src/compiler/glsl/lower_instructions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_instructions.cpp
  26  *
  27  * Many GPUs lack native instructions for certain expression operations, and
  28  * must replace them with some other expression tree.  This pass lowers some
  29  * of the most common cases, allowing the lowering code to be implemented once
  30  * rather than in each driver backend.
  31  *
  32  * Currently supported transformations:
  33  * - SUB_TO_ADD_NEG
  34  * - DIV_TO_MUL_RCP
  35  * - INT_DIV_TO_MUL_RCP
  36  * - EXP_TO_EXP2
  37  * - POW_TO_EXP2
  38  * - LOG_TO_LOG2
  39  * - MOD_TO_FLOOR
  40  * - LDEXP_TO_ARITH
  41  * - DFREXP_TO_ARITH
  42  * - CARRY_TO_ARITH
  43  * - BORROW_TO_ARITH
  44  * - SAT_TO_CLAMP
  45  * - DOPS_TO_DFRAC
  46  *
  47  * SUB_TO_ADD_NEG:
  48  * ---------------
  49  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  50  *
  51  * This simplifies expression reassociation, and for many backends
  52  * there is no subtract operation separate from adding the negation.
  53  * For backends with native subtract operations, they will probably
  54  * want to recognize add(op0, neg(op1)) or the other way around to
  55  * produce a subtract anyway.
  56  *
  57  * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
  58  * --------------------------------------
  59  * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
  60  *
  61  * Many GPUs don't have a divide instruction (945 and 965 included),
  62  * but they do have an RCP instruction to compute an approximate
  63  * reciprocal.  By breaking the operation down, constant reciprocals
  64  * can get constant folded.
  65  *
  66  * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
  67  * handles the integer case, converting to and from floating point so that
  68  * RCP is possible.
  69  *
  70  * EXP_TO_EXP2 and LOG_TO_LOG2:
  71  * ----------------------------
  72  * Many GPUs don't have a base e log or exponent instruction, but they
  73  * do have base 2 versions, so this pass converts exp and log to exp2
  74  * and log2 operations.
  75  *
  76  * POW_TO_EXP2:
  77  * -----------
  78  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  79  * x**y to 2**(y * log2(x)).
  80  *
  81  * MOD_TO_FLOOR:
  82  * -------------
  83  * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
  84  *
  85  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  86  * if we have to break it down like this anyway, it gives an
  87  * opportunity to do things like constant fold the (1.0 / op1) easily.
  88  *
  89  * Note: before we used to implement this as op1 * fract(op / op1) but this
  90  * implementation had significant precision errors.
  91  *
  92  * LDEXP_TO_ARITH:
  93  * -------------
  94  * Converts ir_binop_ldexp to arithmetic and bit operations for float sources.
  95  *
  96  * DFREXP_DLDEXP_TO_ARITH:
  97  * ---------------
  98  * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to
  99  * arithmetic and bit ops for double arguments.
 100  *
 101  * CARRY_TO_ARITH:
 102  * ---------------
 103  * Converts ir_carry into (x + y) < x.
 104  *
 105  * BORROW_TO_ARITH:
 106  * ----------------
 107  * Converts ir_borrow into (x < y).
 108  *
 109  * SAT_TO_CLAMP:
 110  * -------------
 111  * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
 112  *
 113  * DOPS_TO_DFRAC:
 114  * --------------
 115  * Converts double trunc, ceil, floor, round to fract
 116  */
 117
 118 #include "c99_math.h"
 119 #include "program/prog_instruction.h" /* for swizzle */
 120 #include "compiler/glsl_types.h"
 121 #include "ir.h"
 122 #include "ir_builder.h"
 123 #include "ir_optimization.h"
 124
 125 using namespace ir_builder;
 126
 127 namespace {
 128
 129 class lower_instructions_visitor : public ir_hierarchical_visitor {
 130 public:
 131    lower_instructions_visitor(unsigned lower)
 132       : progress(false), lower(lower) { }
 133
 134    ir_visitor_status visit_leave(ir_expression *);
 135
 136    bool progress;
 137
 138 private:
 139    unsigned lower; /** Bitfield of which operations to lower */
 140
 141    void sub_to_add_neg(ir_expression *);
 142    void div_to_mul_rcp(ir_expression *);
 143    void int_div_to_mul_rcp(ir_expression *);
 144    void mod_to_floor(ir_expression *);
 145    void exp_to_exp2(ir_expression *);
 146    void pow_to_exp2(ir_expression *);
 147    void log_to_log2(ir_expression *);
 148    void ldexp_to_arith(ir_expression *);
 149    void dldexp_to_arith(ir_expression *);
 150    void dfrexp_sig_to_arith(ir_expression *);
 151    void dfrexp_exp_to_arith(ir_expression *);
 152    void carry_to_arith(ir_expression *);
 153    void borrow_to_arith(ir_expression *);
 154    void sat_to_clamp(ir_expression *);
 155    void double_dot_to_fma(ir_expression *);
 156    void double_lrp(ir_expression *);
 157    void dceil_to_dfrac(ir_expression *);
 158    void dfloor_to_dfrac(ir_expression *);
 159    void dround_even_to_dfrac(ir_expression *);
 160    void dtrunc_to_dfrac(ir_expression *);
 161    void dsign_to_csel(ir_expression *);
 162    void bit_count_to_math(ir_expression *);
 163    void extract_to_shifts(ir_expression *);
 164    void insert_to_shifts(ir_expression *);
 165    void reverse_to_shifts(ir_expression *ir);
 166    void find_lsb_to_float_cast(ir_expression *ir);
 167    void find_msb_to_float_cast(ir_expression *ir);
 168    void imul_high_to_mul(ir_expression *ir);
 169
 170    ir_expression *_carry(operand a, operand b);
 171 };
 172
 173 } /* anonymous namespace */
 174
 175 /**
 176  * Determine if a particular type of lowering should occur
 177  */
 178 #define lowering(x) (this->lower & x)
 179
 180 bool
 181 lower_instructions(exec_list *instructions, unsigned what_to_lower)
 182 {
 183    lower_instructions_visitor v(what_to_lower);
 184
 185    visit_list_elements(&v, instructions);
 186    return v.progress;
 187 }
 188
 189 void
 190 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 191 {
 192    ir->operation = ir_binop_add;
 193    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
 194                                            ir->operands[1], NULL);
 195    this->progress = true;
 196 }
 197
 198 void
 199 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 200 {
 201    assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double());
 202
 203    /* New expression for the 1.0 / op1 */
 204    ir_rvalue *expr;
 205    expr = new(ir) ir_expression(ir_unop_rcp,
 206                                 ir->operands[1]->type,
 207                                 ir->operands[1]);
 208
 209    /* op0 / op1 -> op0 * (1.0 / op1) */
 210    ir->operation = ir_binop_mul;
 211    ir->operands[1] = expr;
 212
 213    this->progress = true;
 214 }
 215
 216 void
 217 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
 218 {
 219    assert(ir->operands[1]->type->is_integer());
 220
 221    /* Be careful with integer division -- we need to do it as a
 222     * float and re-truncate, since rcp(n > 1) of an integer would
 223     * just be 0.
 224     */
 225    ir_rvalue *op0, *op1;
 226    const struct glsl_type *vec_type;
 227
 228    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 229                                       ir->operands[1]->type->vector_elements,
 230                                       ir->operands[1]->type->matrix_columns);
 231
 232    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
 233       op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
 234    else
 235       op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
 236
 237    op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
 238
 239    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 240                                       ir->operands[0]->type->vector_elements,
 241                                       ir->operands[0]->type->matrix_columns);
 242
 243    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
 244       op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
 245    else
 246       op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 247
 248    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 249                                       ir->type->vector_elements,
 250                                       ir->type->matrix_columns);
 251
 252    op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 253
 254    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
 255       ir->operation = ir_unop_f2i;
 256       ir->operands[0] = op0;
 257    } else {
 258       ir->operation = ir_unop_i2u;
 259       ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
 260    }
 261    ir->operands[1] = NULL;
 262
 263    this->progress = true;
 264 }
 265
 266 void
 267 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
 268 {
 269    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
 270
 271    ir->operation = ir_unop_exp2;
 272    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
 273                                            ir->operands[0], log2_e);
 274    this->progress = true;
 275 }
 276
 277 void
 278 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
 279 {
 280    ir_expression *const log2_x =
 281       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 282                             ir->operands[0]);
 283
 284    ir->operation = ir_unop_exp2;
 285    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
 286                                            ir->operands[1], log2_x);
 287    ir->operands[1] = NULL;
 288    this->progress = true;
 289 }
 290
 291 void
 292 lower_instructions_visitor::log_to_log2(ir_expression *ir)
 293 {
 294    ir->operation = ir_binop_mul;
 295    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 296                                            ir->operands[0], NULL);
 297    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
 298    this->progress = true;
 299 }
 300
 301 void
 302 lower_instructions_visitor::mod_to_floor(ir_expression *ir)
 303 {
 304    ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
 305                                          ir_var_temporary);
 306    ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
 307                                          ir_var_temporary);
 308    this->base_ir->insert_before(x);
 309    this->base_ir->insert_before(y);
 310
 311    ir_assignment *const assign_x =
 312       new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
 313                             ir->operands[0], NULL);
 314    ir_assignment *const assign_y =
 315       new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
 316                             ir->operands[1], NULL);
 317
 318    this->base_ir->insert_before(assign_x);
 319    this->base_ir->insert_before(assign_y);
 320
 321    ir_expression *const div_expr =
 322       new(ir) ir_expression(ir_binop_div, x->type,
 323                             new(ir) ir_dereference_variable(x),
 324                             new(ir) ir_dereference_variable(y));
 325
 326    /* Don't generate new IR that would need to be lowered in an additional
 327     * pass.
 328     */
 329    if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
 330       div_to_mul_rcp(div_expr);
 331
 332    ir_expression *const floor_expr =
 333       new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 334
 335    if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
 336       dfloor_to_dfrac(floor_expr);
 337
 338    ir_expression *const mul_expr =
 339       new(ir) ir_expression(ir_binop_mul,
 340                             new(ir) ir_dereference_variable(y),
 341                             floor_expr);
 342
 343    ir->operation = ir_binop_sub;
 344    ir->operands[0] = new(ir) ir_dereference_variable(x);
 345    ir->operands[1] = mul_expr;
 346    this->progress = true;
 347 }
 348
 349 void
 350 lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
 351 {
 352    /* Translates
 353     *    ir_binop_ldexp x exp
 354     * into
 355     *
 356     *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
 357     *    resulting_biased_exp = extracted_biased_exp + exp;
 358     *
 359     *    if (resulting_biased_exp < 1 || x == 0.0f) {
 360     *       return copysign(0.0, x);
 361     *    }
 362     *
 363     *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
 364     *                       lshift(i2u(resulting_biased_exp), exp_shift));
 365     *
 366     * which we can't actually implement as such, since the GLSL IR doesn't
 367     * have vectorized if-statements. We actually implement it without branches
 368     * using conditional-select:
 369     *
 370     *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
 371     *    resulting_biased_exp = extracted_biased_exp + exp;
 372     *
 373     *    is_not_zero_or_underflow = logic_and(nequal(x, 0.0f),
 374     *                                         gequal(resulting_biased_exp, 1);
 375     *    x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
 376     *    resulting_biased_exp = csel(is_not_zero_or_underflow,
 377     *                                resulting_biased_exp, 0);
 378     *
 379     *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
 380     *                       lshift(i2u(resulting_biased_exp), exp_shift));
 381     */
 382
 383    const unsigned vec_elem = ir->type->vector_elements;
 384
 385    /* Types */
 386    const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
 387    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 388
 389    /* Constants */
 390    ir_constant *zeroi = ir_constant::zero(ir, ivec);
 391
 392    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
 393
 394    ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
 395
 396    /* Temporary variables */
 397    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
 398    ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
 399
 400    ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
 401                                                   ir_var_temporary);
 402
 403    ir_variable *extracted_biased_exp =
 404       new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
 405    ir_variable *resulting_biased_exp =
 406       new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
 407
 408    ir_variable *is_not_zero_or_underflow =
 409       new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
 410
 411    ir_instruction &i = *base_ir;
 412
 413    /* Copy <x> and <exp> arguments. */
 414    i.insert_before(x);
 415    i.insert_before(assign(x, ir->operands[0]));
 416    i.insert_before(exp);
 417    i.insert_before(assign(exp, ir->operands[1]));
 418
 419    /* Extract the biased exponent from <x>. */
 420    i.insert_before(extracted_biased_exp);
 421    i.insert_before(assign(extracted_biased_exp,
 422                           rshift(bitcast_f2i(abs(x)), exp_shift)));
 423
 424    i.insert_before(resulting_biased_exp);
 425    i.insert_before(assign(resulting_biased_exp,
 426                           add(extracted_biased_exp, exp)));
 427
 428    /* Test if result is ±0.0, subnormal, or underflow by checking if the
 429     * resulting biased exponent would be less than 0x1. If so, the result is
 430     * 0.0 with the sign of x. (Actually, invert the conditions so that
 431     * immediate values are the second arguments, which is better for i965)
 432     */
 433    i.insert_before(zero_sign_x);
 434    i.insert_before(assign(zero_sign_x,
 435                           bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask))));
 436
 437    i.insert_before(is_not_zero_or_underflow);
 438    i.insert_before(assign(is_not_zero_or_underflow,
 439                           logic_and(nequal(x, new(ir) ir_constant(0.0f, vec_elem)),
 440                                     gequal(resulting_biased_exp,
 441                                            new(ir) ir_constant(0x1, vec_elem)))));
 442    i.insert_before(assign(x, csel(is_not_zero_or_underflow,
 443                                   x, zero_sign_x)));
 444    i.insert_before(assign(resulting_biased_exp,
 445                           csel(is_not_zero_or_underflow,
 446                                resulting_biased_exp, zeroi)));
 447
 448    /* We could test for overflows by checking if the resulting biased exponent
 449     * would be greater than 0xFE. Turns out we don't need to because the GLSL
 450     * spec says:
 451     *
 452     *    "If this product is too large to be represented in the
 453     *     floating-point type, the result is undefined."
 454     */
 455
 456    ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);
 457
 458    /* Don't generate new IR that would need to be lowered in an additional
 459     * pass.
 460     */
 461    if (!lowering(INSERT_TO_SHIFTS)) {
 462       ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
 463       ir->operation = ir_unop_bitcast_i2f;
 464       ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp,
 465                                         exp_shift_clone, exp_width);
 466       ir->operands[1] = NULL;
 467    } else {
 468       ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem);
 469       ir->operation = ir_unop_bitcast_u2f;
 470       ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask),
 471                                lshift(i2u(resulting_biased_exp), exp_shift_clone));
 472    }
 473
 474    this->progress = true;
 475 }
 476
 477 void
 478 lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
 479 {
 480    /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent
 481     * from the significand.
 482     */
 483
 484    const unsigned vec_elem = ir->type->vector_elements;
 485
 486    /* Types */
 487    const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
 488    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 489
 490    /* Constants */
 491    ir_constant *zeroi = ir_constant::zero(ir, ivec);
 492
 493    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
 494
 495    ir_constant *exp_shift = new(ir) ir_constant(20u);
 496    ir_constant *exp_width = new(ir) ir_constant(11u);
 497    ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
 498
 499    /* Temporary variables */
 500    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
 501    ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
 502
 503    ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
 504                                                   ir_var_temporary);
 505
 506    ir_variable *extracted_biased_exp =
 507       new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
 508    ir_variable *resulting_biased_exp =
 509       new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
 510
 511    ir_variable *is_not_zero_or_underflow =
 512       new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
 513
 514    ir_instruction &i = *base_ir;
 515
 516    /* Copy <x> and <exp> arguments. */
 517    i.insert_before(x);
 518    i.insert_before(assign(x, ir->operands[0]));
 519    i.insert_before(exp);
 520    i.insert_before(assign(exp, ir->operands[1]));
 521
 522    ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x);
 523    if (lowering(DFREXP_DLDEXP_TO_ARITH))
 524       dfrexp_exp_to_arith(frexp_exp);
 525
 526    /* Extract the biased exponent from <x>. */
 527    i.insert_before(extracted_biased_exp);
 528    i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias)));
 529
 530    i.insert_before(resulting_biased_exp);
 531    i.insert_before(assign(resulting_biased_exp,
 532                           add(extracted_biased_exp, exp)));
 533
 534    /* Test if result is ±0.0, subnormal, or underflow by checking if the
 535     * resulting biased exponent would be less than 0x1. If so, the result is
 536     * 0.0 with the sign of x. (Actually, invert the conditions so that
 537     * immediate values are the second arguments, which is better for i965)
 538     * TODO: Implement in a vector fashion.
 539     */
 540    i.insert_before(zero_sign_x);
 541    for (unsigned elem = 0; elem < vec_elem; elem++) {
 542       ir_variable *unpacked =
 543          new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
 544       i.insert_before(unpacked);
 545       i.insert_before(
 546             assign(unpacked,
 547                    expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
 548       i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)),
 549                              WRITEMASK_Y));
 550       i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X));
 551       i.insert_before(assign(zero_sign_x,
 552                              expr(ir_unop_pack_double_2x32, unpacked),
 553                              1 << elem));
 554    }
 555    i.insert_before(is_not_zero_or_underflow);
 556    i.insert_before(assign(is_not_zero_or_underflow,
 557                           gequal(resulting_biased_exp,
 558                                   new(ir) ir_constant(0x1, vec_elem))));
 559    i.insert_before(assign(x, csel(is_not_zero_or_underflow,
 560                                   x, zero_sign_x)));
 561    i.insert_before(assign(resulting_biased_exp,
 562                           csel(is_not_zero_or_underflow,
 563                                resulting_biased_exp, zeroi)));
 564
 565    /* We could test for overflows by checking if the resulting biased exponent
 566     * would be greater than 0xFE. Turns out we don't need to because the GLSL
 567     * spec says:
 568     *
 569     *    "If this product is too large to be represented in the
 570     *     floating-point type, the result is undefined."
 571     */
 572
 573    ir_rvalue *results[4] = {NULL};
 574    for (unsigned elem = 0; elem < vec_elem; elem++) {
 575       ir_variable *unpacked =
 576          new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
 577       i.insert_before(unpacked);
 578       i.insert_before(
 579             assign(unpacked,
 580                    expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
 581
 582       ir_expression *bfi = bitfield_insert(
 583             swizzle_y(unpacked),
 584             i2u(swizzle(resulting_biased_exp, elem, 1)),
 585             exp_shift->clone(ir, NULL),
 586             exp_width->clone(ir, NULL));
 587
 588       i.insert_before(assign(unpacked, bfi, WRITEMASK_Y));
 589
 590       results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
 591    }
 592
 593    ir->operation = ir_quadop_vector;
 594    ir->operands[0] = results[0];
 595    ir->operands[1] = results[1];
 596    ir->operands[2] = results[2];
 597    ir->operands[3] = results[3];
 598
 599    /* Don't generate new IR that would need to be lowered in an additional
 600     * pass.
 601     */
 602
 603    this->progress = true;
 604 }
 605
 606 void
 607 lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir)
 608 {
 609    const unsigned vec_elem = ir->type->vector_elements;
 610    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 611
 612    /* Double-precision floating-point values are stored as
 613     *   1 sign bit;
 614     *   11 exponent bits;
 615     *   52 mantissa bits.
 616     *
 617     * We're just extracting the significand here, so we only need to modify
 618     * the upper 32-bit uint. Unfortunately we must extract each double
 619     * independently as there is no vector version of unpackDouble.
 620     */
 621
 622    ir_instruction &i = *base_ir;
 623
 624    ir_variable *is_not_zero =
 625       new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
 626    ir_rvalue *results[4] = {NULL};
 627
 628    ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
 629    i.insert_before(is_not_zero);
 630    i.insert_before(
 631          assign(is_not_zero,
 632                 nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero)));
 633
 634    /* TODO: Remake this as more vector-friendly when int64 support is
 635     * available.
 636     */
 637    for (unsigned elem = 0; elem < vec_elem; elem++) {
 638       ir_constant *zero = new(ir) ir_constant(0u, 1);
 639       ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1);
 640
 641       /* Exponent of double floating-point values in the range [0.5, 1.0). */
 642       ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1);
 643
 644       ir_variable *bits =
 645          new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary);
 646       ir_variable *unpacked =
 647          new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
 648
 649       ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1);
 650
 651       i.insert_before(bits);
 652       i.insert_before(unpacked);
 653       i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x)));
 654
 655       /* Manipulate the high uint to remove the exponent and replace it with
 656        * either the default exponent or zero.
 657        */
 658       i.insert_before(assign(bits, swizzle_y(unpacked)));
 659       i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask)));
 660       i.insert_before(assign(bits, bit_or(bits,
 661                                           csel(swizzle(is_not_zero, elem, 1),
 662                                                exponent_value,
 663                                                zero))));
 664       i.insert_before(assign(unpacked, bits, WRITEMASK_Y));
 665       results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
 666    }
 667
 668    /* Put the dvec back together */
 669    ir->operation = ir_quadop_vector;
 670    ir->operands[0] = results[0];
 671    ir->operands[1] = results[1];
 672    ir->operands[2] = results[2];
 673    ir->operands[3] = results[3];
 674
 675    this->progress = true;
 676 }
 677
 678 void
 679 lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir)
 680 {
 681    const unsigned vec_elem = ir->type->vector_elements;
 682    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 683    const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1);
 684
 685    /* Double-precision floating-point values are stored as
 686     *   1 sign bit;
 687     *   11 exponent bits;
 688     *   52 mantissa bits.
 689     *
 690     * We're just extracting the exponent here, so we only care about the upper
 691     * 32-bit uint.
 692     */
 693
 694    ir_instruction &i = *base_ir;
 695
 696    ir_variable *is_not_zero =
 697       new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
 698    ir_variable *high_words =
 699       new(ir) ir_variable(uvec, "high_words", ir_var_temporary);
 700    ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
 701    ir_constant *izero = new(ir) ir_constant(0, vec_elem);
 702
 703    ir_rvalue *absval = abs(ir->operands[0]);
 704
 705    i.insert_before(is_not_zero);
 706    i.insert_before(high_words);
 707    i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero)));
 708
 709    /* Extract all of the upper uints. */
 710    for (unsigned elem = 0; elem < vec_elem; elem++) {
 711       ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1);
 712
 713       i.insert_before(assign(high_words,
 714                              swizzle_y(expr(ir_unop_unpack_double_2x32, x)),
 715                              1 << elem));
 716
 717    }
 718    ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem);
 719    ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem);
 720
 721    /* For non-zero inputs, shift the exponent down and apply bias. */
 722    ir->operation = ir_triop_csel;
 723    ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero);
 724    ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift)));
 725    ir->operands[2] = izero;
 726
 727    this->progress = true;
 728 }
 729
 730 void
 731 lower_instructions_visitor::carry_to_arith(ir_expression *ir)
 732 {
 733    /* Translates
 734     *   ir_binop_carry x y
 735     * into
 736     *   sum = ir_binop_add x y
 737     *   bcarry = ir_binop_less sum x
 738     *   carry = ir_unop_b2i bcarry
 739     */
 740
 741    ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL);
 742    ir->operation = ir_unop_i2u;
 743    ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone));
 744    ir->operands[1] = NULL;
 745
 746    this->progress = true;
 747 }
 748
 749 void
 750 lower_instructions_visitor::borrow_to_arith(ir_expression *ir)
 751 {
 752    /* Translates
 753     *   ir_binop_borrow x y
 754     * into
 755     *   bcarry = ir_binop_less x y
 756     *   carry = ir_unop_b2i bcarry
 757     */
 758
 759    ir->operation = ir_unop_i2u;
 760    ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1]));
 761    ir->operands[1] = NULL;
 762
 763    this->progress = true;
 764 }
 765
 766 void
 767 lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
 768 {
 769    /* Translates
 770     *   ir_unop_saturate x
 771     * into
 772     *   ir_binop_min (ir_binop_max(x, 0.0), 1.0)
 773     */
 774
 775    ir->operation = ir_binop_min;
 776    ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
 777                                            ir->operands[0],
 778                                            new(ir) ir_constant(0.0f));
 779    ir->operands[1] = new(ir) ir_constant(1.0f);
 780
 781    this->progress = true;
 782 }
 783
 784 void
 785 lower_instructions_visitor::double_dot_to_fma(ir_expression *ir)
 786 {
 787    ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res",
 788                                            ir_var_temporary);
 789    this->base_ir->insert_before(temp);
 790
 791    int nc = ir->operands[0]->type->components();
 792    for (int i = nc - 1; i >= 1; i--) {
 793       ir_assignment *assig;
 794       if (i == (nc - 1)) {
 795          assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
 796                                   swizzle(ir->operands[1]->clone(ir, NULL), i, 1)));
 797       } else {
 798          assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
 799                                   swizzle(ir->operands[1]->clone(ir, NULL), i, 1),
 800                                   temp));
 801       }
 802       this->base_ir->insert_before(assig);
 803    }
 804
 805    ir->operation = ir_triop_fma;
 806    ir->operands[0] = swizzle(ir->operands[0], 0, 1);
 807    ir->operands[1] = swizzle(ir->operands[1], 0, 1);
 808    ir->operands[2] = new(ir) ir_dereference_variable(temp);
 809
 810    this->progress = true;
 811
 812 }
 813
 814 void
 815 lower_instructions_visitor::double_lrp(ir_expression *ir)
 816 {
 817    int swizval;
 818    ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2];
 819    ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements);
 820
 821    switch (op2->type->vector_elements) {
 822    case 1:
 823       swizval = SWIZZLE_XXXX;
 824       break;
 825    default:
 826       assert(op0->type->vector_elements == op2->type->vector_elements);
 827       swizval = SWIZZLE_XYZW;
 828       break;
 829    }
 830
 831    ir->operation = ir_triop_fma;
 832    ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements);
 833    ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0);
 834
 835    this->progress = true;
 836 }
 837
 838 void
 839 lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
 840 {
 841    /*
 842     * frtemp = frac(x);
 843     * temp = sub(x, frtemp);
 844     * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
 845     */
 846    ir_instruction &i = *base_ir;
 847    ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
 848    ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
 849    ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
 850                                              ir_var_temporary);
 851
 852    i.insert_before(frtemp);
 853    i.insert_before(assign(frtemp, fract(ir->operands[0])));
 854
 855    ir->operation = ir_binop_add;
 856    ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
 857    ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
 858
 859    this->progress = true;
 860 }
 861
 862 void
 863 lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
 864 {
 865    /*
 866     * frtemp = frac(x);
 867     * result = sub(x, frtemp);
 868     */
 869    ir->operation = ir_binop_sub;
 870    ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
 871
 872    this->progress = true;
 873 }
 874 void
 875 lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
 876 {
 877    /*
 878     * insane but works
 879     * temp = x + 0.5;
 880     * frtemp = frac(temp);
 881     * t2 = sub(temp, frtemp);
 882     * if (frac(x) == 0.5)
 883     *     result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
 884     *  else
 885     *     result = t2;
 886
 887     */
 888    ir_instruction &i = *base_ir;
 889    ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
 890                                              ir_var_temporary);
 891    ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
 892                                            ir_var_temporary);
 893    ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
 894                                            ir_var_temporary);
 895    ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
 896    ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
 897    ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
 898
 899    i.insert_before(temp);
 900    i.insert_before(assign(temp, add(ir->operands[0], p5)));
 901
 902    i.insert_before(frtemp);
 903    i.insert_before(assign(frtemp, fract(temp)));
 904
 905    i.insert_before(t2);
 906    i.insert_before(assign(t2, sub(temp, frtemp)));
 907
 908    ir->operation = ir_triop_csel;
 909    ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)),
 910                            p5->clone(ir, NULL));
 911    ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))),
 912                                 zero),
 913                           t2,
 914                           sub(t2, one));
 915    ir->operands[2] = new(ir) ir_dereference_variable(t2);
 916
 917    this->progress = true;
 918 }
 919
 920 void
 921 lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
 922 {
 923    /*
 924     * frtemp = frac(x);
 925     * temp = sub(x, frtemp);
 926     * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1;
 927     */
 928    ir_rvalue *arg = ir->operands[0];
 929    ir_instruction &i = *base_ir;
 930
 931    ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
 932    ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
 933    ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp",
 934                                              ir_var_temporary);
 935    ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
 936                                            ir_var_temporary);
 937
 938    i.insert_before(frtemp);
 939    i.insert_before(assign(frtemp, fract(arg)));
 940    i.insert_before(temp);
 941    i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp)));
 942
 943    ir->operation = ir_triop_csel;
 944    ir->operands[0] = gequal(arg->clone(ir, NULL), zero);
 945    ir->operands[1] = new (ir) ir_dereference_variable(temp);
 946    ir->operands[2] = add(temp,
 947                          csel(equal(frtemp, zero->clone(ir, NULL)),
 948                               zero->clone(ir, NULL),
 949                               one));
 950
 951    this->progress = true;
 952 }
 953
 954 void
 955 lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
 956 {
 957    /*
 958     * temp = x > 0.0 ? 1.0 : 0.0;
 959     * result = x < 0.0 ? -1.0 : temp;
 960     */
 961    ir_rvalue *arg = ir->operands[0];
 962    ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
 963    ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
 964    ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements);
 965
 966    ir->operation = ir_triop_csel;
 967    ir->operands[0] = less(arg->clone(ir, NULL),
 968                           zero->clone(ir, NULL));
 969    ir->operands[1] = neg_one;
 970    ir->operands[2] = csel(greater(arg, zero),
 971                           one,
 972                           zero->clone(ir, NULL));
 973
 974    this->progress = true;
 975 }
 976
 977 void
 978 lower_instructions_visitor::bit_count_to_math(ir_expression *ir)
 979 {
 980    /* For more details, see:
 981     *
 982     * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel
 983     */
 984    const unsigned elements = ir->operands[0]->type->vector_elements;
 985    ir_variable *temp = new(ir) ir_variable(glsl_type::uvec(elements), "temp",
 986                                            ir_var_temporary);
 987    ir_constant *c55555555 = new(ir) ir_constant(0x55555555u);
 988    ir_constant *c33333333 = new(ir) ir_constant(0x33333333u);
 989    ir_constant *c0F0F0F0F = new(ir) ir_constant(0x0F0F0F0Fu);
 990    ir_constant *c01010101 = new(ir) ir_constant(0x01010101u);
 991    ir_constant *c1 = new(ir) ir_constant(1u);
 992    ir_constant *c2 = new(ir) ir_constant(2u);
 993    ir_constant *c4 = new(ir) ir_constant(4u);
 994    ir_constant *c24 = new(ir) ir_constant(24u);
 995
 996    base_ir->insert_before(temp);
 997
 998    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
 999       base_ir->insert_before(assign(temp, ir->operands[0]));
1000    } else {
1001       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1002       base_ir->insert_before(assign(temp, i2u(ir->operands[0])));
1003    }
1004
1005    /* temp = temp - ((temp >> 1) & 0x55555555u); */
1006    base_ir->insert_before(assign(temp, sub(temp, bit_and(rshift(temp, c1),
1007                                                          c55555555))));
1008
1009    /* temp = (temp & 0x33333333u) + ((temp >> 2) & 0x33333333u); */
1010    base_ir->insert_before(assign(temp, add(bit_and(temp, c33333333),
1011                                            bit_and(rshift(temp, c2),
1012                                                    c33333333->clone(ir, NULL)))));
1013
1014    /* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */
1015    ir->operation = ir_unop_u2i;
1016    ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F),
1017                                 c01010101),
1018                             c24);
1019
1020    this->progress = true;
1021 }
1022
1023 void
1024 lower_instructions_visitor::extract_to_shifts(ir_expression *ir)
1025 {
1026    ir_variable *bits =
1027       new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
1028
1029    base_ir->insert_before(bits);
1030    base_ir->insert_before(assign(bits, ir->operands[2]));
1031
1032    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1033       ir_constant *c1 =
1034          new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
1035       ir_constant *c32 =
1036          new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
1037       ir_constant *cFFFFFFFF =
1038          new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
1039
1040       /* At least some hardware treats (x << y) as (x << (y%32)).  This means
1041        * we'd get a mask of 0 when bits is 32.  Special case it.
1042        *
1043        * mask = bits == 32 ? 0xffffffff : (1u << bits) - 1u;
1044        */
1045       ir_expression *mask = csel(equal(bits, c32),
1046                                  cFFFFFFFF,
1047                                  sub(lshift(c1, bits), c1->clone(ir, NULL)));
1048
1049       /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1050        *
1051        *    If bits is zero, the result will be zero.
1052        *
1053        * Since (1 << 0) - 1 == 0, we don't need to bother with the conditional
1054        * select as in the signed integer case.
1055        *
1056        * (value >> offset) & mask;
1057        */
1058       ir->operation = ir_binop_bit_and;
1059       ir->operands[0] = rshift(ir->operands[0], ir->operands[1]);
1060       ir->operands[1] = mask;
1061       ir->operands[2] = NULL;
1062    } else {
1063       ir_constant *c0 =
1064          new(ir) ir_constant(int(0), ir->operands[0]->type->vector_elements);
1065       ir_constant *c32 =
1066          new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
1067       ir_variable *temp =
1068          new(ir) ir_variable(ir->operands[0]->type, "temp", ir_var_temporary);
1069
1070       /* temp = 32 - bits; */
1071       base_ir->insert_before(temp);
1072       base_ir->insert_before(assign(temp, sub(c32, bits)));
1073
1074       /* expr = value << (temp - offset)) >> temp; */
1075       ir_expression *expr =
1076          rshift(lshift(ir->operands[0], sub(temp, ir->operands[1])), temp);
1077
1078       /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1079        *
1080        *    If bits is zero, the result will be zero.
1081        *
1082        * Due to the (x << (y%32)) behavior mentioned before, the (value <<
1083        * (32-0)) doesn't "erase" all of the data as we would like, so finish
1084        * up with:
1085        *
1086        * (bits == 0) ? 0 : e;
1087        */
1088       ir->operation = ir_triop_csel;
1089       ir->operands[0] = equal(c0, bits);
1090       ir->operands[1] = c0->clone(ir, NULL);
1091       ir->operands[2] = expr;
1092    }
1093
1094    this->progress = true;
1095 }
1096
1097 void
1098 lower_instructions_visitor::insert_to_shifts(ir_expression *ir)
1099 {
1100    ir_constant *c1;
1101    ir_constant *c32;
1102    ir_constant *cFFFFFFFF;
1103    ir_variable *offset =
1104       new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary);
1105    ir_variable *bits =
1106       new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
1107    ir_variable *mask =
1108       new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary);
1109
1110    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
1111       c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements);
1112       c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
1113       cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements);
1114    } else {
1115       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
1116
1117       c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
1118       c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
1119       cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
1120    }
1121
1122    base_ir->insert_before(offset);
1123    base_ir->insert_before(assign(offset, ir->operands[2]));
1124
1125    base_ir->insert_before(bits);
1126    base_ir->insert_before(assign(bits, ir->operands[3]));
1127
1128    /* At least some hardware treats (x << y) as (x << (y%32)).  This means
1129     * we'd get a mask of 0 when bits is 32.  Special case it.
1130     *
1131     * mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset;
1132     *
1133     * Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1134     *
1135     *    The result will be undefined if offset or bits is negative, or if the
1136     *    sum of offset and bits is greater than the number of bits used to
1137     *    store the operand.
1138     *
1139     * Since it's undefined, there are a couple other ways this could be
1140     * implemented.  The other way that was considered was to put the csel
1141     * around the whole thing:
1142     *
1143     *    final_result = bits == 32 ? insert : ... ;
1144     */
1145    base_ir->insert_before(mask);
1146
1147    base_ir->insert_before(assign(mask, csel(equal(bits, c32),
1148                                             cFFFFFFFF,
1149                                             lshift(sub(lshift(c1, bits),
1150                                                        c1->clone(ir, NULL)),
1151                                                    offset))));
1152
1153    /* (base & ~mask) | ((insert << offset) & mask) */
1154    ir->operation = ir_binop_bit_or;
1155    ir->operands[0] = bit_and(ir->operands[0], bit_not(mask));
1156    ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask);
1157    ir->operands[2] = NULL;
1158    ir->operands[3] = NULL;
1159
1160    this->progress = true;
1161 }
1162
1163 void
1164 lower_instructions_visitor::reverse_to_shifts(ir_expression *ir)
1165 {
1166    /* For more details, see:
1167     *
1168     * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
1169     */
1170    ir_constant *c1 =
1171       new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
1172    ir_constant *c2 =
1173       new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements);
1174    ir_constant *c4 =
1175       new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements);
1176    ir_constant *c8 =
1177       new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements);
1178    ir_constant *c16 =
1179       new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements);
1180    ir_constant *c33333333 =
1181       new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements);
1182    ir_constant *c55555555 =
1183       new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements);
1184    ir_constant *c0F0F0F0F =
1185       new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements);
1186    ir_constant *c00FF00FF =
1187       new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements);
1188    ir_variable *temp =
1189       new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements),
1190                           "temp", ir_var_temporary);
1191    ir_instruction &i = *base_ir;
1192
1193    i.insert_before(temp);
1194
1195    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1196       i.insert_before(assign(temp, ir->operands[0]));
1197    } else {
1198       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1199       i.insert_before(assign(temp, i2u(ir->operands[0])));
1200    }
1201
1202    /* Swap odd and even bits.
1203     *
1204     * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1);
1205     */
1206    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555),
1207                                        lshift(bit_and(temp, c55555555->clone(ir, NULL)),
1208                                               c1->clone(ir, NULL)))));
1209    /* Swap consecutive pairs.
1210     *
1211     * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2);
1212     */
1213    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333),
1214                                        lshift(bit_and(temp, c33333333->clone(ir, NULL)),
1215                                               c2->clone(ir, NULL)))));
1216
1217    /* Swap nibbles.
1218     *
1219     * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4);
1220     */
1221    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F),
1222                                        lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)),
1223                                               c4->clone(ir, NULL)))));
1224
1225    /* The last step is, basically, bswap.  Swap the bytes, then swap the
1226     * words.  When this code is run through GCC on x86, it does generate a
1227     * bswap instruction.
1228     *
1229     * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8);
1230     * temp = ( temp >> 16              ) | ( temp                << 16);
1231     */
1232    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF),
1233                                        lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)),
1234                                               c8->clone(ir, NULL)))));
1235
1236    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1237       ir->operation = ir_binop_bit_or;
1238       ir->operands[0] = rshift(temp, c16);
1239       ir->operands[1] = lshift(temp, c16->clone(ir, NULL));
1240    } else {
1241       ir->operation = ir_unop_u2i;
1242       ir->operands[0] = bit_or(rshift(temp, c16),
1243                                lshift(temp, c16->clone(ir, NULL)));
1244    }
1245
1246    this->progress = true;
1247 }
1248
1249 void
1250 lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir)
1251 {
1252    /* For more details, see:
1253     *
1254     * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
1255     */
1256    const unsigned elements = ir->operands[0]->type->vector_elements;
1257    ir_constant *c0 = new(ir) ir_constant(unsigned(0), elements);
1258    ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
1259    ir_constant *c23 = new(ir) ir_constant(int(23), elements);
1260    ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
1261    ir_variable *temp =
1262       new(ir) ir_variable(glsl_type::ivec(elements), "temp", ir_var_temporary);
1263    ir_variable *lsb_only =
1264       new(ir) ir_variable(glsl_type::uvec(elements), "lsb_only", ir_var_temporary);
1265    ir_variable *as_float =
1266       new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
1267    ir_variable *lsb =
1268       new(ir) ir_variable(glsl_type::ivec(elements), "lsb", ir_var_temporary);
1269
1270    ir_instruction &i = *base_ir;
1271
1272    i.insert_before(temp);
1273
1274    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
1275       i.insert_before(assign(temp, ir->operands[0]));
1276    } else {
1277       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
1278       i.insert_before(assign(temp, u2i(ir->operands[0])));
1279    }
1280
1281    /* The int-to-float conversion is lossless because (value & -value) is
1282     * either a power of two or zero.  We don't use the result in the zero
1283     * case.  The uint() cast is necessary so that 0x80000000 does not
1284     * generate a negative value.
1285     *
1286     * uint lsb_only = uint(value & -value);
1287     * float as_float = float(lsb_only);
1288     */
1289    i.insert_before(lsb_only);
1290    i.insert_before(assign(lsb_only, i2u(bit_and(temp, neg(temp)))));
1291
1292    i.insert_before(as_float);
1293    i.insert_before(assign(as_float, u2f(lsb_only)));
1294
1295    /* This is basically an open-coded frexp.  Implementations that have a
1296     * native frexp instruction would be better served by that.  This is
1297     * optimized versus a full-featured open-coded implementation in two ways:
1298     *
1299     * - We don't care about a correct result from subnormal numbers (including
1300     *   0.0), so the raw exponent can always be safely unbiased.
1301     *
1302     * - The value cannot be negative, so it does not need to be masked off to
1303     *   extract the exponent.
1304     *
1305     * int lsb = (floatBitsToInt(as_float) >> 23) - 0x7f;
1306     */
1307    i.insert_before(lsb);
1308    i.insert_before(assign(lsb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
1309
1310    /* Use lsb_only in the comparison instead of temp so that the & (far above)
1311     * can possibly generate the result without an explicit comparison.
1312     *
1313     * (lsb_only == 0) ? -1 : lsb;
1314     *
1315     * Since our input values are all integers, the unbiased exponent must not
1316     * be negative.  It will only be negative (-0x7f, in fact) if lsb_only is
1317     * 0.  Instead of using (lsb_only == 0), we could use (lsb >= 0).  Which is
1318     * better is likely GPU dependent.  Either way, the difference should be
1319     * small.
1320     */
1321    ir->operation = ir_triop_csel;
1322    ir->operands[0] = equal(lsb_only, c0);
1323    ir->operands[1] = cminus1;
1324    ir->operands[2] = new(ir) ir_dereference_variable(lsb);
1325
1326    this->progress = true;
1327 }
1328
1329 void
1330 lower_instructions_visitor::find_msb_to_float_cast(ir_expression *ir)
1331 {
1332    /* For more details, see:
1333     *
1334     * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
1335     */
1336    const unsigned elements = ir->operands[0]->type->vector_elements;
1337    ir_constant *c0 = new(ir) ir_constant(int(0), elements);
1338    ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
1339    ir_constant *c23 = new(ir) ir_constant(int(23), elements);
1340    ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
1341    ir_constant *c000000FF = new(ir) ir_constant(0x000000FFu, elements);
1342    ir_constant *cFFFFFF00 = new(ir) ir_constant(0xFFFFFF00u, elements);
1343    ir_variable *temp =
1344       new(ir) ir_variable(glsl_type::uvec(elements), "temp", ir_var_temporary);
1345    ir_variable *as_float =
1346       new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
1347    ir_variable *msb =
1348       new(ir) ir_variable(glsl_type::ivec(elements), "msb", ir_var_temporary);
1349
1350    ir_instruction &i = *base_ir;
1351
1352    i.insert_before(temp);
1353
1354    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1355       i.insert_before(assign(temp, ir->operands[0]));
1356    } else {
1357       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1358
1359       /* findMSB(uint(abs(some_int))) almost always does the right thing.
1360        * There are two problem values:
1361        *
1362        * * 0x80000000.  Since abs(0x80000000) == 0x80000000, findMSB returns
1363        *   31.  However, findMSB(int(0x80000000)) == 30.
1364        *
1365        * * 0xffffffff.  Since abs(0xffffffff) == 1, findMSB returns
1366        *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1367        *
1368        *    For a value of zero or negative one, -1 will be returned.
1369        *
1370        * For all negative number cases, including 0x80000000 and 0xffffffff,
1371        * the correct value is obtained from findMSB if instead of negating the
1372        * (already negative) value the logical-not is used.  A conditonal
1373        * logical-not can be achieved in two instructions.
1374        */
1375       ir_variable *as_int =
1376          new(ir) ir_variable(glsl_type::ivec(elements), "as_int", ir_var_temporary);
1377       ir_constant *c31 = new(ir) ir_constant(int(31), elements);
1378
1379       i.insert_before(as_int);
1380       i.insert_before(assign(as_int, ir->operands[0]));
1381       i.insert_before(assign(temp, i2u(expr(ir_binop_bit_xor,
1382                                             as_int,
1383                                             rshift(as_int, c31)))));
1384    }
1385
1386    /* The int-to-float conversion is lossless because bits are conditionally
1387     * masked off the bottom of temp to ensure the value has at most 24 bits of
1388     * data or is zero.  We don't use the result in the zero case.  The uint()
1389     * cast is necessary so that 0x80000000 does not generate a negative value.
1390     *
1391     * float as_float = float(temp > 255 ? temp & ~255 : temp);
1392     */
1393    i.insert_before(as_float);
1394    i.insert_before(assign(as_float, u2f(csel(greater(temp, c000000FF),
1395                                              bit_and(temp, cFFFFFF00),
1396                                              temp))));
1397
1398    /* This is basically an open-coded frexp.  Implementations that have a
1399     * native frexp instruction would be better served by that.  This is
1400     * optimized versus a full-featured open-coded implementation in two ways:
1401     *
1402     * - We don't care about a correct result from subnormal numbers (including
1403     *   0.0), so the raw exponent can always be safely unbiased.
1404     *
1405     * - The value cannot be negative, so it does not need to be masked off to
1406     *   extract the exponent.
1407     *
1408     * int msb = (floatBitsToInt(as_float) >> 23) - 0x7f;
1409     */
1410    i.insert_before(msb);
1411    i.insert_before(assign(msb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
1412
1413    /* Use msb in the comparison instead of temp so that the subtract can
1414     * possibly generate the result without an explicit comparison.
1415     *
1416     * (msb < 0) ? -1 : msb;
1417     *
1418     * Since our input values are all integers, the unbiased exponent must not
1419     * be negative.  It will only be negative (-0x7f, in fact) if temp is 0.
1420     */
1421    ir->operation = ir_triop_csel;
1422    ir->operands[0] = less(msb, c0);
1423    ir->operands[1] = cminus1;
1424    ir->operands[2] = new(ir) ir_dereference_variable(msb);
1425
1426    this->progress = true;
1427 }
1428
1429 ir_expression *
1430 lower_instructions_visitor::_carry(operand a, operand b)
1431 {
1432    if (lowering(CARRY_TO_ARITH))
1433       return i2u(b2i(less(add(a, b),
1434                           a.val->clone(ralloc_parent(a.val), NULL))));
1435    else
1436       return carry(a, b);
1437 }
1438
1439 void
1440 lower_instructions_visitor::imul_high_to_mul(ir_expression *ir)
1441 {
1442    /*   ABCD
1443     * * EFGH
1444     * ======
1445     * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32
1446     *
1447     * In GLSL, (a * b) becomes
1448     *
1449     * uint m1 = (a & 0x0000ffffu) * (b & 0x0000ffffu);
1450     * uint m2 = (a & 0x0000ffffu) * (b >> 16);
1451     * uint m3 = (a >> 16)         * (b & 0x0000ffffu);
1452     * uint m4 = (a >> 16)         * (b >> 16);
1453     *
1454     * uint c1;
1455     * uint c2;
1456     * uint lo_result;
1457     * uint hi_result;
1458     *
1459     * lo_result = uaddCarry(m1, m2 << 16, c1);
1460     * hi_result = m4 + c1;
1461     * lo_result = uaddCarry(lo_result, m3 << 16, c2);
1462     * hi_result = hi_result + c2;
1463     * hi_result = hi_result + (m2 >> 16) + (m3 >> 16);
1464     */
1465    const unsigned elements = ir->operands[0]->type->vector_elements;
1466    ir_variable *src1 =
1467       new(ir) ir_variable(glsl_type::uvec(elements), "src1", ir_var_temporary);
1468    ir_variable *src1h =
1469       new(ir) ir_variable(glsl_type::uvec(elements), "src1h", ir_var_temporary);
1470    ir_variable *src1l =
1471       new(ir) ir_variable(glsl_type::uvec(elements), "src1l", ir_var_temporary);
1472    ir_variable *src2 =
1473       new(ir) ir_variable(glsl_type::uvec(elements), "src2", ir_var_temporary);
1474    ir_variable *src2h =
1475       new(ir) ir_variable(glsl_type::uvec(elements), "src2h", ir_var_temporary);
1476    ir_variable *src2l =
1477       new(ir) ir_variable(glsl_type::uvec(elements), "src2l", ir_var_temporary);
1478    ir_variable *t1 =
1479       new(ir) ir_variable(glsl_type::uvec(elements), "t1", ir_var_temporary);
1480    ir_variable *t2 =
1481       new(ir) ir_variable(glsl_type::uvec(elements), "t2", ir_var_temporary);
1482    ir_variable *lo =
1483       new(ir) ir_variable(glsl_type::uvec(elements), "lo", ir_var_temporary);
1484    ir_variable *hi =
1485       new(ir) ir_variable(glsl_type::uvec(elements), "hi", ir_var_temporary);
1486    ir_variable *different_signs = NULL;
1487    ir_constant *c0000FFFF = new(ir) ir_constant(0x0000FFFFu, elements);
1488    ir_constant *c16 = new(ir) ir_constant(16u, elements);
1489
1490    ir_instruction &i = *base_ir;
1491
1492    i.insert_before(src1);
1493    i.insert_before(src2);
1494    i.insert_before(src1h);
1495    i.insert_before(src2h);
1496    i.insert_before(src1l);
1497    i.insert_before(src2l);
1498
1499    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1500       i.insert_before(assign(src1, ir->operands[0]));
1501       i.insert_before(assign(src2, ir->operands[1]));
1502    } else {
1503       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1504
1505       ir_variable *itmp1 =
1506          new(ir) ir_variable(glsl_type::ivec(elements), "itmp1", ir_var_temporary);
1507       ir_variable *itmp2 =
1508          new(ir) ir_variable(glsl_type::ivec(elements), "itmp2", ir_var_temporary);
1509       ir_constant *c0 = new(ir) ir_constant(int(0), elements);
1510
1511       i.insert_before(itmp1);
1512       i.insert_before(itmp2);
1513       i.insert_before(assign(itmp1, ir->operands[0]));
1514       i.insert_before(assign(itmp2, ir->operands[1]));
1515
1516       different_signs =
1517          new(ir) ir_variable(glsl_type::bvec(elements), "different_signs",
1518                              ir_var_temporary);
1519
1520       i.insert_before(different_signs);
1521       i.insert_before(assign(different_signs, expr(ir_binop_logic_xor,
1522                                                    less(itmp1, c0),
1523                                                    less(itmp2, c0->clone(ir, NULL)))));
1524
1525       i.insert_before(assign(src1, i2u(abs(itmp1))));
1526       i.insert_before(assign(src2, i2u(abs(itmp2))));
1527    }
1528
1529    i.insert_before(assign(src1l, bit_and(src1, c0000FFFF)));
1530    i.insert_before(assign(src2l, bit_and(src2, c0000FFFF->clone(ir, NULL))));
1531    i.insert_before(assign(src1h, rshift(src1, c16)));
1532    i.insert_before(assign(src2h, rshift(src2, c16->clone(ir, NULL))));
1533
1534    i.insert_before(lo);
1535    i.insert_before(hi);
1536    i.insert_before(t1);
1537    i.insert_before(t2);
1538
1539    i.insert_before(assign(lo, mul(src1l, src2l)));
1540    i.insert_before(assign(t1, mul(src1l, src2h)));
1541    i.insert_before(assign(t2, mul(src1h, src2l)));
1542    i.insert_before(assign(hi, mul(src1h, src2h)));
1543
1544    i.insert_before(assign(hi, add(hi, _carry(lo, lshift(t1, c16->clone(ir, NULL))))));
1545    i.insert_before(assign(lo,            add(lo, lshift(t1, c16->clone(ir, NULL)))));
1546
1547    i.insert_before(assign(hi, add(hi, _carry(lo, lshift(t2, c16->clone(ir, NULL))))));
1548    i.insert_before(assign(lo,            add(lo, lshift(t2, c16->clone(ir, NULL)))));
1549
1550    if (different_signs == NULL) {
1551       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
1552
1553       ir->operation = ir_binop_add;
1554       ir->operands[0] = add(hi, rshift(t1, c16->clone(ir, NULL)));
1555       ir->operands[1] = rshift(t2, c16->clone(ir, NULL));
1556    } else {
1557       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1558
1559       i.insert_before(assign(hi, add(add(hi, rshift(t1, c16->clone(ir, NULL))),
1560                                      rshift(t2, c16->clone(ir, NULL)))));
1561
1562       /* For channels where different_signs is set we have to perform a 64-bit
1563        * negation.  This is *not* the same as just negating the high 32-bits.
1564        * Consider -3 * 2.  The high 32-bits is 0, but the desired result is
1565        * -1, not -0!  Recall -x == ~x + 1.
1566        */
1567       ir_variable *neg_hi =
1568          new(ir) ir_variable(glsl_type::ivec(elements), "neg_hi", ir_var_temporary);
1569       ir_constant *c1 = new(ir) ir_constant(1u, elements);
1570
1571       i.insert_before(neg_hi);
1572       i.insert_before(assign(neg_hi, add(bit_not(u2i(hi)),
1573                                          u2i(_carry(bit_not(lo), c1)))));
1574
1575       ir->operation = ir_triop_csel;
1576       ir->operands[0] = new(ir) ir_dereference_variable(different_signs);
1577       ir->operands[1] = new(ir) ir_dereference_variable(neg_hi);
1578       ir->operands[2] = u2i(hi);
1579    }
1580 }
1581
1582 ir_visitor_status
1583 lower_instructions_visitor::visit_leave(ir_expression *ir)
1584 {
1585    switch (ir->operation) {
1586    case ir_binop_dot:
1587       if (ir->operands[0]->type->is_double())
1588          double_dot_to_fma(ir);
1589       break;
1590    case ir_triop_lrp:
1591       if (ir->operands[0]->type->is_double())
1592          double_lrp(ir);
1593       break;
1594    case ir_binop_sub:
1595       if (lowering(SUB_TO_ADD_NEG))
1596          sub_to_add_neg(ir);
1597       break;
1598
1599    case ir_binop_div:
1600       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
1601          int_div_to_mul_rcp(ir);
1602       else if ((ir->operands[1]->type->is_float() ||
1603                 ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
1604          div_to_mul_rcp(ir);
1605       break;
1606
1607    case ir_unop_exp:
1608       if (lowering(EXP_TO_EXP2))
1609          exp_to_exp2(ir);
1610       break;
1611
1612    case ir_unop_log:
1613       if (lowering(LOG_TO_LOG2))
1614          log_to_log2(ir);
1615       break;
1616
1617    case ir_binop_mod:
1618       if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double()))
1619          mod_to_floor(ir);
1620       break;
1621
1622    case ir_binop_pow:
1623       if (lowering(POW_TO_EXP2))
1624          pow_to_exp2(ir);
1625       break;
1626
1627    case ir_binop_ldexp:
1628       if (lowering(LDEXP_TO_ARITH) && ir->type->is_float())
1629          ldexp_to_arith(ir);
1630       if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double())
1631          dldexp_to_arith(ir);
1632       break;
1633
1634    case ir_unop_frexp_exp:
1635       if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
1636          dfrexp_exp_to_arith(ir);
1637       break;
1638
1639    case ir_unop_frexp_sig:
1640       if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
1641          dfrexp_sig_to_arith(ir);
1642       break;
1643
1644    case ir_binop_carry:
1645       if (lowering(CARRY_TO_ARITH))
1646          carry_to_arith(ir);
1647       break;
1648
1649    case ir_binop_borrow:
1650       if (lowering(BORROW_TO_ARITH))
1651          borrow_to_arith(ir);
1652       break;
1653
1654    case ir_unop_saturate:
1655       if (lowering(SAT_TO_CLAMP))
1656          sat_to_clamp(ir);
1657       break;
1658
1659    case ir_unop_trunc:
1660       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1661          dtrunc_to_dfrac(ir);
1662       break;
1663
1664    case ir_unop_ceil:
1665       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1666          dceil_to_dfrac(ir);
1667       break;
1668
1669    case ir_unop_floor:
1670       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1671          dfloor_to_dfrac(ir);
1672       break;
1673
1674    case ir_unop_round_even:
1675       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1676          dround_even_to_dfrac(ir);
1677       break;
1678
1679    case ir_unop_sign:
1680       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1681          dsign_to_csel(ir);
1682       break;
1683
1684    case ir_unop_bit_count:
1685       if (lowering(BIT_COUNT_TO_MATH))
1686          bit_count_to_math(ir);
1687       break;
1688
1689    case ir_triop_bitfield_extract:
1690       if (lowering(EXTRACT_TO_SHIFTS))
1691          extract_to_shifts(ir);
1692       break;
1693
1694    case ir_quadop_bitfield_insert:
1695       if (lowering(INSERT_TO_SHIFTS))
1696          insert_to_shifts(ir);
1697       break;
1698
1699    case ir_unop_bitfield_reverse:
1700       if (lowering(REVERSE_TO_SHIFTS))
1701          reverse_to_shifts(ir);
1702       break;
1703
1704    case ir_unop_find_lsb:
1705       if (lowering(FIND_LSB_TO_FLOAT_CAST))
1706          find_lsb_to_float_cast(ir);
1707       break;
1708
1709    case ir_unop_find_msb:
1710       if (lowering(FIND_MSB_TO_FLOAT_CAST))
1711          find_msb_to_float_cast(ir);
1712       break;
1713
1714    case ir_binop_imul_high:
1715       if (lowering(IMUL_HIGH_TO_MUL))
1716          imul_high_to_mul(ir);
1717       break;
1718
1719    default:
1720       return visit_continue;
1721    }
1722
1723    return visit_continue;
1724 }