src/compiler/glsl/lower_instructions.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * \file lower_instructions.cpp
  26  *
  27  * Many GPUs lack native instructions for certain expression operations, and
  28  * must replace them with some other expression tree.  This pass lowers some
  29  * of the most common cases, allowing the lowering code to be implemented once
  30  * rather than in each driver backend.
  31  *
  32  * Currently supported transformations:
  33  * - SUB_TO_ADD_NEG
  34  * - DIV_TO_MUL_RCP
  35  * - INT_DIV_TO_MUL_RCP
  36  * - EXP_TO_EXP2
  37  * - POW_TO_EXP2
  38  * - LOG_TO_LOG2
  39  * - MOD_TO_FLOOR
  40  * - LDEXP_TO_ARITH
  41  * - DFREXP_TO_ARITH
  42  * - CARRY_TO_ARITH
  43  * - BORROW_TO_ARITH
  44  * - SAT_TO_CLAMP
  45  * - DOPS_TO_DFRAC
  46  *
  47  * SUB_TO_ADD_NEG:
  48  * ---------------
  49  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
  50  *
  51  * This simplifies expression reassociation, and for many backends
  52  * there is no subtract operation separate from adding the negation.
  53  * For backends with native subtract operations, they will probably
  54  * want to recognize add(op0, neg(op1)) or the other way around to
  55  * produce a subtract anyway.
  56  *
  57  * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
  58  * --------------------------------------
  59  * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
  60  *
  61  * Many GPUs don't have a divide instruction (945 and 965 included),
  62  * but they do have an RCP instruction to compute an approximate
  63  * reciprocal.  By breaking the operation down, constant reciprocals
  64  * can get constant folded.
  65  *
  66  * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
  67  * handles the integer case, converting to and from floating point so that
  68  * RCP is possible.
  69  *
  70  * EXP_TO_EXP2 and LOG_TO_LOG2:
  71  * ----------------------------
  72  * Many GPUs don't have a base e log or exponent instruction, but they
  73  * do have base 2 versions, so this pass converts exp and log to exp2
  74  * and log2 operations.
  75  *
  76  * POW_TO_EXP2:
  77  * -----------
  78  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  79  * x**y to 2**(y * log2(x)).
  80  *
  81  * MOD_TO_FLOOR:
  82  * -------------
  83  * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
  84  *
  85  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  86  * if we have to break it down like this anyway, it gives an
  87  * opportunity to do things like constant fold the (1.0 / op1) easily.
  88  *
  89  * Note: before we used to implement this as op1 * fract(op / op1) but this
  90  * implementation had significant precision errors.
  91  *
  92  * LDEXP_TO_ARITH:
  93  * -------------
  94  * Converts ir_binop_ldexp to arithmetic and bit operations for float sources.
  95  *
  96  * DFREXP_DLDEXP_TO_ARITH:
  97  * ---------------
  98  * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to
  99  * arithmetic and bit ops for double arguments.
 100  *
 101  * CARRY_TO_ARITH:
 102  * ---------------
 103  * Converts ir_carry into (x + y) < x.
 104  *
 105  * BORROW_TO_ARITH:
 106  * ----------------
 107  * Converts ir_borrow into (x < y).
 108  *
 109  * SAT_TO_CLAMP:
 110  * -------------
 111  * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
 112  *
 113  * DOPS_TO_DFRAC:
 114  * --------------
 115  * Converts double trunc, ceil, floor, round to fract
 116  */
 117
 118 #include "c99_math.h"
 119 #include "program/prog_instruction.h" /* for swizzle */
 120 #include "compiler/glsl_types.h"
 121 #include "ir.h"
 122 #include "ir_builder.h"
 123 #include "ir_optimization.h"
 124
 125 using namespace ir_builder;
 126
 127 namespace {
 128
 129 class lower_instructions_visitor : public ir_hierarchical_visitor {
 130 public:
 131    lower_instructions_visitor(unsigned lower)
 132       : progress(false), lower(lower) { }
 133
 134    ir_visitor_status visit_leave(ir_expression *);
 135
 136    bool progress;
 137
 138 private:
 139    unsigned lower; /** Bitfield of which operations to lower */
 140
 141    void sub_to_add_neg(ir_expression *);
 142    void div_to_mul_rcp(ir_expression *);
 143    void int_div_to_mul_rcp(ir_expression *);
 144    void mod_to_floor(ir_expression *);
 145    void exp_to_exp2(ir_expression *);
 146    void pow_to_exp2(ir_expression *);
 147    void log_to_log2(ir_expression *);
 148    void ldexp_to_arith(ir_expression *);
 149    void dldexp_to_arith(ir_expression *);
 150    void dfrexp_sig_to_arith(ir_expression *);
 151    void dfrexp_exp_to_arith(ir_expression *);
 152    void carry_to_arith(ir_expression *);
 153    void borrow_to_arith(ir_expression *);
 154    void sat_to_clamp(ir_expression *);
 155    void double_dot_to_fma(ir_expression *);
 156    void double_lrp(ir_expression *);
 157    void dceil_to_dfrac(ir_expression *);
 158    void dfloor_to_dfrac(ir_expression *);
 159    void dround_even_to_dfrac(ir_expression *);
 160    void dtrunc_to_dfrac(ir_expression *);
 161    void dsign_to_csel(ir_expression *);
 162    void bit_count_to_math(ir_expression *);
 163    void extract_to_shifts(ir_expression *);
 164    void insert_to_shifts(ir_expression *);
 165    void reverse_to_shifts(ir_expression *ir);
 166    void find_lsb_to_float_cast(ir_expression *ir);
 167    void find_msb_to_float_cast(ir_expression *ir);
 168    void imul_high_to_mul(ir_expression *ir);
 169 };
 170
 171 } /* anonymous namespace */
 172
 173 /**
 174  * Determine if a particular type of lowering should occur
 175  */
 176 #define lowering(x) (this->lower & x)
 177
 178 bool
 179 lower_instructions(exec_list *instructions, unsigned what_to_lower)
 180 {
 181    lower_instructions_visitor v(what_to_lower);
 182
 183    visit_list_elements(&v, instructions);
 184    return v.progress;
 185 }
 186
 187 void
 188 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 189 {
 190    ir->operation = ir_binop_add;
 191    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
 192                                            ir->operands[1], NULL);
 193    this->progress = true;
 194 }
 195
 196 void
 197 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 198 {
 199    assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double());
 200
 201    /* New expression for the 1.0 / op1 */
 202    ir_rvalue *expr;
 203    expr = new(ir) ir_expression(ir_unop_rcp,
 204                                 ir->operands[1]->type,
 205                                 ir->operands[1]);
 206
 207    /* op0 / op1 -> op0 * (1.0 / op1) */
 208    ir->operation = ir_binop_mul;
 209    ir->operands[1] = expr;
 210
 211    this->progress = true;
 212 }
 213
 214 void
 215 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
 216 {
 217    assert(ir->operands[1]->type->is_integer());
 218
 219    /* Be careful with integer division -- we need to do it as a
 220     * float and re-truncate, since rcp(n > 1) of an integer would
 221     * just be 0.
 222     */
 223    ir_rvalue *op0, *op1;
 224    const struct glsl_type *vec_type;
 225
 226    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 227                                       ir->operands[1]->type->vector_elements,
 228                                       ir->operands[1]->type->matrix_columns);
 229
 230    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
 231       op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
 232    else
 233       op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
 234
 235    op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
 236
 237    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 238                                       ir->operands[0]->type->vector_elements,
 239                                       ir->operands[0]->type->matrix_columns);
 240
 241    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
 242       op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
 243    else
 244       op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
 245
 246    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
 247                                       ir->type->vector_elements,
 248                                       ir->type->matrix_columns);
 249
 250    op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
 251
 252    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
 253       ir->operation = ir_unop_f2i;
 254       ir->operands[0] = op0;
 255    } else {
 256       ir->operation = ir_unop_i2u;
 257       ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
 258    }
 259    ir->operands[1] = NULL;
 260
 261    this->progress = true;
 262 }
 263
 264 void
 265 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
 266 {
 267    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
 268
 269    ir->operation = ir_unop_exp2;
 270    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
 271                                            ir->operands[0], log2_e);
 272    this->progress = true;
 273 }
 274
 275 void
 276 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
 277 {
 278    ir_expression *const log2_x =
 279       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 280                             ir->operands[0]);
 281
 282    ir->operation = ir_unop_exp2;
 283    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
 284                                            ir->operands[1], log2_x);
 285    ir->operands[1] = NULL;
 286    this->progress = true;
 287 }
 288
 289 void
 290 lower_instructions_visitor::log_to_log2(ir_expression *ir)
 291 {
 292    ir->operation = ir_binop_mul;
 293    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
 294                                            ir->operands[0], NULL);
 295    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
 296    this->progress = true;
 297 }
 298
 299 void
 300 lower_instructions_visitor::mod_to_floor(ir_expression *ir)
 301 {
 302    ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
 303                                          ir_var_temporary);
 304    ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
 305                                          ir_var_temporary);
 306    this->base_ir->insert_before(x);
 307    this->base_ir->insert_before(y);
 308
 309    ir_assignment *const assign_x =
 310       new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
 311                             ir->operands[0], NULL);
 312    ir_assignment *const assign_y =
 313       new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
 314                             ir->operands[1], NULL);
 315
 316    this->base_ir->insert_before(assign_x);
 317    this->base_ir->insert_before(assign_y);
 318
 319    ir_expression *const div_expr =
 320       new(ir) ir_expression(ir_binop_div, x->type,
 321                             new(ir) ir_dereference_variable(x),
 322                             new(ir) ir_dereference_variable(y));
 323
 324    /* Don't generate new IR that would need to be lowered in an additional
 325     * pass.
 326     */
 327    if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
 328       div_to_mul_rcp(div_expr);
 329
 330    ir_expression *const floor_expr =
 331       new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 332
 333    if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
 334       dfloor_to_dfrac(floor_expr);
 335
 336    ir_expression *const mul_expr =
 337       new(ir) ir_expression(ir_binop_mul,
 338                             new(ir) ir_dereference_variable(y),
 339                             floor_expr);
 340
 341    ir->operation = ir_binop_sub;
 342    ir->operands[0] = new(ir) ir_dereference_variable(x);
 343    ir->operands[1] = mul_expr;
 344    this->progress = true;
 345 }
 346
 347 void
 348 lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
 349 {
 350    /* Translates
 351     *    ir_binop_ldexp x exp
 352     * into
 353     *
 354     *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
 355     *    resulting_biased_exp = extracted_biased_exp + exp;
 356     *
 357     *    if (resulting_biased_exp < 1 || x == 0.0f) {
 358     *       return copysign(0.0, x);
 359     *    }
 360     *
 361     *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
 362     *                       lshift(i2u(resulting_biased_exp), exp_shift));
 363     *
 364     * which we can't actually implement as such, since the GLSL IR doesn't
 365     * have vectorized if-statements. We actually implement it without branches
 366     * using conditional-select:
 367     *
 368     *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
 369     *    resulting_biased_exp = extracted_biased_exp + exp;
 370     *
 371     *    is_not_zero_or_underflow = logic_and(nequal(x, 0.0f),
 372     *                                         gequal(resulting_biased_exp, 1);
 373     *    x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
 374     *    resulting_biased_exp = csel(is_not_zero_or_underflow,
 375     *                                resulting_biased_exp, 0);
 376     *
 377     *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
 378     *                       lshift(i2u(resulting_biased_exp), exp_shift));
 379     */
 380
 381    const unsigned vec_elem = ir->type->vector_elements;
 382
 383    /* Types */
 384    const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
 385    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 386
 387    /* Constants */
 388    ir_constant *zeroi = ir_constant::zero(ir, ivec);
 389
 390    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
 391
 392    ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
 393    ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
 394
 395    /* Temporary variables */
 396    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
 397    ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
 398
 399    ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
 400                                                   ir_var_temporary);
 401
 402    ir_variable *extracted_biased_exp =
 403       new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
 404    ir_variable *resulting_biased_exp =
 405       new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
 406
 407    ir_variable *is_not_zero_or_underflow =
 408       new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
 409
 410    ir_instruction &i = *base_ir;
 411
 412    /* Copy <x> and <exp> arguments. */
 413    i.insert_before(x);
 414    i.insert_before(assign(x, ir->operands[0]));
 415    i.insert_before(exp);
 416    i.insert_before(assign(exp, ir->operands[1]));
 417
 418    /* Extract the biased exponent from <x>. */
 419    i.insert_before(extracted_biased_exp);
 420    i.insert_before(assign(extracted_biased_exp,
 421                           rshift(bitcast_f2i(abs(x)), exp_shift)));
 422
 423    i.insert_before(resulting_biased_exp);
 424    i.insert_before(assign(resulting_biased_exp,
 425                           add(extracted_biased_exp, exp)));
 426
 427    /* Test if result is ±0.0, subnormal, or underflow by checking if the
 428     * resulting biased exponent would be less than 0x1. If so, the result is
 429     * 0.0 with the sign of x. (Actually, invert the conditions so that
 430     * immediate values are the second arguments, which is better for i965)
 431     */
 432    i.insert_before(zero_sign_x);
 433    i.insert_before(assign(zero_sign_x,
 434                           bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask))));
 435
 436    i.insert_before(is_not_zero_or_underflow);
 437    i.insert_before(assign(is_not_zero_or_underflow,
 438                           logic_and(nequal(x, new(ir) ir_constant(0.0f, vec_elem)),
 439                                     gequal(resulting_biased_exp,
 440                                            new(ir) ir_constant(0x1, vec_elem)))));
 441    i.insert_before(assign(x, csel(is_not_zero_or_underflow,
 442                                   x, zero_sign_x)));
 443    i.insert_before(assign(resulting_biased_exp,
 444                           csel(is_not_zero_or_underflow,
 445                                resulting_biased_exp, zeroi)));
 446
 447    /* We could test for overflows by checking if the resulting biased exponent
 448     * would be greater than 0xFE. Turns out we don't need to because the GLSL
 449     * spec says:
 450     *
 451     *    "If this product is too large to be represented in the
 452     *     floating-point type, the result is undefined."
 453     */
 454
 455    ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);
 456    ir->operation = ir_unop_bitcast_i2f;
 457    ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp,
 458                                      exp_shift_clone, exp_width);
 459    ir->operands[1] = NULL;
 460
 461    this->progress = true;
 462 }
 463
 464 void
 465 lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
 466 {
 467    /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent
 468     * from the significand.
 469     */
 470
 471    const unsigned vec_elem = ir->type->vector_elements;
 472
 473    /* Types */
 474    const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
 475    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 476
 477    /* Constants */
 478    ir_constant *zeroi = ir_constant::zero(ir, ivec);
 479
 480    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
 481
 482    ir_constant *exp_shift = new(ir) ir_constant(20u);
 483    ir_constant *exp_width = new(ir) ir_constant(11u);
 484    ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
 485
 486    /* Temporary variables */
 487    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
 488    ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
 489
 490    ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
 491                                                   ir_var_temporary);
 492
 493    ir_variable *extracted_biased_exp =
 494       new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
 495    ir_variable *resulting_biased_exp =
 496       new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
 497
 498    ir_variable *is_not_zero_or_underflow =
 499       new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
 500
 501    ir_instruction &i = *base_ir;
 502
 503    /* Copy <x> and <exp> arguments. */
 504    i.insert_before(x);
 505    i.insert_before(assign(x, ir->operands[0]));
 506    i.insert_before(exp);
 507    i.insert_before(assign(exp, ir->operands[1]));
 508
 509    ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x);
 510    if (lowering(DFREXP_DLDEXP_TO_ARITH))
 511       dfrexp_exp_to_arith(frexp_exp);
 512
 513    /* Extract the biased exponent from <x>. */
 514    i.insert_before(extracted_biased_exp);
 515    i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias)));
 516
 517    i.insert_before(resulting_biased_exp);
 518    i.insert_before(assign(resulting_biased_exp,
 519                           add(extracted_biased_exp, exp)));
 520
 521    /* Test if result is ±0.0, subnormal, or underflow by checking if the
 522     * resulting biased exponent would be less than 0x1. If so, the result is
 523     * 0.0 with the sign of x. (Actually, invert the conditions so that
 524     * immediate values are the second arguments, which is better for i965)
 525     * TODO: Implement in a vector fashion.
 526     */
 527    i.insert_before(zero_sign_x);
 528    for (unsigned elem = 0; elem < vec_elem; elem++) {
 529       ir_variable *unpacked =
 530          new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
 531       i.insert_before(unpacked);
 532       i.insert_before(
 533             assign(unpacked,
 534                    expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
 535       i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)),
 536                              WRITEMASK_Y));
 537       i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X));
 538       i.insert_before(assign(zero_sign_x,
 539                              expr(ir_unop_pack_double_2x32, unpacked),
 540                              1 << elem));
 541    }
 542    i.insert_before(is_not_zero_or_underflow);
 543    i.insert_before(assign(is_not_zero_or_underflow,
 544                           gequal(resulting_biased_exp,
 545                                   new(ir) ir_constant(0x1, vec_elem))));
 546    i.insert_before(assign(x, csel(is_not_zero_or_underflow,
 547                                   x, zero_sign_x)));
 548    i.insert_before(assign(resulting_biased_exp,
 549                           csel(is_not_zero_or_underflow,
 550                                resulting_biased_exp, zeroi)));
 551
 552    /* We could test for overflows by checking if the resulting biased exponent
 553     * would be greater than 0xFE. Turns out we don't need to because the GLSL
 554     * spec says:
 555     *
 556     *    "If this product is too large to be represented in the
 557     *     floating-point type, the result is undefined."
 558     */
 559
 560    ir_rvalue *results[4] = {NULL};
 561    for (unsigned elem = 0; elem < vec_elem; elem++) {
 562       ir_variable *unpacked =
 563          new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
 564       i.insert_before(unpacked);
 565       i.insert_before(
 566             assign(unpacked,
 567                    expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
 568
 569       ir_expression *bfi = bitfield_insert(
 570             swizzle_y(unpacked),
 571             i2u(swizzle(resulting_biased_exp, elem, 1)),
 572             exp_shift->clone(ir, NULL),
 573             exp_width->clone(ir, NULL));
 574
 575       i.insert_before(assign(unpacked, bfi, WRITEMASK_Y));
 576
 577       results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
 578    }
 579
 580    ir->operation = ir_quadop_vector;
 581    ir->operands[0] = results[0];
 582    ir->operands[1] = results[1];
 583    ir->operands[2] = results[2];
 584    ir->operands[3] = results[3];
 585
 586    /* Don't generate new IR that would need to be lowered in an additional
 587     * pass.
 588     */
 589
 590    this->progress = true;
 591 }
 592
 593 void
 594 lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir)
 595 {
 596    const unsigned vec_elem = ir->type->vector_elements;
 597    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 598
 599    /* Double-precision floating-point values are stored as
 600     *   1 sign bit;
 601     *   11 exponent bits;
 602     *   52 mantissa bits.
 603     *
 604     * We're just extracting the significand here, so we only need to modify
 605     * the upper 32-bit uint. Unfortunately we must extract each double
 606     * independently as there is no vector version of unpackDouble.
 607     */
 608
 609    ir_instruction &i = *base_ir;
 610
 611    ir_variable *is_not_zero =
 612       new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
 613    ir_rvalue *results[4] = {NULL};
 614
 615    ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
 616    i.insert_before(is_not_zero);
 617    i.insert_before(
 618          assign(is_not_zero,
 619                 nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero)));
 620
 621    /* TODO: Remake this as more vector-friendly when int64 support is
 622     * available.
 623     */
 624    for (unsigned elem = 0; elem < vec_elem; elem++) {
 625       ir_constant *zero = new(ir) ir_constant(0u, 1);
 626       ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1);
 627
 628       /* Exponent of double floating-point values in the range [0.5, 1.0). */
 629       ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1);
 630
 631       ir_variable *bits =
 632          new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary);
 633       ir_variable *unpacked =
 634          new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
 635
 636       ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1);
 637
 638       i.insert_before(bits);
 639       i.insert_before(unpacked);
 640       i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x)));
 641
 642       /* Manipulate the high uint to remove the exponent and replace it with
 643        * either the default exponent or zero.
 644        */
 645       i.insert_before(assign(bits, swizzle_y(unpacked)));
 646       i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask)));
 647       i.insert_before(assign(bits, bit_or(bits,
 648                                           csel(swizzle(is_not_zero, elem, 1),
 649                                                exponent_value,
 650                                                zero))));
 651       i.insert_before(assign(unpacked, bits, WRITEMASK_Y));
 652       results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
 653    }
 654
 655    /* Put the dvec back together */
 656    ir->operation = ir_quadop_vector;
 657    ir->operands[0] = results[0];
 658    ir->operands[1] = results[1];
 659    ir->operands[2] = results[2];
 660    ir->operands[3] = results[3];
 661
 662    this->progress = true;
 663 }
 664
 665 void
 666 lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir)
 667 {
 668    const unsigned vec_elem = ir->type->vector_elements;
 669    const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
 670    const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1);
 671
 672    /* Double-precision floating-point values are stored as
 673     *   1 sign bit;
 674     *   11 exponent bits;
 675     *   52 mantissa bits.
 676     *
 677     * We're just extracting the exponent here, so we only care about the upper
 678     * 32-bit uint.
 679     */
 680
 681    ir_instruction &i = *base_ir;
 682
 683    ir_variable *is_not_zero =
 684       new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
 685    ir_variable *high_words =
 686       new(ir) ir_variable(uvec, "high_words", ir_var_temporary);
 687    ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
 688    ir_constant *izero = new(ir) ir_constant(0, vec_elem);
 689
 690    ir_rvalue *absval = abs(ir->operands[0]);
 691
 692    i.insert_before(is_not_zero);
 693    i.insert_before(high_words);
 694    i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero)));
 695
 696    /* Extract all of the upper uints. */
 697    for (unsigned elem = 0; elem < vec_elem; elem++) {
 698       ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1);
 699
 700       i.insert_before(assign(high_words,
 701                              swizzle_y(expr(ir_unop_unpack_double_2x32, x)),
 702                              1 << elem));
 703
 704    }
 705    ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem);
 706    ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem);
 707
 708    /* For non-zero inputs, shift the exponent down and apply bias. */
 709    ir->operation = ir_triop_csel;
 710    ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero);
 711    ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift)));
 712    ir->operands[2] = izero;
 713
 714    this->progress = true;
 715 }
 716
 717 void
 718 lower_instructions_visitor::carry_to_arith(ir_expression *ir)
 719 {
 720    /* Translates
 721     *   ir_binop_carry x y
 722     * into
 723     *   sum = ir_binop_add x y
 724     *   bcarry = ir_binop_less sum x
 725     *   carry = ir_unop_b2i bcarry
 726     */
 727
 728    ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL);
 729    ir->operation = ir_unop_i2u;
 730    ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone));
 731    ir->operands[1] = NULL;
 732
 733    this->progress = true;
 734 }
 735
 736 void
 737 lower_instructions_visitor::borrow_to_arith(ir_expression *ir)
 738 {
 739    /* Translates
 740     *   ir_binop_borrow x y
 741     * into
 742     *   bcarry = ir_binop_less x y
 743     *   carry = ir_unop_b2i bcarry
 744     */
 745
 746    ir->operation = ir_unop_i2u;
 747    ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1]));
 748    ir->operands[1] = NULL;
 749
 750    this->progress = true;
 751 }
 752
 753 void
 754 lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
 755 {
 756    /* Translates
 757     *   ir_unop_saturate x
 758     * into
 759     *   ir_binop_min (ir_binop_max(x, 0.0), 1.0)
 760     */
 761
 762    ir->operation = ir_binop_min;
 763    ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
 764                                            ir->operands[0],
 765                                            new(ir) ir_constant(0.0f));
 766    ir->operands[1] = new(ir) ir_constant(1.0f);
 767
 768    this->progress = true;
 769 }
 770
 771 void
 772 lower_instructions_visitor::double_dot_to_fma(ir_expression *ir)
 773 {
 774    ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res",
 775                                            ir_var_temporary);
 776    this->base_ir->insert_before(temp);
 777
 778    int nc = ir->operands[0]->type->components();
 779    for (int i = nc - 1; i >= 1; i--) {
 780       ir_assignment *assig;
 781       if (i == (nc - 1)) {
 782          assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
 783                                   swizzle(ir->operands[1]->clone(ir, NULL), i, 1)));
 784       } else {
 785          assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
 786                                   swizzle(ir->operands[1]->clone(ir, NULL), i, 1),
 787                                   temp));
 788       }
 789       this->base_ir->insert_before(assig);
 790    }
 791
 792    ir->operation = ir_triop_fma;
 793    ir->operands[0] = swizzle(ir->operands[0], 0, 1);
 794    ir->operands[1] = swizzle(ir->operands[1], 0, 1);
 795    ir->operands[2] = new(ir) ir_dereference_variable(temp);
 796
 797    this->progress = true;
 798
 799 }
 800
 801 void
 802 lower_instructions_visitor::double_lrp(ir_expression *ir)
 803 {
 804    int swizval;
 805    ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2];
 806    ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements);
 807
 808    switch (op2->type->vector_elements) {
 809    case 1:
 810       swizval = SWIZZLE_XXXX;
 811       break;
 812    default:
 813       assert(op0->type->vector_elements == op2->type->vector_elements);
 814       swizval = SWIZZLE_XYZW;
 815       break;
 816    }
 817
 818    ir->operation = ir_triop_fma;
 819    ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements);
 820    ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0);
 821
 822    this->progress = true;
 823 }
 824
 825 void
 826 lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
 827 {
 828    /*
 829     * frtemp = frac(x);
 830     * temp = sub(x, frtemp);
 831     * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
 832     */
 833    ir_instruction &i = *base_ir;
 834    ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
 835    ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
 836    ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
 837                                              ir_var_temporary);
 838
 839    i.insert_before(frtemp);
 840    i.insert_before(assign(frtemp, fract(ir->operands[0])));
 841
 842    ir->operation = ir_binop_add;
 843    ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
 844    ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
 845
 846    this->progress = true;
 847 }
 848
 849 void
 850 lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
 851 {
 852    /*
 853     * frtemp = frac(x);
 854     * result = sub(x, frtemp);
 855     */
 856    ir->operation = ir_binop_sub;
 857    ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
 858
 859    this->progress = true;
 860 }
 861 void
 862 lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
 863 {
 864    /*
 865     * insane but works
 866     * temp = x + 0.5;
 867     * frtemp = frac(temp);
 868     * t2 = sub(temp, frtemp);
 869     * if (frac(x) == 0.5)
 870     *     result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
 871     *  else
 872     *     result = t2;
 873
 874     */
 875    ir_instruction &i = *base_ir;
 876    ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
 877                                              ir_var_temporary);
 878    ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
 879                                            ir_var_temporary);
 880    ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
 881                                            ir_var_temporary);
 882    ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
 883    ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
 884    ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
 885
 886    i.insert_before(temp);
 887    i.insert_before(assign(temp, add(ir->operands[0], p5)));
 888
 889    i.insert_before(frtemp);
 890    i.insert_before(assign(frtemp, fract(temp)));
 891
 892    i.insert_before(t2);
 893    i.insert_before(assign(t2, sub(temp, frtemp)));
 894
 895    ir->operation = ir_triop_csel;
 896    ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)),
 897                            p5->clone(ir, NULL));
 898    ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))),
 899                                 zero),
 900                           t2,
 901                           sub(t2, one));
 902    ir->operands[2] = new(ir) ir_dereference_variable(t2);
 903
 904    this->progress = true;
 905 }
 906
 907 void
 908 lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
 909 {
 910    /*
 911     * frtemp = frac(x);
 912     * temp = sub(x, frtemp);
 913     * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1;
 914     */
 915    ir_rvalue *arg = ir->operands[0];
 916    ir_instruction &i = *base_ir;
 917
 918    ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
 919    ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
 920    ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp",
 921                                              ir_var_temporary);
 922    ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
 923                                            ir_var_temporary);
 924
 925    i.insert_before(frtemp);
 926    i.insert_before(assign(frtemp, fract(arg)));
 927    i.insert_before(temp);
 928    i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp)));
 929
 930    ir->operation = ir_triop_csel;
 931    ir->operands[0] = gequal(arg->clone(ir, NULL), zero);
 932    ir->operands[1] = new (ir) ir_dereference_variable(temp);
 933    ir->operands[2] = add(temp,
 934                          csel(equal(frtemp, zero->clone(ir, NULL)),
 935                               zero->clone(ir, NULL),
 936                               one));
 937
 938    this->progress = true;
 939 }
 940
 941 void
 942 lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
 943 {
 944    /*
 945     * temp = x > 0.0 ? 1.0 : 0.0;
 946     * result = x < 0.0 ? -1.0 : temp;
 947     */
 948    ir_rvalue *arg = ir->operands[0];
 949    ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
 950    ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
 951    ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements);
 952
 953    ir->operation = ir_triop_csel;
 954    ir->operands[0] = less(arg->clone(ir, NULL),
 955                           zero->clone(ir, NULL));
 956    ir->operands[1] = neg_one;
 957    ir->operands[2] = csel(greater(arg, zero),
 958                           one,
 959                           zero->clone(ir, NULL));
 960
 961    this->progress = true;
 962 }
 963
 964 void
 965 lower_instructions_visitor::bit_count_to_math(ir_expression *ir)
 966 {
 967    /* For more details, see:
 968     *
 969     * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel
 970     */
 971    const unsigned elements = ir->operands[0]->type->vector_elements;
 972    ir_variable *temp = new(ir) ir_variable(glsl_type::uvec(elements), "temp",
 973                                            ir_var_temporary);
 974    ir_constant *c55555555 = new(ir) ir_constant(0x55555555u);
 975    ir_constant *c33333333 = new(ir) ir_constant(0x33333333u);
 976    ir_constant *c0F0F0F0F = new(ir) ir_constant(0x0F0F0F0Fu);
 977    ir_constant *c01010101 = new(ir) ir_constant(0x01010101u);
 978    ir_constant *c1 = new(ir) ir_constant(1u);
 979    ir_constant *c2 = new(ir) ir_constant(2u);
 980    ir_constant *c4 = new(ir) ir_constant(4u);
 981    ir_constant *c24 = new(ir) ir_constant(24u);
 982
 983    base_ir->insert_before(temp);
 984
 985    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
 986       base_ir->insert_before(assign(temp, ir->operands[0]));
 987    } else {
 988       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
 989       base_ir->insert_before(assign(temp, i2u(ir->operands[0])));
 990    }
 991
 992    /* temp = temp - ((temp >> 1) & 0x55555555u); */
 993    base_ir->insert_before(assign(temp, sub(temp, bit_and(rshift(temp, c1),
 994                                                          c55555555))));
 995
 996    /* temp = (temp & 0x33333333u) + ((temp >> 2) & 0x33333333u); */
 997    base_ir->insert_before(assign(temp, add(bit_and(temp, c33333333),
 998                                            bit_and(rshift(temp, c2),
 999                                                    c33333333->clone(ir, NULL)))));
1000
1001    /* int(((temp + (temp >> 4) & 0xF0F0F0Fu) * 0x1010101u) >> 24); */
1002    ir->operation = ir_unop_u2i;
1003    ir->operands[0] = rshift(mul(bit_and(add(temp, rshift(temp, c4)), c0F0F0F0F),
1004                                 c01010101),
1005                             c24);
1006
1007    this->progress = true;
1008 }
1009
1010 void
1011 lower_instructions_visitor::extract_to_shifts(ir_expression *ir)
1012 {
1013    ir_variable *bits =
1014       new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
1015
1016    base_ir->insert_before(bits);
1017    base_ir->insert_before(assign(bits, ir->operands[2]));
1018
1019    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1020       ir_constant *c1 =
1021          new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
1022       ir_constant *c32 =
1023          new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
1024       ir_constant *cFFFFFFFF =
1025          new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
1026
1027       /* At least some hardware treats (x << y) as (x << (y%32)).  This means
1028        * we'd get a mask of 0 when bits is 32.  Special case it.
1029        *
1030        * mask = bits == 32 ? 0xffffffff : (1u << bits) - 1u;
1031        */
1032       ir_expression *mask = csel(equal(bits, c32),
1033                                  cFFFFFFFF,
1034                                  sub(lshift(c1, bits), c1->clone(ir, NULL)));
1035
1036       /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1037        *
1038        *    If bits is zero, the result will be zero.
1039        *
1040        * Since (1 << 0) - 1 == 0, we don't need to bother with the conditional
1041        * select as in the signed integer case.
1042        *
1043        * (value >> offset) & mask;
1044        */
1045       ir->operation = ir_binop_bit_and;
1046       ir->operands[0] = rshift(ir->operands[0], ir->operands[1]);
1047       ir->operands[1] = mask;
1048       ir->operands[2] = NULL;
1049    } else {
1050       ir_constant *c0 =
1051          new(ir) ir_constant(int(0), ir->operands[0]->type->vector_elements);
1052       ir_constant *c32 =
1053          new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
1054       ir_variable *temp =
1055          new(ir) ir_variable(ir->operands[0]->type, "temp", ir_var_temporary);
1056
1057       /* temp = 32 - bits; */
1058       base_ir->insert_before(temp);
1059       base_ir->insert_before(assign(temp, sub(c32, bits)));
1060
1061       /* expr = value << (temp - offset)) >> temp; */
1062       ir_expression *expr =
1063          rshift(lshift(ir->operands[0], sub(temp, ir->operands[1])), temp);
1064
1065       /* Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1066        *
1067        *    If bits is zero, the result will be zero.
1068        *
1069        * Due to the (x << (y%32)) behavior mentioned before, the (value <<
1070        * (32-0)) doesn't "erase" all of the data as we would like, so finish
1071        * up with:
1072        *
1073        * (bits == 0) ? 0 : e;
1074        */
1075       ir->operation = ir_triop_csel;
1076       ir->operands[0] = equal(c0, bits);
1077       ir->operands[1] = c0->clone(ir, NULL);
1078       ir->operands[2] = expr;
1079    }
1080
1081    this->progress = true;
1082 }
1083
1084 void
1085 lower_instructions_visitor::insert_to_shifts(ir_expression *ir)
1086 {
1087    ir_constant *c1;
1088    ir_constant *c32;
1089    ir_constant *cFFFFFFFF;
1090    ir_variable *offset =
1091       new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary);
1092    ir_variable *bits =
1093       new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary);
1094    ir_variable *mask =
1095       new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary);
1096
1097    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
1098       c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements);
1099       c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements);
1100       cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements);
1101    } else {
1102       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
1103
1104       c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
1105       c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements);
1106       cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements);
1107    }
1108
1109    base_ir->insert_before(offset);
1110    base_ir->insert_before(assign(offset, ir->operands[2]));
1111
1112    base_ir->insert_before(bits);
1113    base_ir->insert_before(assign(bits, ir->operands[3]));
1114
1115    /* At least some hardware treats (x << y) as (x << (y%32)).  This means
1116     * we'd get a mask of 0 when bits is 32.  Special case it.
1117     *
1118     * mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset;
1119     *
1120     * Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1121     *
1122     *    The result will be undefined if offset or bits is negative, or if the
1123     *    sum of offset and bits is greater than the number of bits used to
1124     *    store the operand.
1125     *
1126     * Since it's undefined, there are a couple other ways this could be
1127     * implemented.  The other way that was considered was to put the csel
1128     * around the whole thing:
1129     *
1130     *    final_result = bits == 32 ? insert : ... ;
1131     */
1132    base_ir->insert_before(mask);
1133
1134    base_ir->insert_before(assign(mask, csel(equal(bits, c32),
1135                                             cFFFFFFFF,
1136                                             lshift(sub(lshift(c1, bits),
1137                                                        c1->clone(ir, NULL)),
1138                                                    offset))));
1139
1140    /* (base & ~mask) | ((insert << offset) & mask) */
1141    ir->operation = ir_binop_bit_or;
1142    ir->operands[0] = bit_and(ir->operands[0], bit_not(mask));
1143    ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask);
1144    ir->operands[2] = NULL;
1145    ir->operands[3] = NULL;
1146
1147    this->progress = true;
1148 }
1149
1150 void
1151 lower_instructions_visitor::reverse_to_shifts(ir_expression *ir)
1152 {
1153    /* For more details, see:
1154     *
1155     * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
1156     */
1157    ir_constant *c1 =
1158       new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements);
1159    ir_constant *c2 =
1160       new(ir) ir_constant(2u, ir->operands[0]->type->vector_elements);
1161    ir_constant *c4 =
1162       new(ir) ir_constant(4u, ir->operands[0]->type->vector_elements);
1163    ir_constant *c8 =
1164       new(ir) ir_constant(8u, ir->operands[0]->type->vector_elements);
1165    ir_constant *c16 =
1166       new(ir) ir_constant(16u, ir->operands[0]->type->vector_elements);
1167    ir_constant *c33333333 =
1168       new(ir) ir_constant(0x33333333u, ir->operands[0]->type->vector_elements);
1169    ir_constant *c55555555 =
1170       new(ir) ir_constant(0x55555555u, ir->operands[0]->type->vector_elements);
1171    ir_constant *c0F0F0F0F =
1172       new(ir) ir_constant(0x0F0F0F0Fu, ir->operands[0]->type->vector_elements);
1173    ir_constant *c00FF00FF =
1174       new(ir) ir_constant(0x00FF00FFu, ir->operands[0]->type->vector_elements);
1175    ir_variable *temp =
1176       new(ir) ir_variable(glsl_type::uvec(ir->operands[0]->type->vector_elements),
1177                           "temp", ir_var_temporary);
1178    ir_instruction &i = *base_ir;
1179
1180    i.insert_before(temp);
1181
1182    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1183       i.insert_before(assign(temp, ir->operands[0]));
1184    } else {
1185       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1186       i.insert_before(assign(temp, i2u(ir->operands[0])));
1187    }
1188
1189    /* Swap odd and even bits.
1190     *
1191     * temp = ((temp >> 1) & 0x55555555u) | ((temp & 0x55555555u) << 1);
1192     */
1193    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c1), c55555555),
1194                                        lshift(bit_and(temp, c55555555->clone(ir, NULL)),
1195                                               c1->clone(ir, NULL)))));
1196    /* Swap consecutive pairs.
1197     *
1198     * temp = ((temp >> 2) & 0x33333333u) | ((temp & 0x33333333u) << 2);
1199     */
1200    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c2), c33333333),
1201                                        lshift(bit_and(temp, c33333333->clone(ir, NULL)),
1202                                               c2->clone(ir, NULL)))));
1203
1204    /* Swap nibbles.
1205     *
1206     * temp = ((temp >> 4) & 0x0F0F0F0Fu) | ((temp & 0x0F0F0F0Fu) << 4);
1207     */
1208    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c4), c0F0F0F0F),
1209                                        lshift(bit_and(temp, c0F0F0F0F->clone(ir, NULL)),
1210                                               c4->clone(ir, NULL)))));
1211
1212    /* The last step is, basically, bswap.  Swap the bytes, then swap the
1213     * words.  When this code is run through GCC on x86, it does generate a
1214     * bswap instruction.
1215     *
1216     * temp = ((temp >> 8) & 0x00FF00FFu) | ((temp & 0x00FF00FFu) << 8);
1217     * temp = ( temp >> 16              ) | ( temp                << 16);
1218     */
1219    i.insert_before(assign(temp, bit_or(bit_and(rshift(temp, c8), c00FF00FF),
1220                                        lshift(bit_and(temp, c00FF00FF->clone(ir, NULL)),
1221                                               c8->clone(ir, NULL)))));
1222
1223    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1224       ir->operation = ir_binop_bit_or;
1225       ir->operands[0] = rshift(temp, c16);
1226       ir->operands[1] = lshift(temp, c16->clone(ir, NULL));
1227    } else {
1228       ir->operation = ir_unop_u2i;
1229       ir->operands[0] = bit_or(rshift(temp, c16),
1230                                lshift(temp, c16->clone(ir, NULL)));
1231    }
1232
1233    this->progress = true;
1234 }
1235
1236 void
1237 lower_instructions_visitor::find_lsb_to_float_cast(ir_expression *ir)
1238 {
1239    /* For more details, see:
1240     *
1241     * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
1242     */
1243    const unsigned elements = ir->operands[0]->type->vector_elements;
1244    ir_constant *c0 = new(ir) ir_constant(unsigned(0), elements);
1245    ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
1246    ir_constant *c23 = new(ir) ir_constant(int(23), elements);
1247    ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
1248    ir_variable *temp =
1249       new(ir) ir_variable(glsl_type::ivec(elements), "temp", ir_var_temporary);
1250    ir_variable *lsb_only =
1251       new(ir) ir_variable(glsl_type::uvec(elements), "lsb_only", ir_var_temporary);
1252    ir_variable *as_float =
1253       new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
1254    ir_variable *lsb =
1255       new(ir) ir_variable(glsl_type::ivec(elements), "lsb", ir_var_temporary);
1256
1257    ir_instruction &i = *base_ir;
1258
1259    i.insert_before(temp);
1260
1261    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) {
1262       i.insert_before(assign(temp, ir->operands[0]));
1263    } else {
1264       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
1265       i.insert_before(assign(temp, u2i(ir->operands[0])));
1266    }
1267
1268    /* The int-to-float conversion is lossless because (value & -value) is
1269     * either a power of two or zero.  We don't use the result in the zero
1270     * case.  The uint() cast is necessary so that 0x80000000 does not
1271     * generate a negative value.
1272     *
1273     * uint lsb_only = uint(value & -value);
1274     * float as_float = float(lsb_only);
1275     */
1276    i.insert_before(lsb_only);
1277    i.insert_before(assign(lsb_only, i2u(bit_and(temp, neg(temp)))));
1278
1279    i.insert_before(as_float);
1280    i.insert_before(assign(as_float, u2f(lsb_only)));
1281
1282    /* This is basically an open-coded frexp.  Implementations that have a
1283     * native frexp instruction would be better served by that.  This is
1284     * optimized versus a full-featured open-coded implementation in two ways:
1285     *
1286     * - We don't care about a correct result from subnormal numbers (including
1287     *   0.0), so the raw exponent can always be safely unbiased.
1288     *
1289     * - The value cannot be negative, so it does not need to be masked off to
1290     *   extract the exponent.
1291     *
1292     * int lsb = (floatBitsToInt(as_float) >> 23) - 0x7f;
1293     */
1294    i.insert_before(lsb);
1295    i.insert_before(assign(lsb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
1296
1297    /* Use lsb_only in the comparison instead of temp so that the & (far above)
1298     * can possibly generate the result without an explicit comparison.
1299     *
1300     * (lsb_only == 0) ? -1 : lsb;
1301     *
1302     * Since our input values are all integers, the unbiased exponent must not
1303     * be negative.  It will only be negative (-0x7f, in fact) if lsb_only is
1304     * 0.  Instead of using (lsb_only == 0), we could use (lsb >= 0).  Which is
1305     * better is likely GPU dependent.  Either way, the difference should be
1306     * small.
1307     */
1308    ir->operation = ir_triop_csel;
1309    ir->operands[0] = equal(lsb_only, c0);
1310    ir->operands[1] = cminus1;
1311    ir->operands[2] = new(ir) ir_dereference_variable(lsb);
1312
1313    this->progress = true;
1314 }
1315
1316 void
1317 lower_instructions_visitor::find_msb_to_float_cast(ir_expression *ir)
1318 {
1319    /* For more details, see:
1320     *
1321     * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightFloatCast
1322     */
1323    const unsigned elements = ir->operands[0]->type->vector_elements;
1324    ir_constant *c0 = new(ir) ir_constant(int(0), elements);
1325    ir_constant *cminus1 = new(ir) ir_constant(int(-1), elements);
1326    ir_constant *c23 = new(ir) ir_constant(int(23), elements);
1327    ir_constant *c7F = new(ir) ir_constant(int(0x7F), elements);
1328    ir_constant *c000000FF = new(ir) ir_constant(0x000000FFu, elements);
1329    ir_constant *cFFFFFF00 = new(ir) ir_constant(0xFFFFFF00u, elements);
1330    ir_variable *temp =
1331       new(ir) ir_variable(glsl_type::uvec(elements), "temp", ir_var_temporary);
1332    ir_variable *as_float =
1333       new(ir) ir_variable(glsl_type::vec(elements), "as_float", ir_var_temporary);
1334    ir_variable *msb =
1335       new(ir) ir_variable(glsl_type::ivec(elements), "msb", ir_var_temporary);
1336
1337    ir_instruction &i = *base_ir;
1338
1339    i.insert_before(temp);
1340
1341    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1342       i.insert_before(assign(temp, ir->operands[0]));
1343    } else {
1344       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1345
1346       /* findMSB(uint(abs(some_int))) almost always does the right thing.
1347        * There are two problem values:
1348        *
1349        * * 0x80000000.  Since abs(0x80000000) == 0x80000000, findMSB returns
1350        *   31.  However, findMSB(int(0x80000000)) == 30.
1351        *
1352        * * 0xffffffff.  Since abs(0xffffffff) == 1, findMSB returns
1353        *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
1354        *
1355        *    For a value of zero or negative one, -1 will be returned.
1356        *
1357        * For all negative number cases, including 0x80000000 and 0xffffffff,
1358        * the correct value is obtained from findMSB if instead of negating the
1359        * (already negative) value the logical-not is used.  A conditonal
1360        * logical-not can be achieved in two instructions.
1361        */
1362       ir_variable *as_int =
1363          new(ir) ir_variable(glsl_type::ivec(elements), "as_int", ir_var_temporary);
1364       ir_constant *c31 = new(ir) ir_constant(int(31), elements);
1365
1366       i.insert_before(as_int);
1367       i.insert_before(assign(as_int, ir->operands[0]));
1368       i.insert_before(assign(temp, i2u(expr(ir_binop_bit_xor,
1369                                             as_int,
1370                                             rshift(as_int, c31)))));
1371    }
1372
1373    /* The int-to-float conversion is lossless because bits are conditionally
1374     * masked off the bottom of temp to ensure the value has at most 24 bits of
1375     * data or is zero.  We don't use the result in the zero case.  The uint()
1376     * cast is necessary so that 0x80000000 does not generate a negative value.
1377     *
1378     * float as_float = float(temp > 255 ? temp & ~255 : temp);
1379     */
1380    i.insert_before(as_float);
1381    i.insert_before(assign(as_float, u2f(csel(greater(temp, c000000FF),
1382                                              bit_and(temp, cFFFFFF00),
1383                                              temp))));
1384
1385    /* This is basically an open-coded frexp.  Implementations that have a
1386     * native frexp instruction would be better served by that.  This is
1387     * optimized versus a full-featured open-coded implementation in two ways:
1388     *
1389     * - We don't care about a correct result from subnormal numbers (including
1390     *   0.0), so the raw exponent can always be safely unbiased.
1391     *
1392     * - The value cannot be negative, so it does not need to be masked off to
1393     *   extract the exponent.
1394     *
1395     * int msb = (floatBitsToInt(as_float) >> 23) - 0x7f;
1396     */
1397    i.insert_before(msb);
1398    i.insert_before(assign(msb, sub(rshift(bitcast_f2i(as_float), c23), c7F)));
1399
1400    /* Use msb in the comparison instead of temp so that the subtract can
1401     * possibly generate the result without an explicit comparison.
1402     *
1403     * (msb < 0) ? -1 : msb;
1404     *
1405     * Since our input values are all integers, the unbiased exponent must not
1406     * be negative.  It will only be negative (-0x7f, in fact) if temp is 0.
1407     */
1408    ir->operation = ir_triop_csel;
1409    ir->operands[0] = less(msb, c0);
1410    ir->operands[1] = cminus1;
1411    ir->operands[2] = new(ir) ir_dereference_variable(msb);
1412
1413    this->progress = true;
1414 }
1415
1416 void
1417 lower_instructions_visitor::imul_high_to_mul(ir_expression *ir)
1418 {
1419    /*   ABCD
1420     * * EFGH
1421     * ======
1422     * (GH * CD) + (GH * AB) << 16 + (EF * CD) << 16 + (EF * AB) << 32
1423     *
1424     * In GLSL, (a * b) becomes
1425     *
1426     * uint m1 = (a & 0x0000ffffu) * (b & 0x0000ffffu);
1427     * uint m2 = (a & 0x0000ffffu) * (b >> 16);
1428     * uint m3 = (a >> 16)         * (b & 0x0000ffffu);
1429     * uint m4 = (a >> 16)         * (b >> 16);
1430     *
1431     * uint c1;
1432     * uint c2;
1433     * uint lo_result;
1434     * uint hi_result;
1435     *
1436     * lo_result = uaddCarry(m1, m2 << 16, c1);
1437     * hi_result = m4 + c1;
1438     * lo_result = uaddCarry(lo_result, m3 << 16, c2);
1439     * hi_result = hi_result + c2;
1440     * hi_result = hi_result + (m2 >> 16) + (m3 >> 16);
1441     */
1442    const unsigned elements = ir->operands[0]->type->vector_elements;
1443    ir_variable *src1 =
1444       new(ir) ir_variable(glsl_type::uvec(elements), "src1", ir_var_temporary);
1445    ir_variable *src1h =
1446       new(ir) ir_variable(glsl_type::uvec(elements), "src1h", ir_var_temporary);
1447    ir_variable *src1l =
1448       new(ir) ir_variable(glsl_type::uvec(elements), "src1l", ir_var_temporary);
1449    ir_variable *src2 =
1450       new(ir) ir_variable(glsl_type::uvec(elements), "src2", ir_var_temporary);
1451    ir_variable *src2h =
1452       new(ir) ir_variable(glsl_type::uvec(elements), "src2h", ir_var_temporary);
1453    ir_variable *src2l =
1454       new(ir) ir_variable(glsl_type::uvec(elements), "src2l", ir_var_temporary);
1455    ir_variable *t1 =
1456       new(ir) ir_variable(glsl_type::uvec(elements), "t1", ir_var_temporary);
1457    ir_variable *t2 =
1458       new(ir) ir_variable(glsl_type::uvec(elements), "t2", ir_var_temporary);
1459    ir_variable *lo =
1460       new(ir) ir_variable(glsl_type::uvec(elements), "lo", ir_var_temporary);
1461    ir_variable *hi =
1462       new(ir) ir_variable(glsl_type::uvec(elements), "hi", ir_var_temporary);
1463    ir_variable *different_signs = NULL;
1464    ir_constant *c0000FFFF = new(ir) ir_constant(0x0000FFFFu, elements);
1465    ir_constant *c16 = new(ir) ir_constant(16u, elements);
1466
1467    ir_instruction &i = *base_ir;
1468
1469    i.insert_before(src1);
1470    i.insert_before(src2);
1471    i.insert_before(src1h);
1472    i.insert_before(src2h);
1473    i.insert_before(src1l);
1474    i.insert_before(src2l);
1475
1476    if (ir->operands[0]->type->base_type == GLSL_TYPE_UINT) {
1477       i.insert_before(assign(src1, ir->operands[0]));
1478       i.insert_before(assign(src2, ir->operands[1]));
1479    } else {
1480       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1481
1482       ir_variable *itmp1 =
1483          new(ir) ir_variable(glsl_type::ivec(elements), "itmp1", ir_var_temporary);
1484       ir_variable *itmp2 =
1485          new(ir) ir_variable(glsl_type::ivec(elements), "itmp2", ir_var_temporary);
1486       ir_constant *c0 = new(ir) ir_constant(int(0), elements);
1487
1488       i.insert_before(itmp1);
1489       i.insert_before(itmp2);
1490       i.insert_before(assign(itmp1, ir->operands[0]));
1491       i.insert_before(assign(itmp2, ir->operands[1]));
1492
1493       different_signs =
1494          new(ir) ir_variable(glsl_type::bvec(elements), "different_signs",
1495                              ir_var_temporary);
1496
1497       i.insert_before(different_signs);
1498       i.insert_before(assign(different_signs, expr(ir_binop_logic_xor,
1499                                                    less(itmp1, c0),
1500                                                    less(itmp2, c0->clone(ir, NULL)))));
1501
1502       i.insert_before(assign(src1, i2u(abs(itmp1))));
1503       i.insert_before(assign(src2, i2u(abs(itmp2))));
1504    }
1505
1506    i.insert_before(assign(src1l, bit_and(src1, c0000FFFF)));
1507    i.insert_before(assign(src2l, bit_and(src2, c0000FFFF->clone(ir, NULL))));
1508    i.insert_before(assign(src1h, rshift(src1, c16)));
1509    i.insert_before(assign(src2h, rshift(src2, c16->clone(ir, NULL))));
1510
1511    i.insert_before(lo);
1512    i.insert_before(hi);
1513    i.insert_before(t1);
1514    i.insert_before(t2);
1515
1516    i.insert_before(assign(lo, mul(src1l, src2l)));
1517    i.insert_before(assign(t1, mul(src1l, src2h)));
1518    i.insert_before(assign(t2, mul(src1h, src2l)));
1519    i.insert_before(assign(hi, mul(src1h, src2h)));
1520
1521    i.insert_before(assign(hi, add(hi, carry(lo, lshift(t1, c16->clone(ir, NULL))))));
1522    i.insert_before(assign(lo,           add(lo, lshift(t1, c16->clone(ir, NULL)))));
1523
1524    i.insert_before(assign(hi, add(hi, carry(lo, lshift(t2, c16->clone(ir, NULL))))));
1525    i.insert_before(assign(lo,           add(lo, lshift(t2, c16->clone(ir, NULL)))));
1526
1527    if (different_signs == NULL) {
1528       assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
1529
1530       ir->operation = ir_binop_add;
1531       ir->operands[0] = add(hi, rshift(t1, c16->clone(ir, NULL)));
1532       ir->operands[1] = rshift(t2, c16->clone(ir, NULL));
1533    } else {
1534       assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
1535
1536       i.insert_before(assign(hi, add(add(hi, rshift(t1, c16->clone(ir, NULL))),
1537                                      rshift(t2, c16->clone(ir, NULL)))));
1538
1539       /* For channels where different_signs is set we have to perform a 64-bit
1540        * negation.  This is *not* the same as just negating the high 32-bits.
1541        * Consider -3 * 2.  The high 32-bits is 0, but the desired result is
1542        * -1, not -0!  Recall -x == ~x + 1.
1543        */
1544       ir_variable *neg_hi =
1545          new(ir) ir_variable(glsl_type::ivec(elements), "neg_hi", ir_var_temporary);
1546       ir_constant *c1 = new(ir) ir_constant(1u, elements);
1547
1548       i.insert_before(neg_hi);
1549       i.insert_before(assign(neg_hi, add(bit_not(u2i(hi)),
1550                                          u2i(carry(bit_not(lo), c1)))));
1551
1552       ir->operation = ir_triop_csel;
1553       ir->operands[0] = new(ir) ir_dereference_variable(different_signs);
1554       ir->operands[1] = new(ir) ir_dereference_variable(neg_hi);
1555       ir->operands[2] = u2i(hi);
1556    }
1557 }
1558
1559 ir_visitor_status
1560 lower_instructions_visitor::visit_leave(ir_expression *ir)
1561 {
1562    switch (ir->operation) {
1563    case ir_binop_dot:
1564       if (ir->operands[0]->type->is_double())
1565          double_dot_to_fma(ir);
1566       break;
1567    case ir_triop_lrp:
1568       if (ir->operands[0]->type->is_double())
1569          double_lrp(ir);
1570       break;
1571    case ir_binop_sub:
1572       if (lowering(SUB_TO_ADD_NEG))
1573          sub_to_add_neg(ir);
1574       break;
1575
1576    case ir_binop_div:
1577       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
1578          int_div_to_mul_rcp(ir);
1579       else if ((ir->operands[1]->type->is_float() ||
1580                 ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
1581          div_to_mul_rcp(ir);
1582       break;
1583
1584    case ir_unop_exp:
1585       if (lowering(EXP_TO_EXP2))
1586          exp_to_exp2(ir);
1587       break;
1588
1589    case ir_unop_log:
1590       if (lowering(LOG_TO_LOG2))
1591          log_to_log2(ir);
1592       break;
1593
1594    case ir_binop_mod:
1595       if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double()))
1596          mod_to_floor(ir);
1597       break;
1598
1599    case ir_binop_pow:
1600       if (lowering(POW_TO_EXP2))
1601          pow_to_exp2(ir);
1602       break;
1603
1604    case ir_binop_ldexp:
1605       if (lowering(LDEXP_TO_ARITH) && ir->type->is_float())
1606          ldexp_to_arith(ir);
1607       if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double())
1608          dldexp_to_arith(ir);
1609       break;
1610
1611    case ir_unop_frexp_exp:
1612       if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
1613          dfrexp_exp_to_arith(ir);
1614       break;
1615
1616    case ir_unop_frexp_sig:
1617       if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
1618          dfrexp_sig_to_arith(ir);
1619       break;
1620
1621    case ir_binop_carry:
1622       if (lowering(CARRY_TO_ARITH))
1623          carry_to_arith(ir);
1624       break;
1625
1626    case ir_binop_borrow:
1627       if (lowering(BORROW_TO_ARITH))
1628          borrow_to_arith(ir);
1629       break;
1630
1631    case ir_unop_saturate:
1632       if (lowering(SAT_TO_CLAMP))
1633          sat_to_clamp(ir);
1634       break;
1635
1636    case ir_unop_trunc:
1637       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1638          dtrunc_to_dfrac(ir);
1639       break;
1640
1641    case ir_unop_ceil:
1642       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1643          dceil_to_dfrac(ir);
1644       break;
1645
1646    case ir_unop_floor:
1647       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1648          dfloor_to_dfrac(ir);
1649       break;
1650
1651    case ir_unop_round_even:
1652       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1653          dround_even_to_dfrac(ir);
1654       break;
1655
1656    case ir_unop_sign:
1657       if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
1658          dsign_to_csel(ir);
1659       break;
1660
1661    case ir_unop_bit_count:
1662       if (lowering(BIT_COUNT_TO_MATH))
1663          bit_count_to_math(ir);
1664       break;
1665
1666    case ir_triop_bitfield_extract:
1667       if (lowering(EXTRACT_TO_SHIFTS))
1668          extract_to_shifts(ir);
1669       break;
1670
1671    case ir_quadop_bitfield_insert:
1672       if (lowering(INSERT_TO_SHIFTS))
1673          insert_to_shifts(ir);
1674       break;
1675
1676    case ir_unop_bitfield_reverse:
1677       if (lowering(REVERSE_TO_SHIFTS))
1678          reverse_to_shifts(ir);
1679       break;
1680
1681    case ir_unop_find_lsb:
1682       if (lowering(FIND_LSB_TO_FLOAT_CAST))
1683          find_lsb_to_float_cast(ir);
1684       break;
1685
1686    case ir_unop_find_msb:
1687       if (lowering(FIND_MSB_TO_FLOAT_CAST))
1688          find_msb_to_float_cast(ir);
1689       break;
1690
1691    case ir_binop_imul_high:
1692       if (lowering(IMUL_HIGH_TO_MUL))
1693          imul_high_to_mul(ir);
1694       break;
1695
1696    default:
1697       return visit_continue;
1698    }
1699
1700    return visit_continue;
1701 }