src/compiler/glsl/lower_blend_equation_advanced.cpp

   1 /*
   2  * Copyright © 2016 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21  * DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "ir.h"
  25 #include "ir_builder.h"
  26 #include "ir_optimization.h"
  27 #include "ir_hierarchical_visitor.h"
  28 #include "program/prog_instruction.h"
  29 #include "program/prog_statevars.h"
  30 #include "util/bitscan.h"
  31
  32 using namespace ir_builder;
  33
  34 #define imm1(x) new(mem_ctx) ir_constant((float) (x), 1)
  35 #define imm3(x) new(mem_ctx) ir_constant((float) (x), 3)
  36
  37 static ir_rvalue *
  38 blend_multiply(ir_variable *src, ir_variable *dst)
  39 {
  40    /* f(Cs,Cd) = Cs*Cd */
  41    return mul(src, dst);
  42 }
  43
  44 static ir_rvalue *
  45 blend_screen(ir_variable *src, ir_variable *dst)
  46 {
  47    /* f(Cs,Cd) = Cs+Cd-Cs*Cd */
  48    return sub(add(src, dst), mul(src, dst));
  49 }
  50
  51 static ir_rvalue *
  52 blend_overlay(ir_variable *src, ir_variable *dst)
  53 {
  54    void *mem_ctx = ralloc_parent(src);
  55
  56    /* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
  57     *            1-2*(1-Cs)*(1-Cd), otherwise
  58     */
  59    ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
  60    ir_rvalue *rule_2 =
  61       sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
  62    return csel(lequal(dst, imm3(0.5f)), rule_1, rule_2);
  63 }
  64
  65 static ir_rvalue *
  66 blend_darken(ir_variable *src, ir_variable *dst)
  67 {
  68    /* f(Cs,Cd) = min(Cs,Cd) */
  69    return min2(src, dst);
  70 }
  71
  72 static ir_rvalue *
  73 blend_lighten(ir_variable *src, ir_variable *dst)
  74 {
  75    /* f(Cs,Cd) = max(Cs,Cd) */
  76    return max2(src, dst);
  77 }
  78
  79 static ir_rvalue *
  80 blend_colordodge(ir_variable *src, ir_variable *dst)
  81 {
  82    void *mem_ctx = ralloc_parent(src);
  83
  84    /* f(Cs,Cd) =
  85     *   0, if Cd <= 0
  86     *   min(1,Cd/(1-Cs)), if Cd > 0 and Cs < 1
  87     *   1, if Cd > 0 and Cs >= 1
  88     */
  89    return csel(lequal(dst, imm3(0)), imm3(0),
  90                csel(gequal(src, imm3(1)), imm3(1),
  91                     min2(imm3(1), div(dst, sub(imm3(1), src)))));
  92 }
  93
  94 static ir_rvalue *
  95 blend_colorburn(ir_variable *src, ir_variable *dst)
  96 {
  97    void *mem_ctx = ralloc_parent(src);
  98
  99    /* f(Cs,Cd) =
 100     *   1, if Cd >= 1
 101     *   1 - min(1,(1-Cd)/Cs), if Cd < 1 and Cs > 0
 102     *   0, if Cd < 1 and Cs <= 0
 103     */
 104    return csel(gequal(dst, imm3(1)), imm3(1),
 105                csel(lequal(src, imm3(0)), imm3(0),
 106                     sub(imm3(1), min2(imm3(1), div(sub(imm3(1), dst), src)))));
 107 }
 108
 109 static ir_rvalue *
 110 blend_hardlight(ir_variable *src, ir_variable *dst)
 111 {
 112    void *mem_ctx = ralloc_parent(src);
 113
 114    /* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
 115     *            1-2*(1-Cs)*(1-Cd), otherwise
 116     */
 117    ir_rvalue *rule_1 = mul(imm3(2), mul(src, dst));
 118    ir_rvalue *rule_2 =
 119       sub(imm3(1), mul(imm3(2), mul(sub(imm3(1), src), sub(imm3(1), dst))));
 120    return csel(lequal(src, imm3(0.5f)), rule_1, rule_2);
 121 }
 122
 123 static ir_rvalue *
 124 blend_softlight(ir_variable *src, ir_variable *dst)
 125 {
 126    void *mem_ctx = ralloc_parent(src);
 127
 128    /* f(Cs,Cd) =
 129     *   Cd-(1-2*Cs)*Cd*(1-Cd),
 130     *     if Cs <= 0.5
 131     *   Cd+(2*Cs-1)*Cd*((16*Cd-12)*Cd+3),
 132     *     if Cs > 0.5 and Cd <= 0.25
 133     *   Cd+(2*Cs-1)*(sqrt(Cd)-Cd),
 134     *     if Cs > 0.5 and Cd > 0.25
 135     *
 136     * We can simplify this to
 137     *
 138     * f(Cs,Cd) = Cd+(2*Cs-1)*g(Cs,Cd) where
 139     * g(Cs,Cd) = Cd*Cd-Cd             if Cs <= 0.5
 140     *            Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
 141     *            sqrt(Cd)-Cd,         otherwise
 142     */
 143    ir_rvalue *factor_1 = mul(dst, sub(imm3(1), dst));
 144    ir_rvalue *factor_2 =
 145       mul(dst, add(mul(sub(mul(imm3(16), dst), imm3(12)), dst), imm3(3)));
 146    ir_rvalue *factor_3 = sub(sqrt(dst), dst);
 147    ir_rvalue *factor = csel(lequal(src, imm3(0.5f)), factor_1,
 148                             csel(lequal(dst, imm3(0.25f)),
 149                                         factor_2, factor_3));
 150    return add(dst, mul(sub(mul(imm3(2), src), imm3(1)), factor));
 151 }
 152
 153 static ir_rvalue *
 154 blend_difference(ir_variable *src, ir_variable *dst)
 155 {
 156    return abs(sub(dst, src));
 157 }
 158
 159 static ir_rvalue *
 160 blend_exclusion(ir_variable *src, ir_variable *dst)
 161 {
 162    void *mem_ctx = ralloc_parent(src);
 163
 164    return add(src, sub(dst, mul(imm3(2), mul(src, dst))));
 165 }
 166
 167 /* Return the minimum of a vec3's components */
 168 static ir_rvalue *
 169 minv3(ir_variable *v)
 170 {
 171    return min2(min2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
 172 }
 173
 174 /* Return the maximum of a vec3's components */
 175 static ir_rvalue *
 176 maxv3(ir_variable *v)
 177 {
 178    return max2(max2(swizzle_x(v), swizzle_y(v)), swizzle_z(v));
 179 }
 180
 181 static ir_rvalue *
 182 lumv3(ir_variable *c)
 183 {
 184    ir_constant_data data;
 185    data.f[0] = 0.30;
 186    data.f[1] = 0.59;
 187    data.f[2] = 0.11;
 188
 189    void *mem_ctx = ralloc_parent(c);
 190
 191    /* dot(c, vec3(0.30, 0.59, 0.11)) */
 192    return dot(c, new(mem_ctx) ir_constant(glsl_type::vec3_type, &data));
 193 }
 194
 195 static ir_rvalue *
 196 satv3(ir_variable *c)
 197 {
 198    return sub(maxv3(c), minv3(c));
 199 }
 200
 201 /* Take the base RGB color <cbase> and override its luminosity with that
 202  * of the RGB color <clum>.
 203  *
 204  * This follows the equations given in the ES 3.2 (June 15th, 2016)
 205  * specification.  Revision 16 of GL_KHR_blend_equation_advanced and
 206  * revision 9 of GL_NV_blend_equation_advanced specify a different set
 207  * of equations.  Older revisions match ES 3.2's text, and dEQP expects
 208  * the ES 3.2 rules implemented here.
 209  */
 210 static void
 211 set_lum(ir_factory *f,
 212         ir_variable *color,
 213         ir_variable *cbase,
 214         ir_variable *clum)
 215 {
 216    void *mem_ctx = f->mem_ctx;
 217    f->emit(assign(color, add(cbase, sub(lumv3(clum), lumv3(cbase)))));
 218
 219    ir_variable *llum = f->make_temp(glsl_type::float_type, "__blend_lum");
 220    ir_variable *mincol = f->make_temp(glsl_type::float_type, "__blend_mincol");
 221    ir_variable *maxcol = f->make_temp(glsl_type::float_type, "__blend_maxcol");
 222
 223    f->emit(assign(llum, lumv3(color)));
 224    f->emit(assign(mincol, minv3(color)));
 225    f->emit(assign(maxcol, maxv3(color)));
 226
 227    f->emit(if_tree(less(mincol, imm1(0)),
 228                    assign(color, add(llum, div(mul(sub(color, llum), llum),
 229                                                sub(llum, mincol)))),
 230                    if_tree(greater(maxcol, imm1(1)),
 231                            assign(color, add(llum, div(mul(sub(color, llum),
 232                                                            sub(imm3(1), llum)),
 233                                                        sub(maxcol, llum)))))));
 234
 235 }
 236
 237 /* Take the base RGB color <cbase> and override its saturation with
 238  * that of the RGB color <csat>.  The override the luminosity of the
 239  * result with that of the RGB color <clum>.
 240  */
 241 static void
 242 set_lum_sat(ir_factory *f,
 243             ir_variable *color,
 244             ir_variable *cbase,
 245             ir_variable *csat,
 246             ir_variable *clum)
 247 {
 248    void *mem_ctx = f->mem_ctx;
 249
 250    ir_rvalue *minbase = minv3(cbase);
 251    ir_rvalue *ssat = satv3(csat);
 252
 253    ir_variable *sbase = f->make_temp(glsl_type::float_type, "__blend_sbase");
 254    f->emit(assign(sbase, satv3(cbase)));
 255
 256    /* Equivalent (modulo rounding errors) to setting the
 257     * smallest (R,G,B) component to 0, the largest to <ssat>,
 258     * and interpolating the "middle" component based on its
 259     * original value relative to the smallest/largest.
 260     */
 261    f->emit(if_tree(greater(sbase, imm1(0)),
 262                    assign(color, div(mul(sub(cbase, minbase), ssat), sbase)),
 263                    assign(color, imm3(0))));
 264    set_lum(f, color, color, clum);
 265 }
 266
 267 static ir_rvalue *
 268 is_mode(ir_variable *mode, enum gl_advanced_blend_mode q)
 269 {
 270    return equal(mode, new(ralloc_parent(mode)) ir_constant(unsigned(q)));
 271 }
 272
 273 static ir_variable *
 274 calc_blend_result(ir_factory f,
 275                   ir_variable *mode,
 276                   ir_variable *fb,
 277                   ir_rvalue *blend_src,
 278                   GLbitfield blend_qualifiers)
 279 {
 280    void *mem_ctx = f.mem_ctx;
 281    ir_variable *result = f.make_temp(glsl_type::vec4_type, "__blend_result");
 282
 283    /* Save blend_src to a temporary so we can reference it multiple times. */
 284    ir_variable *src = f.make_temp(glsl_type::vec4_type, "__blend_src");
 285    f.emit(assign(src, blend_src));
 286
 287    /* If we're not doing advanced blending, just write the original value. */
 288    ir_if *if_blending = new(mem_ctx) ir_if(is_mode(mode, BLEND_NONE));
 289    f.emit(if_blending);
 290    if_blending->then_instructions.push_tail(assign(result, src));
 291
 292    f.instructions = &if_blending->else_instructions;
 293
 294    /* (Rs', Gs', Bs') =
 295     *   (0, 0, 0),              if As == 0
 296     *   (Rs/As, Gs/As, Bs/As),  otherwise
 297     */
 298    ir_variable *src_rgb = f.make_temp(glsl_type::vec3_type, "__blend_src_rgb");
 299    ir_variable *src_alpha = f.make_temp(glsl_type::float_type, "__blend_src_a");
 300
 301    /* (Rd', Gd', Bd') =
 302     *   (0, 0, 0),              if Ad == 0
 303     *   (Rd/Ad, Gd/Ad, Bd/Ad),  otherwise
 304     */
 305    ir_variable *dst_rgb = f.make_temp(glsl_type::vec3_type, "__blend_dst_rgb");
 306    ir_variable *dst_alpha = f.make_temp(glsl_type::float_type, "__blend_dst_a");
 307
 308    f.emit(assign(dst_alpha, swizzle_w(fb)));
 309    f.emit(if_tree(equal(dst_alpha, imm1(0)),
 310                      assign(dst_rgb, imm3(0)),
 311                      assign(dst_rgb, div(swizzle_xyz(fb), dst_alpha))));
 312
 313    f.emit(assign(src_alpha, swizzle_w(src)));
 314    f.emit(if_tree(equal(src_alpha, imm1(0)),
 315                      assign(src_rgb, imm3(0)),
 316                      assign(src_rgb, div(swizzle_xyz(src), src_alpha))));
 317
 318    ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");
 319
 320    ir_factory casefactory = f;
 321
 322    unsigned choices = blend_qualifiers;
 323    while (choices) {
 324       enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)
 325          (1u << u_bit_scan(&choices));
 326
 327       ir_if *iff = new(mem_ctx) ir_if(is_mode(mode, choice));
 328       casefactory.emit(iff);
 329       casefactory.instructions = &iff->then_instructions;
 330
 331       ir_rvalue *val = NULL;
 332
 333       switch (choice) {
 334       case BLEND_MULTIPLY:
 335          val = blend_multiply(src_rgb, dst_rgb);
 336          break;
 337       case BLEND_SCREEN:
 338          val = blend_screen(src_rgb, dst_rgb);
 339          break;
 340       case BLEND_OVERLAY:
 341          val = blend_overlay(src_rgb, dst_rgb);
 342          break;
 343       case BLEND_DARKEN:
 344          val = blend_darken(src_rgb, dst_rgb);
 345          break;
 346       case BLEND_LIGHTEN:
 347          val = blend_lighten(src_rgb, dst_rgb);
 348          break;
 349       case BLEND_COLORDODGE:
 350          val = blend_colordodge(src_rgb, dst_rgb);
 351          break;
 352       case BLEND_COLORBURN:
 353          val = blend_colorburn(src_rgb, dst_rgb);
 354          break;
 355       case BLEND_HARDLIGHT:
 356          val = blend_hardlight(src_rgb, dst_rgb);
 357          break;
 358       case BLEND_SOFTLIGHT:
 359          val = blend_softlight(src_rgb, dst_rgb);
 360          break;
 361       case BLEND_DIFFERENCE:
 362          val = blend_difference(src_rgb, dst_rgb);
 363          break;
 364       case BLEND_EXCLUSION:
 365          val = blend_exclusion(src_rgb, dst_rgb);
 366          break;
 367       case BLEND_HSL_HUE:
 368          set_lum_sat(&casefactory, factor, src_rgb, dst_rgb, dst_rgb);
 369          break;
 370       case BLEND_HSL_SATURATION:
 371          set_lum_sat(&casefactory, factor, dst_rgb, src_rgb, dst_rgb);
 372          break;
 373       case BLEND_HSL_COLOR:
 374          set_lum(&casefactory, factor, src_rgb, dst_rgb);
 375          break;
 376       case BLEND_HSL_LUMINOSITY:
 377          set_lum(&casefactory, factor, dst_rgb, src_rgb);
 378          break;
 379       case BLEND_NONE:
 380       case BLEND_ALL:
 381          unreachable("not real cases");
 382       }
 383
 384       if (val)
 385          casefactory.emit(assign(factor, val));
 386
 387       casefactory.instructions = &iff->else_instructions;
 388    }
 389
 390    /* p0(As,Ad) = As*Ad
 391     * p1(As,Ad) = As*(1-Ad)
 392     * p2(As,Ad) = Ad*(1-As)
 393     */
 394    ir_variable *p0 = f.make_temp(glsl_type::float_type, "__blend_p0");
 395    ir_variable *p1 = f.make_temp(glsl_type::float_type, "__blend_p1");
 396    ir_variable *p2 = f.make_temp(glsl_type::float_type, "__blend_p2");
 397
 398    f.emit(assign(p0, mul(src_alpha, dst_alpha)));
 399    f.emit(assign(p1, mul(src_alpha, sub(imm1(1), dst_alpha))));
 400    f.emit(assign(p2, mul(dst_alpha, sub(imm1(1), src_alpha))));
 401
 402    /* R = f(Rs',Rd')*p0(As,Ad) + Y*Rs'*p1(As,Ad) + Z*Rd'*p2(As,Ad)
 403     * G = f(Gs',Gd')*p0(As,Ad) + Y*Gs'*p1(As,Ad) + Z*Gd'*p2(As,Ad)
 404     * B = f(Bs',Bd')*p0(As,Ad) + Y*Bs'*p1(As,Ad) + Z*Bd'*p2(As,Ad)
 405     * A =          X*p0(As,Ad) +     Y*p1(As,Ad) +     Z*p2(As,Ad)
 406     *
 407     * <X, Y, Z> is always <1, 1, 1>, so we can ignore it.
 408     *
 409     * In vector form, this is:
 410     * RGB = factor * p0 + Cs * p1 + Cd * p2
 411     *   A = p0 + p1 + p2
 412     */
 413    f.emit(assign(result,
 414                  add(add(mul(factor, p0), mul(src_rgb, p1)), mul(dst_rgb, p2)),
 415                  WRITEMASK_XYZ));
 416    f.emit(assign(result, add(add(p0, p1), p2), WRITEMASK_W));
 417
 418    return result;
 419 }
 420
 421 /**
 422  * Dereference var, or var[0] if it's an array.
 423  */
 424 static ir_dereference *
 425 deref_output(ir_variable *var)
 426 {
 427    void *mem_ctx = ralloc_parent(var);
 428
 429    ir_dereference *val = new(mem_ctx) ir_dereference_variable(var);
 430    if (val->type->is_array()) {
 431       ir_constant *index = new(mem_ctx) ir_constant(0);
 432       val = new(mem_ctx) ir_dereference_array(val, index);
 433    }
 434
 435    return val;
 436 }
 437
 438 static ir_function_signature *
 439 get_main(gl_linked_shader *sh)
 440 {
 441    ir_function_signature *sig = NULL;
 442    /* We can't use _mesa_get_main_function_signature() because we don't
 443     * have a symbol table at this point.  Just go find main() by hand.
 444     */
 445    foreach_in_list(ir_instruction, ir, sh->ir) {
 446       ir_function *f = ir->as_function();
 447       if (f && strcmp(f->name, "main") == 0) {
 448          exec_list void_parameters;
 449          sig = f->matching_signature(NULL, &void_parameters, false);
 450          break;
 451       }
 452    }
 453    assert(sig != NULL); /* main() must exist */
 454    return sig;
 455 }
 456
 457 bool
 458 lower_blend_equation_advanced(struct gl_linked_shader *sh)
 459 {
 460    if (sh->info.BlendSupport == 0)
 461       return false;
 462
 463    /* Lower early returns in main() so there's a single exit point
 464     * where we can insert our lowering code.
 465     */
 466    do_lower_jumps(sh->ir, false, false, true, false, false);
 467
 468    void *mem_ctx = ralloc_parent(sh->ir);
 469
 470    ir_variable *fb = new(mem_ctx) ir_variable(glsl_type::vec4_type,
 471                                               "__blend_fb_fetch",
 472                                               ir_var_shader_out);
 473    fb->data.location = FRAG_RESULT_DATA0;
 474    fb->data.read_only = 1;
 475    fb->data.fb_fetch_output = 1;
 476    fb->data.how_declared = ir_var_hidden;
 477
 478    ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type,
 479                                                 "gl_AdvancedBlendModeMESA",
 480                                                 ir_var_uniform);
 481    mode->data.how_declared = ir_var_hidden;
 482    mode->allocate_state_slots(1);
 483    ir_state_slot *slot0 = &mode->get_state_slots()[0];
 484    slot0->swizzle = SWIZZLE_XXXX;
 485    slot0->tokens[0] = STATE_INTERNAL;
 486    slot0->tokens[1] = STATE_ADVANCED_BLENDING_MODE;
 487    for (int i = 2; i < STATE_LENGTH; i++)
 488       slot0->tokens[i] = 0;
 489
 490    sh->ir->push_head(fb);
 491    sh->ir->push_head(mode);
 492
 493    /* Gather any output variables referring to render target 0.
 494     *
 495     * ARB_enhanced_layouts irritatingly allows the shader to specify
 496     * multiple output variables for the same render target, each of
 497     * which writes a subset of the components, starting at location_frac.
 498     * The variables can't overlap, thankfully.
 499     */
 500    ir_variable *outputs[4] = { NULL, NULL, NULL, NULL };
 501    foreach_in_list(ir_instruction, ir, sh->ir) {
 502       ir_variable *var = ir->as_variable();
 503       if (!var || var->data.mode != ir_var_shader_out)
 504          continue;
 505
 506       if (var->data.location == FRAG_RESULT_DATA0 ||
 507           var->data.location == FRAG_RESULT_COLOR) {
 508          const int components = var->type->without_array()->vector_elements;
 509
 510          for (int i = 0; i < components; i++) {
 511             outputs[var->data.location_frac + i] = var;
 512          }
 513       }
 514    }
 515
 516    /* Combine values written to outputs into a single RGBA blend source.
 517     * We assign <0, 0, 0, 1> to any components with no corresponding output.
 518     */
 519    ir_rvalue *blend_source;
 520    if (outputs[0] && outputs[0]->type->without_array()->vector_elements == 4) {
 521       blend_source = deref_output(outputs[0]);
 522    } else {
 523       ir_rvalue *blend_comps[4];
 524       for (int i = 0; i < 4; i++) {
 525          ir_variable *var = outputs[i];
 526          if (var) {
 527             blend_comps[i] = swizzle(deref_output(outputs[i]),
 528                                      i - outputs[i]->data.location_frac, 1);
 529          } else {
 530             blend_comps[i] = new(mem_ctx) ir_constant(i < 3 ? 0.0f : 1.0f);
 531          }
 532       }
 533
 534       blend_source =
 535          new(mem_ctx) ir_expression(ir_quadop_vector, glsl_type::vec4_type,
 536                                     blend_comps[0], blend_comps[1],
 537                                     blend_comps[2], blend_comps[3]);
 538    }
 539
 540    ir_function_signature *main = get_main(sh);
 541    ir_factory f(&main->body, mem_ctx);
 542
 543    ir_variable *result_dest =
 544       calc_blend_result(f, mode, fb, blend_source, sh->info.BlendSupport);
 545
 546    /* Copy the result back to the original values.  It would be simpler
 547     * to demote the program's output variables, and create a new vec4
 548     * output for our result, but this pass runs before we create the
 549     * ARB_program_interface_query resource list.  So we have to leave
 550     * the original outputs in place and use them.
 551     */
 552    for (int i = 0; i < 4; i++) {
 553       if (!outputs[i])
 554          continue;
 555
 556       f.emit(assign(deref_output(outputs[i]), swizzle(result_dest, i, 1),
 557                     1 << i));
 558    }
 559
 560    validate_ir_tree(sh->ir);
 561    return true;
 562 }