src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 static bool
  38 can_do_pln(struct intel_context *intel, const struct brw_reg *deltas)
  39 {
  40    struct brw_context *brw = brw_context(&intel->ctx);
  41
  42    if (!brw->has_pln)
  43       return false;
  44
  45    if (deltas[1].nr != deltas[0].nr + 1)
  46       return false;
  47
  48    if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
  49       return false;
  50
  51    return true;
  52 }
  53
  54 /* Return the SrcReg index of the channels that can be immediate float operands
  55  * instead of usage of PROGRAM_CONSTANT values through push/pull.
  56  */
  57 bool
  58 brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg)
  59 {
  60    int opcode_array[] = {
  61       [OPCODE_ADD] = 2,
  62       [OPCODE_CMP] = 3,
  63       [OPCODE_DP3] = 2,
  64       [OPCODE_DP4] = 2,
  65       [OPCODE_DPH] = 2,
  66       [OPCODE_MAX] = 2,
  67       [OPCODE_MIN] = 2,
  68       [OPCODE_MOV] = 1,
  69       [OPCODE_MUL] = 2,
  70       [OPCODE_SEQ] = 2,
  71       [OPCODE_SGE] = 2,
  72       [OPCODE_SGT] = 2,
  73       [OPCODE_SLE] = 2,
  74       [OPCODE_SLT] = 2,
  75       [OPCODE_SNE] = 2,
  76       [OPCODE_SWZ] = 1,
  77       [OPCODE_XPD] = 2,
  78    };
  79
  80    /* These opcodes get broken down in a way that allow two
  81     * args to be immediates.
  82     */
  83    if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) {
  84       if (arg == 1 || arg == 2)
  85          return true;
  86    }
  87
  88    if (opcode > ARRAY_SIZE(opcode_array))
  89       return false;
  90
  91    return arg == opcode_array[opcode] - 1;
  92 }
  93
  94 /**
  95  * Computes the screen-space x,y position of the pixels.
  96  *
  97  * This will be used by emit_delta_xy() or emit_wpos_xy() for
  98  * interpolation of attributes..
  99  *
 100  * Payload R0:
 101  *
 102  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
 103  *         corresponding to each of the 16 execution channels.
 104  * R0.1..8 -- ?
 105  * R1.0 -- triangle vertex 0.X
 106  * R1.1 -- triangle vertex 0.Y
 107  * R1.2 -- tile 0 x,y coords (2 packed uwords)
 108  * R1.3 -- tile 1 x,y coords (2 packed uwords)
 109  * R1.4 -- tile 2 x,y coords (2 packed uwords)
 110  * R1.5 -- tile 3 x,y coords (2 packed uwords)
 111  * R1.6 -- ?
 112  * R1.7 -- ?
 113  * R1.8 -- ?
 114  */
 115 void emit_pixel_xy(struct brw_wm_compile *c,
 116                    const struct brw_reg *dst,
 117                    GLuint mask)
 118 {
 119    struct brw_compile *p = &c->func;
 120    struct brw_reg r1 = brw_vec1_grf(1, 0);
 121    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
 122    struct brw_reg dst0_uw, dst1_uw;
 123
 124    brw_push_insn_state(p);
 125    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 126
 127    if (c->dispatch_width == 16) {
 128       dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
 129       dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
 130    } else {
 131       dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
 132       dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
 133    }
 134
 135    /* Calculate pixel centers by adding 1 or 0 to each of the
 136     * micro-tile coordinates passed in r1.
 137     */
 138    if (mask & WRITEMASK_X) {
 139       brw_ADD(p,
 140               dst0_uw,
 141               stride(suboffset(r1_uw, 4), 2, 4, 0),
 142               brw_imm_v(0x10101010));
 143    }
 144
 145    if (mask & WRITEMASK_Y) {
 146       brw_ADD(p,
 147               dst1_uw,
 148               stride(suboffset(r1_uw,5), 2, 4, 0),
 149               brw_imm_v(0x11001100));
 150    }
 151    brw_pop_insn_state(p);
 152 }
 153
 154 /**
 155  * Computes the screen-space x,y distance of the pixels from the start
 156  * vertex.
 157  *
 158  * This will be used in linterp or pinterp with the start vertex value
 159  * and the Cx, Cy, and C0 coefficients passed in from the setup engine
 160  * to produce interpolated attribute values.
 161  */
 162 void emit_delta_xy(struct brw_compile *p,
 163                    const struct brw_reg *dst,
 164                    GLuint mask,
 165                    const struct brw_reg *arg0)
 166 {
 167    struct intel_context *intel = &p->brw->intel;
 168    struct brw_reg r1 = brw_vec1_grf(1, 0);
 169
 170    if (mask == 0)
 171       return;
 172
 173    assert(mask == WRITEMASK_XY);
 174
 175    if (intel->gen >= 6) {
 176        /* XXX Gen6 WM doesn't have Xstart/Ystart in payload r1.0/r1.1.
 177           Just add them with 0.0 for dst reg.. */
 178        r1 = brw_imm_v(0x00000000);
 179        brw_ADD(p,
 180                dst[0],
 181                retype(arg0[0], BRW_REGISTER_TYPE_UW),
 182                r1);
 183        brw_ADD(p,
 184                dst[1],
 185                retype(arg0[1], BRW_REGISTER_TYPE_UW),
 186                r1);
 187        return;
 188    }
 189
 190    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 191     * centers produced by emit_pixel_xy().
 192     */
 193    brw_ADD(p,
 194            dst[0],
 195            retype(arg0[0], BRW_REGISTER_TYPE_UW),
 196            negate(r1));
 197    brw_ADD(p,
 198            dst[1],
 199            retype(arg0[1], BRW_REGISTER_TYPE_UW),
 200            negate(suboffset(r1,1)));
 201 }
 202
 203 /**
 204  * Computes the pixel offset from the window origin for gl_FragCoord().
 205  */
 206 void emit_wpos_xy(struct brw_wm_compile *c,
 207                   const struct brw_reg *dst,
 208                   GLuint mask,
 209                   const struct brw_reg *arg0)
 210 {
 211    struct brw_compile *p = &c->func;
 212    struct intel_context *intel = &p->brw->intel;
 213    struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W);
 214    struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W);
 215
 216    if (mask & WRITEMASK_X) {
 217       if (intel->gen >= 6) {
 218          struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F);
 219          brw_MOV(p, delta_x_f, delta_x);
 220          delta_x = delta_x_f;
 221       }
 222
 223       if (c->fp->program.PixelCenterInteger) {
 224          /* X' = X */
 225          brw_MOV(p, dst[0], delta_x);
 226       } else {
 227          /* X' = X + 0.5 */
 228          brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5));
 229       }
 230    }
 231
 232    if (mask & WRITEMASK_Y) {
 233       if (intel->gen >= 6) {
 234          struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F);
 235          brw_MOV(p, delta_y_f, delta_y);
 236          delta_y = delta_y_f;
 237       }
 238
 239       if (c->fp->program.OriginUpperLeft) {
 240          if (c->fp->program.PixelCenterInteger) {
 241             /* Y' = Y */
 242             brw_MOV(p, dst[1], delta_y);
 243          } else {
 244             brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5));
 245          }
 246       } else {
 247          float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
 248
 249          /* Y' = (height - 1) - Y + center */
 250          brw_ADD(p, dst[1], negate(delta_y),
 251                  brw_imm_f(c->key.drawable_height - 1 + center_offset));
 252       }
 253    }
 254 }
 255
 256
 257 void emit_pixel_w(struct brw_wm_compile *c,
 258                   const struct brw_reg *dst,
 259                   GLuint mask,
 260                   const struct brw_reg *arg0,
 261                   const struct brw_reg *deltas)
 262 {
 263    struct brw_compile *p = &c->func;
 264    struct intel_context *intel = &p->brw->intel;
 265    struct brw_reg src;
 266    struct brw_reg temp_dst;
 267
 268    if (intel->gen >= 6)
 269         temp_dst = dst[3];
 270    else
 271         temp_dst = brw_message_reg(2);
 272
 273    assert(intel->gen < 6);
 274
 275    /* Don't need this if all you are doing is interpolating color, for
 276     * instance.
 277     */
 278    if (mask & WRITEMASK_W) {
 279       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 280
 281       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 282        * result straight into a message reg.
 283        */
 284       if (can_do_pln(intel, deltas)) {
 285          brw_PLN(p, temp_dst, interp3, deltas[0]);
 286       } else {
 287          brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 288          brw_MAC(p, temp_dst, suboffset(interp3, 1), deltas[1]);
 289       }
 290
 291       /* Calc w */
 292       if (intel->gen >= 6)
 293          src = temp_dst;
 294       else
 295          src = brw_null_reg();
 296
 297       if (c->dispatch_width == 16) {
 298          brw_math_16(p, dst[3],
 299                      BRW_MATH_FUNCTION_INV,
 300                      BRW_MATH_SATURATE_NONE,
 301                      2, src,
 302                      BRW_MATH_PRECISION_FULL);
 303       } else {
 304          brw_math(p, dst[3],
 305                   BRW_MATH_FUNCTION_INV,
 306                   BRW_MATH_SATURATE_NONE,
 307                   2, src,
 308                   BRW_MATH_DATA_VECTOR,
 309                   BRW_MATH_PRECISION_FULL);
 310       }
 311    }
 312 }
 313
 314 void emit_linterp(struct brw_compile *p,
 315                   const struct brw_reg *dst,
 316                   GLuint mask,
 317                   const struct brw_reg *arg0,
 318                   const struct brw_reg *deltas)
 319 {
 320    struct intel_context *intel = &p->brw->intel;
 321    struct brw_reg interp[4];
 322    GLuint nr = arg0[0].nr;
 323    GLuint i;
 324
 325    interp[0] = brw_vec1_grf(nr, 0);
 326    interp[1] = brw_vec1_grf(nr, 4);
 327    interp[2] = brw_vec1_grf(nr+1, 0);
 328    interp[3] = brw_vec1_grf(nr+1, 4);
 329
 330    for (i = 0; i < 4; i++) {
 331       if (mask & (1<<i)) {
 332          if (intel->gen >= 6) {
 333             brw_PLN(p, dst[i], interp[i], brw_vec8_grf(2, 0));
 334          } else if (can_do_pln(intel, deltas)) {
 335             brw_PLN(p, dst[i], interp[i], deltas[0]);
 336          } else {
 337             brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 338             brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 339          }
 340       }
 341    }
 342 }
 343
 344
 345 void emit_pinterp(struct brw_compile *p,
 346                   const struct brw_reg *dst,
 347                   GLuint mask,
 348                   const struct brw_reg *arg0,
 349                   const struct brw_reg *deltas,
 350                   const struct brw_reg *w)
 351 {
 352    struct intel_context *intel = &p->brw->intel;
 353    struct brw_reg interp[4];
 354    GLuint nr = arg0[0].nr;
 355    GLuint i;
 356
 357    if (intel->gen >= 6) {
 358       emit_linterp(p, dst, mask, arg0, interp);
 359       return;
 360    }
 361
 362    interp[0] = brw_vec1_grf(nr, 0);
 363    interp[1] = brw_vec1_grf(nr, 4);
 364    interp[2] = brw_vec1_grf(nr+1, 0);
 365    interp[3] = brw_vec1_grf(nr+1, 4);
 366
 367    for (i = 0; i < 4; i++) {
 368       if (mask & (1<<i)) {
 369          if (can_do_pln(intel, deltas)) {
 370             brw_PLN(p, dst[i], interp[i], deltas[0]);
 371          } else {
 372             brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 373             brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 374          }
 375       }
 376    }
 377    for (i = 0; i < 4; i++) {
 378       if (mask & (1<<i)) {
 379          brw_MUL(p, dst[i], dst[i], w[3]);
 380       }
 381    }
 382 }
 383
 384
 385 void emit_cinterp(struct brw_compile *p,
 386                   const struct brw_reg *dst,
 387                   GLuint mask,
 388                   const struct brw_reg *arg0)
 389 {
 390    struct brw_reg interp[4];
 391    GLuint nr = arg0[0].nr;
 392    GLuint i;
 393
 394    interp[0] = brw_vec1_grf(nr, 0);
 395    interp[1] = brw_vec1_grf(nr, 4);
 396    interp[2] = brw_vec1_grf(nr+1, 0);
 397    interp[3] = brw_vec1_grf(nr+1, 4);
 398
 399    for (i = 0; i < 4; i++) {
 400       if (mask & (1<<i)) {
 401          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 402       }
 403    }
 404 }
 405
 406 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 407 void emit_frontfacing(struct brw_compile *p,
 408                       const struct brw_reg *dst,
 409                       GLuint mask)
 410 {
 411    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 412    GLuint i;
 413
 414    if (!(mask & WRITEMASK_XYZW))
 415       return;
 416
 417    for (i = 0; i < 4; i++) {
 418       if (mask & (1<<i)) {
 419          brw_MOV(p, dst[i], brw_imm_f(0.0));
 420       }
 421    }
 422
 423    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 424     * us front face
 425     */
 426    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 427    for (i = 0; i < 4; i++) {
 428       if (mask & (1<<i)) {
 429          brw_MOV(p, dst[i], brw_imm_f(1.0));
 430       }
 431    }
 432    brw_set_predicate_control_flag_value(p, 0xff);
 433 }
 434
 435 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 436  * looking like:
 437  *
 438  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
 439  *
 440  * and we're trying to produce:
 441  *
 442  *           DDX                     DDY
 443  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
 444  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
 445  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
 446  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
 447  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
 448  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
 449  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
 450  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
 451  *
 452  * and add another set of two more subspans if in 16-pixel dispatch mode.
 453  *
 454  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 455  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 456  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 457  * between each other.  We could probably do it like ddx and swizzle the right
 458  * order later, but bail for now and just produce
 459  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
 460  */
 461 void emit_ddxy(struct brw_compile *p,
 462                const struct brw_reg *dst,
 463                GLuint mask,
 464                bool is_ddx,
 465                const struct brw_reg *arg0)
 466 {
 467    int i;
 468    struct brw_reg src0, src1;
 469
 470    if (mask & SATURATE)
 471       brw_set_saturate(p, 1);
 472    for (i = 0; i < 4; i++ ) {
 473       if (mask & (1<<i)) {
 474          if (is_ddx) {
 475             src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
 476                            BRW_REGISTER_TYPE_F,
 477                            BRW_VERTICAL_STRIDE_2,
 478                            BRW_WIDTH_2,
 479                            BRW_HORIZONTAL_STRIDE_0,
 480                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 481             src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 482                            BRW_REGISTER_TYPE_F,
 483                            BRW_VERTICAL_STRIDE_2,
 484                            BRW_WIDTH_2,
 485                            BRW_HORIZONTAL_STRIDE_0,
 486                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 487          } else {
 488             src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 489                            BRW_REGISTER_TYPE_F,
 490                            BRW_VERTICAL_STRIDE_4,
 491                            BRW_WIDTH_4,
 492                            BRW_HORIZONTAL_STRIDE_0,
 493                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 494             src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 495                            BRW_REGISTER_TYPE_F,
 496                            BRW_VERTICAL_STRIDE_4,
 497                            BRW_WIDTH_4,
 498                            BRW_HORIZONTAL_STRIDE_0,
 499                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 500          }
 501          brw_ADD(p, dst[i], src0, negate(src1));
 502       }
 503    }
 504    if (mask & SATURATE)
 505       brw_set_saturate(p, 0);
 506 }
 507
 508 void emit_alu1(struct brw_compile *p,
 509                struct brw_instruction *(*func)(struct brw_compile *,
 510                                                struct brw_reg,
 511                                                struct brw_reg),
 512                const struct brw_reg *dst,
 513                GLuint mask,
 514                const struct brw_reg *arg0)
 515 {
 516    GLuint i;
 517
 518    if (mask & SATURATE)
 519       brw_set_saturate(p, 1);
 520
 521    for (i = 0; i < 4; i++) {
 522       if (mask & (1<<i)) {
 523          func(p, dst[i], arg0[i]);
 524       }
 525    }
 526
 527    if (mask & SATURATE)
 528       brw_set_saturate(p, 0);
 529 }
 530
 531
 532 void emit_alu2(struct brw_compile *p,
 533                struct brw_instruction *(*func)(struct brw_compile *,
 534                                                struct brw_reg,
 535                                                struct brw_reg,
 536                                                struct brw_reg),
 537                const struct brw_reg *dst,
 538                GLuint mask,
 539                const struct brw_reg *arg0,
 540                const struct brw_reg *arg1)
 541 {
 542    GLuint i;
 543
 544    if (mask & SATURATE)
 545       brw_set_saturate(p, 1);
 546
 547    for (i = 0; i < 4; i++) {
 548       if (mask & (1<<i)) {
 549          func(p, dst[i], arg0[i], arg1[i]);
 550       }
 551    }
 552
 553    if (mask & SATURATE)
 554       brw_set_saturate(p, 0);
 555 }
 556
 557
 558 void emit_mad(struct brw_compile *p,
 559               const struct brw_reg *dst,
 560               GLuint mask,
 561               const struct brw_reg *arg0,
 562               const struct brw_reg *arg1,
 563               const struct brw_reg *arg2)
 564 {
 565    GLuint i;
 566
 567    for (i = 0; i < 4; i++) {
 568       if (mask & (1<<i)) {
 569          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 570
 571          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 572          brw_ADD(p, dst[i], dst[i], arg2[i]);
 573          brw_set_saturate(p, 0);
 574       }
 575    }
 576 }
 577
 578 void emit_lrp(struct brw_compile *p,
 579               const struct brw_reg *dst,
 580               GLuint mask,
 581               const struct brw_reg *arg0,
 582               const struct brw_reg *arg1,
 583               const struct brw_reg *arg2)
 584 {
 585    GLuint i;
 586
 587    /* Uses dst as a temporary:
 588     */
 589    for (i = 0; i < 4; i++) {
 590       if (mask & (1<<i)) {
 591          /* Can I use the LINE instruction for this?
 592           */
 593          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 594          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 595
 596          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 597          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 598          brw_set_saturate(p, 0);
 599       }
 600    }
 601 }
 602
 603 void emit_sop(struct brw_compile *p,
 604               const struct brw_reg *dst,
 605               GLuint mask,
 606               GLuint cond,
 607               const struct brw_reg *arg0,
 608               const struct brw_reg *arg1)
 609 {
 610    GLuint i;
 611
 612    for (i = 0; i < 4; i++) {
 613       if (mask & (1<<i)) {
 614          brw_push_insn_state(p);
 615          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 616          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 617          brw_MOV(p, dst[i], brw_imm_f(0));
 618          brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 619          brw_MOV(p, dst[i], brw_imm_f(1.0));
 620          brw_pop_insn_state(p);
 621       }
 622    }
 623 }
 624
 625 static void emit_slt( struct brw_compile *p,
 626                       const struct brw_reg *dst,
 627                       GLuint mask,
 628                       const struct brw_reg *arg0,
 629                       const struct brw_reg *arg1 )
 630 {
 631    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 632 }
 633
 634 static void emit_sle( struct brw_compile *p,
 635                       const struct brw_reg *dst,
 636                       GLuint mask,
 637                       const struct brw_reg *arg0,
 638                       const struct brw_reg *arg1 )
 639 {
 640    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 641 }
 642
 643 static void emit_sgt( struct brw_compile *p,
 644                       const struct brw_reg *dst,
 645                       GLuint mask,
 646                       const struct brw_reg *arg0,
 647                       const struct brw_reg *arg1 )
 648 {
 649    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 650 }
 651
 652 static void emit_sge( struct brw_compile *p,
 653                       const struct brw_reg *dst,
 654                       GLuint mask,
 655                       const struct brw_reg *arg0,
 656                       const struct brw_reg *arg1 )
 657 {
 658    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 659 }
 660
 661 static void emit_seq( struct brw_compile *p,
 662                       const struct brw_reg *dst,
 663                       GLuint mask,
 664                       const struct brw_reg *arg0,
 665                       const struct brw_reg *arg1 )
 666 {
 667    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 668 }
 669
 670 static void emit_sne( struct brw_compile *p,
 671                       const struct brw_reg *dst,
 672                       GLuint mask,
 673                       const struct brw_reg *arg0,
 674                       const struct brw_reg *arg1 )
 675 {
 676    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 677 }
 678
 679 void emit_cmp(struct brw_compile *p,
 680               const struct brw_reg *dst,
 681               GLuint mask,
 682               const struct brw_reg *arg0,
 683               const struct brw_reg *arg1,
 684               const struct brw_reg *arg2)
 685 {
 686    GLuint i;
 687
 688    for (i = 0; i < 4; i++) {
 689       if (mask & (1<<i)) {
 690          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 691
 692          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 693          brw_SEL(p, dst[i], arg1[i], arg2[i]);
 694          brw_set_saturate(p, 0);
 695          brw_set_predicate_control_flag_value(p, 0xff);
 696       }
 697    }
 698 }
 699
 700 void emit_sign(struct brw_compile *p,
 701                const struct brw_reg *dst,
 702                GLuint mask,
 703                const struct brw_reg *arg0)
 704 {
 705    GLuint i;
 706
 707    for (i = 0; i < 4; i++) {
 708       if (mask & (1<<i)) {
 709          brw_MOV(p, dst[i], brw_imm_f(0.0));
 710
 711          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 712          brw_MOV(p, dst[i], brw_imm_f(-1.0));
 713          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 714
 715          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0[i], brw_imm_f(0));
 716          brw_MOV(p, dst[i], brw_imm_f(1.0));
 717          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 718       }
 719    }
 720 }
 721
 722 void emit_max(struct brw_compile *p,
 723               const struct brw_reg *dst,
 724               GLuint mask,
 725               const struct brw_reg *arg0,
 726               const struct brw_reg *arg1)
 727 {
 728    GLuint i;
 729
 730    for (i = 0; i < 4; i++) {
 731       if (mask & (1<<i)) {
 732          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
 733
 734          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 735          brw_SEL(p, dst[i], arg0[i], arg1[i]);
 736          brw_set_saturate(p, 0);
 737          brw_set_predicate_control_flag_value(p, 0xff);
 738       }
 739    }
 740 }
 741
 742 void emit_min(struct brw_compile *p,
 743               const struct brw_reg *dst,
 744               GLuint mask,
 745               const struct brw_reg *arg0,
 746               const struct brw_reg *arg1)
 747 {
 748    GLuint i;
 749
 750    for (i = 0; i < 4; i++) {
 751       if (mask & (1<<i)) {
 752          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 753
 754          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 755          brw_SEL(p, dst[i], arg0[i], arg1[i]);
 756          brw_set_saturate(p, 0);
 757          brw_set_predicate_control_flag_value(p, 0xff);
 758       }
 759    }
 760 }
 761
 762
 763 void emit_dp2(struct brw_compile *p,
 764               const struct brw_reg *dst,
 765               GLuint mask,
 766               const struct brw_reg *arg0,
 767               const struct brw_reg *arg1)
 768 {
 769    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 770
 771    if (!(mask & WRITEMASK_XYZW))
 772       return; /* Do not emit dead code */
 773
 774    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 775
 776    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 777
 778    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 779    brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]);
 780    brw_set_saturate(p, 0);
 781 }
 782
 783
 784 void emit_dp3(struct brw_compile *p,
 785               const struct brw_reg *dst,
 786               GLuint mask,
 787               const struct brw_reg *arg0,
 788               const struct brw_reg *arg1)
 789 {
 790    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 791
 792    if (!(mask & WRITEMASK_XYZW))
 793       return; /* Do not emit dead code */
 794
 795    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 796
 797    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 798    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 799
 800    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 801    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 802    brw_set_saturate(p, 0);
 803 }
 804
 805
 806 void emit_dp4(struct brw_compile *p,
 807               const struct brw_reg *dst,
 808               GLuint mask,
 809               const struct brw_reg *arg0,
 810               const struct brw_reg *arg1)
 811 {
 812    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 813
 814    if (!(mask & WRITEMASK_XYZW))
 815       return; /* Do not emit dead code */
 816
 817    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 818
 819    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 820    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 821    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 822
 823    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 824    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 825    brw_set_saturate(p, 0);
 826 }
 827
 828
 829 void emit_dph(struct brw_compile *p,
 830               const struct brw_reg *dst,
 831               GLuint mask,
 832               const struct brw_reg *arg0,
 833               const struct brw_reg *arg1)
 834 {
 835    const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 836
 837    if (!(mask & WRITEMASK_XYZW))
 838       return; /* Do not emit dead code */
 839
 840    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 841
 842    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 843    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 844    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 845
 846    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 847    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 848    brw_set_saturate(p, 0);
 849 }
 850
 851
 852 void emit_xpd(struct brw_compile *p,
 853               const struct brw_reg *dst,
 854               GLuint mask,
 855               const struct brw_reg *arg0,
 856               const struct brw_reg *arg1)
 857 {
 858    GLuint i;
 859
 860    assert((mask & WRITEMASK_W) != WRITEMASK_W);
 861
 862    for (i = 0 ; i < 3; i++) {
 863       if (mask & (1<<i)) {
 864          GLuint i2 = (i+2)%3;
 865          GLuint i1 = (i+1)%3;
 866
 867          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 868
 869          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 870          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 871          brw_set_saturate(p, 0);
 872       }
 873    }
 874 }
 875
 876
 877 void emit_math1(struct brw_wm_compile *c,
 878                 GLuint function,
 879                 const struct brw_reg *dst,
 880                 GLuint mask,
 881                 const struct brw_reg *arg0)
 882 {
 883    struct brw_compile *p = &c->func;
 884    struct intel_context *intel = &p->brw->intel;
 885    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 886    GLuint saturate = ((mask & SATURATE) ?
 887                       BRW_MATH_SATURATE_SATURATE :
 888                       BRW_MATH_SATURATE_NONE);
 889    struct brw_reg src;
 890
 891    if (!(mask & WRITEMASK_XYZW))
 892       return; /* Do not emit dead code */
 893
 894    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 895
 896    if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
 897                             arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
 898                            arg0[0].negate || arg0[0].abs)) {
 899       /* Gen6 math requires that source and dst horizontal stride be 1,
 900        * and that the argument be in the GRF.
 901        *
 902        * The hardware ignores source modifiers (negate and abs) on math
 903        * instructions, so we also move to a temp to set those up.
 904        */
 905       src = dst[dst_chan];
 906       brw_MOV(p, src, arg0[0]);
 907    } else {
 908       src = arg0[0];
 909    }
 910
 911    /* Send two messages to perform all 16 operations:
 912     */
 913    brw_push_insn_state(p);
 914    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 915    brw_math(p,
 916             dst[dst_chan],
 917             function,
 918             saturate,
 919             2,
 920             src,
 921             BRW_MATH_DATA_VECTOR,
 922             BRW_MATH_PRECISION_FULL);
 923
 924    if (c->dispatch_width == 16) {
 925       brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 926       brw_math(p,
 927                offset(dst[dst_chan],1),
 928                function,
 929                saturate,
 930                3,
 931                sechalf(src),
 932                BRW_MATH_DATA_VECTOR,
 933                BRW_MATH_PRECISION_FULL);
 934    }
 935    brw_pop_insn_state(p);
 936 }
 937
 938
 939 void emit_math2(struct brw_wm_compile *c,
 940                 GLuint function,
 941                 const struct brw_reg *dst,
 942                 GLuint mask,
 943                 const struct brw_reg *arg0,
 944                 const struct brw_reg *arg1)
 945 {
 946    struct brw_compile *p = &c->func;
 947    struct intel_context *intel = &p->brw->intel;
 948    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 949
 950    if (!(mask & WRITEMASK_XYZW))
 951       return; /* Do not emit dead code */
 952
 953    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 954
 955    brw_push_insn_state(p);
 956
 957    /* math can only operate on up to a vec8 at a time, so in
 958     * dispatch_width==16 we have to do the second half manually.
 959     */
 960    if (intel->gen >= 6) {
 961       struct brw_reg src0 = arg0[0];
 962       struct brw_reg src1 = arg1[0];
 963       struct brw_reg temp_dst = dst[dst_chan];
 964
 965       if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
 966          brw_MOV(p, temp_dst, src0);
 967          src0 = temp_dst;
 968       }
 969
 970       if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
 971          /* This is a heinous hack to get a temporary register for use
 972           * in case both arg0 and arg1 are constants.  Why you're
 973           * doing exponentiation on constant values in the shader, we
 974           * don't know.
 975           *
 976           * max_wm_grf is almost surely less than the maximum GRF, and
 977           * gen6 doesn't care about the number of GRFs used in a
 978           * shader like pre-gen6 did.
 979           */
 980          struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0);
 981          brw_MOV(p, temp, src1);
 982          src1 = temp;
 983       }
 984
 985       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 986       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 987       brw_math2(p,
 988                 temp_dst,
 989                 function,
 990                 src0,
 991                 src1);
 992       if (c->dispatch_width == 16) {
 993          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 994          brw_math2(p,
 995                    sechalf(temp_dst),
 996                    function,
 997                    sechalf(src0),
 998                    sechalf(src1));
 999       }
1000    } else {
1001       GLuint saturate = ((mask & SATURATE) ?
1002                          BRW_MATH_SATURATE_SATURATE :
1003                          BRW_MATH_SATURATE_NONE);
1004
1005       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1006       brw_MOV(p, brw_message_reg(3), arg1[0]);
1007       if (c->dispatch_width == 16) {
1008          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1009          brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
1010       }
1011
1012       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1013       brw_math(p,
1014                dst[dst_chan],
1015                function,
1016                saturate,
1017                2,
1018                arg0[0],
1019                BRW_MATH_DATA_VECTOR,
1020                BRW_MATH_PRECISION_FULL);
1021
1022       /* Send two messages to perform all 16 operations:
1023        */
1024       if (c->dispatch_width == 16) {
1025          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1026          brw_math(p,
1027                   offset(dst[dst_chan],1),
1028                   function,
1029                   saturate,
1030                   4,
1031                   sechalf(arg0[0]),
1032                   BRW_MATH_DATA_VECTOR,
1033                   BRW_MATH_PRECISION_FULL);
1034       }
1035    }
1036    brw_pop_insn_state(p);
1037 }
1038
1039
1040 void emit_tex(struct brw_wm_compile *c,
1041               struct brw_reg *dst,
1042               GLuint dst_flags,
1043               struct brw_reg *arg,
1044               struct brw_reg depth_payload,
1045               GLuint tex_idx,
1046               GLuint sampler,
1047               bool shadow)
1048 {
1049    struct brw_compile *p = &c->func;
1050    struct intel_context *intel = &p->brw->intel;
1051    struct brw_reg dst_retyped;
1052    GLuint cur_mrf = 2, response_length;
1053    GLuint i, nr_texcoords;
1054    GLuint emit;
1055    GLuint msg_type;
1056    GLuint mrf_per_channel;
1057    GLuint simd_mode;
1058
1059    if (c->dispatch_width == 16) {
1060       mrf_per_channel = 2;
1061       response_length = 8;
1062       dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
1063       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
1064    } else {
1065       mrf_per_channel = 1;
1066       response_length = 4;
1067       dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
1068       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
1069    }
1070
1071    /* How many input regs are there?
1072     */
1073    switch (tex_idx) {
1074    case TEXTURE_1D_INDEX:
1075       emit = WRITEMASK_X;
1076       nr_texcoords = 1;
1077       break;
1078    case TEXTURE_2D_INDEX:
1079    case TEXTURE_1D_ARRAY_INDEX:
1080    case TEXTURE_RECT_INDEX:
1081       emit = WRITEMASK_XY;
1082       nr_texcoords = 2;
1083       break;
1084    case TEXTURE_3D_INDEX:
1085    case TEXTURE_2D_ARRAY_INDEX:
1086    case TEXTURE_CUBE_INDEX:
1087       emit = WRITEMASK_XYZ;
1088       nr_texcoords = 3;
1089       break;
1090    default:
1091       /* unexpected target */
1092       abort();
1093    }
1094
1095    /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1096    if (intel->gen < 5 && c->dispatch_width == 8)
1097       nr_texcoords = 3;
1098
1099    if (shadow) {
1100       if (intel->gen < 7) {
1101          /* For shadow comparisons, we have to supply u,v,r. */
1102          nr_texcoords = 3;
1103       } else {
1104          /* On Ivybridge, the shadow comparitor comes first. Just load it. */
1105          brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
1106          cur_mrf += mrf_per_channel;
1107       }
1108    }
1109
1110    /* Emit the texcoords. */
1111    for (i = 0; i < nr_texcoords; i++) {
1112       if (c->key.tex.gl_clamp_mask[i] & (1 << sampler))
1113          brw_set_saturate(p, true);
1114
1115       if (emit & (1<<i))
1116          brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
1117       else
1118          brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
1119       cur_mrf += mrf_per_channel;
1120
1121       brw_set_saturate(p, false);
1122    }
1123
1124    /* Fill in the shadow comparison reference value. */
1125    if (shadow && intel->gen < 7) {
1126       if (intel->gen >= 5) {
1127          /* Fill in the cube map array index value. */
1128          brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
1129          cur_mrf += mrf_per_channel;
1130       } else if (c->dispatch_width == 8) {
1131          /* Fill in the LOD bias value. */
1132          brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
1133          cur_mrf += mrf_per_channel;
1134       }
1135       brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
1136       cur_mrf += mrf_per_channel;
1137    }
1138
1139    if (intel->gen >= 5) {
1140       if (shadow)
1141          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
1142       else
1143          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
1144    } else {
1145       /* Note that G45 and older determines shadow compare and dispatch width
1146        * from message length for most messages.
1147        */
1148       if (c->dispatch_width == 16 && shadow)
1149          msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1150       else
1151          msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1152    }
1153
1154    brw_SAMPLE(p,
1155               dst_retyped,
1156               1,
1157               retype(depth_payload, BRW_REGISTER_TYPE_UW),
1158               SURF_INDEX_TEXTURE(sampler),
1159               sampler,
1160               dst_flags & WRITEMASK_XYZW,
1161               msg_type,
1162               response_length,
1163               cur_mrf - 1,
1164               1,
1165               simd_mode,
1166               BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
1167 }
1168
1169
1170 void emit_txb(struct brw_wm_compile *c,
1171               struct brw_reg *dst,
1172               GLuint dst_flags,
1173               struct brw_reg *arg,
1174               struct brw_reg depth_payload,
1175               GLuint tex_idx,
1176               GLuint sampler)
1177 {
1178    struct brw_compile *p = &c->func;
1179    struct intel_context *intel = &p->brw->intel;
1180    GLuint msgLength;
1181    GLuint msg_type;
1182    GLuint mrf_per_channel;
1183    GLuint response_length;
1184    struct brw_reg dst_retyped;
1185
1186    /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
1187     * samples, so we'll use the 16-wide instruction, leave the second halves
1188     * undefined, and trust the execution mask to keep the undefined pixels
1189     * from mattering.
1190     */
1191    if (c->dispatch_width == 16 || intel->gen < 5) {
1192       if (intel->gen >= 5)
1193          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
1194       else
1195          msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1196       mrf_per_channel = 2;
1197       dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
1198       response_length = 8;
1199    } else {
1200       msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
1201       mrf_per_channel = 1;
1202       dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
1203       response_length = 4;
1204    }
1205
1206    /* Shadow ignored for txb. */
1207    switch (tex_idx) {
1208    case TEXTURE_1D_INDEX:
1209       brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1210       brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
1211       brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1212       break;
1213    case TEXTURE_2D_INDEX:
1214    case TEXTURE_RECT_INDEX:
1215       brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1216       brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1217       brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
1218       break;
1219    case TEXTURE_3D_INDEX:
1220    case TEXTURE_CUBE_INDEX:
1221       brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
1222       brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
1223       brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
1224       break;
1225    default:
1226       /* unexpected target */
1227       abort();
1228    }
1229
1230    brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
1231    msgLength = 2 + 4 * mrf_per_channel - 1;
1232
1233    brw_SAMPLE(p,
1234               dst_retyped,
1235               1,
1236               retype(depth_payload, BRW_REGISTER_TYPE_UW),
1237               SURF_INDEX_TEXTURE(sampler),
1238               sampler,
1239               dst_flags & WRITEMASK_XYZW,
1240               msg_type,
1241               response_length,
1242               msgLength,
1243               1,
1244               BRW_SAMPLER_SIMD_MODE_SIMD16,
1245               BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
1246 }
1247
1248
1249 static void emit_lit(struct brw_wm_compile *c,
1250                      const struct brw_reg *dst,
1251                      GLuint mask,
1252                      const struct brw_reg *arg0)
1253 {
1254    struct brw_compile *p = &c->func;
1255
1256    assert((mask & WRITEMASK_XW) == 0);
1257
1258    if (mask & WRITEMASK_Y) {
1259       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
1260       brw_MOV(p, dst[1], arg0[0]);
1261       brw_set_saturate(p, 0);
1262    }
1263
1264    if (mask & WRITEMASK_Z) {
1265       emit_math2(c, BRW_MATH_FUNCTION_POW,
1266                  &dst[2],
1267                  WRITEMASK_X | (mask & SATURATE),
1268                  &arg0[1],
1269                  &arg0[3]);
1270    }
1271
1272    /* Ordinarily you'd use an iff statement to skip or shortcircuit
1273     * some of the POW calculations above, but 16-wide iff statements
1274     * seem to lock c1 hardware, so this is a nasty workaround:
1275     */
1276    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
1277    {
1278       if (mask & WRITEMASK_Y)
1279          brw_MOV(p, dst[1], brw_imm_f(0));
1280
1281       if (mask & WRITEMASK_Z)
1282          brw_MOV(p, dst[2], brw_imm_f(0));
1283    }
1284    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1285 }
1286
1287
1288 /* Kill pixel - set execution mask to zero for those pixels which
1289  * fail.
1290  */
1291 static void emit_kil( struct brw_wm_compile *c,
1292                       struct brw_reg *arg0)
1293 {
1294    struct brw_compile *p = &c->func;
1295    struct intel_context *intel = &p->brw->intel;
1296    struct brw_reg pixelmask;
1297    GLuint i, j;
1298
1299    if (intel->gen >= 6)
1300       pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
1301    else
1302       pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1303
1304    for (i = 0; i < 4; i++) {
1305       /* Check if we've already done the comparison for this reg
1306        * -- common when someone does KIL TEMP.wwww.
1307        */
1308       for (j = 0; j < i; j++) {
1309          if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
1310             break;
1311       }
1312       if (j != i)
1313          continue;
1314
1315       brw_push_insn_state(p);
1316       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1317       brw_set_predicate_control_flag_value(p, 0xff);
1318       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1319       brw_AND(p, pixelmask, brw_flag_reg(), pixelmask);
1320       brw_pop_insn_state(p);
1321    }
1322 }
1323
1324 static void fire_fb_write( struct brw_wm_compile *c,
1325                            GLuint base_reg,
1326                            GLuint nr,
1327                            GLuint target,
1328                            GLuint eot )
1329 {
1330    struct brw_compile *p = &c->func;
1331    struct intel_context *intel = &p->brw->intel;
1332
1333    /* Pass through control information:
1334     *
1335     * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case.
1336     */
1337 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
1338    if (intel->gen < 6)
1339    {
1340       brw_push_insn_state(p);
1341       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1342       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1343       brw_MOV(p,
1344                brw_message_reg(base_reg + 1),
1345                brw_vec8_grf(1, 0));
1346       brw_pop_insn_state(p);
1347    }
1348
1349    /* Send framebuffer write message: */
1350 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
1351    brw_fb_WRITE(p,
1352                 c->dispatch_width,
1353                 base_reg,
1354                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1355                 target,
1356                 nr,
1357                 0,
1358                 eot,
1359                 true);
1360 }
1361
1362
1363 static void emit_aa( struct brw_wm_compile *c,
1364                      struct brw_reg *arg1,
1365                      GLuint reg )
1366 {
1367    struct brw_compile *p = &c->func;
1368    GLuint comp = c->aa_dest_stencil_reg / 2;
1369    GLuint off = c->aa_dest_stencil_reg % 2;
1370    struct brw_reg aa = offset(arg1[comp], off);
1371
1372    brw_push_insn_state(p);
1373    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1374    brw_MOV(p, brw_message_reg(reg), aa);
1375    brw_pop_insn_state(p);
1376 }
1377
1378
1379 /* Post-fragment-program processing.  Send the results to the
1380  * framebuffer.
1381  * \param arg0  the fragment color
1382  * \param arg1  the pass-through depth value
1383  * \param arg2  the shader-computed depth value
1384  */
1385 void emit_fb_write(struct brw_wm_compile *c,
1386                    struct brw_reg *arg0,
1387                    struct brw_reg *arg1,
1388                    struct brw_reg *arg2,
1389                    GLuint target,
1390                    GLuint eot)
1391 {
1392    struct brw_compile *p = &c->func;
1393    struct brw_context *brw = p->brw;
1394    struct intel_context *intel = &brw->intel;
1395    GLuint nr = 2;
1396    GLuint channel;
1397
1398    /* Reserve a space for AA - may not be needed:
1399     */
1400    if (c->aa_dest_stencil_reg)
1401       nr += 1;
1402
1403    /* I don't really understand how this achieves the color interleave
1404     * (ie RGBARGBA) in the result:  [Do the saturation here]
1405     */
1406    brw_push_insn_state(p);
1407
1408    if (c->key.clamp_fragment_color)
1409       brw_set_saturate(p, 1);
1410
1411    for (channel = 0; channel < 4; channel++) {
1412       if (intel->gen >= 6) {
1413          /* gen6 SIMD16 single source DP write looks like:
1414           * m + 0: r0
1415           * m + 1: r1
1416           * m + 2: g0
1417           * m + 3: g1
1418           * m + 4: b0
1419           * m + 5: b1
1420           * m + 6: a0
1421           * m + 7: a1
1422           */
1423          if (c->dispatch_width == 16) {
1424             brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]);
1425          } else {
1426             brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]);
1427          }
1428       } else if (c->dispatch_width == 16 && brw->has_compr4) {
1429          /* pre-gen6 SIMD16 single source DP write looks like:
1430           * m + 0: r0
1431           * m + 1: g0
1432           * m + 2: b0
1433           * m + 3: a0
1434           * m + 4: r1
1435           * m + 5: g1
1436           * m + 6: b1
1437           * m + 7: a1
1438           *
1439           * By setting the high bit of the MRF register number, we indicate
1440           * that we want COMPR4 mode - instead of doing the usual destination
1441           * + 1 for the second half we get destination + 4.
1442           */
1443          brw_MOV(p,
1444                  brw_message_reg(nr + channel + BRW_MRF_COMPR4),
1445                  arg0[channel]);
1446       } else {
1447          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
1448          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
1449          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1450          brw_MOV(p,
1451                  brw_message_reg(nr + channel),
1452                  arg0[channel]);
1453
1454          if (c->dispatch_width == 16) {
1455             brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1456             brw_MOV(p,
1457                     brw_message_reg(nr + channel + 4),
1458                     sechalf(arg0[channel]));
1459          }
1460       }
1461    }
1462
1463    brw_set_saturate(p, 0);
1464
1465    /* skip over the regs populated above:
1466     */
1467    if (c->dispatch_width == 16)
1468       nr += 8;
1469    else
1470       nr += 4;
1471
1472    brw_pop_insn_state(p);
1473
1474    if (c->source_depth_to_render_target)
1475    {
1476       if (c->computes_depth)
1477          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1478       else
1479          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1480
1481       nr += 2;
1482    }
1483
1484    if (c->dest_depth_reg)
1485    {
1486       GLuint comp = c->dest_depth_reg / 2;
1487       GLuint off = c->dest_depth_reg % 2;
1488
1489       if (off != 0) {
1490          brw_push_insn_state(p);
1491          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1492
1493          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1494          /* 2nd half? */
1495          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1496          brw_pop_insn_state(p);
1497       }
1498       else {
1499          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1500       }
1501       nr += 2;
1502    }
1503
1504    if (intel->gen >= 6) {
1505       /* Load the message header.  There's no implied move from src0
1506        * to the base mrf on gen6.
1507        */
1508       brw_push_insn_state(p);
1509       brw_set_mask_control(p, BRW_MASK_DISABLE);
1510       brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD),
1511               retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
1512       brw_pop_insn_state(p);
1513
1514       if (target != 0) {
1515          brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
1516                                         0,
1517                                         2), BRW_REGISTER_TYPE_UD),
1518                  brw_imm_ud(target));
1519       }
1520    }
1521
1522    if (!c->runtime_check_aads_emit) {
1523       if (c->aa_dest_stencil_reg)
1524          emit_aa(c, arg1, 2);
1525
1526       fire_fb_write(c, 0, nr, target, eot);
1527    }
1528    else {
1529       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1530       struct brw_reg ip = brw_ip_reg();
1531       int jmp;
1532
1533       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1534       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1535       brw_AND(p,
1536               v1_null_ud,
1537               get_element_ud(brw_vec8_grf(1,0), 6),
1538               brw_imm_ud(1<<26));
1539
1540       jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)) - p->store;
1541       {
1542          emit_aa(c, arg1, 2);
1543          fire_fb_write(c, 0, nr, target, eot);
1544          /* note - thread killed in subroutine */
1545       }
1546       brw_land_fwd_jump(p, jmp);
1547
1548       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1549        */
1550       fire_fb_write(c, 1, nr-1, target, eot);
1551    }
1552 }
1553
1554 /**
1555  * Move a GPR to scratch memory.
1556  */
1557 static void emit_spill( struct brw_wm_compile *c,
1558                         struct brw_reg reg,
1559                         GLuint slot )
1560 {
1561    struct brw_compile *p = &c->func;
1562
1563    /*
1564      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1565    */
1566    brw_MOV(p, brw_message_reg(2), reg);
1567
1568    /*
1569      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1570      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1571    */
1572    brw_oword_block_write_scratch(p, brw_message_reg(1), 2, slot);
1573 }
1574
1575
1576 /**
1577  * Load a GPR from scratch memory.
1578  */
1579 static void emit_unspill( struct brw_wm_compile *c,
1580                           struct brw_reg reg,
1581                           GLuint slot )
1582 {
1583    struct brw_compile *p = &c->func;
1584
1585    /* Slot 0 is the undef value.
1586     */
1587    if (slot == 0) {
1588       brw_MOV(p, reg, brw_imm_f(0));
1589       return;
1590    }
1591
1592    /*
1593      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1594      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1595    */
1596
1597    brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot);
1598 }
1599
1600
1601 /**
1602  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1603  * Args with unspill_reg != 0 will be loaded from scratch memory.
1604  */
1605 static void get_argument_regs( struct brw_wm_compile *c,
1606                                struct brw_wm_ref *arg[],
1607                                struct brw_reg *regs )
1608 {
1609    GLuint i;
1610
1611    for (i = 0; i < 4; i++) {
1612       if (arg[i]) {
1613          if (arg[i]->unspill_reg)
1614             emit_unspill(c,
1615                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1616                          arg[i]->value->spill_slot);
1617
1618          regs[i] = arg[i]->hw_reg;
1619       }
1620       else {
1621          regs[i] = brw_null_reg();
1622       }
1623    }
1624 }
1625
1626
1627 /**
1628  * For values that have a spill_slot!=0, write those regs to scratch memory.
1629  */
1630 static void spill_values( struct brw_wm_compile *c,
1631                           struct brw_wm_value *values,
1632                           GLuint nr )
1633 {
1634    GLuint i;
1635
1636    for (i = 0; i < nr; i++)
1637       if (values[i].spill_slot)
1638          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1639 }
1640
1641
1642 /* Emit the fragment program instructions here.
1643  */
1644 void brw_wm_emit( struct brw_wm_compile *c )
1645 {
1646    struct brw_compile *p = &c->func;
1647    struct intel_context *intel = &p->brw->intel;
1648    GLuint insn;
1649
1650    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1651    if (intel->gen >= 6)
1652         brw_set_acc_write_control(p, 1);
1653
1654    /* Check if any of the payload regs need to be spilled:
1655     */
1656    spill_values(c, c->payload.depth, 4);
1657    spill_values(c, c->creg, c->nr_creg);
1658    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1659
1660
1661    for (insn = 0; insn < c->nr_insns; insn++) {
1662
1663       struct brw_wm_instruction *inst = &c->instruction[insn];
1664       struct brw_reg args[3][4], dst[4];
1665       GLuint i, dst_flags;
1666
1667       /* Get argument regs:
1668        */
1669       for (i = 0; i < 3; i++)
1670          get_argument_regs(c, inst->src[i], args[i]);
1671
1672       /* Get dest regs:
1673        */
1674       for (i = 0; i < 4; i++)
1675          if (inst->dst[i])
1676             dst[i] = inst->dst[i]->hw_reg;
1677          else
1678             dst[i] = brw_null_reg();
1679
1680       /* Flags
1681        */
1682       dst_flags = inst->writemask;
1683       if (inst->saturate)
1684          dst_flags |= SATURATE;
1685
1686       switch (inst->opcode) {
1687          /* Generated instructions for calculating triangle interpolants:
1688           */
1689       case WM_PIXELXY:
1690          emit_pixel_xy(c, dst, dst_flags);
1691          break;
1692
1693       case WM_DELTAXY:
1694          emit_delta_xy(p, dst, dst_flags, args[0]);
1695          break;
1696
1697       case WM_WPOSXY:
1698          emit_wpos_xy(c, dst, dst_flags, args[0]);
1699          break;
1700
1701       case WM_PIXELW:
1702          emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1703          break;
1704
1705       case WM_LINTERP:
1706          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1707          break;
1708
1709       case WM_PINTERP:
1710          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1711          break;
1712
1713       case WM_CINTERP:
1714          emit_cinterp(p, dst, dst_flags, args[0]);
1715          break;
1716
1717       case WM_FB_WRITE:
1718          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1719          break;
1720
1721       case WM_FRONTFACING:
1722          emit_frontfacing(p, dst, dst_flags);
1723          break;
1724
1725          /* Straightforward arithmetic:
1726           */
1727       case OPCODE_ADD:
1728          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1729          break;
1730
1731       case OPCODE_FRC:
1732          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1733          break;
1734
1735       case OPCODE_FLR:
1736          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1737          break;
1738
1739       case OPCODE_DDX:
1740          emit_ddxy(p, dst, dst_flags, true, args[0]);
1741          break;
1742
1743       case OPCODE_DDY:
1744          emit_ddxy(p, dst, dst_flags, false, args[0]);
1745          break;
1746
1747       case OPCODE_DP2:
1748          emit_dp2(p, dst, dst_flags, args[0], args[1]);
1749          break;
1750
1751       case OPCODE_DP3:
1752          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1753          break;
1754
1755       case OPCODE_DP4:
1756          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1757          break;
1758
1759       case OPCODE_DPH:
1760          emit_dph(p, dst, dst_flags, args[0], args[1]);
1761          break;
1762
1763       case OPCODE_TRUNC:
1764          for (i = 0; i < 4; i++) {
1765             if (dst_flags & (1<<i)) {
1766                brw_RNDZ(p, dst[i], args[0][i]);
1767             }
1768          }
1769          break;
1770
1771       case OPCODE_LRP:
1772          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1773          break;
1774
1775       case OPCODE_MAD:
1776          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1777          break;
1778
1779       case OPCODE_MOV:
1780       case OPCODE_SWZ:
1781          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1782          break;
1783
1784       case OPCODE_MUL:
1785          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1786          break;
1787
1788       case OPCODE_XPD:
1789          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1790          break;
1791
1792          /* Higher math functions:
1793           */
1794       case OPCODE_RCP:
1795          emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1796          break;
1797
1798       case OPCODE_RSQ:
1799          emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1800          break;
1801
1802       case OPCODE_SIN:
1803          emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1804          break;
1805
1806       case OPCODE_COS:
1807          emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1808          break;
1809
1810       case OPCODE_EX2:
1811          emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1812          break;
1813
1814       case OPCODE_LG2:
1815          emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1816          break;
1817
1818       case OPCODE_SCS:
1819          /* There is an scs math function, but it would need some
1820           * fixup for 16-element execution.
1821           */
1822          if (dst_flags & WRITEMASK_X)
1823             emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1824          if (dst_flags & WRITEMASK_Y)
1825             emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1826          break;
1827
1828       case OPCODE_POW:
1829          emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1830          break;
1831
1832          /* Comparisons:
1833           */
1834       case OPCODE_CMP:
1835          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1836          break;
1837
1838       case OPCODE_MAX:
1839          emit_max(p, dst, dst_flags, args[0], args[1]);
1840          break;
1841
1842       case OPCODE_MIN:
1843          emit_min(p, dst, dst_flags, args[0], args[1]);
1844          break;
1845
1846       case OPCODE_SLT:
1847          emit_slt(p, dst, dst_flags, args[0], args[1]);
1848          break;
1849
1850       case OPCODE_SLE:
1851          emit_sle(p, dst, dst_flags, args[0], args[1]);
1852         break;
1853       case OPCODE_SGT:
1854          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1855         break;
1856       case OPCODE_SGE:
1857          emit_sge(p, dst, dst_flags, args[0], args[1]);
1858          break;
1859       case OPCODE_SEQ:
1860          emit_seq(p, dst, dst_flags, args[0], args[1]);
1861         break;
1862       case OPCODE_SNE:
1863          emit_sne(p, dst, dst_flags, args[0], args[1]);
1864         break;
1865
1866       case OPCODE_SSG:
1867          emit_sign(p, dst, dst_flags, args[0]);
1868          break;
1869
1870       case OPCODE_LIT:
1871          emit_lit(c, dst, dst_flags, args[0]);
1872          break;
1873
1874          /* Texturing operations:
1875           */
1876       case OPCODE_TEX:
1877          emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1878                   inst->tex_idx, inst->tex_unit,
1879                   inst->tex_shadow);
1880          break;
1881
1882       case OPCODE_TXB:
1883          emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
1884                   inst->tex_idx, inst->tex_unit);
1885          break;
1886
1887       case OPCODE_KIL:
1888          emit_kil(c, args[0]);
1889          break;
1890
1891       default:
1892          printf("Unsupported opcode %i (%s) in fragment shader\n",
1893                 inst->opcode, inst->opcode < MAX_OPCODE ?
1894                 _mesa_opcode_string(inst->opcode) :
1895                 "unknown");
1896       }
1897
1898       for (i = 0; i < 4; i++)
1899         if (inst->dst[i] && inst->dst[i]->spill_slot)
1900            emit_spill(c,
1901                       inst->dst[i]->hw_reg,
1902                       inst->dst[i]->spill_slot);
1903    }
1904
1905    /* Only properly tested on ILK */
1906    if (p->brw->intel.gen == 5) {
1907      brw_remove_duplicate_mrf_moves(p);
1908      if (c->dispatch_width == 16)
1909         brw_remove_grf_to_mrf_moves(p);
1910    }
1911
1912    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
1913       int i;
1914
1915      printf("wm-native:\n");
1916      for (i = 0; i < p->nr_insn; i++)
1917          brw_disasm(stdout, &p->store[i], p->brw->intel.gen);
1918       printf("\n");
1919    }
1920 }
1921