src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 /* Not quite sure how correct this is - need to understand horiz
  38  * vs. vertical strides a little better.
  39  */
  40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  41 {
  42    if (reg.vstride)
  43       reg.nr++;
  44    return reg;
  45 }
  46
  47 /* Payload R0:
  48  *
  49  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  50  *         corresponding to each of the 16 execution channels.
  51  * R0.1..8 -- ?
  52  * R1.0 -- triangle vertex 0.X
  53  * R1.1 -- triangle vertex 0.Y
  54  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  55  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  56  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  57  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  58  * R1.6 -- ?
  59  * R1.7 -- ?
  60  * R1.8 -- ?
  61  */
  62
  63
  64 static void emit_pixel_xy(struct brw_compile *p,
  65                           const struct brw_reg *dst,
  66                           GLuint mask)
  67 {
  68    struct brw_reg r1 = brw_vec1_grf(1, 0);
  69    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  70
  71    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  72
  73    /* Calculate pixel centers by adding 1 or 0 to each of the
  74     * micro-tile coordinates passed in r1.
  75     */
  76    if (mask & WRITEMASK_X) {
  77       brw_ADD(p,
  78               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  79               stride(suboffset(r1_uw, 4), 2, 4, 0),
  80               brw_imm_v(0x10101010));
  81    }
  82
  83    if (mask & WRITEMASK_Y) {
  84       brw_ADD(p,
  85               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  86               stride(suboffset(r1_uw,5), 2, 4, 0),
  87               brw_imm_v(0x11001100));
  88    }
  89
  90    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  91 }
  92
  93
  94
  95 static void emit_delta_xy(struct brw_compile *p,
  96                           const struct brw_reg *dst,
  97                           GLuint mask,
  98                           const struct brw_reg *arg0)
  99 {
 100    struct brw_reg r1 = brw_vec1_grf(1, 0);
 101
 102    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 103     * centers.
 104     */
 105    if (mask & WRITEMASK_X) {
 106       brw_ADD(p,
 107               dst[0],
 108               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 109               negate(r1));
 110    }
 111
 112    if (mask & WRITEMASK_Y) {
 113       brw_ADD(p,
 114               dst[1],
 115               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 116               negate(suboffset(r1,1)));
 117
 118    }
 119 }
 120
 121 static void emit_wpos_xy(struct brw_wm_compile *c,
 122                          const struct brw_reg *dst,
 123                          GLuint mask,
 124                          const struct brw_reg *arg0)
 125 {
 126    struct brw_compile *p = &c->func;
 127
 128    /* Calculate the pixel offset from window bottom left into destination
 129     * X and Y channels.
 130     */
 131    if (mask & WRITEMASK_X) {
 132       /* X' = X - origin */
 133       brw_ADD(p,
 134               dst[0],
 135               retype(arg0[0], BRW_REGISTER_TYPE_W),
 136               brw_imm_d(0 - c->key.origin_x));
 137    }
 138
 139    if (mask & WRITEMASK_Y) {
 140       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 141       brw_ADD(p,
 142               dst[1],
 143               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 144               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 145    }
 146 }
 147
 148
 149 static void emit_pixel_w( struct brw_compile *p,
 150                           const struct brw_reg *dst,
 151                           GLuint mask,
 152                           const struct brw_reg *arg0,
 153                           const struct brw_reg *deltas)
 154 {
 155    /* Don't need this if all you are doing is interpolating color, for
 156     * instance.
 157     */
 158    if (mask & WRITEMASK_W) {
 159       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 160
 161       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 162        * result straight into a message reg.
 163        */
 164       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 165       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 166
 167       /* Calc w */
 168       brw_math_16( p, dst[3],
 169                    BRW_MATH_FUNCTION_INV,
 170                    BRW_MATH_SATURATE_NONE,
 171                    2, brw_null_reg(),
 172                    BRW_MATH_PRECISION_FULL);
 173    }
 174 }
 175
 176
 177
 178 static void emit_linterp( struct brw_compile *p,
 179                          const struct brw_reg *dst,
 180                          GLuint mask,
 181                          const struct brw_reg *arg0,
 182                          const struct brw_reg *deltas )
 183 {
 184    struct brw_reg interp[4];
 185    GLuint nr = arg0[0].nr;
 186    GLuint i;
 187
 188    interp[0] = brw_vec1_grf(nr, 0);
 189    interp[1] = brw_vec1_grf(nr, 4);
 190    interp[2] = brw_vec1_grf(nr+1, 0);
 191    interp[3] = brw_vec1_grf(nr+1, 4);
 192
 193    for (i = 0; i < 4; i++) {
 194       if (mask & (1<<i)) {
 195          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 196          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 197       }
 198    }
 199 }
 200
 201
 202 static void emit_pinterp( struct brw_compile *p,
 203                           const struct brw_reg *dst,
 204                           GLuint mask,
 205                           const struct brw_reg *arg0,
 206                           const struct brw_reg *deltas,
 207                           const struct brw_reg *w)
 208 {
 209    struct brw_reg interp[4];
 210    GLuint nr = arg0[0].nr;
 211    GLuint i;
 212
 213    interp[0] = brw_vec1_grf(nr, 0);
 214    interp[1] = brw_vec1_grf(nr, 4);
 215    interp[2] = brw_vec1_grf(nr+1, 0);
 216    interp[3] = brw_vec1_grf(nr+1, 4);
 217
 218    for (i = 0; i < 4; i++) {
 219       if (mask & (1<<i)) {
 220          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 221          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 222       }
 223    }
 224    for (i = 0; i < 4; i++) {
 225       if (mask & (1<<i)) {
 226          brw_MUL(p, dst[i], dst[i], w[3]);
 227       }
 228    }
 229 }
 230
 231
 232 static void emit_cinterp( struct brw_compile *p,
 233                          const struct brw_reg *dst,
 234                          GLuint mask,
 235                          const struct brw_reg *arg0 )
 236 {
 237    struct brw_reg interp[4];
 238    GLuint nr = arg0[0].nr;
 239    GLuint i;
 240
 241    interp[0] = brw_vec1_grf(nr, 0);
 242    interp[1] = brw_vec1_grf(nr, 4);
 243    interp[2] = brw_vec1_grf(nr+1, 0);
 244    interp[3] = brw_vec1_grf(nr+1, 4);
 245
 246    for (i = 0; i < 4; i++) {
 247       if (mask & (1<<i)) {
 248          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 249       }
 250    }
 251 }
 252
 253 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 254 static void emit_frontfacing( struct brw_compile *p,
 255                               const struct brw_reg *dst,
 256                               GLuint mask )
 257 {
 258    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 259    GLuint i;
 260
 261    if (!(mask & WRITEMASK_XYZW))
 262       return;
 263
 264    for (i = 0; i < 4; i++) {
 265       if (mask & (1<<i)) {
 266          brw_MOV(p, dst[i], brw_imm_f(0.0));
 267       }
 268    }
 269
 270    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 271     * us front face
 272     */
 273    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 274    for (i = 0; i < 4; i++) {
 275       if (mask & (1<<i)) {
 276          brw_MOV(p, dst[i], brw_imm_f(1.0));
 277       }
 278    }
 279    brw_set_predicate_control_flag_value(p, 0xff);
 280 }
 281
 282 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 283  * looking like:
 284  *
 285  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
 286  *
 287  * and we're trying to produce:
 288  *
 289  *           DDX                     DDY
 290  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
 291  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
 292  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
 293  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
 294  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
 295  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
 296  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
 297  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
 298  *
 299  * and add another set of two more subspans if in 16-pixel dispatch mode.
 300  *
 301  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 302  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 303  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 304  * between each other.  We could probably do it like ddx and swizzle the right
 305  * order later, but bail for now and just produce
 306  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
 307  */
 308 void emit_ddxy(struct brw_compile *p,
 309                const struct brw_reg *dst,
 310                GLuint mask,
 311                GLboolean is_ddx,
 312                const struct brw_reg *arg0)
 313 {
 314    int i;
 315    struct brw_reg src0, src1;
 316
 317    if (mask & SATURATE)
 318       brw_set_saturate(p, 1);
 319    for (i = 0; i < 4; i++ ) {
 320       if (mask & (1<<i)) {
 321          if (is_ddx) {
 322             src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
 323                            BRW_REGISTER_TYPE_F,
 324                            BRW_VERTICAL_STRIDE_2,
 325                            BRW_WIDTH_2,
 326                            BRW_HORIZONTAL_STRIDE_0,
 327                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 328             src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 329                            BRW_REGISTER_TYPE_F,
 330                            BRW_VERTICAL_STRIDE_2,
 331                            BRW_WIDTH_2,
 332                            BRW_HORIZONTAL_STRIDE_0,
 333                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 334          } else {
 335             src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 336                            BRW_REGISTER_TYPE_F,
 337                            BRW_VERTICAL_STRIDE_4,
 338                            BRW_WIDTH_4,
 339                            BRW_HORIZONTAL_STRIDE_0,
 340                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 341             src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 342                            BRW_REGISTER_TYPE_F,
 343                            BRW_VERTICAL_STRIDE_4,
 344                            BRW_WIDTH_4,
 345                            BRW_HORIZONTAL_STRIDE_0,
 346                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 347          }
 348          brw_ADD(p, dst[i], src0, negate(src1));
 349       }
 350    }
 351    if (mask & SATURATE)
 352       brw_set_saturate(p, 0);
 353 }
 354
 355 void emit_alu1(struct brw_compile *p,
 356                struct brw_instruction *(*func)(struct brw_compile *,
 357                                                struct brw_reg,
 358                                                struct brw_reg),
 359                const struct brw_reg *dst,
 360                GLuint mask,
 361                const struct brw_reg *arg0)
 362 {
 363    GLuint i;
 364
 365    if (mask & SATURATE)
 366       brw_set_saturate(p, 1);
 367
 368    for (i = 0; i < 4; i++) {
 369       if (mask & (1<<i)) {
 370          func(p, dst[i], arg0[i]);
 371       }
 372    }
 373
 374    if (mask & SATURATE)
 375       brw_set_saturate(p, 0);
 376 }
 377
 378
 379 void emit_alu2(struct brw_compile *p,
 380                struct brw_instruction *(*func)(struct brw_compile *,
 381                                                struct brw_reg,
 382                                                struct brw_reg,
 383                                                struct brw_reg),
 384                const struct brw_reg *dst,
 385                GLuint mask,
 386                const struct brw_reg *arg0,
 387                const struct brw_reg *arg1)
 388 {
 389    GLuint i;
 390
 391    if (mask & SATURATE)
 392       brw_set_saturate(p, 1);
 393
 394    for (i = 0; i < 4; i++) {
 395       if (mask & (1<<i)) {
 396          func(p, dst[i], arg0[i], arg1[i]);
 397       }
 398    }
 399
 400    if (mask & SATURATE)
 401       brw_set_saturate(p, 0);
 402 }
 403
 404
 405 static void emit_mad( struct brw_compile *p,
 406                       const struct brw_reg *dst,
 407                       GLuint mask,
 408                       const struct brw_reg *arg0,
 409                       const struct brw_reg *arg1,
 410                       const struct brw_reg *arg2 )
 411 {
 412    GLuint i;
 413
 414    for (i = 0; i < 4; i++) {
 415       if (mask & (1<<i)) {
 416          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 417
 418          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 419          brw_ADD(p, dst[i], dst[i], arg2[i]);
 420          brw_set_saturate(p, 0);
 421       }
 422    }
 423 }
 424
 425 static void emit_lrp( struct brw_compile *p,
 426                       const struct brw_reg *dst,
 427                       GLuint mask,
 428                       const struct brw_reg *arg0,
 429                       const struct brw_reg *arg1,
 430                       const struct brw_reg *arg2 )
 431 {
 432    GLuint i;
 433
 434    /* Uses dst as a temporary:
 435     */
 436    for (i = 0; i < 4; i++) {
 437       if (mask & (1<<i)) {
 438          /* Can I use the LINE instruction for this?
 439           */
 440          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 441          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 442
 443          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 444          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 445          brw_set_saturate(p, 0);
 446       }
 447    }
 448 }
 449
 450 static void emit_sop( struct brw_compile *p,
 451                       const struct brw_reg *dst,
 452                       GLuint mask,
 453                       GLuint cond,
 454                       const struct brw_reg *arg0,
 455                       const struct brw_reg *arg1 )
 456 {
 457    GLuint i;
 458
 459    for (i = 0; i < 4; i++) {
 460       if (mask & (1<<i)) {
 461          brw_MOV(p, dst[i], brw_imm_f(0));
 462          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 463          brw_MOV(p, dst[i], brw_imm_f(1.0));
 464          brw_set_predicate_control_flag_value(p, 0xff);
 465       }
 466    }
 467 }
 468
 469 static void emit_slt( struct brw_compile *p,
 470                       const struct brw_reg *dst,
 471                       GLuint mask,
 472                       const struct brw_reg *arg0,
 473                       const struct brw_reg *arg1 )
 474 {
 475    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 476 }
 477
 478 static void emit_sle( struct brw_compile *p,
 479                       const struct brw_reg *dst,
 480                       GLuint mask,
 481                       const struct brw_reg *arg0,
 482                       const struct brw_reg *arg1 )
 483 {
 484    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 485 }
 486
 487 static void emit_sgt( struct brw_compile *p,
 488                       const struct brw_reg *dst,
 489                       GLuint mask,
 490                       const struct brw_reg *arg0,
 491                       const struct brw_reg *arg1 )
 492 {
 493    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 494 }
 495
 496 static void emit_sge( struct brw_compile *p,
 497                       const struct brw_reg *dst,
 498                       GLuint mask,
 499                       const struct brw_reg *arg0,
 500                       const struct brw_reg *arg1 )
 501 {
 502    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 503 }
 504
 505 static void emit_seq( struct brw_compile *p,
 506                       const struct brw_reg *dst,
 507                       GLuint mask,
 508                       const struct brw_reg *arg0,
 509                       const struct brw_reg *arg1 )
 510 {
 511    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 512 }
 513
 514 static void emit_sne( struct brw_compile *p,
 515                       const struct brw_reg *dst,
 516                       GLuint mask,
 517                       const struct brw_reg *arg0,
 518                       const struct brw_reg *arg1 )
 519 {
 520    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 521 }
 522
 523 static void emit_cmp( struct brw_compile *p,
 524                       const struct brw_reg *dst,
 525                       GLuint mask,
 526                       const struct brw_reg *arg0,
 527                       const struct brw_reg *arg1,
 528                       const struct brw_reg *arg2 )
 529 {
 530    GLuint i;
 531
 532    for (i = 0; i < 4; i++) {
 533       if (mask & (1<<i)) {
 534          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 535          brw_MOV(p, dst[i], arg2[i]);
 536          brw_set_saturate(p, 0);
 537
 538          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 539
 540          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 541          brw_MOV(p, dst[i], arg1[i]);
 542          brw_set_saturate(p, 0);
 543          brw_set_predicate_control_flag_value(p, 0xff);
 544       }
 545    }
 546 }
 547
 548 static void emit_max( struct brw_compile *p,
 549                       const struct brw_reg *dst,
 550                       GLuint mask,
 551                       const struct brw_reg *arg0,
 552                       const struct brw_reg *arg1 )
 553 {
 554    GLuint i;
 555
 556    for (i = 0; i < 4; i++) {
 557       if (mask & (1<<i)) {
 558          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 559          brw_MOV(p, dst[i], arg0[i]);
 560          brw_set_saturate(p, 0);
 561
 562          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 563
 564          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 565          brw_MOV(p, dst[i], arg1[i]);
 566          brw_set_saturate(p, 0);
 567          brw_set_predicate_control_flag_value(p, 0xff);
 568       }
 569    }
 570 }
 571
 572 static void emit_min( struct brw_compile *p,
 573                       const struct brw_reg *dst,
 574                       GLuint mask,
 575                       const struct brw_reg *arg0,
 576                       const struct brw_reg *arg1 )
 577 {
 578    GLuint i;
 579
 580    for (i = 0; i < 4; i++) {
 581       if (mask & (1<<i)) {
 582          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 583          brw_MOV(p, dst[i], arg1[i]);
 584          brw_set_saturate(p, 0);
 585
 586          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 587
 588          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 589          brw_MOV(p, dst[i], arg0[i]);
 590          brw_set_saturate(p, 0);
 591          brw_set_predicate_control_flag_value(p, 0xff);
 592       }
 593    }
 594 }
 595
 596
 597 static void emit_dp3( struct brw_compile *p,
 598                       const struct brw_reg *dst,
 599                       GLuint mask,
 600                       const struct brw_reg *arg0,
 601                       const struct brw_reg *arg1 )
 602 {
 603    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 604
 605    if (!(mask & WRITEMASK_XYZW))
 606       return; /* Do not emit dead code */
 607
 608    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 609
 610    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 611    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 612
 613    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 614    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 615    brw_set_saturate(p, 0);
 616 }
 617
 618
 619 static void emit_dp4( struct brw_compile *p,
 620                       const struct brw_reg *dst,
 621                       GLuint mask,
 622                       const struct brw_reg *arg0,
 623                       const struct brw_reg *arg1 )
 624 {
 625    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 626
 627    if (!(mask & WRITEMASK_XYZW))
 628       return; /* Do not emit dead code */
 629
 630    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 631
 632    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 633    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 634    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 635
 636    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 637    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 638    brw_set_saturate(p, 0);
 639 }
 640
 641
 642 static void emit_dph( struct brw_compile *p,
 643                       const struct brw_reg *dst,
 644                       GLuint mask,
 645                       const struct brw_reg *arg0,
 646                       const struct brw_reg *arg1 )
 647 {
 648    const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 649
 650    if (!(mask & WRITEMASK_XYZW))
 651       return; /* Do not emit dead code */
 652
 653    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 654
 655    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 656    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 657    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 658
 659    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 660    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 661    brw_set_saturate(p, 0);
 662 }
 663
 664
 665 static void emit_xpd( struct brw_compile *p,
 666                       const struct brw_reg *dst,
 667                       GLuint mask,
 668                       const struct brw_reg *arg0,
 669                       const struct brw_reg *arg1 )
 670 {
 671    GLuint i;
 672
 673    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 674
 675    for (i = 0 ; i < 3; i++) {
 676       if (mask & (1<<i)) {
 677          GLuint i2 = (i+2)%3;
 678          GLuint i1 = (i+1)%3;
 679
 680          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 681
 682          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 683          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 684          brw_set_saturate(p, 0);
 685       }
 686    }
 687 }
 688
 689
 690 static void emit_math1( struct brw_compile *p,
 691                         GLuint function,
 692                         const struct brw_reg *dst,
 693                         GLuint mask,
 694                         const struct brw_reg *arg0 )
 695 {
 696    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 697
 698    if (!(mask & WRITEMASK_XYZW))
 699       return; /* Do not emit dead code */
 700
 701    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 702
 703    brw_MOV(p, brw_message_reg(2), arg0[0]);
 704
 705    /* Send two messages to perform all 16 operations:
 706     */
 707    brw_math_16(p,
 708                dst[dst_chan],
 709                function,
 710                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 711                2,
 712                brw_null_reg(),
 713                BRW_MATH_PRECISION_FULL);
 714 }
 715
 716
 717 static void emit_math2( struct brw_compile *p,
 718                         GLuint function,
 719                         const struct brw_reg *dst,
 720                         GLuint mask,
 721                         const struct brw_reg *arg0,
 722                         const struct brw_reg *arg1)
 723 {
 724    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 725
 726    if (!(mask & WRITEMASK_XYZW))
 727       return; /* Do not emit dead code */
 728
 729    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 730
 731    brw_push_insn_state(p);
 732
 733    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 734    brw_MOV(p, brw_message_reg(2), arg0[0]);
 735    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 736    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 737
 738    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 739    brw_MOV(p, brw_message_reg(3), arg1[0]);
 740    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 741    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 742
 743
 744    /* Send two messages to perform all 16 operations:
 745     */
 746    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 747    brw_math(p,
 748             dst[dst_chan],
 749             function,
 750             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 751             2,
 752             brw_null_reg(),
 753             BRW_MATH_DATA_VECTOR,
 754             BRW_MATH_PRECISION_FULL);
 755
 756    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 757    brw_math(p,
 758             offset(dst[dst_chan],1),
 759             function,
 760             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 761             4,
 762             brw_null_reg(),
 763             BRW_MATH_DATA_VECTOR,
 764             BRW_MATH_PRECISION_FULL);
 765
 766    brw_pop_insn_state(p);
 767 }
 768
 769
 770
 771 static void emit_tex( struct brw_wm_compile *c,
 772                       const struct brw_wm_instruction *inst,
 773                       struct brw_reg *dst,
 774                       GLuint dst_flags,
 775                       struct brw_reg *arg )
 776 {
 777    struct brw_compile *p = &c->func;
 778    GLuint msgLength, responseLength;
 779    GLuint i, nr;
 780    GLuint emit;
 781    GLuint msg_type;
 782
 783    /* How many input regs are there?
 784     */
 785    switch (inst->tex_idx) {
 786    case TEXTURE_1D_INDEX:
 787       emit = WRITEMASK_X;
 788       nr = 1;
 789       break;
 790    case TEXTURE_2D_INDEX:
 791    case TEXTURE_RECT_INDEX:
 792       emit = WRITEMASK_XY;
 793       nr = 2;
 794       break;
 795    case TEXTURE_3D_INDEX:
 796    case TEXTURE_CUBE_INDEX:
 797       emit = WRITEMASK_XYZ;
 798       nr = 3;
 799       break;
 800    default:
 801       /* unexpected target */
 802       abort();
 803    }
 804
 805    if (inst->tex_shadow) {
 806       nr = 4;
 807       emit |= WRITEMASK_W;
 808    }
 809
 810    msgLength = 1;
 811
 812    for (i = 0; i < nr; i++) {
 813       static const GLuint swz[4] = {0,1,2,2};
 814       if (emit & (1<<i))
 815          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 816       else
 817          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 818       msgLength += 2;
 819    }
 820
 821    responseLength = 8;          /* always */
 822
 823    if (BRW_IS_IGDNG(p->brw)) {
 824        if (inst->tex_shadow)
 825            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 826        else
 827            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 828    } else {
 829        if (inst->tex_shadow)
 830            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 831        else
 832            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 833    }
 834
 835    brw_SAMPLE(p,
 836               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 837               1,
 838               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 839               SURF_INDEX_TEXTURE(inst->tex_unit),
 840               inst->tex_unit,     /* sampler */
 841               inst->writemask,
 842               msg_type,
 843               responseLength,
 844               msgLength,
 845               0,
 846               1,
 847               BRW_SAMPLER_SIMD_MODE_SIMD16);
 848 }
 849
 850
 851 static void emit_txb( struct brw_wm_compile *c,
 852                       const struct brw_wm_instruction *inst,
 853                       struct brw_reg *dst,
 854                       GLuint dst_flags,
 855                       struct brw_reg *arg )
 856 {
 857    struct brw_compile *p = &c->func;
 858    GLuint msgLength;
 859    GLuint msg_type;
 860    /* Shadow ignored for txb.
 861     */
 862    switch (inst->tex_idx) {
 863    case TEXTURE_1D_INDEX:
 864       brw_MOV(p, brw_message_reg(2), arg[0]);
 865       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 866       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 867       break;
 868    case TEXTURE_2D_INDEX:
 869    case TEXTURE_RECT_INDEX:
 870       brw_MOV(p, brw_message_reg(2), arg[0]);
 871       brw_MOV(p, brw_message_reg(4), arg[1]);
 872       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 873       break;
 874    case TEXTURE_3D_INDEX:
 875    case TEXTURE_CUBE_INDEX:
 876       brw_MOV(p, brw_message_reg(2), arg[0]);
 877       brw_MOV(p, brw_message_reg(4), arg[1]);
 878       brw_MOV(p, brw_message_reg(6), arg[2]);
 879       break;
 880    default:
 881       /* unexpected target */
 882       abort();
 883    }
 884
 885    brw_MOV(p, brw_message_reg(8), arg[3]);
 886    msgLength = 9;
 887
 888    if (BRW_IS_IGDNG(p->brw))
 889        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 890    else
 891        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 892
 893    brw_SAMPLE(p,
 894               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 895               1,
 896               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 897               SURF_INDEX_TEXTURE(inst->tex_unit),
 898               inst->tex_unit,     /* sampler */
 899               inst->writemask,
 900               msg_type,
 901               8,                /* responseLength */
 902               msgLength,
 903               0,
 904               1,
 905               BRW_SAMPLER_SIMD_MODE_SIMD16);
 906 }
 907
 908
 909 static void emit_lit( struct brw_compile *p,
 910                       const struct brw_reg *dst,
 911                       GLuint mask,
 912                       const struct brw_reg *arg0 )
 913 {
 914    assert((mask & WRITEMASK_XW) == 0);
 915
 916    if (mask & WRITEMASK_Y) {
 917       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 918       brw_MOV(p, dst[1], arg0[0]);
 919       brw_set_saturate(p, 0);
 920    }
 921
 922    if (mask & WRITEMASK_Z) {
 923       emit_math2(p, BRW_MATH_FUNCTION_POW,
 924                  &dst[2],
 925                  WRITEMASK_X | (mask & SATURATE),
 926                  &arg0[1],
 927                  &arg0[3]);
 928    }
 929
 930    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 931     * some of the POW calculations above, but 16-wide iff statements
 932     * seem to lock c1 hardware, so this is a nasty workaround:
 933     */
 934    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 935    {
 936       if (mask & WRITEMASK_Y)
 937          brw_MOV(p, dst[1], brw_imm_f(0));
 938
 939       if (mask & WRITEMASK_Z)
 940          brw_MOV(p, dst[2], brw_imm_f(0));
 941    }
 942    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 943 }
 944
 945
 946 /* Kill pixel - set execution mask to zero for those pixels which
 947  * fail.
 948  */
 949 static void emit_kil( struct brw_wm_compile *c,
 950                       struct brw_reg *arg0)
 951 {
 952    struct brw_compile *p = &c->func;
 953    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 954    GLuint i;
 955
 956    /* XXX - usually won't need 4 compares!
 957     */
 958    for (i = 0; i < 4; i++) {
 959       brw_push_insn_state(p);
 960       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 961       brw_set_predicate_control_flag_value(p, 0xff);
 962       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 963       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 964       brw_pop_insn_state(p);
 965    }
 966 }
 967
 968 /* KIL_NV kills the pixels that are currently executing, not based on a test
 969  * of the arguments.
 970  */
 971 static void emit_kil_nv( struct brw_wm_compile *c )
 972 {
 973    struct brw_compile *p = &c->func;
 974    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 975
 976    brw_push_insn_state(p);
 977    brw_set_mask_control(p, BRW_MASK_DISABLE);
 978    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
 979    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
 980    brw_pop_insn_state(p);
 981 }
 982
 983 static void fire_fb_write( struct brw_wm_compile *c,
 984                            GLuint base_reg,
 985                            GLuint nr,
 986                            GLuint target,
 987                            GLuint eot )
 988 {
 989    struct brw_compile *p = &c->func;
 990
 991    /* Pass through control information:
 992     */
 993 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 994    {
 995       brw_push_insn_state(p);
 996       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 997       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 998       brw_MOV(p,
 999                brw_message_reg(base_reg + 1),
1000                brw_vec8_grf(1, 0));
1001       brw_pop_insn_state(p);
1002    }
1003
1004    /* Send framebuffer write message: */
1005 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
1006    brw_fb_WRITE(p,
1007                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1008                 base_reg,
1009                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1010                 target,
1011                 nr,
1012                 0,
1013                 eot);
1014 }
1015
1016
1017 static void emit_aa( struct brw_wm_compile *c,
1018                      struct brw_reg *arg1,
1019                      GLuint reg )
1020 {
1021    struct brw_compile *p = &c->func;
1022    GLuint comp = c->key.aa_dest_stencil_reg / 2;
1023    GLuint off = c->key.aa_dest_stencil_reg % 2;
1024    struct brw_reg aa = offset(arg1[comp], off);
1025
1026    brw_push_insn_state(p);
1027    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1028    brw_MOV(p, brw_message_reg(reg), aa);
1029    brw_pop_insn_state(p);
1030 }
1031
1032
1033 /* Post-fragment-program processing.  Send the results to the
1034  * framebuffer.
1035  * \param arg0  the fragment color
1036  * \param arg1  the pass-through depth value
1037  * \param arg2  the shader-computed depth value
1038  */
1039 static void emit_fb_write( struct brw_wm_compile *c,
1040                            struct brw_reg *arg0,
1041                            struct brw_reg *arg1,
1042                            struct brw_reg *arg2,
1043                            GLuint target,
1044                            GLuint eot)
1045 {
1046    struct brw_compile *p = &c->func;
1047    GLuint nr = 2;
1048    GLuint channel;
1049
1050    /* Reserve a space for AA - may not be needed:
1051     */
1052    if (c->key.aa_dest_stencil_reg)
1053       nr += 1;
1054
1055    /* I don't really understand how this achieves the color interleave
1056     * (ie RGBARGBA) in the result:  [Do the saturation here]
1057     */
1058    {
1059       brw_push_insn_state(p);
1060
1061       for (channel = 0; channel < 4; channel++) {
1062          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
1063          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
1064
1065          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1066          brw_MOV(p,
1067                  brw_message_reg(nr + channel),
1068                  arg0[channel]);
1069
1070          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1071          brw_MOV(p,
1072                  brw_message_reg(nr + channel + 4),
1073                  sechalf(arg0[channel]));
1074       }
1075
1076       /* skip over the regs populated above:
1077        */
1078       nr += 8;
1079
1080       brw_pop_insn_state(p);
1081    }
1082
1083    if (c->key.source_depth_to_render_target)
1084    {
1085       if (c->key.computes_depth)
1086          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1087       else
1088          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1089
1090       nr += 2;
1091    }
1092
1093    if (c->key.dest_depth_reg)
1094    {
1095       GLuint comp = c->key.dest_depth_reg / 2;
1096       GLuint off = c->key.dest_depth_reg % 2;
1097
1098       if (off != 0) {
1099          brw_push_insn_state(p);
1100          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1101
1102          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1103          /* 2nd half? */
1104          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1105          brw_pop_insn_state(p);
1106       }
1107       else {
1108          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1109       }
1110       nr += 2;
1111    }
1112
1113    if (!c->key.runtime_check_aads_emit) {
1114       if (c->key.aa_dest_stencil_reg)
1115          emit_aa(c, arg1, 2);
1116
1117       fire_fb_write(c, 0, nr, target, eot);
1118    }
1119    else {
1120       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1121       struct brw_reg ip = brw_ip_reg();
1122       struct brw_instruction *jmp;
1123
1124       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1125       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1126       brw_AND(p,
1127               v1_null_ud,
1128               get_element_ud(brw_vec8_grf(1,0), 6),
1129               brw_imm_ud(1<<26));
1130
1131       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1132       {
1133          emit_aa(c, arg1, 2);
1134          fire_fb_write(c, 0, nr, target, eot);
1135          /* note - thread killed in subroutine */
1136       }
1137       brw_land_fwd_jump(p, jmp);
1138
1139       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1140        */
1141       fire_fb_write(c, 1, nr-1, target, eot);
1142    }
1143 }
1144
1145
1146 /**
1147  * Move a GPR to scratch memory.
1148  */
1149 static void emit_spill( struct brw_wm_compile *c,
1150                         struct brw_reg reg,
1151                         GLuint slot )
1152 {
1153    struct brw_compile *p = &c->func;
1154
1155    /*
1156      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1157    */
1158    brw_MOV(p, brw_message_reg(2), reg);
1159
1160    /*
1161      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1162      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1163    */
1164    brw_dp_WRITE_16(p,
1165                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1166                    slot);
1167 }
1168
1169
1170 /**
1171  * Load a GPR from scratch memory.
1172  */
1173 static void emit_unspill( struct brw_wm_compile *c,
1174                           struct brw_reg reg,
1175                           GLuint slot )
1176 {
1177    struct brw_compile *p = &c->func;
1178
1179    /* Slot 0 is the undef value.
1180     */
1181    if (slot == 0) {
1182       brw_MOV(p, reg, brw_imm_f(0));
1183       return;
1184    }
1185
1186    /*
1187      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1188      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1189    */
1190
1191    brw_dp_READ_16(p,
1192                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1193                   slot);
1194 }
1195
1196
1197 /**
1198  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1199  * Args with unspill_reg != 0 will be loaded from scratch memory.
1200  */
1201 static void get_argument_regs( struct brw_wm_compile *c,
1202                                struct brw_wm_ref *arg[],
1203                                struct brw_reg *regs )
1204 {
1205    GLuint i;
1206
1207    for (i = 0; i < 4; i++) {
1208       if (arg[i]) {
1209          if (arg[i]->unspill_reg)
1210             emit_unspill(c,
1211                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1212                          arg[i]->value->spill_slot);
1213
1214          regs[i] = arg[i]->hw_reg;
1215       }
1216       else {
1217          regs[i] = brw_null_reg();
1218       }
1219    }
1220 }
1221
1222
1223 /**
1224  * For values that have a spill_slot!=0, write those regs to scratch memory.
1225  */
1226 static void spill_values( struct brw_wm_compile *c,
1227                           struct brw_wm_value *values,
1228                           GLuint nr )
1229 {
1230    GLuint i;
1231
1232    for (i = 0; i < nr; i++)
1233       if (values[i].spill_slot)
1234          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1235 }
1236
1237
1238 /* Emit the fragment program instructions here.
1239  */
1240 void brw_wm_emit( struct brw_wm_compile *c )
1241 {
1242    struct brw_compile *p = &c->func;
1243    GLuint insn;
1244
1245    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1246
1247    /* Check if any of the payload regs need to be spilled:
1248     */
1249    spill_values(c, c->payload.depth, 4);
1250    spill_values(c, c->creg, c->nr_creg);
1251    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1252
1253
1254    for (insn = 0; insn < c->nr_insns; insn++) {
1255
1256       struct brw_wm_instruction *inst = &c->instruction[insn];
1257       struct brw_reg args[3][4], dst[4];
1258       GLuint i, dst_flags;
1259
1260       /* Get argument regs:
1261        */
1262       for (i = 0; i < 3; i++)
1263          get_argument_regs(c, inst->src[i], args[i]);
1264
1265       /* Get dest regs:
1266        */
1267       for (i = 0; i < 4; i++)
1268          if (inst->dst[i])
1269             dst[i] = inst->dst[i]->hw_reg;
1270          else
1271             dst[i] = brw_null_reg();
1272
1273       /* Flags
1274        */
1275       dst_flags = inst->writemask;
1276       if (inst->saturate)
1277          dst_flags |= SATURATE;
1278
1279       switch (inst->opcode) {
1280          /* Generated instructions for calculating triangle interpolants:
1281           */
1282       case WM_PIXELXY:
1283          emit_pixel_xy(p, dst, dst_flags);
1284          break;
1285
1286       case WM_DELTAXY:
1287          emit_delta_xy(p, dst, dst_flags, args[0]);
1288          break;
1289
1290       case WM_WPOSXY:
1291          emit_wpos_xy(c, dst, dst_flags, args[0]);
1292          break;
1293
1294       case WM_PIXELW:
1295          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1296          break;
1297
1298       case WM_LINTERP:
1299          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1300          break;
1301
1302       case WM_PINTERP:
1303          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1304          break;
1305
1306       case WM_CINTERP:
1307          emit_cinterp(p, dst, dst_flags, args[0]);
1308          break;
1309
1310       case WM_FB_WRITE:
1311          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1312          break;
1313
1314       case WM_FRONTFACING:
1315          emit_frontfacing(p, dst, dst_flags);
1316          break;
1317
1318          /* Straightforward arithmetic:
1319           */
1320       case OPCODE_ADD:
1321          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1322          break;
1323
1324       case OPCODE_FRC:
1325          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1326          break;
1327
1328       case OPCODE_FLR:
1329          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1330          break;
1331
1332       case OPCODE_DDX:
1333          emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1334          break;
1335
1336       case OPCODE_DDY:
1337          emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1338          break;
1339
1340       case OPCODE_DP3:
1341          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1342          break;
1343
1344       case OPCODE_DP4:
1345          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1346          break;
1347
1348       case OPCODE_DPH:
1349          emit_dph(p, dst, dst_flags, args[0], args[1]);
1350          break;
1351
1352       case OPCODE_TRUNC:
1353          emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1354          break;
1355
1356       case OPCODE_LRP:
1357          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1358          break;
1359
1360       case OPCODE_MAD:
1361          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1362          break;
1363
1364       case OPCODE_MOV:
1365       case OPCODE_SWZ:
1366          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1367          break;
1368
1369       case OPCODE_MUL:
1370          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1371          break;
1372
1373       case OPCODE_XPD:
1374          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1375          break;
1376
1377          /* Higher math functions:
1378           */
1379       case OPCODE_RCP:
1380          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1381          break;
1382
1383       case OPCODE_RSQ:
1384          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1385          break;
1386
1387       case OPCODE_SIN:
1388          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1389          break;
1390
1391       case OPCODE_COS:
1392          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1393          break;
1394
1395       case OPCODE_EX2:
1396          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1397          break;
1398
1399       case OPCODE_LG2:
1400          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1401          break;
1402
1403       case OPCODE_SCS:
1404          /* There is an scs math function, but it would need some
1405           * fixup for 16-element execution.
1406           */
1407          if (dst_flags & WRITEMASK_X)
1408             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1409          if (dst_flags & WRITEMASK_Y)
1410             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1411          break;
1412
1413       case OPCODE_POW:
1414          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1415          break;
1416
1417          /* Comparisons:
1418           */
1419       case OPCODE_CMP:
1420          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1421          break;
1422
1423       case OPCODE_MAX:
1424          emit_max(p, dst, dst_flags, args[0], args[1]);
1425          break;
1426
1427       case OPCODE_MIN:
1428          emit_min(p, dst, dst_flags, args[0], args[1]);
1429          break;
1430
1431       case OPCODE_SLT:
1432          emit_slt(p, dst, dst_flags, args[0], args[1]);
1433          break;
1434
1435       case OPCODE_SLE:
1436          emit_sle(p, dst, dst_flags, args[0], args[1]);
1437         break;
1438       case OPCODE_SGT:
1439          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1440         break;
1441       case OPCODE_SGE:
1442          emit_sge(p, dst, dst_flags, args[0], args[1]);
1443          break;
1444       case OPCODE_SEQ:
1445          emit_seq(p, dst, dst_flags, args[0], args[1]);
1446         break;
1447       case OPCODE_SNE:
1448          emit_sne(p, dst, dst_flags, args[0], args[1]);
1449         break;
1450
1451       case OPCODE_LIT:
1452          emit_lit(p, dst, dst_flags, args[0]);
1453          break;
1454
1455          /* Texturing operations:
1456           */
1457       case OPCODE_TEX:
1458          emit_tex(c, inst, dst, dst_flags, args[0]);
1459          break;
1460
1461       case OPCODE_TXB:
1462          emit_txb(c, inst, dst, dst_flags, args[0]);
1463          break;
1464
1465       case OPCODE_KIL:
1466          emit_kil(c, args[0]);
1467          break;
1468
1469       case OPCODE_KIL_NV:
1470          emit_kil_nv(c);
1471          break;
1472
1473       default:
1474          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1475                       inst->opcode, inst->opcode < MAX_OPCODE ?
1476                                     _mesa_opcode_string(inst->opcode) :
1477                                     "unknown");
1478       }
1479
1480       for (i = 0; i < 4; i++)
1481         if (inst->dst[i] && inst->dst[i]->spill_slot)
1482            emit_spill(c,
1483                       inst->dst[i]->hw_reg,
1484                       inst->dst[i]->spill_slot);
1485    }
1486
1487    if (INTEL_DEBUG & DEBUG_WM) {
1488       int i;
1489
1490       _mesa_printf("wm-native:\n");
1491       for (i = 0; i < p->nr_insn; i++)
1492          brw_disasm(stderr, &p->store[i]);
1493       _mesa_printf("\n");
1494    }
1495 }