src/gallium/drivers/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include "util/u_math.h"
  33 #include "tgsi/tgsi_info.h"
  34
  35 #include "brw_context.h"
  36 #include "brw_wm.h"
  37 #include "brw_debug.h"
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- quad 0 x,y coords (2 packed uwords)
  57  * R1.3 -- quad 1 x,y coords (2 packed uwords)
  58  * R1.4 -- quad 2 x,y coords (2 packed uwords)
  59  * R1.5 -- quad 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask)
  69 {
  70    struct brw_reg r1 = brw_vec1_grf(1, 0);
  71    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  72
  73    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  74
  75    /* Calculate pixel centers by adding 1 or 0 to each of the
  76     * micro-tile coordinates passed in r1.
  77     */
  78    if (mask & BRW_WRITEMASK_X) {
  79       brw_ADD(p,
  80               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  81               stride(suboffset(r1_uw, 4), 2, 4, 0),
  82               brw_imm_v(0x10101010));
  83    }
  84
  85    if (mask & BRW_WRITEMASK_Y) {
  86       brw_ADD(p,
  87               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  88               stride(suboffset(r1_uw,5), 2, 4, 0),
  89               brw_imm_v(0x11001100));
  90    }
  91
  92    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  93 }
  94
  95
  96
  97 static void emit_delta_xy(struct brw_compile *p,
  98                           const struct brw_reg *dst,
  99                           GLuint mask,
 100                           const struct brw_reg *arg0)
 101 {
 102    struct brw_reg r1 = brw_vec1_grf(1, 0);
 103
 104    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 105     * centers.
 106     */
 107    if (mask & BRW_WRITEMASK_X) {
 108       brw_ADD(p,
 109               dst[0],
 110               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 111               negate(r1));
 112    }
 113
 114    if (mask & BRW_WRITEMASK_Y) {
 115       brw_ADD(p,
 116               dst[1],
 117               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 118               negate(suboffset(r1,1)));
 119
 120    }
 121 }
 122
 123 static void emit_wpos_xy(struct brw_wm_compile *c,
 124                          const struct brw_reg *dst,
 125                          GLuint mask,
 126                          const struct brw_reg *arg0)
 127 {
 128    struct brw_compile *p = &c->func;
 129
 130    if (mask & BRW_WRITEMASK_X) {
 131       /* X' = X */
 132       brw_MOV(p,
 133               dst[0],
 134               retype(arg0[0], BRW_REGISTER_TYPE_W));
 135    }
 136
 137    /* XXX: is this needed any more, or is this a NOOP?
 138     */
 139    if (mask & BRW_WRITEMASK_Y) {
 140 #if 0
 141       /* Y' = height - 1 - Y */
 142       brw_ADD(p,
 143               dst[1],
 144               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 145               brw_imm_d(c->key.drawable_height - 1));
 146 #else
 147       brw_MOV(p,
 148               dst[0],
 149               retype(arg0[0], BRW_REGISTER_TYPE_W));
 150 #endif
 151    }
 152 }
 153
 154
 155 static void emit_pixel_w( struct brw_compile *p,
 156                           const struct brw_reg *dst,
 157                           GLuint mask,
 158                           const struct brw_reg *arg0,
 159                           const struct brw_reg *deltas)
 160 {
 161    /* Don't need this if all you are doing is interpolating color, for
 162     * instance.
 163     */
 164    if (mask & BRW_WRITEMASK_W) {
 165       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 166
 167       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 168        * result straight into a message reg.
 169        */
 170       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 171       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 172
 173       /* Calc w */
 174       brw_math_16( p, dst[3],
 175                    BRW_MATH_FUNCTION_INV,
 176                    BRW_MATH_SATURATE_NONE,
 177                    2, brw_null_reg(),
 178                    BRW_MATH_PRECISION_FULL);
 179    }
 180 }
 181
 182
 183
 184 static void emit_linterp( struct brw_compile *p,
 185                          const struct brw_reg *dst,
 186                          GLuint mask,
 187                          const struct brw_reg *arg0,
 188                          const struct brw_reg *deltas )
 189 {
 190    struct brw_reg interp[4];
 191    GLuint nr = arg0[0].nr;
 192    GLuint i;
 193
 194    interp[0] = brw_vec1_grf(nr, 0);
 195    interp[1] = brw_vec1_grf(nr, 4);
 196    interp[2] = brw_vec1_grf(nr+1, 0);
 197    interp[3] = brw_vec1_grf(nr+1, 4);
 198
 199    for (i = 0; i < 4; i++) {
 200       if (mask & (1<<i)) {
 201          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 202          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 203       }
 204    }
 205 }
 206
 207
 208 static void emit_pinterp( struct brw_compile *p,
 209                           const struct brw_reg *dst,
 210                           GLuint mask,
 211                           const struct brw_reg *arg0,
 212                           const struct brw_reg *deltas,
 213                           const struct brw_reg *w)
 214 {
 215    struct brw_reg interp[4];
 216    GLuint nr = arg0[0].nr;
 217    GLuint i;
 218
 219    interp[0] = brw_vec1_grf(nr, 0);
 220    interp[1] = brw_vec1_grf(nr, 4);
 221    interp[2] = brw_vec1_grf(nr+1, 0);
 222    interp[3] = brw_vec1_grf(nr+1, 4);
 223
 224    for (i = 0; i < 4; i++) {
 225       if (mask & (1<<i)) {
 226          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 227          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 228       }
 229    }
 230    for (i = 0; i < 4; i++) {
 231       if (mask & (1<<i)) {
 232          brw_MUL(p, dst[i], dst[i], w[3]);
 233       }
 234    }
 235 }
 236
 237
 238 static void emit_cinterp( struct brw_compile *p,
 239                          const struct brw_reg *dst,
 240                          GLuint mask,
 241                          const struct brw_reg *arg0 )
 242 {
 243    struct brw_reg interp[4];
 244    GLuint nr = arg0[0].nr;
 245    GLuint i;
 246
 247    interp[0] = brw_vec1_grf(nr, 0);
 248    interp[1] = brw_vec1_grf(nr, 4);
 249    interp[2] = brw_vec1_grf(nr+1, 0);
 250    interp[3] = brw_vec1_grf(nr+1, 4);
 251
 252    for (i = 0; i < 4; i++) {
 253       if (mask & (1<<i)) {
 254          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 255       }
 256    }
 257 }
 258
 259 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 260 static void emit_frontfacing( struct brw_compile *p,
 261                               const struct brw_reg *dst,
 262                               GLuint mask )
 263 {
 264    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 265    GLuint i;
 266
 267    if (!(mask & BRW_WRITEMASK_XYZW))
 268       return;
 269
 270    for (i = 0; i < 4; i++) {
 271       if (mask & (1<<i)) {
 272          brw_MOV(p, dst[i], brw_imm_f(0.0));
 273       }
 274    }
 275
 276    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 277     * us front face
 278     */
 279    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 280    for (i = 0; i < 4; i++) {
 281       if (mask & (1<<i)) {
 282          brw_MOV(p, dst[i], brw_imm_f(1.0));
 283       }
 284    }
 285    brw_set_predicate_control_flag_value(p, 0xff);
 286 }
 287
 288 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 289  * looking like:
 290  *
 291  * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
 292  *
 293  * and we're trying to produce:
 294  *
 295  *           DDX                     DDY
 296  * dst: (q0.tr - q0.tl)     (q0.tl - q0.bl)
 297  *      (q0.tr - q0.tl)     (q0.tr - q0.br)
 298  *      (q0.br - q0.bl)     (q0.tl - q0.bl)
 299  *      (q0.br - q0.bl)     (q0.tr - q0.br)
 300  *      (q1.tr - q1.tl)     (q1.tl - q1.bl)
 301  *      (q1.tr - q1.tl)     (q1.tr - q1.br)
 302  *      (q1.br - q1.bl)     (q1.tl - q1.bl)
 303  *      (q1.br - q1.bl)     (q1.tr - q1.br)
 304  *
 305  * and add two more quads if in 16-pixel dispatch mode.
 306  *
 307  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 308  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 309  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 310  * between each other.  We could probably do it like ddx and swizzle the right
 311  * order later, but bail for now and just produce
 312  * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
 313  */
 314 void emit_ddxy(struct brw_compile *p,
 315                const struct brw_reg *dst,
 316                GLuint mask,
 317                GLboolean is_ddx,
 318                const struct brw_reg *arg0)
 319 {
 320    int i;
 321    struct brw_reg src0, src1;
 322
 323    if (mask & SATURATE)
 324       brw_set_saturate(p, 1);
 325    for (i = 0; i < 4; i++ ) {
 326       if (mask & (1<<i)) {
 327          if (is_ddx) {
 328             src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
 329                            BRW_REGISTER_TYPE_F,
 330                            BRW_VERTICAL_STRIDE_2,
 331                            BRW_WIDTH_2,
 332                            BRW_HORIZONTAL_STRIDE_0,
 333                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 334             src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 335                            BRW_REGISTER_TYPE_F,
 336                            BRW_VERTICAL_STRIDE_2,
 337                            BRW_WIDTH_2,
 338                            BRW_HORIZONTAL_STRIDE_0,
 339                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 340          } else {
 341             src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 342                            BRW_REGISTER_TYPE_F,
 343                            BRW_VERTICAL_STRIDE_4,
 344                            BRW_WIDTH_4,
 345                            BRW_HORIZONTAL_STRIDE_0,
 346                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 347             src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 348                            BRW_REGISTER_TYPE_F,
 349                            BRW_VERTICAL_STRIDE_4,
 350                            BRW_WIDTH_4,
 351                            BRW_HORIZONTAL_STRIDE_0,
 352                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 353          }
 354          brw_ADD(p, dst[i], src0, negate(src1));
 355       }
 356    }
 357    if (mask & SATURATE)
 358       brw_set_saturate(p, 0);
 359 }
 360
 361 static void emit_alu1( struct brw_compile *p,
 362                        struct brw_instruction *(*func)(struct brw_compile *,
 363                                                        struct brw_reg,
 364                                                        struct brw_reg),
 365                        const struct brw_reg *dst,
 366                        GLuint mask,
 367                        const struct brw_reg *arg0 )
 368 {
 369    GLuint i;
 370
 371    if (mask & SATURATE)
 372       brw_set_saturate(p, 1);
 373
 374    for (i = 0; i < 4; i++) {
 375       if (mask & (1<<i)) {
 376          func(p, dst[i], arg0[i]);
 377       }
 378    }
 379
 380    if (mask & SATURATE)
 381       brw_set_saturate(p, 0);
 382 }
 383
 384
 385 static void emit_alu2( struct brw_compile *p,
 386                        struct brw_instruction *(*func)(struct brw_compile *,
 387                                                        struct brw_reg,
 388                                                        struct brw_reg,
 389                                                        struct brw_reg),
 390                        const struct brw_reg *dst,
 391                        GLuint mask,
 392                        const struct brw_reg *arg0,
 393                        const struct brw_reg *arg1 )
 394 {
 395    GLuint i;
 396
 397    if (mask & SATURATE)
 398       brw_set_saturate(p, 1);
 399
 400    for (i = 0; i < 4; i++) {
 401       if (mask & (1<<i)) {
 402          func(p, dst[i], arg0[i], arg1[i]);
 403       }
 404    }
 405
 406    if (mask & SATURATE)
 407       brw_set_saturate(p, 0);
 408 }
 409
 410
 411 static void emit_mad( struct brw_compile *p,
 412                       const struct brw_reg *dst,
 413                       GLuint mask,
 414                       const struct brw_reg *arg0,
 415                       const struct brw_reg *arg1,
 416                       const struct brw_reg *arg2 )
 417 {
 418    GLuint i;
 419
 420    for (i = 0; i < 4; i++) {
 421       if (mask & (1<<i)) {
 422          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 423
 424          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 425          brw_ADD(p, dst[i], dst[i], arg2[i]);
 426          brw_set_saturate(p, 0);
 427       }
 428    }
 429 }
 430
 431 static void emit_trunc( struct brw_compile *p,
 432                       const struct brw_reg *dst,
 433                       GLuint mask,
 434                       const struct brw_reg *arg0)
 435 {
 436    GLuint i;
 437
 438    for (i = 0; i < 4; i++) {
 439       if (mask & (1<<i)) {
 440          brw_RNDZ(p, dst[i], arg0[i]);
 441       }
 442    }
 443 }
 444
 445 static void emit_lrp( struct brw_compile *p,
 446                       const struct brw_reg *dst,
 447                       GLuint mask,
 448                       const struct brw_reg *arg0,
 449                       const struct brw_reg *arg1,
 450                       const struct brw_reg *arg2 )
 451 {
 452    GLuint i;
 453
 454    /* Uses dst as a temporary:
 455     */
 456    for (i = 0; i < 4; i++) {
 457       if (mask & (1<<i)) {
 458          /* Can I use the LINE instruction for this?
 459           */
 460          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 461          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 462
 463          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 464          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 465          brw_set_saturate(p, 0);
 466       }
 467    }
 468 }
 469
 470 static void emit_sop( struct brw_compile *p,
 471                       const struct brw_reg *dst,
 472                       GLuint mask,
 473                       GLuint cond,
 474                       const struct brw_reg *arg0,
 475                       const struct brw_reg *arg1 )
 476 {
 477    GLuint i;
 478
 479    for (i = 0; i < 4; i++) {
 480       if (mask & (1<<i)) {
 481          brw_MOV(p, dst[i], brw_imm_f(0));
 482          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 483          brw_MOV(p, dst[i], brw_imm_f(1.0));
 484          brw_set_predicate_control_flag_value(p, 0xff);
 485       }
 486    }
 487 }
 488
 489 static void emit_slt( struct brw_compile *p,
 490                       const struct brw_reg *dst,
 491                       GLuint mask,
 492                       const struct brw_reg *arg0,
 493                       const struct brw_reg *arg1 )
 494 {
 495    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 496 }
 497
 498 static void emit_sle( struct brw_compile *p,
 499                       const struct brw_reg *dst,
 500                       GLuint mask,
 501                       const struct brw_reg *arg0,
 502                       const struct brw_reg *arg1 )
 503 {
 504    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 505 }
 506
 507 static void emit_sgt( struct brw_compile *p,
 508                       const struct brw_reg *dst,
 509                       GLuint mask,
 510                       const struct brw_reg *arg0,
 511                       const struct brw_reg *arg1 )
 512 {
 513    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 514 }
 515
 516 static void emit_sge( struct brw_compile *p,
 517                       const struct brw_reg *dst,
 518                       GLuint mask,
 519                       const struct brw_reg *arg0,
 520                       const struct brw_reg *arg1 )
 521 {
 522    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 523 }
 524
 525 static void emit_seq( struct brw_compile *p,
 526                       const struct brw_reg *dst,
 527                       GLuint mask,
 528                       const struct brw_reg *arg0,
 529                       const struct brw_reg *arg1 )
 530 {
 531    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 532 }
 533
 534 static void emit_sne( struct brw_compile *p,
 535                       const struct brw_reg *dst,
 536                       GLuint mask,
 537                       const struct brw_reg *arg0,
 538                       const struct brw_reg *arg1 )
 539 {
 540    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 541 }
 542
 543 static void emit_cmp( struct brw_compile *p,
 544                       const struct brw_reg *dst,
 545                       GLuint mask,
 546                       const struct brw_reg *arg0,
 547                       const struct brw_reg *arg1,
 548                       const struct brw_reg *arg2 )
 549 {
 550    GLuint i;
 551
 552    for (i = 0; i < 4; i++) {
 553       if (mask & (1<<i)) {
 554          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 555          brw_MOV(p, dst[i], arg2[i]);
 556          brw_set_saturate(p, 0);
 557
 558          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 559
 560          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 561          brw_MOV(p, dst[i], arg1[i]);
 562          brw_set_saturate(p, 0);
 563          brw_set_predicate_control_flag_value(p, 0xff);
 564       }
 565    }
 566 }
 567
 568 static void emit_max( struct brw_compile *p,
 569                       const struct brw_reg *dst,
 570                       GLuint mask,
 571                       const struct brw_reg *arg0,
 572                       const struct brw_reg *arg1 )
 573 {
 574    GLuint i;
 575
 576    for (i = 0; i < 4; i++) {
 577       if (mask & (1<<i)) {
 578          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 579          brw_MOV(p, dst[i], arg0[i]);
 580          brw_set_saturate(p, 0);
 581
 582          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 583
 584          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 585          brw_MOV(p, dst[i], arg1[i]);
 586          brw_set_saturate(p, 0);
 587          brw_set_predicate_control_flag_value(p, 0xff);
 588       }
 589    }
 590 }
 591
 592 static void emit_min( struct brw_compile *p,
 593                       const struct brw_reg *dst,
 594                       GLuint mask,
 595                       const struct brw_reg *arg0,
 596                       const struct brw_reg *arg1 )
 597 {
 598    GLuint i;
 599
 600    for (i = 0; i < 4; i++) {
 601       if (mask & (1<<i)) {
 602          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 603          brw_MOV(p, dst[i], arg1[i]);
 604          brw_set_saturate(p, 0);
 605
 606          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 607
 608          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 609          brw_MOV(p, dst[i], arg0[i]);
 610          brw_set_saturate(p, 0);
 611          brw_set_predicate_control_flag_value(p, 0xff);
 612       }
 613    }
 614 }
 615
 616
 617 static void emit_dp3( struct brw_compile *p,
 618                       const struct brw_reg *dst,
 619                       GLuint mask,
 620                       const struct brw_reg *arg0,
 621                       const struct brw_reg *arg1 )
 622 {
 623    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 624
 625    if (!(mask & BRW_WRITEMASK_XYZW))
 626       return; /* Do not emit dead code */
 627
 628    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 629
 630    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 631    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 632
 633    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 634    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 635    brw_set_saturate(p, 0);
 636 }
 637
 638
 639 static void emit_dp4( struct brw_compile *p,
 640                       const struct brw_reg *dst,
 641                       GLuint mask,
 642                       const struct brw_reg *arg0,
 643                       const struct brw_reg *arg1 )
 644 {
 645    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 646
 647    if (!(mask & BRW_WRITEMASK_XYZW))
 648       return; /* Do not emit dead code */
 649
 650    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 651
 652    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 653    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 654    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 655
 656    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 657    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 658    brw_set_saturate(p, 0);
 659 }
 660
 661
 662 static void emit_dph( struct brw_compile *p,
 663                       const struct brw_reg *dst,
 664                       GLuint mask,
 665                       const struct brw_reg *arg0,
 666                       const struct brw_reg *arg1 )
 667 {
 668    const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 669
 670    if (!(mask & BRW_WRITEMASK_XYZW))
 671       return; /* Do not emit dead code */
 672
 673    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 674
 675    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 676    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 677    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 678
 679    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 680    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 681    brw_set_saturate(p, 0);
 682 }
 683
 684
 685 static void emit_xpd( struct brw_compile *p,
 686                       const struct brw_reg *dst,
 687                       GLuint mask,
 688                       const struct brw_reg *arg0,
 689                       const struct brw_reg *arg1 )
 690 {
 691    GLuint i;
 692
 693    assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X);
 694
 695    for (i = 0 ; i < 3; i++) {
 696       if (mask & (1<<i)) {
 697          GLuint i2 = (i+2)%3;
 698          GLuint i1 = (i+1)%3;
 699
 700          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 701
 702          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 703          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 704          brw_set_saturate(p, 0);
 705       }
 706    }
 707 }
 708
 709
 710 static void emit_math1( struct brw_compile *p,
 711                         GLuint function,
 712                         const struct brw_reg *dst,
 713                         GLuint mask,
 714                         const struct brw_reg *arg0 )
 715 {
 716    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 717
 718    if (!(mask & BRW_WRITEMASK_XYZW))
 719       return; /* Do not emit dead code */
 720
 721    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 722
 723    brw_MOV(p, brw_message_reg(2), arg0[0]);
 724
 725    /* Send two messages to perform all 16 operations:
 726     */
 727    brw_math_16(p,
 728                dst[dst_chan],
 729                function,
 730                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 731                2,
 732                brw_null_reg(),
 733                BRW_MATH_PRECISION_FULL);
 734 }
 735
 736
 737 static void emit_math2( struct brw_compile *p,
 738                         GLuint function,
 739                         const struct brw_reg *dst,
 740                         GLuint mask,
 741                         const struct brw_reg *arg0,
 742                         const struct brw_reg *arg1)
 743 {
 744    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 745
 746    if (!(mask & BRW_WRITEMASK_XYZW))
 747       return; /* Do not emit dead code */
 748
 749    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 750
 751    brw_push_insn_state(p);
 752
 753    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 754    brw_MOV(p, brw_message_reg(2), arg0[0]);
 755    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 756    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 757
 758    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 759    brw_MOV(p, brw_message_reg(3), arg1[0]);
 760    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 761    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 762
 763
 764    /* Send two messages to perform all 16 operations:
 765     */
 766    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 767    brw_math(p,
 768             dst[dst_chan],
 769             function,
 770             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 771             2,
 772             brw_null_reg(),
 773             BRW_MATH_DATA_VECTOR,
 774             BRW_MATH_PRECISION_FULL);
 775
 776    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 777    brw_math(p,
 778             offset(dst[dst_chan],1),
 779             function,
 780             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 781             4,
 782             brw_null_reg(),
 783             BRW_MATH_DATA_VECTOR,
 784             BRW_MATH_PRECISION_FULL);
 785
 786    brw_pop_insn_state(p);
 787 }
 788
 789
 790
 791 static void emit_tex( struct brw_wm_compile *c,
 792                       const struct brw_wm_instruction *inst,
 793                       struct brw_reg *dst,
 794                       GLuint dst_flags,
 795                       struct brw_reg *arg )
 796 {
 797    struct brw_compile *p = &c->func;
 798    GLuint msgLength, responseLength;
 799    GLuint i, nr;
 800    GLuint emit;
 801    GLuint msg_type;
 802    GLboolean shadow = FALSE;
 803
 804    /* How many input regs are there?
 805     */
 806    switch (inst->tex_target) {
 807    case TGSI_TEXTURE_1D:
 808       emit = BRW_WRITEMASK_X;
 809       nr = 1;
 810       break;
 811    case TGSI_TEXTURE_SHADOW1D:
 812       emit = BRW_WRITEMASK_XW;
 813       nr = 4;
 814       shadow = TRUE;
 815       break;
 816    case TGSI_TEXTURE_2D:
 817       emit = BRW_WRITEMASK_XY;
 818       nr = 2;
 819       break;
 820    case TGSI_TEXTURE_SHADOW2D:
 821    case TGSI_TEXTURE_SHADOWRECT:
 822       emit = BRW_WRITEMASK_XYW;
 823       nr = 4;
 824       shadow = TRUE;
 825       break;
 826    case TGSI_TEXTURE_3D:
 827    case TGSI_TEXTURE_CUBE:
 828       emit = BRW_WRITEMASK_XYZ;
 829       nr = 3;
 830       break;
 831    default:
 832       /* unexpected target */
 833       abort();
 834    }
 835
 836    msgLength = 1;
 837
 838    for (i = 0; i < nr; i++) {
 839       static const GLuint swz[4] = {0,1,2,2};
 840       if (emit & (1<<i))
 841          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 842       else
 843          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 844       msgLength += 2;
 845    }
 846
 847    responseLength = 8;          /* always */
 848
 849    if (BRW_IS_IGDNG(p->brw)) {
 850        if (shadow)
 851            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 852        else
 853            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 854    } else {
 855        if (shadow)
 856            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 857        else
 858            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 859    }
 860
 861    brw_SAMPLE(p,
 862               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 863               1,
 864               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 865               SURF_INDEX_TEXTURE(inst->tex_unit),
 866               inst->tex_unit,     /* sampler */
 867               inst->writemask,
 868               msg_type,
 869               responseLength,
 870               msgLength,
 871               0,
 872               1,
 873               BRW_SAMPLER_SIMD_MODE_SIMD16);
 874 }
 875
 876
 877 static void emit_txb( struct brw_wm_compile *c,
 878                       const struct brw_wm_instruction *inst,
 879                       struct brw_reg *dst,
 880                       GLuint dst_flags,
 881                       struct brw_reg *arg )
 882 {
 883    struct brw_compile *p = &c->func;
 884    GLuint msgLength;
 885    GLuint msg_type;
 886    /* Shadow ignored for txb.
 887     */
 888    switch (inst->tex_target) {
 889    case TGSI_TEXTURE_1D:
 890    case TGSI_TEXTURE_SHADOW1D:
 891       brw_MOV(p, brw_message_reg(2), arg[0]);
 892       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 893       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 894       break;
 895    case TGSI_TEXTURE_2D:
 896    case TGSI_TEXTURE_RECT:
 897    case TGSI_TEXTURE_SHADOW2D:
 898    case TGSI_TEXTURE_SHADOWRECT:
 899       brw_MOV(p, brw_message_reg(2), arg[0]);
 900       brw_MOV(p, brw_message_reg(4), arg[1]);
 901       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 902       break;
 903    case TGSI_TEXTURE_3D:
 904    case TGSI_TEXTURE_CUBE:
 905       brw_MOV(p, brw_message_reg(2), arg[0]);
 906       brw_MOV(p, brw_message_reg(4), arg[1]);
 907       brw_MOV(p, brw_message_reg(6), arg[2]);
 908       break;
 909    default:
 910       /* unexpected target */
 911       abort();
 912    }
 913
 914    brw_MOV(p, brw_message_reg(8), arg[3]);
 915    msgLength = 9;
 916
 917    if (BRW_IS_IGDNG(p->brw))
 918        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 919    else
 920        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 921
 922    brw_SAMPLE(p,
 923               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 924               1,
 925               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 926               SURF_INDEX_TEXTURE(inst->tex_unit),
 927               inst->tex_unit,     /* sampler */
 928               inst->writemask,
 929               msg_type,
 930               8,                /* responseLength */
 931               msgLength,
 932               0,
 933               1,
 934               BRW_SAMPLER_SIMD_MODE_SIMD16);
 935 }
 936
 937
 938 static void emit_lit( struct brw_compile *p,
 939                       const struct brw_reg *dst,
 940                       GLuint mask,
 941                       const struct brw_reg *arg0 )
 942 {
 943    assert((mask & BRW_WRITEMASK_XW) == 0);
 944
 945    if (mask & BRW_WRITEMASK_Y) {
 946       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 947       brw_MOV(p, dst[1], arg0[0]);
 948       brw_set_saturate(p, 0);
 949    }
 950
 951    if (mask & BRW_WRITEMASK_Z) {
 952       emit_math2(p, BRW_MATH_FUNCTION_POW,
 953                  &dst[2],
 954                  BRW_WRITEMASK_X | (mask & SATURATE),
 955                  &arg0[1],
 956                  &arg0[3]);
 957    }
 958
 959    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 960     * some of the POW calculations above, but 16-wide iff statements
 961     * seem to lock c1 hardware, so this is a nasty workaround:
 962     */
 963    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 964    {
 965       if (mask & BRW_WRITEMASK_Y)
 966          brw_MOV(p, dst[1], brw_imm_f(0));
 967
 968       if (mask & BRW_WRITEMASK_Z)
 969          brw_MOV(p, dst[2], brw_imm_f(0));
 970    }
 971    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 972 }
 973
 974
 975 /* Kill pixel - set execution mask to zero for those pixels which
 976  * fail.
 977  */
 978 static void emit_kil( struct brw_wm_compile *c,
 979                       struct brw_reg *arg0)
 980 {
 981    struct brw_compile *p = &c->func;
 982    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 983    GLuint i;
 984
 985    /* XXX - usually won't need 4 compares!
 986     */
 987    for (i = 0; i < 4; i++) {
 988       brw_push_insn_state(p);
 989       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 990       brw_set_predicate_control_flag_value(p, 0xff);
 991       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 992       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 993       brw_pop_insn_state(p);
 994    }
 995 }
 996
 997 /* KILLP kills the pixels that are currently executing, not based on a test
 998  * of the arguments.
 999  */
1000 static void emit_killp( struct brw_wm_compile *c )
1001 {
1002    struct brw_compile *p = &c->func;
1003    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1004
1005    brw_push_insn_state(p);
1006    brw_set_mask_control(p, BRW_MASK_DISABLE);
1007    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
1008    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1009    brw_pop_insn_state(p);
1010 }
1011
1012 static void fire_fb_write( struct brw_wm_compile *c,
1013                            GLuint base_reg,
1014                            GLuint nr,
1015                            GLuint target,
1016                            GLuint eot )
1017 {
1018    struct brw_compile *p = &c->func;
1019
1020    /* Pass through control information:
1021     */
1022 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
1023    {
1024       brw_push_insn_state(p);
1025       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1026       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1027       brw_MOV(p,
1028                brw_message_reg(base_reg + 1),
1029                brw_vec8_grf(1, 0));
1030       brw_pop_insn_state(p);
1031    }
1032
1033    /* Send framebuffer write message: */
1034 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
1035    brw_fb_WRITE(p,
1036                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1037                 base_reg,
1038                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1039                 target,
1040                 nr,
1041                 0,
1042                 eot);
1043 }
1044
1045
1046 static void emit_aa( struct brw_wm_compile *c,
1047                      struct brw_reg *arg1,
1048                      GLuint reg )
1049 {
1050    struct brw_compile *p = &c->func;
1051    GLuint comp = c->key.aa_dest_stencil_reg / 2;
1052    GLuint off = c->key.aa_dest_stencil_reg % 2;
1053    struct brw_reg aa = offset(arg1[comp], off);
1054
1055    brw_push_insn_state(p);
1056    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1057    brw_MOV(p, brw_message_reg(reg), aa);
1058    brw_pop_insn_state(p);
1059 }
1060
1061
1062 /* Post-fragment-program processing.  Send the results to the
1063  * framebuffer.
1064  * \param arg0  the fragment color
1065  * \param arg1  the pass-through depth value
1066  * \param arg2  the shader-computed depth value
1067  */
1068 static void emit_fb_write( struct brw_wm_compile *c,
1069                            struct brw_reg *arg0,
1070                            struct brw_reg *arg1,
1071                            struct brw_reg *arg2,
1072                            GLuint target,
1073                            GLuint eot)
1074 {
1075    struct brw_compile *p = &c->func;
1076    GLuint nr = 2;
1077    GLuint channel;
1078
1079    /* Reserve a space for AA - may not be needed:
1080     */
1081    if (c->key.aa_dest_stencil_reg)
1082       nr += 1;
1083
1084    /* I don't really understand how this achieves the color interleave
1085     * (ie RGBARGBA) in the result:  [Do the saturation here]
1086     */
1087    {
1088       brw_push_insn_state(p);
1089
1090       for (channel = 0; channel < 4; channel++) {
1091          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
1092          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
1093
1094          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1095          brw_MOV(p,
1096                  brw_message_reg(nr + channel),
1097                  arg0[channel]);
1098
1099          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1100          brw_MOV(p,
1101                  brw_message_reg(nr + channel + 4),
1102                  sechalf(arg0[channel]));
1103       }
1104
1105       /* skip over the regs populated above:
1106        */
1107       nr += 8;
1108
1109       brw_pop_insn_state(p);
1110    }
1111
1112    if (c->key.source_depth_to_render_target)
1113    {
1114       if (c->key.computes_depth)
1115          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1116       else
1117          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1118
1119       nr += 2;
1120    }
1121
1122    if (c->key.dest_depth_reg)
1123    {
1124       GLuint comp = c->key.dest_depth_reg / 2;
1125       GLuint off = c->key.dest_depth_reg % 2;
1126
1127       if (off != 0) {
1128          brw_push_insn_state(p);
1129          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1130
1131          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1132          /* 2nd half? */
1133          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1134          brw_pop_insn_state(p);
1135       }
1136       else {
1137          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1138       }
1139       nr += 2;
1140    }
1141
1142    if (!c->key.runtime_check_aads_emit) {
1143       if (c->key.aa_dest_stencil_reg)
1144          emit_aa(c, arg1, 2);
1145
1146       fire_fb_write(c, 0, nr, target, eot);
1147    }
1148    else {
1149       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1150       struct brw_reg ip = brw_ip_reg();
1151       struct brw_instruction *jmp;
1152
1153       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1154       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1155       brw_AND(p,
1156               v1_null_ud,
1157               get_element_ud(brw_vec8_grf(1,0), 6),
1158               brw_imm_ud(1<<26));
1159
1160       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1161       {
1162          emit_aa(c, arg1, 2);
1163          fire_fb_write(c, 0, nr, target, eot);
1164          /* note - thread killed in subroutine */
1165       }
1166       brw_land_fwd_jump(p, jmp);
1167
1168       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1169        */
1170       fire_fb_write(c, 1, nr-1, target, eot);
1171    }
1172 }
1173
1174
1175 /**
1176  * Move a GPR to scratch memory.
1177  */
1178 static void emit_spill( struct brw_wm_compile *c,
1179                         struct brw_reg reg,
1180                         GLuint slot )
1181 {
1182    struct brw_compile *p = &c->func;
1183
1184    /*
1185      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1186    */
1187    brw_MOV(p, brw_message_reg(2), reg);
1188
1189    /*
1190      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1191      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1192    */
1193    brw_dp_WRITE_16(p,
1194                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1195                    slot);
1196 }
1197
1198
1199 /**
1200  * Load a GPR from scratch memory.
1201  */
1202 static void emit_unspill( struct brw_wm_compile *c,
1203                           struct brw_reg reg,
1204                           GLuint slot )
1205 {
1206    struct brw_compile *p = &c->func;
1207
1208    /* Slot 0 is the undef value.
1209     */
1210    if (slot == 0) {
1211       brw_MOV(p, reg, brw_imm_f(0));
1212       return;
1213    }
1214
1215    /*
1216      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1217      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1218    */
1219
1220    brw_dp_READ_16(p,
1221                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1222                   slot);
1223 }
1224
1225
1226 /**
1227  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1228  * Args with unspill_reg != 0 will be loaded from scratch memory.
1229  */
1230 static void get_argument_regs( struct brw_wm_compile *c,
1231                                struct brw_wm_ref *arg[],
1232                                struct brw_reg *regs )
1233 {
1234    GLuint i;
1235
1236    for (i = 0; i < 4; i++) {
1237       if (arg[i]) {
1238          if (arg[i]->unspill_reg)
1239             emit_unspill(c,
1240                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1241                          arg[i]->value->spill_slot);
1242
1243          regs[i] = arg[i]->hw_reg;
1244       }
1245       else {
1246          regs[i] = brw_null_reg();
1247       }
1248    }
1249 }
1250
1251
1252 /**
1253  * For values that have a spill_slot!=0, write those regs to scratch memory.
1254  */
1255 static void spill_values( struct brw_wm_compile *c,
1256                           struct brw_wm_value *values,
1257                           GLuint nr )
1258 {
1259    GLuint i;
1260
1261    for (i = 0; i < nr; i++)
1262       if (values[i].spill_slot)
1263          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1264 }
1265
1266
1267 /* Emit the fragment program instructions here.
1268  */
1269 void brw_wm_emit( struct brw_wm_compile *c )
1270 {
1271    struct brw_compile *p = &c->func;
1272    GLuint insn;
1273
1274    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1275
1276    /* Check if any of the payload regs need to be spilled:
1277     */
1278    spill_values(c, c->payload.depth, 4);
1279    spill_values(c, c->creg, c->nr_creg);
1280    spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
1281
1282
1283    for (insn = 0; insn < c->nr_insns; insn++) {
1284
1285       struct brw_wm_instruction *inst = &c->instruction[insn];
1286       struct brw_reg args[3][4], dst[4];
1287       GLuint i, dst_flags;
1288
1289       /* Get argument regs:
1290        */
1291       for (i = 0; i < 3; i++)
1292          get_argument_regs(c, inst->src[i], args[i]);
1293
1294       /* Get dest regs:
1295        */
1296       for (i = 0; i < 4; i++)
1297          if (inst->dst[i])
1298             dst[i] = inst->dst[i]->hw_reg;
1299          else
1300             dst[i] = brw_null_reg();
1301
1302       /* Flags
1303        */
1304       dst_flags = inst->writemask;
1305       if (inst->saturate)
1306          dst_flags |= SATURATE;
1307
1308       switch (inst->opcode) {
1309          /* Generated instructions for calculating triangle interpolants:
1310           */
1311       case WM_PIXELXY:
1312          emit_pixel_xy(p, dst, dst_flags);
1313          break;
1314
1315       case WM_DELTAXY:
1316          emit_delta_xy(p, dst, dst_flags, args[0]);
1317          break;
1318
1319       case WM_WPOSXY:
1320          emit_wpos_xy(c, dst, dst_flags, args[0]);
1321          break;
1322
1323       case WM_PIXELW:
1324          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1325          break;
1326
1327       case WM_LINTERP:
1328          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1329          break;
1330
1331       case WM_PINTERP:
1332          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1333          break;
1334
1335       case WM_CINTERP:
1336          emit_cinterp(p, dst, dst_flags, args[0]);
1337          break;
1338
1339       case WM_FB_WRITE:
1340          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1341          break;
1342
1343       case WM_FRONTFACING:
1344          emit_frontfacing(p, dst, dst_flags);
1345          break;
1346
1347          /* Straightforward arithmetic:
1348           */
1349       case TGSI_OPCODE_ADD:
1350          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1351          break;
1352
1353       case TGSI_OPCODE_FRC:
1354          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1355          break;
1356
1357       case TGSI_OPCODE_FLR:
1358          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1359          break;
1360
1361       case TGSI_OPCODE_DDX:
1362          emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1363          break;
1364
1365       case TGSI_OPCODE_DDY:
1366          emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1367          break;
1368
1369       case TGSI_OPCODE_DP3:
1370          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1371          break;
1372
1373       case TGSI_OPCODE_DP4:
1374          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1375          break;
1376
1377       case TGSI_OPCODE_DPH:
1378          emit_dph(p, dst, dst_flags, args[0], args[1]);
1379          break;
1380
1381       case TGSI_OPCODE_TRUNC:
1382          emit_trunc(p, dst, dst_flags, args[0]);
1383          break;
1384
1385       case TGSI_OPCODE_LRP:
1386          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1387          break;
1388
1389       case TGSI_OPCODE_MAD:
1390          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1391          break;
1392
1393       case TGSI_OPCODE_MOV:
1394          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1395          break;
1396
1397       case TGSI_OPCODE_MUL:
1398          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1399          break;
1400
1401       case TGSI_OPCODE_XPD:
1402          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1403          break;
1404
1405          /* Higher math functions:
1406           */
1407       case TGSI_OPCODE_RCP:
1408          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1409          break;
1410
1411       case TGSI_OPCODE_RSQ:
1412          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1413          break;
1414
1415       case TGSI_OPCODE_SIN:
1416          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1417          break;
1418
1419       case TGSI_OPCODE_COS:
1420          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1421          break;
1422
1423       case TGSI_OPCODE_EX2:
1424          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1425          break;
1426
1427       case TGSI_OPCODE_LG2:
1428          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1429          break;
1430
1431       case TGSI_OPCODE_SCS:
1432          /* There is an scs math function, but it would need some
1433           * fixup for 16-element execution.
1434           */
1435          if (dst_flags & BRW_WRITEMASK_X)
1436             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1437          if (dst_flags & BRW_WRITEMASK_Y)
1438             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1439          break;
1440
1441       case TGSI_OPCODE_POW:
1442          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1443          break;
1444
1445          /* Comparisons:
1446           */
1447       case TGSI_OPCODE_CMP:
1448          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1449          break;
1450
1451       case TGSI_OPCODE_MAX:
1452          emit_max(p, dst, dst_flags, args[0], args[1]);
1453          break;
1454
1455       case TGSI_OPCODE_MIN:
1456          emit_min(p, dst, dst_flags, args[0], args[1]);
1457          break;
1458
1459       case TGSI_OPCODE_SLT:
1460          emit_slt(p, dst, dst_flags, args[0], args[1]);
1461          break;
1462
1463       case TGSI_OPCODE_SLE:
1464          emit_sle(p, dst, dst_flags, args[0], args[1]);
1465         break;
1466       case TGSI_OPCODE_SGT:
1467          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1468         break;
1469       case TGSI_OPCODE_SGE:
1470          emit_sge(p, dst, dst_flags, args[0], args[1]);
1471          break;
1472       case TGSI_OPCODE_SEQ:
1473          emit_seq(p, dst, dst_flags, args[0], args[1]);
1474         break;
1475       case TGSI_OPCODE_SNE:
1476          emit_sne(p, dst, dst_flags, args[0], args[1]);
1477         break;
1478
1479       case TGSI_OPCODE_LIT:
1480          emit_lit(p, dst, dst_flags, args[0]);
1481          break;
1482
1483          /* Texturing operations:
1484           */
1485       case TGSI_OPCODE_TEX:
1486          emit_tex(c, inst, dst, dst_flags, args[0]);
1487          break;
1488
1489       case TGSI_OPCODE_TXB:
1490          emit_txb(c, inst, dst, dst_flags, args[0]);
1491          break;
1492
1493       case TGSI_OPCODE_KIL:
1494          emit_kil(c, args[0]);
1495          break;
1496
1497       case TGSI_OPCODE_KILP:
1498          emit_killp(c);
1499          break;
1500
1501       default:
1502          debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
1503                       inst->opcode,
1504                       tgsi_get_opcode_info(inst->opcode)->mnemonic);
1505       }
1506
1507       for (i = 0; i < 4; i++)
1508         if (inst->dst[i] && inst->dst[i]->spill_slot)
1509            emit_spill(c,
1510                       inst->dst[i]->hw_reg,
1511                       inst->dst[i]->spill_slot);
1512    }
1513
1514    if (BRW_DEBUG & DEBUG_WM) {
1515       debug_printf("wm-native:\n");
1516       brw_disasm(stderr, p->store, p->nr_insn);
1517    }
1518 }