src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 /* Not quite sure how correct this is - need to understand horiz
  38  * vs. vertical strides a little better.
  39  */
  40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  41 {
  42    if (reg.vstride)
  43       reg.nr++;
  44    return reg;
  45 }
  46
  47
  48 /* Payload R0:
  49  *
  50  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  51  *         corresponding to each of the 16 execution channels.
  52  * R0.1..8 -- ?
  53  * R1.0 -- triangle vertex 0.X
  54  * R1.1 -- triangle vertex 0.Y
  55  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  56  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  57  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  58  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  59  * R1.6 -- ?
  60  * R1.7 -- ?
  61  * R1.8 -- ?
  62  */
  63
  64 void emit_pixel_xy(struct brw_wm_compile *c,
  65                    const struct brw_reg *dst,
  66                    GLuint mask)
  67 {
  68    struct brw_compile *p = &c->func;
  69    struct brw_reg r1 = brw_vec1_grf(1, 0);
  70    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  71    struct brw_reg dst0_uw, dst1_uw;
  72
  73    brw_push_insn_state(p);
  74    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  75
  76    if (c->dispatch_width == 16) {
  77       dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
  78       dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
  79    } else {
  80       dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
  81       dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
  82    }
  83
  84    /* Calculate pixel centers by adding 1 or 0 to each of the
  85     * micro-tile coordinates passed in r1.
  86     */
  87    if (mask & WRITEMASK_X) {
  88       brw_ADD(p,
  89               dst0_uw,
  90               stride(suboffset(r1_uw, 4), 2, 4, 0),
  91               brw_imm_v(0x10101010));
  92    }
  93
  94    if (mask & WRITEMASK_Y) {
  95       brw_ADD(p,
  96               dst1_uw,
  97               stride(suboffset(r1_uw,5), 2, 4, 0),
  98               brw_imm_v(0x11001100));
  99    }
 100    brw_pop_insn_state(p);
 101 }
 102
 103
 104 void emit_delta_xy(struct brw_compile *p,
 105                    const struct brw_reg *dst,
 106                    GLuint mask,
 107                    const struct brw_reg *arg0)
 108 {
 109    struct brw_reg r1 = brw_vec1_grf(1, 0);
 110
 111    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 112     * centers.
 113     */
 114    if (mask & WRITEMASK_X) {
 115       brw_ADD(p,
 116               dst[0],
 117               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 118               negate(r1));
 119    }
 120
 121    if (mask & WRITEMASK_Y) {
 122       brw_ADD(p,
 123               dst[1],
 124               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 125               negate(suboffset(r1,1)));
 126
 127    }
 128 }
 129
 130 void emit_wpos_xy(struct brw_wm_compile *c,
 131                   const struct brw_reg *dst,
 132                   GLuint mask,
 133                   const struct brw_reg *arg0)
 134 {
 135    struct brw_compile *p = &c->func;
 136
 137    /* Calculate the pixel offset from window bottom left into destination
 138     * X and Y channels.
 139     */
 140    if (mask & WRITEMASK_X) {
 141       /* X' = X - origin */
 142       brw_ADD(p,
 143               dst[0],
 144               retype(arg0[0], BRW_REGISTER_TYPE_W),
 145               brw_imm_d(0 - c->key.origin_x));
 146    }
 147
 148    if (mask & WRITEMASK_Y) {
 149       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 150       brw_ADD(p,
 151               dst[1],
 152               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 153               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 154    }
 155 }
 156
 157
 158 void emit_pixel_w(struct brw_wm_compile *c,
 159                   const struct brw_reg *dst,
 160                   GLuint mask,
 161                   const struct brw_reg *arg0,
 162                   const struct brw_reg *deltas)
 163 {
 164    struct brw_compile *p = &c->func;
 165
 166    /* Don't need this if all you are doing is interpolating color, for
 167     * instance.
 168     */
 169    if (mask & WRITEMASK_W) {
 170       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 171
 172       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 173        * result straight into a message reg.
 174        */
 175       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 176       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 177
 178       /* Calc w */
 179       if (c->dispatch_width == 16) {
 180          brw_math_16(p, dst[3],
 181                      BRW_MATH_FUNCTION_INV,
 182                      BRW_MATH_SATURATE_NONE,
 183                      2, brw_null_reg(),
 184                      BRW_MATH_PRECISION_FULL);
 185       } else {
 186          brw_math(p, dst[3],
 187                   BRW_MATH_FUNCTION_INV,
 188                   BRW_MATH_SATURATE_NONE,
 189                   2, brw_null_reg(),
 190                   BRW_MATH_DATA_VECTOR,
 191                   BRW_MATH_PRECISION_FULL);
 192       }
 193    }
 194 }
 195
 196
 197 void emit_linterp(struct brw_compile *p,
 198                   const struct brw_reg *dst,
 199                   GLuint mask,
 200                   const struct brw_reg *arg0,
 201                   const struct brw_reg *deltas)
 202 {
 203    struct brw_reg interp[4];
 204    GLuint nr = arg0[0].nr;
 205    GLuint i;
 206
 207    interp[0] = brw_vec1_grf(nr, 0);
 208    interp[1] = brw_vec1_grf(nr, 4);
 209    interp[2] = brw_vec1_grf(nr+1, 0);
 210    interp[3] = brw_vec1_grf(nr+1, 4);
 211
 212    for (i = 0; i < 4; i++) {
 213       if (mask & (1<<i)) {
 214          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 215          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 216       }
 217    }
 218 }
 219
 220
 221 void emit_pinterp(struct brw_compile *p,
 222                   const struct brw_reg *dst,
 223                   GLuint mask,
 224                   const struct brw_reg *arg0,
 225                   const struct brw_reg *deltas,
 226                   const struct brw_reg *w)
 227 {
 228    struct brw_reg interp[4];
 229    GLuint nr = arg0[0].nr;
 230    GLuint i;
 231
 232    interp[0] = brw_vec1_grf(nr, 0);
 233    interp[1] = brw_vec1_grf(nr, 4);
 234    interp[2] = brw_vec1_grf(nr+1, 0);
 235    interp[3] = brw_vec1_grf(nr+1, 4);
 236
 237    for (i = 0; i < 4; i++) {
 238       if (mask & (1<<i)) {
 239          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 240          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 241       }
 242    }
 243    for (i = 0; i < 4; i++) {
 244       if (mask & (1<<i)) {
 245          brw_MUL(p, dst[i], dst[i], w[3]);
 246       }
 247    }
 248 }
 249
 250
 251 void emit_cinterp(struct brw_compile *p,
 252                   const struct brw_reg *dst,
 253                   GLuint mask,
 254                   const struct brw_reg *arg0)
 255 {
 256    struct brw_reg interp[4];
 257    GLuint nr = arg0[0].nr;
 258    GLuint i;
 259
 260    interp[0] = brw_vec1_grf(nr, 0);
 261    interp[1] = brw_vec1_grf(nr, 4);
 262    interp[2] = brw_vec1_grf(nr+1, 0);
 263    interp[3] = brw_vec1_grf(nr+1, 4);
 264
 265    for (i = 0; i < 4; i++) {
 266       if (mask & (1<<i)) {
 267          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 268       }
 269    }
 270 }
 271
 272 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 273 void emit_frontfacing(struct brw_compile *p,
 274                       const struct brw_reg *dst,
 275                       GLuint mask)
 276 {
 277    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 278    GLuint i;
 279
 280    if (!(mask & WRITEMASK_XYZW))
 281       return;
 282
 283    for (i = 0; i < 4; i++) {
 284       if (mask & (1<<i)) {
 285          brw_MOV(p, dst[i], brw_imm_f(0.0));
 286       }
 287    }
 288
 289    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 290     * us front face
 291     */
 292    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 293    for (i = 0; i < 4; i++) {
 294       if (mask & (1<<i)) {
 295          brw_MOV(p, dst[i], brw_imm_f(1.0));
 296       }
 297    }
 298    brw_set_predicate_control_flag_value(p, 0xff);
 299 }
 300
 301 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 302  * looking like:
 303  *
 304  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
 305  *
 306  * and we're trying to produce:
 307  *
 308  *           DDX                     DDY
 309  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
 310  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
 311  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
 312  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
 313  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
 314  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
 315  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
 316  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
 317  *
 318  * and add another set of two more subspans if in 16-pixel dispatch mode.
 319  *
 320  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 321  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 322  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 323  * between each other.  We could probably do it like ddx and swizzle the right
 324  * order later, but bail for now and just produce
 325  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
 326  */
 327 void emit_ddxy(struct brw_compile *p,
 328                const struct brw_reg *dst,
 329                GLuint mask,
 330                GLboolean is_ddx,
 331                const struct brw_reg *arg0)
 332 {
 333    int i;
 334    struct brw_reg src0, src1;
 335
 336    if (mask & SATURATE)
 337       brw_set_saturate(p, 1);
 338    for (i = 0; i < 4; i++ ) {
 339       if (mask & (1<<i)) {
 340          if (is_ddx) {
 341             src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
 342                            BRW_REGISTER_TYPE_F,
 343                            BRW_VERTICAL_STRIDE_2,
 344                            BRW_WIDTH_2,
 345                            BRW_HORIZONTAL_STRIDE_0,
 346                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 347             src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 348                            BRW_REGISTER_TYPE_F,
 349                            BRW_VERTICAL_STRIDE_2,
 350                            BRW_WIDTH_2,
 351                            BRW_HORIZONTAL_STRIDE_0,
 352                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 353          } else {
 354             src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 355                            BRW_REGISTER_TYPE_F,
 356                            BRW_VERTICAL_STRIDE_4,
 357                            BRW_WIDTH_4,
 358                            BRW_HORIZONTAL_STRIDE_0,
 359                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 360             src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 361                            BRW_REGISTER_TYPE_F,
 362                            BRW_VERTICAL_STRIDE_4,
 363                            BRW_WIDTH_4,
 364                            BRW_HORIZONTAL_STRIDE_0,
 365                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 366          }
 367          brw_ADD(p, dst[i], src0, negate(src1));
 368       }
 369    }
 370    if (mask & SATURATE)
 371       brw_set_saturate(p, 0);
 372 }
 373
 374 void emit_alu1(struct brw_compile *p,
 375                struct brw_instruction *(*func)(struct brw_compile *,
 376                                                struct brw_reg,
 377                                                struct brw_reg),
 378                const struct brw_reg *dst,
 379                GLuint mask,
 380                const struct brw_reg *arg0)
 381 {
 382    GLuint i;
 383
 384    if (mask & SATURATE)
 385       brw_set_saturate(p, 1);
 386
 387    for (i = 0; i < 4; i++) {
 388       if (mask & (1<<i)) {
 389          func(p, dst[i], arg0[i]);
 390       }
 391    }
 392
 393    if (mask & SATURATE)
 394       brw_set_saturate(p, 0);
 395 }
 396
 397
 398 void emit_alu2(struct brw_compile *p,
 399                struct brw_instruction *(*func)(struct brw_compile *,
 400                                                struct brw_reg,
 401                                                struct brw_reg,
 402                                                struct brw_reg),
 403                const struct brw_reg *dst,
 404                GLuint mask,
 405                const struct brw_reg *arg0,
 406                const struct brw_reg *arg1)
 407 {
 408    GLuint i;
 409
 410    if (mask & SATURATE)
 411       brw_set_saturate(p, 1);
 412
 413    for (i = 0; i < 4; i++) {
 414       if (mask & (1<<i)) {
 415          func(p, dst[i], arg0[i], arg1[i]);
 416       }
 417    }
 418
 419    if (mask & SATURATE)
 420       brw_set_saturate(p, 0);
 421 }
 422
 423
 424 void emit_mad(struct brw_compile *p,
 425               const struct brw_reg *dst,
 426               GLuint mask,
 427               const struct brw_reg *arg0,
 428               const struct brw_reg *arg1,
 429               const struct brw_reg *arg2)
 430 {
 431    GLuint i;
 432
 433    for (i = 0; i < 4; i++) {
 434       if (mask & (1<<i)) {
 435          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 436
 437          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 438          brw_ADD(p, dst[i], dst[i], arg2[i]);
 439          brw_set_saturate(p, 0);
 440       }
 441    }
 442 }
 443
 444 void emit_lrp(struct brw_compile *p,
 445               const struct brw_reg *dst,
 446               GLuint mask,
 447               const struct brw_reg *arg0,
 448               const struct brw_reg *arg1,
 449               const struct brw_reg *arg2)
 450 {
 451    GLuint i;
 452
 453    /* Uses dst as a temporary:
 454     */
 455    for (i = 0; i < 4; i++) {
 456       if (mask & (1<<i)) {
 457          /* Can I use the LINE instruction for this?
 458           */
 459          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 460          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 461
 462          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 463          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 464          brw_set_saturate(p, 0);
 465       }
 466    }
 467 }
 468
 469 void emit_sop(struct brw_compile *p,
 470               const struct brw_reg *dst,
 471               GLuint mask,
 472               GLuint cond,
 473               const struct brw_reg *arg0,
 474               const struct brw_reg *arg1)
 475 {
 476    GLuint i;
 477
 478    for (i = 0; i < 4; i++) {
 479       if (mask & (1<<i)) {
 480          brw_push_insn_state(p);
 481          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 482          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 483          brw_MOV(p, dst[i], brw_imm_f(0));
 484          brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
 485          brw_MOV(p, dst[i], brw_imm_f(1.0));
 486          brw_pop_insn_state(p);
 487       }
 488    }
 489 }
 490
 491 static void emit_slt( struct brw_compile *p,
 492                       const struct brw_reg *dst,
 493                       GLuint mask,
 494                       const struct brw_reg *arg0,
 495                       const struct brw_reg *arg1 )
 496 {
 497    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 498 }
 499
 500 static void emit_sle( struct brw_compile *p,
 501                       const struct brw_reg *dst,
 502                       GLuint mask,
 503                       const struct brw_reg *arg0,
 504                       const struct brw_reg *arg1 )
 505 {
 506    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 507 }
 508
 509 static void emit_sgt( struct brw_compile *p,
 510                       const struct brw_reg *dst,
 511                       GLuint mask,
 512                       const struct brw_reg *arg0,
 513                       const struct brw_reg *arg1 )
 514 {
 515    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 516 }
 517
 518 static void emit_sge( struct brw_compile *p,
 519                       const struct brw_reg *dst,
 520                       GLuint mask,
 521                       const struct brw_reg *arg0,
 522                       const struct brw_reg *arg1 )
 523 {
 524    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 525 }
 526
 527 static void emit_seq( struct brw_compile *p,
 528                       const struct brw_reg *dst,
 529                       GLuint mask,
 530                       const struct brw_reg *arg0,
 531                       const struct brw_reg *arg1 )
 532 {
 533    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 534 }
 535
 536 static void emit_sne( struct brw_compile *p,
 537                       const struct brw_reg *dst,
 538                       GLuint mask,
 539                       const struct brw_reg *arg0,
 540                       const struct brw_reg *arg1 )
 541 {
 542    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 543 }
 544
 545 static void emit_cmp( struct brw_compile *p,
 546                       const struct brw_reg *dst,
 547                       GLuint mask,
 548                       const struct brw_reg *arg0,
 549                       const struct brw_reg *arg1,
 550                       const struct brw_reg *arg2 )
 551 {
 552    GLuint i;
 553
 554    for (i = 0; i < 4; i++) {
 555       if (mask & (1<<i)) {
 556          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 557          brw_MOV(p, dst[i], arg2[i]);
 558          brw_set_saturate(p, 0);
 559
 560          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 561
 562          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 563          brw_MOV(p, dst[i], arg1[i]);
 564          brw_set_saturate(p, 0);
 565          brw_set_predicate_control_flag_value(p, 0xff);
 566       }
 567    }
 568 }
 569
 570 void emit_max(struct brw_compile *p,
 571               const struct brw_reg *dst,
 572               GLuint mask,
 573               const struct brw_reg *arg0,
 574               const struct brw_reg *arg1)
 575 {
 576    GLuint i;
 577
 578    for (i = 0; i < 4; i++) {
 579       if (mask & (1<<i)) {
 580          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 581          brw_MOV(p, dst[i], arg0[i]);
 582          brw_set_saturate(p, 0);
 583
 584          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 585
 586          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 587          brw_MOV(p, dst[i], arg1[i]);
 588          brw_set_saturate(p, 0);
 589          brw_set_predicate_control_flag_value(p, 0xff);
 590       }
 591    }
 592 }
 593
 594 void emit_min(struct brw_compile *p,
 595               const struct brw_reg *dst,
 596               GLuint mask,
 597               const struct brw_reg *arg0,
 598               const struct brw_reg *arg1)
 599 {
 600    GLuint i;
 601
 602    for (i = 0; i < 4; i++) {
 603       if (mask & (1<<i)) {
 604          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 605          brw_MOV(p, dst[i], arg1[i]);
 606          brw_set_saturate(p, 0);
 607
 608          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 609
 610          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 611          brw_MOV(p, dst[i], arg0[i]);
 612          brw_set_saturate(p, 0);
 613          brw_set_predicate_control_flag_value(p, 0xff);
 614       }
 615    }
 616 }
 617
 618
 619 void emit_dp3(struct brw_compile *p,
 620               const struct brw_reg *dst,
 621               GLuint mask,
 622               const struct brw_reg *arg0,
 623               const struct brw_reg *arg1)
 624 {
 625    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 626
 627    if (!(mask & WRITEMASK_XYZW))
 628       return; /* Do not emit dead code */
 629
 630    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 631
 632    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 633    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 634
 635    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 636    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 637    brw_set_saturate(p, 0);
 638 }
 639
 640
 641 void emit_dp4(struct brw_compile *p,
 642               const struct brw_reg *dst,
 643               GLuint mask,
 644               const struct brw_reg *arg0,
 645               const struct brw_reg *arg1)
 646 {
 647    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 648
 649    if (!(mask & WRITEMASK_XYZW))
 650       return; /* Do not emit dead code */
 651
 652    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 653
 654    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 655    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 656    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 657
 658    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 659    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 660    brw_set_saturate(p, 0);
 661 }
 662
 663
 664 void emit_dph(struct brw_compile *p,
 665               const struct brw_reg *dst,
 666               GLuint mask,
 667               const struct brw_reg *arg0,
 668               const struct brw_reg *arg1)
 669 {
 670    const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 671
 672    if (!(mask & WRITEMASK_XYZW))
 673       return; /* Do not emit dead code */
 674
 675    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 676
 677    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 678    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 679    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 680
 681    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 682    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 683    brw_set_saturate(p, 0);
 684 }
 685
 686
 687 void emit_xpd(struct brw_compile *p,
 688               const struct brw_reg *dst,
 689               GLuint mask,
 690               const struct brw_reg *arg0,
 691               const struct brw_reg *arg1)
 692 {
 693    GLuint i;
 694
 695    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 696
 697    for (i = 0 ; i < 3; i++) {
 698       if (mask & (1<<i)) {
 699          GLuint i2 = (i+2)%3;
 700          GLuint i1 = (i+1)%3;
 701
 702          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 703
 704          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 705          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 706          brw_set_saturate(p, 0);
 707       }
 708    }
 709 }
 710
 711
 712 void emit_math1(struct brw_wm_compile *c,
 713                 GLuint function,
 714                 const struct brw_reg *dst,
 715                 GLuint mask,
 716                 const struct brw_reg *arg0)
 717 {
 718    struct brw_compile *p = &c->func;
 719    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 720    GLuint saturate = ((mask & SATURATE) ?
 721                       BRW_MATH_SATURATE_SATURATE :
 722                       BRW_MATH_SATURATE_NONE);
 723
 724    if (!(mask & WRITEMASK_XYZW))
 725       return; /* Do not emit dead code */
 726
 727    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 728
 729    /* If compressed, this will write message reg 2,3 from arg0.x's 16
 730     * channels.
 731     */
 732    brw_MOV(p, brw_message_reg(2), arg0[0]);
 733
 734    /* Send two messages to perform all 16 operations:
 735     */
 736    brw_push_insn_state(p);
 737    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 738    brw_math(p,
 739             dst[dst_chan],
 740             function,
 741             saturate,
 742             2,
 743             brw_null_reg(),
 744             BRW_MATH_DATA_VECTOR,
 745             BRW_MATH_PRECISION_FULL);
 746
 747    if (c->dispatch_width == 16) {
 748       brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 749       brw_math(p,
 750                offset(dst[dst_chan],1),
 751                function,
 752                saturate,
 753                3,
 754                brw_null_reg(),
 755                BRW_MATH_DATA_VECTOR,
 756                BRW_MATH_PRECISION_FULL);
 757    }
 758    brw_pop_insn_state(p);
 759 }
 760
 761
 762 void emit_math2(struct brw_wm_compile *c,
 763                 GLuint function,
 764                 const struct brw_reg *dst,
 765                 GLuint mask,
 766                 const struct brw_reg *arg0,
 767                 const struct brw_reg *arg1)
 768 {
 769    struct brw_compile *p = &c->func;
 770    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 771    GLuint saturate = ((mask & SATURATE) ?
 772                       BRW_MATH_SATURATE_SATURATE :
 773                       BRW_MATH_SATURATE_NONE);
 774
 775    if (!(mask & WRITEMASK_XYZW))
 776       return; /* Do not emit dead code */
 777
 778    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 779
 780    brw_push_insn_state(p);
 781
 782    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 783    brw_MOV(p, brw_message_reg(2), arg0[0]);
 784    if (c->dispatch_width == 16) {
 785       brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 786       brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 787    }
 788
 789    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 790    brw_MOV(p, brw_message_reg(3), arg1[0]);
 791    if (c->dispatch_width == 16) {
 792       brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 793       brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 794    }
 795
 796    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 797    brw_math(p,
 798             dst[dst_chan],
 799             function,
 800             saturate,
 801             2,
 802             brw_null_reg(),
 803             BRW_MATH_DATA_VECTOR,
 804             BRW_MATH_PRECISION_FULL);
 805
 806    /* Send two messages to perform all 16 operations:
 807     */
 808    if (c->dispatch_width == 16) {
 809       brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 810       brw_math(p,
 811                offset(dst[dst_chan],1),
 812                function,
 813                saturate,
 814                4,
 815                brw_null_reg(),
 816                BRW_MATH_DATA_VECTOR,
 817                BRW_MATH_PRECISION_FULL);
 818    }
 819    brw_pop_insn_state(p);
 820 }
 821
 822
 823
 824 static void emit_tex( struct brw_wm_compile *c,
 825                       const struct brw_wm_instruction *inst,
 826                       struct brw_reg *dst,
 827                       GLuint dst_flags,
 828                       struct brw_reg *arg )
 829 {
 830    struct brw_compile *p = &c->func;
 831    GLuint cur_mrf = 2, response_length;
 832    GLuint i, nr_texcoords;
 833    GLuint emit;
 834    GLuint msg_type;
 835
 836    /* How many input regs are there?
 837     */
 838    switch (inst->tex_idx) {
 839    case TEXTURE_1D_INDEX:
 840       emit = WRITEMASK_X;
 841       nr_texcoords = 1;
 842       break;
 843    case TEXTURE_2D_INDEX:
 844    case TEXTURE_RECT_INDEX:
 845       emit = WRITEMASK_XY;
 846       nr_texcoords = 2;
 847       break;
 848    case TEXTURE_3D_INDEX:
 849    case TEXTURE_CUBE_INDEX:
 850       emit = WRITEMASK_XYZ;
 851       nr_texcoords = 3;
 852       break;
 853    default:
 854       /* unexpected target */
 855       abort();
 856    }
 857
 858    /* For shadow comparisons, we have to supply u,v,r. */
 859    if (inst->tex_shadow)
 860       nr_texcoords = 3;
 861
 862    for (i = 0; i < nr_texcoords; i++) {
 863       if (emit & (1<<i))
 864          brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
 865       else
 866          brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
 867       cur_mrf += 2;
 868    }
 869
 870    /* Fill in the shadow comparison reference value. */
 871    if (inst->tex_shadow) {
 872       if (BRW_IS_IGDNG(p->brw)) {
 873          /* Fill in the cube map array index value. */
 874          brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
 875          cur_mrf += 2;
 876       }
 877       brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
 878       cur_mrf += 2;
 879    }
 880
 881    response_length = 8;         /* always */
 882
 883    if (BRW_IS_IGDNG(p->brw)) {
 884        if (inst->tex_shadow)
 885            msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG;
 886        else
 887            msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG;
 888    } else {
 889        if (inst->tex_shadow)
 890            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 891        else
 892            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 893    }
 894
 895    brw_SAMPLE(p,
 896               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 897               1,
 898               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 899               SURF_INDEX_TEXTURE(inst->tex_unit),
 900               inst->tex_unit,     /* sampler */
 901               inst->writemask,
 902               msg_type,
 903               response_length,
 904               cur_mrf - 1,
 905               0,
 906               1,
 907               BRW_SAMPLER_SIMD_MODE_SIMD16);
 908 }
 909
 910
 911 static void emit_txb( struct brw_wm_compile *c,
 912                       const struct brw_wm_instruction *inst,
 913                       struct brw_reg *dst,
 914                       GLuint dst_flags,
 915                       struct brw_reg *arg )
 916 {
 917    struct brw_compile *p = &c->func;
 918    GLuint msgLength;
 919    GLuint msg_type;
 920    /* Shadow ignored for txb.
 921     */
 922    switch (inst->tex_idx) {
 923    case TEXTURE_1D_INDEX:
 924       brw_MOV(p, brw_message_reg(2), arg[0]);
 925       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 926       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 927       break;
 928    case TEXTURE_2D_INDEX:
 929    case TEXTURE_RECT_INDEX:
 930       brw_MOV(p, brw_message_reg(2), arg[0]);
 931       brw_MOV(p, brw_message_reg(4), arg[1]);
 932       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 933       break;
 934    case TEXTURE_3D_INDEX:
 935    case TEXTURE_CUBE_INDEX:
 936       brw_MOV(p, brw_message_reg(2), arg[0]);
 937       brw_MOV(p, brw_message_reg(4), arg[1]);
 938       brw_MOV(p, brw_message_reg(6), arg[2]);
 939       break;
 940    default:
 941       /* unexpected target */
 942       abort();
 943    }
 944
 945    brw_MOV(p, brw_message_reg(8), arg[3]);
 946    msgLength = 9;
 947
 948    if (BRW_IS_IGDNG(p->brw))
 949        msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG;
 950    else
 951        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 952
 953    brw_SAMPLE(p,
 954               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 955               1,
 956               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 957               SURF_INDEX_TEXTURE(inst->tex_unit),
 958               inst->tex_unit,     /* sampler */
 959               inst->writemask,
 960               msg_type,
 961               8,                /* responseLength */
 962               msgLength,
 963               0,
 964               1,
 965               BRW_SAMPLER_SIMD_MODE_SIMD16);
 966 }
 967
 968
 969 static void emit_lit(struct brw_wm_compile *c,
 970                      const struct brw_reg *dst,
 971                      GLuint mask,
 972                      const struct brw_reg *arg0)
 973 {
 974    struct brw_compile *p = &c->func;
 975
 976    assert((mask & WRITEMASK_XW) == 0);
 977
 978    if (mask & WRITEMASK_Y) {
 979       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 980       brw_MOV(p, dst[1], arg0[0]);
 981       brw_set_saturate(p, 0);
 982    }
 983
 984    if (mask & WRITEMASK_Z) {
 985       emit_math2(c, BRW_MATH_FUNCTION_POW,
 986                  &dst[2],
 987                  WRITEMASK_X | (mask & SATURATE),
 988                  &arg0[1],
 989                  &arg0[3]);
 990    }
 991
 992    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 993     * some of the POW calculations above, but 16-wide iff statements
 994     * seem to lock c1 hardware, so this is a nasty workaround:
 995     */
 996    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 997    {
 998       if (mask & WRITEMASK_Y)
 999          brw_MOV(p, dst[1], brw_imm_f(0));
1000
1001       if (mask & WRITEMASK_Z)
1002          brw_MOV(p, dst[2], brw_imm_f(0));
1003    }
1004    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1005 }
1006
1007
1008 /* Kill pixel - set execution mask to zero for those pixels which
1009  * fail.
1010  */
1011 static void emit_kil( struct brw_wm_compile *c,
1012                       struct brw_reg *arg0)
1013 {
1014    struct brw_compile *p = &c->func;
1015    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1016    GLuint i;
1017
1018    /* XXX - usually won't need 4 compares!
1019     */
1020    for (i = 0; i < 4; i++) {
1021       brw_push_insn_state(p);
1022       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
1023       brw_set_predicate_control_flag_value(p, 0xff);
1024       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1025       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
1026       brw_pop_insn_state(p);
1027    }
1028 }
1029
1030 /* KIL_NV kills the pixels that are currently executing, not based on a test
1031  * of the arguments.
1032  */
1033 static void emit_kil_nv( struct brw_wm_compile *c )
1034 {
1035    struct brw_compile *p = &c->func;
1036    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1037
1038    brw_push_insn_state(p);
1039    brw_set_mask_control(p, BRW_MASK_DISABLE);
1040    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
1041    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1042    brw_pop_insn_state(p);
1043 }
1044
1045 static void fire_fb_write( struct brw_wm_compile *c,
1046                            GLuint base_reg,
1047                            GLuint nr,
1048                            GLuint target,
1049                            GLuint eot )
1050 {
1051    struct brw_compile *p = &c->func;
1052    struct brw_reg dst;
1053
1054    if (c->dispatch_width == 16)
1055       dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1056    else
1057       dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
1058
1059    /* Pass through control information:
1060     */
1061 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
1062    {
1063       brw_push_insn_state(p);
1064       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1065       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1066       brw_MOV(p,
1067                brw_message_reg(base_reg + 1),
1068                brw_vec8_grf(1, 0));
1069       brw_pop_insn_state(p);
1070    }
1071
1072    /* Send framebuffer write message: */
1073 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
1074    brw_fb_WRITE(p,
1075                 dst,
1076                 base_reg,
1077                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1078                 target,
1079                 nr,
1080                 0,
1081                 eot);
1082 }
1083
1084
1085 static void emit_aa( struct brw_wm_compile *c,
1086                      struct brw_reg *arg1,
1087                      GLuint reg )
1088 {
1089    struct brw_compile *p = &c->func;
1090    GLuint comp = c->key.aa_dest_stencil_reg / 2;
1091    GLuint off = c->key.aa_dest_stencil_reg % 2;
1092    struct brw_reg aa = offset(arg1[comp], off);
1093
1094    brw_push_insn_state(p);
1095    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1096    brw_MOV(p, brw_message_reg(reg), aa);
1097    brw_pop_insn_state(p);
1098 }
1099
1100
1101 /* Post-fragment-program processing.  Send the results to the
1102  * framebuffer.
1103  * \param arg0  the fragment color
1104  * \param arg1  the pass-through depth value
1105  * \param arg2  the shader-computed depth value
1106  */
1107 void emit_fb_write(struct brw_wm_compile *c,
1108                    struct brw_reg *arg0,
1109                    struct brw_reg *arg1,
1110                    struct brw_reg *arg2,
1111                    GLuint target,
1112                    GLuint eot)
1113 {
1114    struct brw_compile *p = &c->func;
1115    struct brw_context *brw = p->brw;
1116    GLuint nr = 2;
1117    GLuint channel;
1118
1119    /* Reserve a space for AA - may not be needed:
1120     */
1121    if (c->key.aa_dest_stencil_reg)
1122       nr += 1;
1123
1124    /* I don't really understand how this achieves the color interleave
1125     * (ie RGBARGBA) in the result:  [Do the saturation here]
1126     */
1127    brw_push_insn_state(p);
1128
1129    for (channel = 0; channel < 4; channel++) {
1130       if (c->dispatch_width == 16 && (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) {
1131          /* By setting the high bit of the MRF register number, we indicate
1132           * that we want COMPR4 mode - instead of doing the usual destination
1133           * + 1 for the second half we get destination + 4.
1134           */
1135          brw_MOV(p,
1136                  brw_message_reg(nr + channel + (1 << 7)),
1137                  arg0[channel]);
1138       } else {
1139          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
1140          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
1141          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1142          brw_MOV(p,
1143                  brw_message_reg(nr + channel),
1144                  arg0[channel]);
1145
1146          if (c->dispatch_width == 16) {
1147             brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1148             brw_MOV(p,
1149                     brw_message_reg(nr + channel + 4),
1150                     sechalf(arg0[channel]));
1151          }
1152       }
1153    }
1154    /* skip over the regs populated above:
1155     */
1156    nr += 8;
1157    brw_pop_insn_state(p);
1158
1159    if (c->key.source_depth_to_render_target)
1160    {
1161       if (c->key.computes_depth)
1162          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1163       else
1164          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1165
1166       nr += 2;
1167    }
1168
1169    if (c->key.dest_depth_reg)
1170    {
1171       GLuint comp = c->key.dest_depth_reg / 2;
1172       GLuint off = c->key.dest_depth_reg % 2;
1173
1174       if (off != 0) {
1175          brw_push_insn_state(p);
1176          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1177
1178          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1179          /* 2nd half? */
1180          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1181          brw_pop_insn_state(p);
1182       }
1183       else {
1184          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1185       }
1186       nr += 2;
1187    }
1188
1189    if (!c->key.runtime_check_aads_emit) {
1190       if (c->key.aa_dest_stencil_reg)
1191          emit_aa(c, arg1, 2);
1192
1193       fire_fb_write(c, 0, nr, target, eot);
1194    }
1195    else {
1196       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1197       struct brw_reg ip = brw_ip_reg();
1198       struct brw_instruction *jmp;
1199
1200       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1201       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1202       brw_AND(p,
1203               v1_null_ud,
1204               get_element_ud(brw_vec8_grf(1,0), 6),
1205               brw_imm_ud(1<<26));
1206
1207       jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
1208       {
1209          emit_aa(c, arg1, 2);
1210          fire_fb_write(c, 0, nr, target, eot);
1211          /* note - thread killed in subroutine */
1212       }
1213       brw_land_fwd_jump(p, jmp);
1214
1215       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1216        */
1217       fire_fb_write(c, 1, nr-1, target, eot);
1218    }
1219 }
1220
1221 /**
1222  * Move a GPR to scratch memory.
1223  */
1224 static void emit_spill( struct brw_wm_compile *c,
1225                         struct brw_reg reg,
1226                         GLuint slot )
1227 {
1228    struct brw_compile *p = &c->func;
1229
1230    /*
1231      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1232    */
1233    brw_MOV(p, brw_message_reg(2), reg);
1234
1235    /*
1236      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1237      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1238    */
1239    brw_dp_WRITE_16(p,
1240                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1241                    slot);
1242 }
1243
1244
1245 /**
1246  * Load a GPR from scratch memory.
1247  */
1248 static void emit_unspill( struct brw_wm_compile *c,
1249                           struct brw_reg reg,
1250                           GLuint slot )
1251 {
1252    struct brw_compile *p = &c->func;
1253
1254    /* Slot 0 is the undef value.
1255     */
1256    if (slot == 0) {
1257       brw_MOV(p, reg, brw_imm_f(0));
1258       return;
1259    }
1260
1261    /*
1262      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1263      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1264    */
1265
1266    brw_dp_READ_16(p,
1267                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1268                   slot);
1269 }
1270
1271
1272 /**
1273  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1274  * Args with unspill_reg != 0 will be loaded from scratch memory.
1275  */
1276 static void get_argument_regs( struct brw_wm_compile *c,
1277                                struct brw_wm_ref *arg[],
1278                                struct brw_reg *regs )
1279 {
1280    GLuint i;
1281
1282    for (i = 0; i < 4; i++) {
1283       if (arg[i]) {
1284          if (arg[i]->unspill_reg)
1285             emit_unspill(c,
1286                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1287                          arg[i]->value->spill_slot);
1288
1289          regs[i] = arg[i]->hw_reg;
1290       }
1291       else {
1292          regs[i] = brw_null_reg();
1293       }
1294    }
1295 }
1296
1297
1298 /**
1299  * For values that have a spill_slot!=0, write those regs to scratch memory.
1300  */
1301 static void spill_values( struct brw_wm_compile *c,
1302                           struct brw_wm_value *values,
1303                           GLuint nr )
1304 {
1305    GLuint i;
1306
1307    for (i = 0; i < nr; i++)
1308       if (values[i].spill_slot)
1309          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1310 }
1311
1312
1313 /* Emit the fragment program instructions here.
1314  */
1315 void brw_wm_emit( struct brw_wm_compile *c )
1316 {
1317    struct brw_compile *p = &c->func;
1318    GLuint insn;
1319
1320    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1321
1322    /* Check if any of the payload regs need to be spilled:
1323     */
1324    spill_values(c, c->payload.depth, 4);
1325    spill_values(c, c->creg, c->nr_creg);
1326    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1327
1328
1329    for (insn = 0; insn < c->nr_insns; insn++) {
1330
1331       struct brw_wm_instruction *inst = &c->instruction[insn];
1332       struct brw_reg args[3][4], dst[4];
1333       GLuint i, dst_flags;
1334
1335       /* Get argument regs:
1336        */
1337       for (i = 0; i < 3; i++)
1338          get_argument_regs(c, inst->src[i], args[i]);
1339
1340       /* Get dest regs:
1341        */
1342       for (i = 0; i < 4; i++)
1343          if (inst->dst[i])
1344             dst[i] = inst->dst[i]->hw_reg;
1345          else
1346             dst[i] = brw_null_reg();
1347
1348       /* Flags
1349        */
1350       dst_flags = inst->writemask;
1351       if (inst->saturate)
1352          dst_flags |= SATURATE;
1353
1354       switch (inst->opcode) {
1355          /* Generated instructions for calculating triangle interpolants:
1356           */
1357       case WM_PIXELXY:
1358          emit_pixel_xy(c, dst, dst_flags);
1359          break;
1360
1361       case WM_DELTAXY:
1362          emit_delta_xy(p, dst, dst_flags, args[0]);
1363          break;
1364
1365       case WM_WPOSXY:
1366          emit_wpos_xy(c, dst, dst_flags, args[0]);
1367          break;
1368
1369       case WM_PIXELW:
1370          emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
1371          break;
1372
1373       case WM_LINTERP:
1374          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1375          break;
1376
1377       case WM_PINTERP:
1378          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1379          break;
1380
1381       case WM_CINTERP:
1382          emit_cinterp(p, dst, dst_flags, args[0]);
1383          break;
1384
1385       case WM_FB_WRITE:
1386          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1387          break;
1388
1389       case WM_FRONTFACING:
1390          emit_frontfacing(p, dst, dst_flags);
1391          break;
1392
1393          /* Straightforward arithmetic:
1394           */
1395       case OPCODE_ADD:
1396          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1397          break;
1398
1399       case OPCODE_FRC:
1400          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1401          break;
1402
1403       case OPCODE_FLR:
1404          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1405          break;
1406
1407       case OPCODE_DDX:
1408          emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1409          break;
1410
1411       case OPCODE_DDY:
1412          emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1413          break;
1414
1415       case OPCODE_DP3:
1416          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1417          break;
1418
1419       case OPCODE_DP4:
1420          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1421          break;
1422
1423       case OPCODE_DPH:
1424          emit_dph(p, dst, dst_flags, args[0], args[1]);
1425          break;
1426
1427       case OPCODE_TRUNC:
1428          emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
1429          break;
1430
1431       case OPCODE_LRP:
1432          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1433          break;
1434
1435       case OPCODE_MAD:
1436          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1437          break;
1438
1439       case OPCODE_MOV:
1440       case OPCODE_SWZ:
1441          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1442          break;
1443
1444       case OPCODE_MUL:
1445          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1446          break;
1447
1448       case OPCODE_XPD:
1449          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1450          break;
1451
1452          /* Higher math functions:
1453           */
1454       case OPCODE_RCP:
1455          emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1456          break;
1457
1458       case OPCODE_RSQ:
1459          emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1460          break;
1461
1462       case OPCODE_SIN:
1463          emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1464          break;
1465
1466       case OPCODE_COS:
1467          emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1468          break;
1469
1470       case OPCODE_EX2:
1471          emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1472          break;
1473
1474       case OPCODE_LG2:
1475          emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1476          break;
1477
1478       case OPCODE_SCS:
1479          /* There is an scs math function, but it would need some
1480           * fixup for 16-element execution.
1481           */
1482          if (dst_flags & WRITEMASK_X)
1483             emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1484          if (dst_flags & WRITEMASK_Y)
1485             emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1486          break;
1487
1488       case OPCODE_POW:
1489          emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1490          break;
1491
1492          /* Comparisons:
1493           */
1494       case OPCODE_CMP:
1495          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1496          break;
1497
1498       case OPCODE_MAX:
1499          emit_max(p, dst, dst_flags, args[0], args[1]);
1500          break;
1501
1502       case OPCODE_MIN:
1503          emit_min(p, dst, dst_flags, args[0], args[1]);
1504          break;
1505
1506       case OPCODE_SLT:
1507          emit_slt(p, dst, dst_flags, args[0], args[1]);
1508          break;
1509
1510       case OPCODE_SLE:
1511          emit_sle(p, dst, dst_flags, args[0], args[1]);
1512         break;
1513       case OPCODE_SGT:
1514          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1515         break;
1516       case OPCODE_SGE:
1517          emit_sge(p, dst, dst_flags, args[0], args[1]);
1518          break;
1519       case OPCODE_SEQ:
1520          emit_seq(p, dst, dst_flags, args[0], args[1]);
1521         break;
1522       case OPCODE_SNE:
1523          emit_sne(p, dst, dst_flags, args[0], args[1]);
1524         break;
1525
1526       case OPCODE_LIT:
1527          emit_lit(c, dst, dst_flags, args[0]);
1528          break;
1529
1530          /* Texturing operations:
1531           */
1532       case OPCODE_TEX:
1533          emit_tex(c, inst, dst, dst_flags, args[0]);
1534          break;
1535
1536       case OPCODE_TXB:
1537          emit_txb(c, inst, dst, dst_flags, args[0]);
1538          break;
1539
1540       case OPCODE_KIL:
1541          emit_kil(c, args[0]);
1542          break;
1543
1544       case OPCODE_KIL_NV:
1545          emit_kil_nv(c);
1546          break;
1547
1548       default:
1549          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1550                       inst->opcode, inst->opcode < MAX_OPCODE ?
1551                                     _mesa_opcode_string(inst->opcode) :
1552                                     "unknown");
1553       }
1554
1555       for (i = 0; i < 4; i++)
1556         if (inst->dst[i] && inst->dst[i]->spill_slot)
1557            emit_spill(c,
1558                       inst->dst[i]->hw_reg,
1559                       inst->dst[i]->spill_slot);
1560    }
1561
1562    if (INTEL_DEBUG & DEBUG_WM) {
1563       int i;
1564
1565       _mesa_printf("wm-native:\n");
1566       for (i = 0; i < p->nr_insn; i++)
1567          brw_disasm(stderr, &p->store[i]);
1568       _mesa_printf("\n");
1569    }
1570 }