src/gallium/drivers/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32 #include "util/u_math.h"
  33 #include "tgsi/tgsi_info.h"
  34
  35 #include "brw_context.h"
  36 #include "brw_wm.h"
  37 #include "brw_debug.h"
  38 #include "brw_disasm.h"
  39
  40 /* Not quite sure how correct this is - need to understand horiz
  41  * vs. vertical strides a little better.
  42  */
  43 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  44 {
  45    if (reg.vstride)
  46       reg.nr++;
  47    return reg;
  48 }
  49
  50 /* Payload R0:
  51  *
  52  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
  53  *         corresponding to each of the 16 execution channels.
  54  * R0.1..8 -- ?
  55  * R1.0 -- triangle vertex 0.X
  56  * R1.1 -- triangle vertex 0.Y
  57  * R1.2 -- quad 0 x,y coords (2 packed uwords)
  58  * R1.3 -- quad 1 x,y coords (2 packed uwords)
  59  * R1.4 -- quad 2 x,y coords (2 packed uwords)
  60  * R1.5 -- quad 3 x,y coords (2 packed uwords)
  61  * R1.6 -- ?
  62  * R1.7 -- ?
  63  * R1.8 -- ?
  64  */
  65
  66
  67 static void emit_pixel_xy(struct brw_compile *p,
  68                           const struct brw_reg *dst,
  69                           GLuint mask)
  70 {
  71    struct brw_reg r1 = brw_vec1_grf(1, 0);
  72    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  73
  74    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  75
  76    /* Calculate pixel centers by adding 1 or 0 to each of the
  77     * micro-tile coordinates passed in r1.
  78     */
  79    if (mask & BRW_WRITEMASK_X) {
  80       brw_ADD(p,
  81               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  82               stride(suboffset(r1_uw, 4), 2, 4, 0),
  83               brw_imm_v(0x10101010));
  84    }
  85
  86    if (mask & BRW_WRITEMASK_Y) {
  87       brw_ADD(p,
  88               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  89               stride(suboffset(r1_uw,5), 2, 4, 0),
  90               brw_imm_v(0x11001100));
  91    }
  92
  93    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  94 }
  95
  96
  97
  98 static void emit_delta_xy(struct brw_compile *p,
  99                           const struct brw_reg *dst,
 100                           GLuint mask,
 101                           const struct brw_reg *arg0)
 102 {
 103    struct brw_reg r1 = brw_vec1_grf(1, 0);
 104
 105    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 106     * centers.
 107     */
 108    if (mask & BRW_WRITEMASK_X) {
 109       brw_ADD(p,
 110               dst[0],
 111               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 112               negate(r1));
 113    }
 114
 115    if (mask & BRW_WRITEMASK_Y) {
 116       brw_ADD(p,
 117               dst[1],
 118               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 119               negate(suboffset(r1,1)));
 120
 121    }
 122 }
 123
 124 static void emit_wpos_xy(struct brw_wm_compile *c,
 125                          const struct brw_reg *dst,
 126                          GLuint mask,
 127                          const struct brw_reg *arg0)
 128 {
 129    struct brw_compile *p = &c->func;
 130
 131    if (mask & BRW_WRITEMASK_X) {
 132       /* X' = X */
 133       brw_MOV(p,
 134               dst[0],
 135               retype(arg0[0], BRW_REGISTER_TYPE_W));
 136    }
 137
 138    /* XXX: is this needed any more, or is this a NOOP?
 139     */
 140    if (mask & BRW_WRITEMASK_Y) {
 141 #if 0
 142       /* Y' = height - 1 - Y */
 143       brw_ADD(p,
 144               dst[1],
 145               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 146               brw_imm_d(c->key.drawable_height - 1));
 147 #else
 148       brw_MOV(p,
 149               dst[0],
 150               retype(arg0[0], BRW_REGISTER_TYPE_W));
 151 #endif
 152    }
 153 }
 154
 155
 156 static void emit_pixel_w( struct brw_compile *p,
 157                           const struct brw_reg *dst,
 158                           GLuint mask,
 159                           const struct brw_reg *arg0,
 160                           const struct brw_reg *deltas)
 161 {
 162    /* Don't need this if all you are doing is interpolating color, for
 163     * instance.
 164     */
 165    if (mask & BRW_WRITEMASK_W) {
 166       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 167
 168       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 169        * result straight into a message reg.
 170        */
 171       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 172       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 173
 174       /* Calc w */
 175       brw_math_16( p, dst[3],
 176                    BRW_MATH_FUNCTION_INV,
 177                    BRW_MATH_SATURATE_NONE,
 178                    2, brw_null_reg(),
 179                    BRW_MATH_PRECISION_FULL);
 180    }
 181 }
 182
 183
 184
 185 static void emit_linterp( struct brw_compile *p,
 186                          const struct brw_reg *dst,
 187                          GLuint mask,
 188                          const struct brw_reg *arg0,
 189                          const struct brw_reg *deltas )
 190 {
 191    struct brw_reg interp[4];
 192    GLuint nr = arg0[0].nr;
 193    GLuint i;
 194
 195    interp[0] = brw_vec1_grf(nr, 0);
 196    interp[1] = brw_vec1_grf(nr, 4);
 197    interp[2] = brw_vec1_grf(nr+1, 0);
 198    interp[3] = brw_vec1_grf(nr+1, 4);
 199
 200    for (i = 0; i < 4; i++) {
 201       if (mask & (1<<i)) {
 202          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 203          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 204       }
 205    }
 206 }
 207
 208
 209 static void emit_pinterp( struct brw_compile *p,
 210                           const struct brw_reg *dst,
 211                           GLuint mask,
 212                           const struct brw_reg *arg0,
 213                           const struct brw_reg *deltas,
 214                           const struct brw_reg *w)
 215 {
 216    struct brw_reg interp[4];
 217    GLuint nr = arg0[0].nr;
 218    GLuint i;
 219
 220    interp[0] = brw_vec1_grf(nr, 0);
 221    interp[1] = brw_vec1_grf(nr, 4);
 222    interp[2] = brw_vec1_grf(nr+1, 0);
 223    interp[3] = brw_vec1_grf(nr+1, 4);
 224
 225    for (i = 0; i < 4; i++) {
 226       if (mask & (1<<i)) {
 227          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 228          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 229       }
 230    }
 231    for (i = 0; i < 4; i++) {
 232       if (mask & (1<<i)) {
 233          brw_MUL(p, dst[i], dst[i], w[3]);
 234       }
 235    }
 236 }
 237
 238
 239 static void emit_cinterp( struct brw_compile *p,
 240                          const struct brw_reg *dst,
 241                          GLuint mask,
 242                          const struct brw_reg *arg0 )
 243 {
 244    struct brw_reg interp[4];
 245    GLuint nr = arg0[0].nr;
 246    GLuint i;
 247
 248    interp[0] = brw_vec1_grf(nr, 0);
 249    interp[1] = brw_vec1_grf(nr, 4);
 250    interp[2] = brw_vec1_grf(nr+1, 0);
 251    interp[3] = brw_vec1_grf(nr+1, 4);
 252
 253    for (i = 0; i < 4; i++) {
 254       if (mask & (1<<i)) {
 255          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 256       }
 257    }
 258 }
 259
 260 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 261 static void emit_frontfacing( struct brw_compile *p,
 262                               const struct brw_reg *dst,
 263                               GLuint mask )
 264 {
 265    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 266    GLuint i;
 267
 268    if (!(mask & BRW_WRITEMASK_XYZW))
 269       return;
 270
 271    for (i = 0; i < 4; i++) {
 272       if (mask & (1<<i)) {
 273          brw_MOV(p, dst[i], brw_imm_f(0.0));
 274       }
 275    }
 276
 277    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 278     * us front face
 279     */
 280    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 281    for (i = 0; i < 4; i++) {
 282       if (mask & (1<<i)) {
 283          brw_MOV(p, dst[i], brw_imm_f(1.0));
 284       }
 285    }
 286    brw_set_predicate_control_flag_value(p, 0xff);
 287 }
 288
 289 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 290  * looking like:
 291  *
 292  * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
 293  *
 294  * and we're trying to produce:
 295  *
 296  *           DDX                     DDY
 297  * dst: (q0.tr - q0.tl)     (q0.tl - q0.bl)
 298  *      (q0.tr - q0.tl)     (q0.tr - q0.br)
 299  *      (q0.br - q0.bl)     (q0.tl - q0.bl)
 300  *      (q0.br - q0.bl)     (q0.tr - q0.br)
 301  *      (q1.tr - q1.tl)     (q1.tl - q1.bl)
 302  *      (q1.tr - q1.tl)     (q1.tr - q1.br)
 303  *      (q1.br - q1.bl)     (q1.tl - q1.bl)
 304  *      (q1.br - q1.bl)     (q1.tr - q1.br)
 305  *
 306  * and add two more quads if in 16-pixel dispatch mode.
 307  *
 308  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 309  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 310  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 311  * between each other.  We could probably do it like ddx and swizzle the right
 312  * order later, but bail for now and just produce
 313  * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
 314  */
 315 void emit_ddxy(struct brw_compile *p,
 316                const struct brw_reg *dst,
 317                GLuint mask,
 318                GLboolean is_ddx,
 319                const struct brw_reg *arg0)
 320 {
 321    int i;
 322    struct brw_reg src0, src1;
 323
 324    if (mask & SATURATE)
 325       brw_set_saturate(p, 1);
 326    for (i = 0; i < 4; i++ ) {
 327       if (mask & (1<<i)) {
 328          if (is_ddx) {
 329             src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
 330                            BRW_REGISTER_TYPE_F,
 331                            BRW_VERTICAL_STRIDE_2,
 332                            BRW_WIDTH_2,
 333                            BRW_HORIZONTAL_STRIDE_0,
 334                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 335             src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 336                            BRW_REGISTER_TYPE_F,
 337                            BRW_VERTICAL_STRIDE_2,
 338                            BRW_WIDTH_2,
 339                            BRW_HORIZONTAL_STRIDE_0,
 340                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 341          } else {
 342             src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 343                            BRW_REGISTER_TYPE_F,
 344                            BRW_VERTICAL_STRIDE_4,
 345                            BRW_WIDTH_4,
 346                            BRW_HORIZONTAL_STRIDE_0,
 347                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 348             src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 349                            BRW_REGISTER_TYPE_F,
 350                            BRW_VERTICAL_STRIDE_4,
 351                            BRW_WIDTH_4,
 352                            BRW_HORIZONTAL_STRIDE_0,
 353                            BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 354          }
 355          brw_ADD(p, dst[i], src0, negate(src1));
 356       }
 357    }
 358    if (mask & SATURATE)
 359       brw_set_saturate(p, 0);
 360 }
 361
 362 static void emit_alu1( struct brw_compile *p,
 363                        struct brw_instruction *(*func)(struct brw_compile *,
 364                                                        struct brw_reg,
 365                                                        struct brw_reg),
 366                        const struct brw_reg *dst,
 367                        GLuint mask,
 368                        const struct brw_reg *arg0 )
 369 {
 370    GLuint i;
 371
 372    if (mask & SATURATE)
 373       brw_set_saturate(p, 1);
 374
 375    for (i = 0; i < 4; i++) {
 376       if (mask & (1<<i)) {
 377          func(p, dst[i], arg0[i]);
 378       }
 379    }
 380
 381    if (mask & SATURATE)
 382       brw_set_saturate(p, 0);
 383 }
 384
 385
 386 static void emit_alu2( struct brw_compile *p,
 387                        struct brw_instruction *(*func)(struct brw_compile *,
 388                                                        struct brw_reg,
 389                                                        struct brw_reg,
 390                                                        struct brw_reg),
 391                        const struct brw_reg *dst,
 392                        GLuint mask,
 393                        const struct brw_reg *arg0,
 394                        const struct brw_reg *arg1 )
 395 {
 396    GLuint i;
 397
 398    if (mask & SATURATE)
 399       brw_set_saturate(p, 1);
 400
 401    for (i = 0; i < 4; i++) {
 402       if (mask & (1<<i)) {
 403          func(p, dst[i], arg0[i], arg1[i]);
 404       }
 405    }
 406
 407    if (mask & SATURATE)
 408       brw_set_saturate(p, 0);
 409 }
 410
 411
 412 static void emit_mad( struct brw_compile *p,
 413                       const struct brw_reg *dst,
 414                       GLuint mask,
 415                       const struct brw_reg *arg0,
 416                       const struct brw_reg *arg1,
 417                       const struct brw_reg *arg2 )
 418 {
 419    GLuint i;
 420
 421    for (i = 0; i < 4; i++) {
 422       if (mask & (1<<i)) {
 423          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 424
 425          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 426          brw_ADD(p, dst[i], dst[i], arg2[i]);
 427          brw_set_saturate(p, 0);
 428       }
 429    }
 430 }
 431
 432 static void emit_trunc( struct brw_compile *p,
 433                       const struct brw_reg *dst,
 434                       GLuint mask,
 435                       const struct brw_reg *arg0)
 436 {
 437    GLuint i;
 438
 439    for (i = 0; i < 4; i++) {
 440       if (mask & (1<<i)) {
 441          brw_RNDZ(p, dst[i], arg0[i]);
 442       }
 443    }
 444 }
 445
 446 static void emit_lrp( struct brw_compile *p,
 447                       const struct brw_reg *dst,
 448                       GLuint mask,
 449                       const struct brw_reg *arg0,
 450                       const struct brw_reg *arg1,
 451                       const struct brw_reg *arg2 )
 452 {
 453    GLuint i;
 454
 455    /* Uses dst as a temporary:
 456     */
 457    for (i = 0; i < 4; i++) {
 458       if (mask & (1<<i)) {
 459          /* Can I use the LINE instruction for this?
 460           */
 461          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 462          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 463
 464          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 465          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 466          brw_set_saturate(p, 0);
 467       }
 468    }
 469 }
 470
 471 static void emit_sop( struct brw_compile *p,
 472                       const struct brw_reg *dst,
 473                       GLuint mask,
 474                       GLuint cond,
 475                       const struct brw_reg *arg0,
 476                       const struct brw_reg *arg1 )
 477 {
 478    GLuint i;
 479
 480    for (i = 0; i < 4; i++) {
 481       if (mask & (1<<i)) {
 482          brw_MOV(p, dst[i], brw_imm_f(0));
 483          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 484          brw_MOV(p, dst[i], brw_imm_f(1.0));
 485          brw_set_predicate_control_flag_value(p, 0xff);
 486       }
 487    }
 488 }
 489
 490 static void emit_slt( struct brw_compile *p,
 491                       const struct brw_reg *dst,
 492                       GLuint mask,
 493                       const struct brw_reg *arg0,
 494                       const struct brw_reg *arg1 )
 495 {
 496    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 497 }
 498
 499 static void emit_sle( struct brw_compile *p,
 500                       const struct brw_reg *dst,
 501                       GLuint mask,
 502                       const struct brw_reg *arg0,
 503                       const struct brw_reg *arg1 )
 504 {
 505    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 506 }
 507
 508 static void emit_sgt( struct brw_compile *p,
 509                       const struct brw_reg *dst,
 510                       GLuint mask,
 511                       const struct brw_reg *arg0,
 512                       const struct brw_reg *arg1 )
 513 {
 514    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 515 }
 516
 517 static void emit_sge( struct brw_compile *p,
 518                       const struct brw_reg *dst,
 519                       GLuint mask,
 520                       const struct brw_reg *arg0,
 521                       const struct brw_reg *arg1 )
 522 {
 523    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 524 }
 525
 526 static void emit_seq( struct brw_compile *p,
 527                       const struct brw_reg *dst,
 528                       GLuint mask,
 529                       const struct brw_reg *arg0,
 530                       const struct brw_reg *arg1 )
 531 {
 532    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 533 }
 534
 535 static void emit_sne( struct brw_compile *p,
 536                       const struct brw_reg *dst,
 537                       GLuint mask,
 538                       const struct brw_reg *arg0,
 539                       const struct brw_reg *arg1 )
 540 {
 541    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 542 }
 543
 544 static void emit_cmp( struct brw_compile *p,
 545                       const struct brw_reg *dst,
 546                       GLuint mask,
 547                       const struct brw_reg *arg0,
 548                       const struct brw_reg *arg1,
 549                       const struct brw_reg *arg2 )
 550 {
 551    GLuint i;
 552
 553    for (i = 0; i < 4; i++) {
 554       if (mask & (1<<i)) {
 555          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 556          brw_MOV(p, dst[i], arg2[i]);
 557          brw_set_saturate(p, 0);
 558
 559          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 560
 561          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 562          brw_MOV(p, dst[i], arg1[i]);
 563          brw_set_saturate(p, 0);
 564          brw_set_predicate_control_flag_value(p, 0xff);
 565       }
 566    }
 567 }
 568
 569 static void emit_max( struct brw_compile *p,
 570                       const struct brw_reg *dst,
 571                       GLuint mask,
 572                       const struct brw_reg *arg0,
 573                       const struct brw_reg *arg1 )
 574 {
 575    GLuint i;
 576
 577    for (i = 0; i < 4; i++) {
 578       if (mask & (1<<i)) {
 579          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 580          brw_MOV(p, dst[i], arg0[i]);
 581          brw_set_saturate(p, 0);
 582
 583          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 584
 585          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 586          brw_MOV(p, dst[i], arg1[i]);
 587          brw_set_saturate(p, 0);
 588          brw_set_predicate_control_flag_value(p, 0xff);
 589       }
 590    }
 591 }
 592
 593 static void emit_min( struct brw_compile *p,
 594                       const struct brw_reg *dst,
 595                       GLuint mask,
 596                       const struct brw_reg *arg0,
 597                       const struct brw_reg *arg1 )
 598 {
 599    GLuint i;
 600
 601    for (i = 0; i < 4; i++) {
 602       if (mask & (1<<i)) {
 603          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 604          brw_MOV(p, dst[i], arg1[i]);
 605          brw_set_saturate(p, 0);
 606
 607          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 608
 609          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 610          brw_MOV(p, dst[i], arg0[i]);
 611          brw_set_saturate(p, 0);
 612          brw_set_predicate_control_flag_value(p, 0xff);
 613       }
 614    }
 615 }
 616
 617
 618 static void emit_dp3( struct brw_compile *p,
 619                       const struct brw_reg *dst,
 620                       GLuint mask,
 621                       const struct brw_reg *arg0,
 622                       const struct brw_reg *arg1 )
 623 {
 624    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 625
 626    if (!(mask & BRW_WRITEMASK_XYZW))
 627       return; /* Do not emit dead code */
 628
 629    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 630
 631    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 632    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 633
 634    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 635    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 636    brw_set_saturate(p, 0);
 637 }
 638
 639
 640 static void emit_dp4( struct brw_compile *p,
 641                       const struct brw_reg *dst,
 642                       GLuint mask,
 643                       const struct brw_reg *arg0,
 644                       const struct brw_reg *arg1 )
 645 {
 646    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 647
 648    if (!(mask & BRW_WRITEMASK_XYZW))
 649       return; /* Do not emit dead code */
 650
 651    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 652
 653    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 654    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 655    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 656
 657    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 658    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 659    brw_set_saturate(p, 0);
 660 }
 661
 662
 663 static void emit_dph( struct brw_compile *p,
 664                       const struct brw_reg *dst,
 665                       GLuint mask,
 666                       const struct brw_reg *arg0,
 667                       const struct brw_reg *arg1 )
 668 {
 669    const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 670
 671    if (!(mask & BRW_WRITEMASK_XYZW))
 672       return; /* Do not emit dead code */
 673
 674    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 675
 676    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 677    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 678    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 679
 680    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 681    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 682    brw_set_saturate(p, 0);
 683 }
 684
 685
 686 static void emit_xpd( struct brw_compile *p,
 687                       const struct brw_reg *dst,
 688                       GLuint mask,
 689                       const struct brw_reg *arg0,
 690                       const struct brw_reg *arg1 )
 691 {
 692    GLuint i;
 693
 694    assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W);
 695
 696    for (i = 0 ; i < 3; i++) {
 697       if (mask & (1<<i)) {
 698          GLuint i2 = (i+2)%3;
 699          GLuint i1 = (i+1)%3;
 700
 701          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 702
 703          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 704          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 705          brw_set_saturate(p, 0);
 706       }
 707    }
 708 }
 709
 710
 711 static void emit_math1( struct brw_compile *p,
 712                         GLuint function,
 713                         const struct brw_reg *dst,
 714                         GLuint mask,
 715                         const struct brw_reg *arg0 )
 716 {
 717    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 718
 719    if (!(mask & BRW_WRITEMASK_XYZW))
 720       return; /* Do not emit dead code */
 721
 722    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 723
 724    brw_MOV(p, brw_message_reg(2), arg0[0]);
 725
 726    /* Send two messages to perform all 16 operations:
 727     */
 728    brw_math_16(p,
 729                dst[dst_chan],
 730                function,
 731                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 732                2,
 733                brw_null_reg(),
 734                BRW_MATH_PRECISION_FULL);
 735 }
 736
 737
 738 static void emit_math2( struct brw_compile *p,
 739                         GLuint function,
 740                         const struct brw_reg *dst,
 741                         GLuint mask,
 742                         const struct brw_reg *arg0,
 743                         const struct brw_reg *arg1)
 744 {
 745    int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 746
 747    if (!(mask & BRW_WRITEMASK_XYZW))
 748       return; /* Do not emit dead code */
 749
 750    assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 751
 752    brw_push_insn_state(p);
 753
 754    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 755    brw_MOV(p, brw_message_reg(2), arg0[0]);
 756    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 757    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 758
 759    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 760    brw_MOV(p, brw_message_reg(3), arg1[0]);
 761    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 762    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 763
 764
 765    /* Send two messages to perform all 16 operations:
 766     */
 767    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 768    brw_math(p,
 769             dst[dst_chan],
 770             function,
 771             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 772             2,
 773             brw_null_reg(),
 774             BRW_MATH_DATA_VECTOR,
 775             BRW_MATH_PRECISION_FULL);
 776
 777    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 778    brw_math(p,
 779             offset(dst[dst_chan],1),
 780             function,
 781             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 782             4,
 783             brw_null_reg(),
 784             BRW_MATH_DATA_VECTOR,
 785             BRW_MATH_PRECISION_FULL);
 786
 787    brw_pop_insn_state(p);
 788 }
 789
 790
 791
 792 static void emit_tex( struct brw_wm_compile *c,
 793                       const struct brw_wm_instruction *inst,
 794                       struct brw_reg *dst,
 795                       GLuint dst_flags,
 796                       struct brw_reg *coord,
 797                       GLuint sampler)
 798 {
 799    struct brw_compile *p = &c->func;
 800    GLuint msgLength, responseLength;
 801    GLuint i, nr;
 802    GLuint emit;
 803    GLuint msg_type;
 804    GLboolean shadow = FALSE;
 805
 806    /* How many input regs are there?
 807     */
 808    switch (inst->target) {
 809    case TGSI_TEXTURE_1D:
 810       emit = BRW_WRITEMASK_X;
 811       nr = 1;
 812       break;
 813    case TGSI_TEXTURE_SHADOW1D:
 814       emit = BRW_WRITEMASK_XW;
 815       nr = 4;
 816       shadow = TRUE;
 817       break;
 818    case TGSI_TEXTURE_2D:
 819       emit = BRW_WRITEMASK_XY;
 820       nr = 2;
 821       break;
 822    case TGSI_TEXTURE_SHADOW2D:
 823    case TGSI_TEXTURE_SHADOWRECT:
 824       emit = BRW_WRITEMASK_XYW;
 825       nr = 4;
 826       shadow = TRUE;
 827       break;
 828    case TGSI_TEXTURE_3D:
 829    case TGSI_TEXTURE_CUBE:
 830       emit = BRW_WRITEMASK_XYZ;
 831       nr = 3;
 832       break;
 833    default:
 834       /* unexpected target */
 835       abort();
 836    }
 837
 838    msgLength = 1;
 839
 840    for (i = 0; i < nr; i++) {
 841       static const GLuint swz[4] = {0,1,2,2};
 842       if (emit & (1<<i))
 843          brw_MOV(p, brw_message_reg(msgLength+1), coord[swz[i]]);
 844       else
 845          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 846       msgLength += 2;
 847    }
 848
 849    responseLength = 8;          /* always */
 850
 851    if (BRW_IS_IGDNG(p->brw)) {
 852        if (shadow)
 853            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 854        else
 855            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 856    } else {
 857        if (shadow)
 858            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 859        else
 860            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 861    }
 862
 863    brw_SAMPLE(p,
 864               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 865               1,
 866               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 867               BTI_TEXTURE(inst->tex_unit),
 868               sampler,          /* sampler index */
 869               inst->writemask,
 870               msg_type,
 871               responseLength,
 872               msgLength,
 873               0,
 874               1,
 875               BRW_SAMPLER_SIMD_MODE_SIMD16);
 876 }
 877
 878
 879 static void emit_txb( struct brw_wm_compile *c,
 880                       const struct brw_wm_instruction *inst,
 881                       struct brw_reg *dst,
 882                       GLuint dst_flags,
 883                       struct brw_reg *coord,
 884                       GLuint sampler )
 885 {
 886    struct brw_compile *p = &c->func;
 887    GLuint msgLength;
 888    GLuint msg_type;
 889    /* Shadow ignored for txb.
 890     */
 891    switch (inst->target) {
 892    case TGSI_TEXTURE_1D:
 893    case TGSI_TEXTURE_SHADOW1D:
 894       brw_MOV(p, brw_message_reg(2), coord[0]);
 895       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 896       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 897       break;
 898    case TGSI_TEXTURE_2D:
 899    case TGSI_TEXTURE_RECT:
 900    case TGSI_TEXTURE_SHADOW2D:
 901    case TGSI_TEXTURE_SHADOWRECT:
 902       brw_MOV(p, brw_message_reg(2), coord[0]);
 903       brw_MOV(p, brw_message_reg(4), coord[1]);
 904       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 905       break;
 906    case TGSI_TEXTURE_3D:
 907    case TGSI_TEXTURE_CUBE:
 908       brw_MOV(p, brw_message_reg(2), coord[0]);
 909       brw_MOV(p, brw_message_reg(4), coord[1]);
 910       brw_MOV(p, brw_message_reg(6), coord[2]);
 911       break;
 912    default:
 913       /* unexpected target */
 914       abort();
 915    }
 916
 917    brw_MOV(p, brw_message_reg(8), coord[3]);
 918    msgLength = 9;
 919
 920    if (BRW_IS_IGDNG(p->brw))
 921        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 922    else
 923        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 924
 925    brw_SAMPLE(p,
 926               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 927               1,
 928               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 929               BTI_TEXTURE(inst->tex_unit),
 930               sampler,          /* sampler index */
 931               inst->writemask,
 932               msg_type,
 933               8,                /* responseLength */
 934               msgLength,
 935               0,
 936               1,
 937               BRW_SAMPLER_SIMD_MODE_SIMD16);
 938 }
 939
 940
 941 static void emit_lit( struct brw_compile *p,
 942                       const struct brw_reg *dst,
 943                       GLuint mask,
 944                       const struct brw_reg *arg0 )
 945 {
 946    assert((mask & BRW_WRITEMASK_XW) == 0);
 947
 948    if (mask & BRW_WRITEMASK_Y) {
 949       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 950       brw_MOV(p, dst[1], arg0[0]);
 951       brw_set_saturate(p, 0);
 952    }
 953
 954    if (mask & BRW_WRITEMASK_Z) {
 955       emit_math2(p, BRW_MATH_FUNCTION_POW,
 956                  &dst[2],
 957                  BRW_WRITEMASK_X | (mask & SATURATE),
 958                  &arg0[1],
 959                  &arg0[3]);
 960    }
 961
 962    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 963     * some of the POW calculations above, but 16-wide iff statements
 964     * seem to lock c1 hardware, so this is a nasty workaround:
 965     */
 966    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 967    {
 968       if (mask & BRW_WRITEMASK_Y)
 969          brw_MOV(p, dst[1], brw_imm_f(0));
 970
 971       if (mask & BRW_WRITEMASK_Z)
 972          brw_MOV(p, dst[2], brw_imm_f(0));
 973    }
 974    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 975 }
 976
 977
 978 /* Kill pixel - set execution mask to zero for those pixels which
 979  * fail.
 980  */
 981 static void emit_kil( struct brw_wm_compile *c,
 982                       struct brw_reg *arg0)
 983 {
 984    struct brw_compile *p = &c->func;
 985    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 986    GLuint i;
 987
 988    /* XXX - usually won't need 4 compares!
 989     */
 990    for (i = 0; i < 4; i++) {
 991       brw_push_insn_state(p);
 992       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 993       brw_set_predicate_control_flag_value(p, 0xff);
 994       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 995       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 996       brw_pop_insn_state(p);
 997    }
 998 }
 999
1000 /* KILLP kills the pixels that are currently executing, not based on a test
1001  * of the arguments.
1002  */
1003 static void emit_killp( struct brw_wm_compile *c )
1004 {
1005    struct brw_compile *p = &c->func;
1006    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1007
1008    brw_push_insn_state(p);
1009    brw_set_mask_control(p, BRW_MASK_DISABLE);
1010    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1011    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
1012    brw_pop_insn_state(p);
1013 }
1014
1015 static void fire_fb_write( struct brw_wm_compile *c,
1016                            GLuint base_reg,
1017                            GLuint nr,
1018                            GLuint target,
1019                            GLuint eot )
1020 {
1021    struct brw_compile *p = &c->func;
1022
1023    /* Pass through control information:
1024     */
1025 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
1026    {
1027       brw_push_insn_state(p);
1028       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1029       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1030       brw_MOV(p,
1031                brw_message_reg(base_reg + 1),
1032                brw_vec8_grf(1, 0));
1033       brw_pop_insn_state(p);
1034    }
1035
1036    /* Send framebuffer write message: */
1037 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
1038    brw_fb_WRITE(p,
1039                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1040                 base_reg,
1041                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1042                 target,
1043                 nr,
1044                 0,
1045                 eot);
1046 }
1047
1048
1049 static void emit_aa( struct brw_wm_compile *c,
1050                      struct brw_reg *arg1,
1051                      GLuint reg )
1052 {
1053    struct brw_compile *p = &c->func;
1054    GLuint comp = c->key.aa_dest_stencil_reg / 2;
1055    GLuint off = c->key.aa_dest_stencil_reg % 2;
1056    struct brw_reg aa = offset(arg1[comp], off);
1057
1058    brw_push_insn_state(p);
1059    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1060    brw_MOV(p, brw_message_reg(reg), aa);
1061    brw_pop_insn_state(p);
1062 }
1063
1064
1065 /* Post-fragment-program processing.  Send the results to the
1066  * framebuffer.
1067  * \param arg0  the fragment color
1068  * \param arg1  the pass-through depth value
1069  * \param arg2  the shader-computed depth value
1070  */
1071 static void emit_fb_write( struct brw_wm_compile *c,
1072                            struct brw_reg *arg0,
1073                            struct brw_reg *arg1,
1074                            struct brw_reg *arg2,
1075                            GLuint target,
1076                            GLuint eot)
1077 {
1078    struct brw_compile *p = &c->func;
1079    GLuint nr = 2;
1080    GLuint channel;
1081
1082    /* Reserve a space for AA - may not be needed:
1083     */
1084    if (c->key.aa_dest_stencil_reg)
1085       nr += 1;
1086
1087    /* I don't really understand how this achieves the color interleave
1088     * (ie RGBARGBA) in the result:  [Do the saturation here]
1089     */
1090    {
1091       brw_push_insn_state(p);
1092
1093       for (channel = 0; channel < 4; channel++) {
1094          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
1095          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
1096
1097          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1098          brw_MOV(p,
1099                  brw_message_reg(nr + channel),
1100                  arg0[channel]);
1101
1102          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1103          brw_MOV(p,
1104                  brw_message_reg(nr + channel + 4),
1105                  sechalf(arg0[channel]));
1106       }
1107
1108       /* skip over the regs populated above:
1109        */
1110       nr += 8;
1111
1112       brw_pop_insn_state(p);
1113    }
1114
1115    if (c->key.source_depth_to_render_target)
1116    {
1117       if (c->key.computes_depth)
1118          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1119       else
1120          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1121
1122       nr += 2;
1123    }
1124
1125    if (c->key.dest_depth_reg)
1126    {
1127       GLuint comp = c->key.dest_depth_reg / 2;
1128       GLuint off = c->key.dest_depth_reg % 2;
1129
1130       if (off != 0) {
1131          brw_push_insn_state(p);
1132          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1133
1134          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1135          /* 2nd half? */
1136          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1137          brw_pop_insn_state(p);
1138       }
1139       else {
1140          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1141       }
1142       nr += 2;
1143    }
1144
1145    if (!c->key.runtime_check_aads_emit) {
1146       if (c->key.aa_dest_stencil_reg)
1147          emit_aa(c, arg1, 2);
1148
1149       fire_fb_write(c, 0, nr, target, eot);
1150    }
1151    else {
1152       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1153       struct brw_reg ip = brw_ip_reg();
1154       struct brw_instruction *jmp;
1155
1156       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1157       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1158       brw_AND(p,
1159               v1_null_ud,
1160               get_element_ud(brw_vec8_grf(1,0), 6),
1161               brw_imm_ud(1<<26));
1162
1163       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1164       {
1165          emit_aa(c, arg1, 2);
1166          fire_fb_write(c, 0, nr, target, eot);
1167          /* note - thread killed in subroutine */
1168       }
1169       brw_land_fwd_jump(p, jmp);
1170
1171       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1172        */
1173       fire_fb_write(c, 1, nr-1, target, eot);
1174    }
1175 }
1176
1177
1178 /**
1179  * Move a GPR to scratch memory.
1180  */
1181 static void emit_spill( struct brw_wm_compile *c,
1182                         struct brw_reg reg,
1183                         GLuint slot )
1184 {
1185    struct brw_compile *p = &c->func;
1186
1187    /*
1188      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1189    */
1190    brw_MOV(p, brw_message_reg(2), reg);
1191
1192    /*
1193      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1194      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1195    */
1196    brw_dp_WRITE_16(p,
1197                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1198                    slot);
1199 }
1200
1201
1202 /**
1203  * Load a GPR from scratch memory.
1204  */
1205 static void emit_unspill( struct brw_wm_compile *c,
1206                           struct brw_reg reg,
1207                           GLuint slot )
1208 {
1209    struct brw_compile *p = &c->func;
1210
1211    /* Slot 0 is the undef value.
1212     */
1213    if (slot == 0) {
1214       brw_MOV(p, reg, brw_imm_f(0));
1215       return;
1216    }
1217
1218    /*
1219      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1220      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1221    */
1222
1223    brw_dp_READ_16(p,
1224                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1225                   slot);
1226 }
1227
1228
1229 /**
1230  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1231  * Args with unspill_reg != 0 will be loaded from scratch memory.
1232  */
1233 static void get_argument_regs( struct brw_wm_compile *c,
1234                                struct brw_wm_ref *arg[],
1235                                struct brw_reg *regs )
1236 {
1237    GLuint i;
1238
1239    for (i = 0; i < 4; i++) {
1240       if (arg[i]) {
1241          if (arg[i]->unspill_reg)
1242             emit_unspill(c,
1243                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1244                          arg[i]->value->spill_slot);
1245
1246          regs[i] = arg[i]->hw_reg;
1247       }
1248       else {
1249          regs[i] = brw_null_reg();
1250       }
1251    }
1252 }
1253
1254
1255 /**
1256  * For values that have a spill_slot!=0, write those regs to scratch memory.
1257  */
1258 static void spill_values( struct brw_wm_compile *c,
1259                           struct brw_wm_value *values,
1260                           GLuint nr )
1261 {
1262    GLuint i;
1263
1264    for (i = 0; i < nr; i++)
1265       if (values[i].spill_slot)
1266          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1267 }
1268
1269
1270 /* Emit the fragment program instructions here.
1271  */
1272 void brw_wm_emit( struct brw_wm_compile *c )
1273 {
1274    struct brw_compile *p = &c->func;
1275    GLuint insn;
1276
1277    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1278
1279    /* Check if any of the payload regs need to be spilled:
1280     */
1281    spill_values(c, c->payload.depth, 4);
1282    spill_values(c, c->creg, c->nr_creg);
1283    spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
1284
1285
1286    for (insn = 0; insn < c->nr_insns; insn++) {
1287
1288       struct brw_wm_instruction *inst = &c->instruction[insn];
1289       struct brw_reg args[3][4], dst[4];
1290       GLuint i, dst_flags;
1291
1292       /* Get argument regs:
1293        */
1294       for (i = 0; i < 3; i++)
1295          get_argument_regs(c, inst->src[i], args[i]);
1296
1297       /* Get dest regs:
1298        */
1299       for (i = 0; i < 4; i++)
1300          if (inst->dst[i])
1301             dst[i] = inst->dst[i]->hw_reg;
1302          else
1303             dst[i] = brw_null_reg();
1304
1305       /* Flags
1306        */
1307       dst_flags = inst->writemask;
1308       if (inst->saturate)
1309          dst_flags |= SATURATE;
1310
1311       switch (inst->opcode) {
1312          /* Generated instructions for calculating triangle interpolants:
1313           */
1314       case WM_PIXELXY:
1315          emit_pixel_xy(p, dst, dst_flags);
1316          break;
1317
1318       case WM_DELTAXY:
1319          emit_delta_xy(p, dst, dst_flags, args[0]);
1320          break;
1321
1322       case WM_WPOSXY:
1323          emit_wpos_xy(c, dst, dst_flags, args[0]);
1324          break;
1325
1326       case WM_PIXELW:
1327          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1328          break;
1329
1330       case WM_LINTERP:
1331          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1332          break;
1333
1334       case WM_PINTERP:
1335          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1336          break;
1337
1338       case WM_CINTERP:
1339          emit_cinterp(p, dst, dst_flags, args[0]);
1340          break;
1341
1342       case WM_FB_WRITE:
1343          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1344          break;
1345
1346       case WM_FRONTFACING:
1347          emit_frontfacing(p, dst, dst_flags);
1348          break;
1349
1350          /* Straightforward arithmetic:
1351           */
1352       case TGSI_OPCODE_ADD:
1353          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1354          break;
1355
1356       case TGSI_OPCODE_FRC:
1357          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1358          break;
1359
1360       case TGSI_OPCODE_FLR:
1361          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1362          break;
1363
1364       case TGSI_OPCODE_DDX:
1365          emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1366          break;
1367
1368       case TGSI_OPCODE_DDY:
1369          emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1370          break;
1371
1372       case TGSI_OPCODE_DP3:
1373          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1374          break;
1375
1376       case TGSI_OPCODE_DP4:
1377          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1378          break;
1379
1380       case TGSI_OPCODE_DPH:
1381          emit_dph(p, dst, dst_flags, args[0], args[1]);
1382          break;
1383
1384       case TGSI_OPCODE_TRUNC:
1385          emit_trunc(p, dst, dst_flags, args[0]);
1386          break;
1387
1388       case TGSI_OPCODE_LRP:
1389          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1390          break;
1391
1392       case TGSI_OPCODE_MAD:
1393          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1394          break;
1395
1396       case TGSI_OPCODE_MOV:
1397          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1398          break;
1399
1400       case TGSI_OPCODE_MUL:
1401          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1402          break;
1403
1404       case TGSI_OPCODE_XPD:
1405          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1406          break;
1407
1408          /* Higher math functions:
1409           */
1410       case TGSI_OPCODE_RCP:
1411          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1412          break;
1413
1414       case TGSI_OPCODE_RSQ:
1415          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1416          break;
1417
1418       case TGSI_OPCODE_SIN:
1419          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1420          break;
1421
1422       case TGSI_OPCODE_COS:
1423          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1424          break;
1425
1426       case TGSI_OPCODE_EX2:
1427          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1428          break;
1429
1430       case TGSI_OPCODE_LG2:
1431          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1432          break;
1433
1434       case TGSI_OPCODE_SCS:
1435          /* There is an scs math function, but it would need some
1436           * fixup for 16-element execution.
1437           */
1438          if (dst_flags & BRW_WRITEMASK_X)
1439             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1440          if (dst_flags & BRW_WRITEMASK_Y)
1441             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
1442          break;
1443
1444       case TGSI_OPCODE_POW:
1445          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1446          break;
1447
1448          /* Comparisons:
1449           */
1450       case TGSI_OPCODE_CMP:
1451          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1452          break;
1453
1454       case TGSI_OPCODE_MAX:
1455          emit_max(p, dst, dst_flags, args[0], args[1]);
1456          break;
1457
1458       case TGSI_OPCODE_MIN:
1459          emit_min(p, dst, dst_flags, args[0], args[1]);
1460          break;
1461
1462       case TGSI_OPCODE_SLT:
1463          emit_slt(p, dst, dst_flags, args[0], args[1]);
1464          break;
1465
1466       case TGSI_OPCODE_SLE:
1467          emit_sle(p, dst, dst_flags, args[0], args[1]);
1468         break;
1469       case TGSI_OPCODE_SGT:
1470          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1471         break;
1472       case TGSI_OPCODE_SGE:
1473          emit_sge(p, dst, dst_flags, args[0], args[1]);
1474          break;
1475       case TGSI_OPCODE_SEQ:
1476          emit_seq(p, dst, dst_flags, args[0], args[1]);
1477         break;
1478       case TGSI_OPCODE_SNE:
1479          emit_sne(p, dst, dst_flags, args[0], args[1]);
1480         break;
1481
1482       case TGSI_OPCODE_LIT:
1483          emit_lit(p, dst, dst_flags, args[0]);
1484          break;
1485
1486          /* Texturing operations:
1487           */
1488       case TGSI_OPCODE_TEX:
1489          emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler);
1490          break;
1491
1492       case TGSI_OPCODE_TXB:
1493          emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler);
1494          break;
1495
1496       case TGSI_OPCODE_KIL:
1497          emit_kil(c, args[0]);
1498          break;
1499
1500       case TGSI_OPCODE_KILP:
1501          emit_killp(c);
1502          break;
1503
1504       default:
1505          debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
1506                       inst->opcode,
1507                       tgsi_get_opcode_info(inst->opcode)->mnemonic);
1508       }
1509
1510       for (i = 0; i < 4; i++)
1511         if (inst->dst[i] && inst->dst[i]->spill_slot)
1512            emit_spill(c,
1513                       inst->dst[i]->hw_reg,
1514                       inst->dst[i]->spill_slot);
1515    }
1516
1517    if (BRW_DEBUG & DEBUG_WM) {
1518       debug_printf("wm-native:\n");
1519       brw_disasm(stderr, p->store, p->nr_insn);
1520    }
1521 }