src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 #define SATURATE (1<<5)
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  57  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  58  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  59  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask)
  69 {
  70    struct brw_reg r1 = brw_vec1_grf(1, 0);
  71    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  72
  73    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  74
  75    /* Calculate pixel centers by adding 1 or 0 to each of the
  76     * micro-tile coordinates passed in r1.
  77     */
  78    if (mask & WRITEMASK_X) {
  79       brw_ADD(p,
  80               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  81               stride(suboffset(r1_uw, 4), 2, 4, 0),
  82               brw_imm_v(0x10101010));
  83    }
  84
  85    if (mask & WRITEMASK_Y) {
  86       brw_ADD(p,
  87               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  88               stride(suboffset(r1_uw,5), 2, 4, 0),
  89               brw_imm_v(0x11001100));
  90    }
  91
  92    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  93 }
  94
  95
  96
  97 static void emit_delta_xy(struct brw_compile *p,
  98                           const struct brw_reg *dst,
  99                           GLuint mask,
 100                           const struct brw_reg *arg0)
 101 {
 102    struct brw_reg r1 = brw_vec1_grf(1, 0);
 103
 104    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 105     * centers.
 106     */
 107    if (mask & WRITEMASK_X) {
 108       brw_ADD(p,
 109               dst[0],
 110               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 111               negate(r1));
 112    }
 113
 114    if (mask & WRITEMASK_Y) {
 115       brw_ADD(p,
 116               dst[1],
 117               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 118               negate(suboffset(r1,1)));
 119
 120    }
 121 }
 122
 123 static void emit_wpos_xy(struct brw_wm_compile *c,
 124                          const struct brw_reg *dst,
 125                          GLuint mask,
 126                          const struct brw_reg *arg0)
 127 {
 128    struct brw_compile *p = &c->func;
 129
 130    /* Calculate the pixel offset from window bottom left into destination
 131     * X and Y channels.
 132     */
 133    if (mask & WRITEMASK_X) {
 134       /* X' = X - origin */
 135       brw_ADD(p,
 136               dst[0],
 137               retype(arg0[0], BRW_REGISTER_TYPE_W),
 138               brw_imm_d(0 - c->key.origin_x));
 139    }
 140
 141    if (mask & WRITEMASK_Y) {
 142       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 143       brw_ADD(p,
 144               dst[1],
 145               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 146               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 147    }
 148 }
 149
 150
 151 static void emit_pixel_w( struct brw_compile *p,
 152                           const struct brw_reg *dst,
 153                           GLuint mask,
 154                           const struct brw_reg *arg0,
 155                           const struct brw_reg *deltas)
 156 {
 157    /* Don't need this if all you are doing is interpolating color, for
 158     * instance.
 159     */
 160    if (mask & WRITEMASK_W) {
 161       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 162
 163       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 164        * result straight into a message reg.
 165        */
 166       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 167       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 168
 169       /* Calc w */
 170       brw_math_16( p, dst[3],
 171                    BRW_MATH_FUNCTION_INV,
 172                    BRW_MATH_SATURATE_NONE,
 173                    2, brw_null_reg(),
 174                    BRW_MATH_PRECISION_FULL);
 175    }
 176 }
 177
 178
 179
 180 static void emit_linterp( struct brw_compile *p,
 181                          const struct brw_reg *dst,
 182                          GLuint mask,
 183                          const struct brw_reg *arg0,
 184                          const struct brw_reg *deltas )
 185 {
 186    struct brw_reg interp[4];
 187    GLuint nr = arg0[0].nr;
 188    GLuint i;
 189
 190    interp[0] = brw_vec1_grf(nr, 0);
 191    interp[1] = brw_vec1_grf(nr, 4);
 192    interp[2] = brw_vec1_grf(nr+1, 0);
 193    interp[3] = brw_vec1_grf(nr+1, 4);
 194
 195    for (i = 0; i < 4; i++) {
 196       if (mask & (1<<i)) {
 197          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 198          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 199       }
 200    }
 201 }
 202
 203
 204 static void emit_pinterp( struct brw_compile *p,
 205                           const struct brw_reg *dst,
 206                           GLuint mask,
 207                           const struct brw_reg *arg0,
 208                           const struct brw_reg *deltas,
 209                           const struct brw_reg *w)
 210 {
 211    struct brw_reg interp[4];
 212    GLuint nr = arg0[0].nr;
 213    GLuint i;
 214
 215    interp[0] = brw_vec1_grf(nr, 0);
 216    interp[1] = brw_vec1_grf(nr, 4);
 217    interp[2] = brw_vec1_grf(nr+1, 0);
 218    interp[3] = brw_vec1_grf(nr+1, 4);
 219
 220    for (i = 0; i < 4; i++) {
 221       if (mask & (1<<i)) {
 222          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 223          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 224       }
 225    }
 226    for (i = 0; i < 4; i++) {
 227       if (mask & (1<<i)) {
 228          brw_MUL(p, dst[i], dst[i], w[3]);
 229       }
 230    }
 231 }
 232
 233
 234 static void emit_cinterp( struct brw_compile *p,
 235                          const struct brw_reg *dst,
 236                          GLuint mask,
 237                          const struct brw_reg *arg0 )
 238 {
 239    struct brw_reg interp[4];
 240    GLuint nr = arg0[0].nr;
 241    GLuint i;
 242
 243    interp[0] = brw_vec1_grf(nr, 0);
 244    interp[1] = brw_vec1_grf(nr, 4);
 245    interp[2] = brw_vec1_grf(nr+1, 0);
 246    interp[3] = brw_vec1_grf(nr+1, 4);
 247
 248    for (i = 0; i < 4; i++) {
 249       if (mask & (1<<i)) {
 250          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 251       }
 252    }
 253 }
 254
 255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 256 static void emit_frontfacing( struct brw_compile *p,
 257                               const struct brw_reg *dst,
 258                               GLuint mask )
 259 {
 260    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 261    GLuint i;
 262
 263    if (!(mask & WRITEMASK_XYZW))
 264       return;
 265
 266    for (i = 0; i < 4; i++) {
 267       if (mask & (1<<i)) {
 268          brw_MOV(p, dst[i], brw_imm_f(0.0));
 269       }
 270    }
 271
 272    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 273     * us front face
 274     */
 275    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 276    for (i = 0; i < 4; i++) {
 277       if (mask & (1<<i)) {
 278          brw_MOV(p, dst[i], brw_imm_f(1.0));
 279       }
 280    }
 281    brw_set_predicate_control_flag_value(p, 0xff);
 282 }
 283
 284 static void emit_alu1( struct brw_compile *p,
 285                        struct brw_instruction *(*func)(struct brw_compile *,
 286                                                        struct brw_reg,
 287                                                        struct brw_reg),
 288                        const struct brw_reg *dst,
 289                        GLuint mask,
 290                        const struct brw_reg *arg0 )
 291 {
 292    GLuint i;
 293
 294    if (mask & SATURATE)
 295       brw_set_saturate(p, 1);
 296
 297    for (i = 0; i < 4; i++) {
 298       if (mask & (1<<i)) {
 299          func(p, dst[i], arg0[i]);
 300       }
 301    }
 302
 303    if (mask & SATURATE)
 304       brw_set_saturate(p, 0);
 305 }
 306
 307
 308 static void emit_alu2( struct brw_compile *p,
 309                        struct brw_instruction *(*func)(struct brw_compile *,
 310                                                        struct brw_reg,
 311                                                        struct brw_reg,
 312                                                        struct brw_reg),
 313                        const struct brw_reg *dst,
 314                        GLuint mask,
 315                        const struct brw_reg *arg0,
 316                        const struct brw_reg *arg1 )
 317 {
 318    GLuint i;
 319
 320    if (mask & SATURATE)
 321       brw_set_saturate(p, 1);
 322
 323    for (i = 0; i < 4; i++) {
 324       if (mask & (1<<i)) {
 325          func(p, dst[i], arg0[i], arg1[i]);
 326       }
 327    }
 328
 329    if (mask & SATURATE)
 330       brw_set_saturate(p, 0);
 331 }
 332
 333
 334 static void emit_mad( struct brw_compile *p,
 335                       const struct brw_reg *dst,
 336                       GLuint mask,
 337                       const struct brw_reg *arg0,
 338                       const struct brw_reg *arg1,
 339                       const struct brw_reg *arg2 )
 340 {
 341    GLuint i;
 342
 343    for (i = 0; i < 4; i++) {
 344       if (mask & (1<<i)) {
 345          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 346
 347          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 348          brw_ADD(p, dst[i], dst[i], arg2[i]);
 349          brw_set_saturate(p, 0);
 350       }
 351    }
 352 }
 353
 354 static void emit_trunc( struct brw_compile *p,
 355                       const struct brw_reg *dst,
 356                       GLuint mask,
 357                       const struct brw_reg *arg0)
 358 {
 359    GLuint i;
 360
 361    for (i = 0; i < 4; i++) {
 362       if (mask & (1<<i)) {
 363          brw_RNDZ(p, dst[i], arg0[i]);
 364       }
 365    }
 366 }
 367
 368 static void emit_lrp( struct brw_compile *p,
 369                       const struct brw_reg *dst,
 370                       GLuint mask,
 371                       const struct brw_reg *arg0,
 372                       const struct brw_reg *arg1,
 373                       const struct brw_reg *arg2 )
 374 {
 375    GLuint i;
 376
 377    /* Uses dst as a temporary:
 378     */
 379    for (i = 0; i < 4; i++) {
 380       if (mask & (1<<i)) {
 381          /* Can I use the LINE instruction for this?
 382           */
 383          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 384          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 385
 386          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 387          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 388          brw_set_saturate(p, 0);
 389       }
 390    }
 391 }
 392
 393 static void emit_sop( struct brw_compile *p,
 394                       const struct brw_reg *dst,
 395                       GLuint mask,
 396                       GLuint cond,
 397                       const struct brw_reg *arg0,
 398                       const struct brw_reg *arg1 )
 399 {
 400    GLuint i;
 401
 402    for (i = 0; i < 4; i++) {
 403       if (mask & (1<<i)) {
 404          brw_MOV(p, dst[i], brw_imm_f(0));
 405          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 406          brw_MOV(p, dst[i], brw_imm_f(1.0));
 407          brw_set_predicate_control_flag_value(p, 0xff);
 408       }
 409    }
 410 }
 411
 412 static void emit_slt( struct brw_compile *p,
 413                       const struct brw_reg *dst,
 414                       GLuint mask,
 415                       const struct brw_reg *arg0,
 416                       const struct brw_reg *arg1 )
 417 {
 418    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 419 }
 420
 421 static void emit_sle( struct brw_compile *p,
 422                       const struct brw_reg *dst,
 423                       GLuint mask,
 424                       const struct brw_reg *arg0,
 425                       const struct brw_reg *arg1 )
 426 {
 427    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 428 }
 429
 430 static void emit_sgt( struct brw_compile *p,
 431                       const struct brw_reg *dst,
 432                       GLuint mask,
 433                       const struct brw_reg *arg0,
 434                       const struct brw_reg *arg1 )
 435 {
 436    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 437 }
 438
 439 static void emit_sge( struct brw_compile *p,
 440                       const struct brw_reg *dst,
 441                       GLuint mask,
 442                       const struct brw_reg *arg0,
 443                       const struct brw_reg *arg1 )
 444 {
 445    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 446 }
 447
 448 static void emit_seq( struct brw_compile *p,
 449                       const struct brw_reg *dst,
 450                       GLuint mask,
 451                       const struct brw_reg *arg0,
 452                       const struct brw_reg *arg1 )
 453 {
 454    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 455 }
 456
 457 static void emit_sne( struct brw_compile *p,
 458                       const struct brw_reg *dst,
 459                       GLuint mask,
 460                       const struct brw_reg *arg0,
 461                       const struct brw_reg *arg1 )
 462 {
 463    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 464 }
 465
 466 static void emit_cmp( struct brw_compile *p,
 467                       const struct brw_reg *dst,
 468                       GLuint mask,
 469                       const struct brw_reg *arg0,
 470                       const struct brw_reg *arg1,
 471                       const struct brw_reg *arg2 )
 472 {
 473    GLuint i;
 474
 475    for (i = 0; i < 4; i++) {
 476       if (mask & (1<<i)) {
 477          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 478          brw_MOV(p, dst[i], arg2[i]);
 479          brw_set_saturate(p, 0);
 480
 481          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 482
 483          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 484          brw_MOV(p, dst[i], arg1[i]);
 485          brw_set_saturate(p, 0);
 486          brw_set_predicate_control_flag_value(p, 0xff);
 487       }
 488    }
 489 }
 490
 491 static void emit_max( struct brw_compile *p,
 492                       const struct brw_reg *dst,
 493                       GLuint mask,
 494                       const struct brw_reg *arg0,
 495                       const struct brw_reg *arg1 )
 496 {
 497    GLuint i;
 498
 499    for (i = 0; i < 4; i++) {
 500       if (mask & (1<<i)) {
 501          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 502          brw_MOV(p, dst[i], arg0[i]);
 503          brw_set_saturate(p, 0);
 504
 505          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 506
 507          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 508          brw_MOV(p, dst[i], arg1[i]);
 509          brw_set_saturate(p, 0);
 510          brw_set_predicate_control_flag_value(p, 0xff);
 511       }
 512    }
 513 }
 514
 515 static void emit_min( struct brw_compile *p,
 516                       const struct brw_reg *dst,
 517                       GLuint mask,
 518                       const struct brw_reg *arg0,
 519                       const struct brw_reg *arg1 )
 520 {
 521    GLuint i;
 522
 523    for (i = 0; i < 4; i++) {
 524       if (mask & (1<<i)) {
 525          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 526          brw_MOV(p, dst[i], arg1[i]);
 527          brw_set_saturate(p, 0);
 528
 529          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 530
 531          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 532          brw_MOV(p, dst[i], arg0[i]);
 533          brw_set_saturate(p, 0);
 534          brw_set_predicate_control_flag_value(p, 0xff);
 535       }
 536    }
 537 }
 538
 539
 540 static void emit_dp3( struct brw_compile *p,
 541                       const struct brw_reg *dst,
 542                       GLuint mask,
 543                       const struct brw_reg *arg0,
 544                       const struct brw_reg *arg1 )
 545 {
 546    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 547
 548    if (!(mask & WRITEMASK_XYZW))
 549       return; /* Do not emit dead code */
 550
 551    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 552
 553    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 554    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 555
 556    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 557    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 558    brw_set_saturate(p, 0);
 559 }
 560
 561
 562 static void emit_dp4( struct brw_compile *p,
 563                       const struct brw_reg *dst,
 564                       GLuint mask,
 565                       const struct brw_reg *arg0,
 566                       const struct brw_reg *arg1 )
 567 {
 568    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 569
 570    if (!(mask & WRITEMASK_XYZW))
 571       return; /* Do not emit dead code */
 572
 573    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 574
 575    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 576    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 577    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 578
 579    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 580    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 581    brw_set_saturate(p, 0);
 582 }
 583
 584
 585 static void emit_dph( struct brw_compile *p,
 586                       const struct brw_reg *dst,
 587                       GLuint mask,
 588                       const struct brw_reg *arg0,
 589                       const struct brw_reg *arg1 )
 590 {
 591    const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 592
 593    if (!(mask & WRITEMASK_XYZW))
 594       return; /* Do not emit dead code */
 595
 596    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 597
 598    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 599    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 600    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 601
 602    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 603    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 604    brw_set_saturate(p, 0);
 605 }
 606
 607
 608 static void emit_xpd( struct brw_compile *p,
 609                       const struct brw_reg *dst,
 610                       GLuint mask,
 611                       const struct brw_reg *arg0,
 612                       const struct brw_reg *arg1 )
 613 {
 614    GLuint i;
 615
 616    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 617
 618    for (i = 0 ; i < 3; i++) {
 619       if (mask & (1<<i)) {
 620          GLuint i2 = (i+2)%3;
 621          GLuint i1 = (i+1)%3;
 622
 623          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 624
 625          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 626          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 627          brw_set_saturate(p, 0);
 628       }
 629    }
 630 }
 631
 632
 633 static void emit_math1( struct brw_compile *p,
 634                         GLuint function,
 635                         const struct brw_reg *dst,
 636                         GLuint mask,
 637                         const struct brw_reg *arg0 )
 638 {
 639    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 640
 641    if (!(mask & WRITEMASK_XYZW))
 642       return; /* Do not emit dead code */
 643
 644    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 645
 646    brw_MOV(p, brw_message_reg(2), arg0[0]);
 647
 648    /* Send two messages to perform all 16 operations:
 649     */
 650    brw_math_16(p,
 651                dst[dst_chan],
 652                function,
 653                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 654                2,
 655                brw_null_reg(),
 656                BRW_MATH_PRECISION_FULL);
 657 }
 658
 659
 660 static void emit_math2( struct brw_compile *p,
 661                         GLuint function,
 662                         const struct brw_reg *dst,
 663                         GLuint mask,
 664                         const struct brw_reg *arg0,
 665                         const struct brw_reg *arg1)
 666 {
 667    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 668
 669    if (!(mask & WRITEMASK_XYZW))
 670       return; /* Do not emit dead code */
 671
 672    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 673
 674    brw_push_insn_state(p);
 675
 676    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 677    brw_MOV(p, brw_message_reg(2), arg0[0]);
 678    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 679    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 680
 681    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 682    brw_MOV(p, brw_message_reg(3), arg1[0]);
 683    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 684    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 685
 686
 687    /* Send two messages to perform all 16 operations:
 688     */
 689    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 690    brw_math(p,
 691             dst[dst_chan],
 692             function,
 693             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 694             2,
 695             brw_null_reg(),
 696             BRW_MATH_DATA_VECTOR,
 697             BRW_MATH_PRECISION_FULL);
 698
 699    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 700    brw_math(p,
 701             offset(dst[dst_chan],1),
 702             function,
 703             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 704             4,
 705             brw_null_reg(),
 706             BRW_MATH_DATA_VECTOR,
 707             BRW_MATH_PRECISION_FULL);
 708
 709    brw_pop_insn_state(p);
 710 }
 711
 712
 713
 714 static void emit_tex( struct brw_wm_compile *c,
 715                       const struct brw_wm_instruction *inst,
 716                       struct brw_reg *dst,
 717                       GLuint dst_flags,
 718                       struct brw_reg *arg )
 719 {
 720    struct brw_compile *p = &c->func;
 721    GLuint msgLength, responseLength;
 722    GLuint i, nr;
 723    GLuint emit;
 724    GLuint msg_type;
 725
 726    /* How many input regs are there?
 727     */
 728    switch (inst->tex_idx) {
 729    case TEXTURE_1D_INDEX:
 730       emit = WRITEMASK_X;
 731       nr = 1;
 732       break;
 733    case TEXTURE_2D_INDEX:
 734    case TEXTURE_RECT_INDEX:
 735       emit = WRITEMASK_XY;
 736       nr = 2;
 737       break;
 738    case TEXTURE_3D_INDEX:
 739    case TEXTURE_CUBE_INDEX:
 740       emit = WRITEMASK_XYZ;
 741       nr = 3;
 742       break;
 743    default:
 744       /* unexpected target */
 745       abort();
 746    }
 747
 748    if (inst->tex_shadow) {
 749       nr = 4;
 750       emit |= WRITEMASK_W;
 751    }
 752
 753    msgLength = 1;
 754
 755    for (i = 0; i < nr; i++) {
 756       static const GLuint swz[4] = {0,1,2,2};
 757       if (emit & (1<<i))
 758          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 759       else
 760          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 761       msgLength += 2;
 762    }
 763
 764    responseLength = 8;          /* always */
 765
 766    if (BRW_IS_IGDNG(p->brw)) {
 767        if (inst->tex_shadow)
 768            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 769        else
 770            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 771    } else {
 772        if (inst->tex_shadow)
 773            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 774        else
 775            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 776    }
 777
 778    brw_SAMPLE(p,
 779               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 780               1,
 781               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 782               SURF_INDEX_TEXTURE(inst->tex_unit),
 783               inst->tex_unit,     /* sampler */
 784               inst->writemask,
 785               msg_type,
 786               responseLength,
 787               msgLength,
 788               0,
 789               1,
 790               BRW_SAMPLER_SIMD_MODE_SIMD16);
 791 }
 792
 793
 794 static void emit_txb( struct brw_wm_compile *c,
 795                       const struct brw_wm_instruction *inst,
 796                       struct brw_reg *dst,
 797                       GLuint dst_flags,
 798                       struct brw_reg *arg )
 799 {
 800    struct brw_compile *p = &c->func;
 801    GLuint msgLength;
 802    GLuint msg_type;
 803    /* Shadow ignored for txb.
 804     */
 805    switch (inst->tex_idx) {
 806    case TEXTURE_1D_INDEX:
 807       brw_MOV(p, brw_message_reg(2), arg[0]);
 808       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 809       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 810       break;
 811    case TEXTURE_2D_INDEX:
 812    case TEXTURE_RECT_INDEX:
 813       brw_MOV(p, brw_message_reg(2), arg[0]);
 814       brw_MOV(p, brw_message_reg(4), arg[1]);
 815       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 816       break;
 817    case TEXTURE_3D_INDEX:
 818    case TEXTURE_CUBE_INDEX:
 819       brw_MOV(p, brw_message_reg(2), arg[0]);
 820       brw_MOV(p, brw_message_reg(4), arg[1]);
 821       brw_MOV(p, brw_message_reg(6), arg[2]);
 822       break;
 823    default:
 824       /* unexpected target */
 825       abort();
 826    }
 827
 828    brw_MOV(p, brw_message_reg(8), arg[3]);
 829    msgLength = 9;
 830
 831    if (BRW_IS_IGDNG(p->brw))
 832        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 833    else
 834        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 835
 836    brw_SAMPLE(p,
 837               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 838               1,
 839               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 840               SURF_INDEX_TEXTURE(inst->tex_unit),
 841               inst->tex_unit,     /* sampler */
 842               inst->writemask,
 843               msg_type,
 844               8,                /* responseLength */
 845               msgLength,
 846               0,
 847               1,
 848               BRW_SAMPLER_SIMD_MODE_SIMD16);
 849 }
 850
 851
 852 static void emit_lit( struct brw_compile *p,
 853                       const struct brw_reg *dst,
 854                       GLuint mask,
 855                       const struct brw_reg *arg0 )
 856 {
 857    assert((mask & WRITEMASK_XW) == 0);
 858
 859    if (mask & WRITEMASK_Y) {
 860       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 861       brw_MOV(p, dst[1], arg0[0]);
 862       brw_set_saturate(p, 0);
 863    }
 864
 865    if (mask & WRITEMASK_Z) {
 866       emit_math2(p, BRW_MATH_FUNCTION_POW,
 867                  &dst[2],
 868                  WRITEMASK_X | (mask & SATURATE),
 869                  &arg0[1],
 870                  &arg0[3]);
 871    }
 872
 873    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 874     * some of the POW calculations above, but 16-wide iff statements
 875     * seem to lock c1 hardware, so this is a nasty workaround:
 876     */
 877    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 878    {
 879       if (mask & WRITEMASK_Y)
 880          brw_MOV(p, dst[1], brw_imm_f(0));
 881
 882       if (mask & WRITEMASK_Z)
 883          brw_MOV(p, dst[2], brw_imm_f(0));
 884    }
 885    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 886 }
 887
 888
 889 /* Kill pixel - set execution mask to zero for those pixels which
 890  * fail.
 891  */
 892 static void emit_kil( struct brw_wm_compile *c,
 893                       struct brw_reg *arg0)
 894 {
 895    struct brw_compile *p = &c->func;
 896    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 897    GLuint i;
 898
 899    /* XXX - usually won't need 4 compares!
 900     */
 901    for (i = 0; i < 4; i++) {
 902       brw_push_insn_state(p);
 903       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 904       brw_set_predicate_control_flag_value(p, 0xff);
 905       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 906       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 907       brw_pop_insn_state(p);
 908    }
 909 }
 910
 911
 912 static void fire_fb_write( struct brw_wm_compile *c,
 913                            GLuint base_reg,
 914                            GLuint nr,
 915                            GLuint target,
 916                            GLuint eot )
 917 {
 918    struct brw_compile *p = &c->func;
 919
 920    /* Pass through control information:
 921     */
 922 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 923    {
 924       brw_push_insn_state(p);
 925       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 926       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 927       brw_MOV(p,
 928                brw_message_reg(base_reg + 1),
 929                brw_vec8_grf(1, 0));
 930       brw_pop_insn_state(p);
 931    }
 932
 933    /* Send framebuffer write message: */
 934 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 935    brw_fb_WRITE(p,
 936                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 937                 base_reg,
 938                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 939                 target,
 940                 nr,
 941                 0,
 942                 eot);
 943 }
 944
 945
 946 static void emit_aa( struct brw_wm_compile *c,
 947                      struct brw_reg *arg1,
 948                      GLuint reg )
 949 {
 950    struct brw_compile *p = &c->func;
 951    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 952    GLuint off = c->key.aa_dest_stencil_reg % 2;
 953    struct brw_reg aa = offset(arg1[comp], off);
 954
 955    brw_push_insn_state(p);
 956    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 957    brw_MOV(p, brw_message_reg(reg), aa);
 958    brw_pop_insn_state(p);
 959 }
 960
 961
 962 /* Post-fragment-program processing.  Send the results to the
 963  * framebuffer.
 964  * \param arg0  the fragment color
 965  * \param arg1  the pass-through depth value
 966  * \param arg2  the shader-computed depth value
 967  */
 968 static void emit_fb_write( struct brw_wm_compile *c,
 969                            struct brw_reg *arg0,
 970                            struct brw_reg *arg1,
 971                            struct brw_reg *arg2,
 972                            GLuint target,
 973                            GLuint eot)
 974 {
 975    struct brw_compile *p = &c->func;
 976    GLuint nr = 2;
 977    GLuint channel;
 978
 979    /* Reserve a space for AA - may not be needed:
 980     */
 981    if (c->key.aa_dest_stencil_reg)
 982       nr += 1;
 983
 984    /* I don't really understand how this achieves the color interleave
 985     * (ie RGBARGBA) in the result:  [Do the saturation here]
 986     */
 987    {
 988       brw_push_insn_state(p);
 989
 990       for (channel = 0; channel < 4; channel++) {
 991          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 992          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 993
 994          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 995          brw_MOV(p,
 996                  brw_message_reg(nr + channel),
 997                  arg0[channel]);
 998
 999          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1000          brw_MOV(p,
1001                  brw_message_reg(nr + channel + 4),
1002                  sechalf(arg0[channel]));
1003       }
1004
1005       /* skip over the regs populated above:
1006        */
1007       nr += 8;
1008
1009       brw_pop_insn_state(p);
1010    }
1011
1012    if (c->key.source_depth_to_render_target)
1013    {
1014       if (c->key.computes_depth)
1015          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1016       else
1017          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1018
1019       nr += 2;
1020    }
1021
1022    if (c->key.dest_depth_reg)
1023    {
1024       GLuint comp = c->key.dest_depth_reg / 2;
1025       GLuint off = c->key.dest_depth_reg % 2;
1026
1027       if (off != 0) {
1028          brw_push_insn_state(p);
1029          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1030
1031          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1032          /* 2nd half? */
1033          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1034          brw_pop_insn_state(p);
1035       }
1036       else {
1037          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1038       }
1039       nr += 2;
1040    }
1041
1042    if (!c->key.runtime_check_aads_emit) {
1043       if (c->key.aa_dest_stencil_reg)
1044          emit_aa(c, arg1, 2);
1045
1046       fire_fb_write(c, 0, nr, target, eot);
1047    }
1048    else {
1049       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1050       struct brw_reg ip = brw_ip_reg();
1051       struct brw_instruction *jmp;
1052
1053       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1054       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1055       brw_AND(p,
1056               v1_null_ud,
1057               get_element_ud(brw_vec8_grf(1,0), 6),
1058               brw_imm_ud(1<<26));
1059
1060       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1061       {
1062          emit_aa(c, arg1, 2);
1063          fire_fb_write(c, 0, nr, target, eot);
1064          /* note - thread killed in subroutine */
1065       }
1066       brw_land_fwd_jump(p, jmp);
1067
1068       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1069        */
1070       fire_fb_write(c, 1, nr-1, target, eot);
1071    }
1072 }
1073
1074
1075 /**
1076  * Move a GPR to scratch memory.
1077  */
1078 static void emit_spill( struct brw_wm_compile *c,
1079                         struct brw_reg reg,
1080                         GLuint slot )
1081 {
1082    struct brw_compile *p = &c->func;
1083
1084    /*
1085      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1086    */
1087    brw_MOV(p, brw_message_reg(2), reg);
1088
1089    /*
1090      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1091      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1092    */
1093    brw_dp_WRITE_16(p,
1094                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1095                    slot);
1096 }
1097
1098
1099 /**
1100  * Load a GPR from scratch memory.
1101  */
1102 static void emit_unspill( struct brw_wm_compile *c,
1103                           struct brw_reg reg,
1104                           GLuint slot )
1105 {
1106    struct brw_compile *p = &c->func;
1107
1108    /* Slot 0 is the undef value.
1109     */
1110    if (slot == 0) {
1111       brw_MOV(p, reg, brw_imm_f(0));
1112       return;
1113    }
1114
1115    /*
1116      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1117      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1118    */
1119
1120    brw_dp_READ_16(p,
1121                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1122                   slot);
1123 }
1124
1125
1126 /**
1127  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1128  * Args with unspill_reg != 0 will be loaded from scratch memory.
1129  */
1130 static void get_argument_regs( struct brw_wm_compile *c,
1131                                struct brw_wm_ref *arg[],
1132                                struct brw_reg *regs )
1133 {
1134    GLuint i;
1135
1136    for (i = 0; i < 4; i++) {
1137       if (arg[i]) {
1138          if (arg[i]->unspill_reg)
1139             emit_unspill(c,
1140                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1141                          arg[i]->value->spill_slot);
1142
1143          regs[i] = arg[i]->hw_reg;
1144       }
1145       else {
1146          regs[i] = brw_null_reg();
1147       }
1148    }
1149 }
1150
1151
1152 /**
1153  * For values that have a spill_slot!=0, write those regs to scratch memory.
1154  */
1155 static void spill_values( struct brw_wm_compile *c,
1156                           struct brw_wm_value *values,
1157                           GLuint nr )
1158 {
1159    GLuint i;
1160
1161    for (i = 0; i < nr; i++)
1162       if (values[i].spill_slot)
1163          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1164 }
1165
1166
1167 /* Emit the fragment program instructions here.
1168  */
1169 void brw_wm_emit( struct brw_wm_compile *c )
1170 {
1171    struct brw_compile *p = &c->func;
1172    GLuint insn;
1173
1174    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1175
1176    /* Check if any of the payload regs need to be spilled:
1177     */
1178    spill_values(c, c->payload.depth, 4);
1179    spill_values(c, c->creg, c->nr_creg);
1180    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1181
1182
1183    for (insn = 0; insn < c->nr_insns; insn++) {
1184
1185       struct brw_wm_instruction *inst = &c->instruction[insn];
1186       struct brw_reg args[3][4], dst[4];
1187       GLuint i, dst_flags;
1188
1189       /* Get argument regs:
1190        */
1191       for (i = 0; i < 3; i++)
1192          get_argument_regs(c, inst->src[i], args[i]);
1193
1194       /* Get dest regs:
1195        */
1196       for (i = 0; i < 4; i++)
1197          if (inst->dst[i])
1198             dst[i] = inst->dst[i]->hw_reg;
1199          else
1200             dst[i] = brw_null_reg();
1201
1202       /* Flags
1203        */
1204       dst_flags = inst->writemask;
1205       if (inst->saturate)
1206          dst_flags |= SATURATE;
1207
1208       switch (inst->opcode) {
1209          /* Generated instructions for calculating triangle interpolants:
1210           */
1211       case WM_PIXELXY:
1212          emit_pixel_xy(p, dst, dst_flags);
1213          break;
1214
1215       case WM_DELTAXY:
1216          emit_delta_xy(p, dst, dst_flags, args[0]);
1217          break;
1218
1219       case WM_WPOSXY:
1220          emit_wpos_xy(c, dst, dst_flags, args[0]);
1221          break;
1222
1223       case WM_PIXELW:
1224          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1225          break;
1226
1227       case WM_LINTERP:
1228          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1229          break;
1230
1231       case WM_PINTERP:
1232          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1233          break;
1234
1235       case WM_CINTERP:
1236          emit_cinterp(p, dst, dst_flags, args[0]);
1237          break;
1238
1239       case WM_FB_WRITE:
1240          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1241          break;
1242
1243       case WM_FRONTFACING:
1244          emit_frontfacing(p, dst, dst_flags);
1245          break;
1246
1247          /* Straightforward arithmetic:
1248           */
1249       case OPCODE_ADD:
1250          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1251          break;
1252
1253       case OPCODE_FRC:
1254          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1255          break;
1256
1257       case OPCODE_FLR:
1258          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1259          break;
1260
1261       case OPCODE_DP3:
1262          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1263          break;
1264
1265       case OPCODE_DP4:
1266          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1267          break;
1268
1269       case OPCODE_DPH:
1270          emit_dph(p, dst, dst_flags, args[0], args[1]);
1271          break;
1272
1273       case OPCODE_TRUNC:
1274          emit_trunc(p, dst, dst_flags, args[0]);
1275          break;
1276
1277       case OPCODE_LRP:
1278          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1279          break;
1280
1281       case OPCODE_MAD:
1282          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1283          break;
1284
1285       case OPCODE_MOV:
1286       case OPCODE_SWZ:
1287          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1288          break;
1289
1290       case OPCODE_MUL:
1291          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1292          break;
1293
1294       case OPCODE_XPD:
1295          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1296          break;
1297
1298          /* Higher math functions:
1299           */
1300       case OPCODE_RCP:
1301          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1302          break;
1303
1304       case OPCODE_RSQ:
1305          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1306          break;
1307
1308       case OPCODE_SIN:
1309          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1310          break;
1311
1312       case OPCODE_COS:
1313          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1314          break;
1315
1316       case OPCODE_EX2:
1317          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1318          break;
1319
1320       case OPCODE_LG2:
1321          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1322          break;
1323
1324       case OPCODE_SCS:
1325          /* There is an scs math function, but it would need some
1326           * fixup for 16-element execution.
1327           */
1328          if (dst_flags & WRITEMASK_X)
1329             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1330          if (dst_flags & WRITEMASK_Y)
1331             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1332          break;
1333
1334       case OPCODE_POW:
1335          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1336          break;
1337
1338          /* Comparisons:
1339           */
1340       case OPCODE_CMP:
1341          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1342          break;
1343
1344       case OPCODE_MAX:
1345          emit_max(p, dst, dst_flags, args[0], args[1]);
1346          break;
1347
1348       case OPCODE_MIN:
1349          emit_min(p, dst, dst_flags, args[0], args[1]);
1350          break;
1351
1352       case OPCODE_SLT:
1353          emit_slt(p, dst, dst_flags, args[0], args[1]);
1354          break;
1355
1356       case OPCODE_SLE:
1357          emit_sle(p, dst, dst_flags, args[0], args[1]);
1358         break;
1359       case OPCODE_SGT:
1360          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1361         break;
1362       case OPCODE_SGE:
1363          emit_sge(p, dst, dst_flags, args[0], args[1]);
1364          break;
1365       case OPCODE_SEQ:
1366          emit_seq(p, dst, dst_flags, args[0], args[1]);
1367         break;
1368       case OPCODE_SNE:
1369          emit_sne(p, dst, dst_flags, args[0], args[1]);
1370         break;
1371
1372       case OPCODE_LIT:
1373          emit_lit(p, dst, dst_flags, args[0]);
1374          break;
1375
1376          /* Texturing operations:
1377           */
1378       case OPCODE_TEX:
1379          emit_tex(c, inst, dst, dst_flags, args[0]);
1380          break;
1381
1382       case OPCODE_TXB:
1383          emit_txb(c, inst, dst, dst_flags, args[0]);
1384          break;
1385
1386       case OPCODE_KIL:
1387          emit_kil(c, args[0]);
1388          break;
1389
1390       default:
1391          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1392                       inst->opcode, inst->opcode < MAX_OPCODE ?
1393                                     _mesa_opcode_string(inst->opcode) :
1394                                     "unknown");
1395       }
1396
1397       for (i = 0; i < 4; i++)
1398         if (inst->dst[i] && inst->dst[i]->spill_slot)
1399            emit_spill(c,
1400                       inst->dst[i]->hw_reg,
1401                       inst->dst[i]->spill_slot);
1402    }
1403
1404    if (INTEL_DEBUG & DEBUG_WM) {
1405       int i;
1406
1407       _mesa_printf("wm-native:\n");
1408       for (i = 0; i < p->nr_insn; i++)
1409          brw_disasm(stderr, &p->store[i]);
1410       _mesa_printf("\n");
1411    }
1412 }