src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 #define SATURATE (1<<5)
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  57  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  58  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  59  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask)
  69 {
  70    struct brw_reg r1 = brw_vec1_grf(1, 0);
  71    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  72
  73    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  74
  75    /* Calculate pixel centers by adding 1 or 0 to each of the
  76     * micro-tile coordinates passed in r1.
  77     */
  78    if (mask & WRITEMASK_X) {
  79       brw_ADD(p,
  80               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  81               stride(suboffset(r1_uw, 4), 2, 4, 0),
  82               brw_imm_v(0x10101010));
  83    }
  84
  85    if (mask & WRITEMASK_Y) {
  86       brw_ADD(p,
  87               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  88               stride(suboffset(r1_uw,5), 2, 4, 0),
  89               brw_imm_v(0x11001100));
  90    }
  91
  92    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  93 }
  94
  95
  96
  97 static void emit_delta_xy(struct brw_compile *p,
  98                           const struct brw_reg *dst,
  99                           GLuint mask,
 100                           const struct brw_reg *arg0)
 101 {
 102    struct brw_reg r1 = brw_vec1_grf(1, 0);
 103
 104    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 105     * centers.
 106     */
 107    if (mask & WRITEMASK_X) {
 108       brw_ADD(p,
 109               dst[0],
 110               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 111               negate(r1));
 112    }
 113
 114    if (mask & WRITEMASK_Y) {
 115       brw_ADD(p,
 116               dst[1],
 117               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 118               negate(suboffset(r1,1)));
 119
 120    }
 121 }
 122
 123 static void emit_wpos_xy(struct brw_wm_compile *c,
 124                          const struct brw_reg *dst,
 125                          GLuint mask,
 126                          const struct brw_reg *arg0)
 127 {
 128    struct brw_compile *p = &c->func;
 129
 130    /* Calculate the pixel offset from window bottom left into destination
 131     * X and Y channels.
 132     */
 133    if (mask & WRITEMASK_X) {
 134       /* X' = X - origin */
 135       brw_ADD(p,
 136               dst[0],
 137               retype(arg0[0], BRW_REGISTER_TYPE_W),
 138               brw_imm_d(0 - c->key.origin_x));
 139    }
 140
 141    if (mask & WRITEMASK_Y) {
 142       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 143       brw_ADD(p,
 144               dst[1],
 145               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 146               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 147    }
 148 }
 149
 150
 151 static void emit_pixel_w( struct brw_compile *p,
 152                           const struct brw_reg *dst,
 153                           GLuint mask,
 154                           const struct brw_reg *arg0,
 155                           const struct brw_reg *deltas)
 156 {
 157    /* Don't need this if all you are doing is interpolating color, for
 158     * instance.
 159     */
 160    if (mask & WRITEMASK_W) {
 161       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 162
 163       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 164        * result straight into a message reg.
 165        */
 166       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 167       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 168
 169       /* Calc w */
 170       brw_math_16( p, dst[3],
 171                    BRW_MATH_FUNCTION_INV,
 172                    BRW_MATH_SATURATE_NONE,
 173                    2, brw_null_reg(),
 174                    BRW_MATH_PRECISION_FULL);
 175    }
 176 }
 177
 178
 179
 180 static void emit_linterp( struct brw_compile *p,
 181                          const struct brw_reg *dst,
 182                          GLuint mask,
 183                          const struct brw_reg *arg0,
 184                          const struct brw_reg *deltas )
 185 {
 186    struct brw_reg interp[4];
 187    GLuint nr = arg0[0].nr;
 188    GLuint i;
 189
 190    interp[0] = brw_vec1_grf(nr, 0);
 191    interp[1] = brw_vec1_grf(nr, 4);
 192    interp[2] = brw_vec1_grf(nr+1, 0);
 193    interp[3] = brw_vec1_grf(nr+1, 4);
 194
 195    for (i = 0; i < 4; i++) {
 196       if (mask & (1<<i)) {
 197          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 198          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 199       }
 200    }
 201 }
 202
 203
 204 static void emit_pinterp( struct brw_compile *p,
 205                           const struct brw_reg *dst,
 206                           GLuint mask,
 207                           const struct brw_reg *arg0,
 208                           const struct brw_reg *deltas,
 209                           const struct brw_reg *w)
 210 {
 211    struct brw_reg interp[4];
 212    GLuint nr = arg0[0].nr;
 213    GLuint i;
 214
 215    interp[0] = brw_vec1_grf(nr, 0);
 216    interp[1] = brw_vec1_grf(nr, 4);
 217    interp[2] = brw_vec1_grf(nr+1, 0);
 218    interp[3] = brw_vec1_grf(nr+1, 4);
 219
 220    for (i = 0; i < 4; i++) {
 221       if (mask & (1<<i)) {
 222          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 223          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 224       }
 225    }
 226    for (i = 0; i < 4; i++) {
 227       if (mask & (1<<i)) {
 228          brw_MUL(p, dst[i], dst[i], w[3]);
 229       }
 230    }
 231 }
 232
 233
 234 static void emit_cinterp( struct brw_compile *p,
 235                          const struct brw_reg *dst,
 236                          GLuint mask,
 237                          const struct brw_reg *arg0 )
 238 {
 239    struct brw_reg interp[4];
 240    GLuint nr = arg0[0].nr;
 241    GLuint i;
 242
 243    interp[0] = brw_vec1_grf(nr, 0);
 244    interp[1] = brw_vec1_grf(nr, 4);
 245    interp[2] = brw_vec1_grf(nr+1, 0);
 246    interp[3] = brw_vec1_grf(nr+1, 4);
 247
 248    for (i = 0; i < 4; i++) {
 249       if (mask & (1<<i)) {
 250          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 251       }
 252    }
 253 }
 254
 255 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 256 static void emit_frontfacing( struct brw_compile *p,
 257                               const struct brw_reg *dst,
 258                               GLuint mask )
 259 {
 260    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 261    GLuint i;
 262
 263    if (!(mask & WRITEMASK_XYZW))
 264       return;
 265
 266    for (i = 0; i < 4; i++) {
 267       if (mask & (1<<i)) {
 268          brw_MOV(p, dst[i], brw_imm_f(0.0));
 269       }
 270    }
 271
 272    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 273     * us front face
 274     */
 275    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 276    for (i = 0; i < 4; i++) {
 277       if (mask & (1<<i)) {
 278          brw_MOV(p, dst[i], brw_imm_f(1.0));
 279       }
 280    }
 281    brw_set_predicate_control_flag_value(p, 0xff);
 282 }
 283
 284 static void emit_alu1( struct brw_compile *p,
 285                        struct brw_instruction *(*func)(struct brw_compile *,
 286                                                        struct brw_reg,
 287                                                        struct brw_reg),
 288                        const struct brw_reg *dst,
 289                        GLuint mask,
 290                        const struct brw_reg *arg0 )
 291 {
 292    GLuint i;
 293
 294    if (mask & SATURATE)
 295       brw_set_saturate(p, 1);
 296
 297    for (i = 0; i < 4; i++) {
 298       if (mask & (1<<i)) {
 299          func(p, dst[i], arg0[i]);
 300       }
 301    }
 302
 303    if (mask & SATURATE)
 304       brw_set_saturate(p, 0);
 305 }
 306
 307
 308 static void emit_alu2( struct brw_compile *p,
 309                        struct brw_instruction *(*func)(struct brw_compile *,
 310                                                        struct brw_reg,
 311                                                        struct brw_reg,
 312                                                        struct brw_reg),
 313                        const struct brw_reg *dst,
 314                        GLuint mask,
 315                        const struct brw_reg *arg0,
 316                        const struct brw_reg *arg1 )
 317 {
 318    GLuint i;
 319
 320    if (mask & SATURATE)
 321       brw_set_saturate(p, 1);
 322
 323    for (i = 0; i < 4; i++) {
 324       if (mask & (1<<i)) {
 325          func(p, dst[i], arg0[i], arg1[i]);
 326       }
 327    }
 328
 329    if (mask & SATURATE)
 330       brw_set_saturate(p, 0);
 331 }
 332
 333
 334 static void emit_mad( struct brw_compile *p,
 335                       const struct brw_reg *dst,
 336                       GLuint mask,
 337                       const struct brw_reg *arg0,
 338                       const struct brw_reg *arg1,
 339                       const struct brw_reg *arg2 )
 340 {
 341    GLuint i;
 342
 343    for (i = 0; i < 4; i++) {
 344       if (mask & (1<<i)) {
 345          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 346
 347          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 348          brw_ADD(p, dst[i], dst[i], arg2[i]);
 349          brw_set_saturate(p, 0);
 350       }
 351    }
 352 }
 353
 354 static void emit_trunc( struct brw_compile *p,
 355                       const struct brw_reg *dst,
 356                       GLuint mask,
 357                       const struct brw_reg *arg0)
 358 {
 359    GLuint i;
 360
 361    for (i = 0; i < 4; i++) {
 362       if (mask & (1<<i)) {
 363          brw_RNDZ(p, dst[i], arg0[i]);
 364       }
 365    }
 366 }
 367
 368 static void emit_lrp( struct brw_compile *p,
 369                       const struct brw_reg *dst,
 370                       GLuint mask,
 371                       const struct brw_reg *arg0,
 372                       const struct brw_reg *arg1,
 373                       const struct brw_reg *arg2 )
 374 {
 375    GLuint i;
 376
 377    /* Uses dst as a temporary:
 378     */
 379    for (i = 0; i < 4; i++) {
 380       if (mask & (1<<i)) {
 381          /* Can I use the LINE instruction for this?
 382           */
 383          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 384          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 385
 386          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 387          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 388          brw_set_saturate(p, 0);
 389       }
 390    }
 391 }
 392
 393 static void emit_sop( struct brw_compile *p,
 394                       const struct brw_reg *dst,
 395                       GLuint mask,
 396                       GLuint cond,
 397                       const struct brw_reg *arg0,
 398                       const struct brw_reg *arg1 )
 399 {
 400    GLuint i;
 401
 402    for (i = 0; i < 4; i++) {
 403       if (mask & (1<<i)) {
 404          brw_MOV(p, dst[i], brw_imm_f(0));
 405          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 406          brw_MOV(p, dst[i], brw_imm_f(1.0));
 407          brw_set_predicate_control_flag_value(p, 0xff);
 408       }
 409    }
 410 }
 411
 412 static void emit_slt( struct brw_compile *p,
 413                       const struct brw_reg *dst,
 414                       GLuint mask,
 415                       const struct brw_reg *arg0,
 416                       const struct brw_reg *arg1 )
 417 {
 418    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 419 }
 420
 421 static void emit_sle( struct brw_compile *p,
 422                       const struct brw_reg *dst,
 423                       GLuint mask,
 424                       const struct brw_reg *arg0,
 425                       const struct brw_reg *arg1 )
 426 {
 427    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 428 }
 429
 430 static void emit_sgt( struct brw_compile *p,
 431                       const struct brw_reg *dst,
 432                       GLuint mask,
 433                       const struct brw_reg *arg0,
 434                       const struct brw_reg *arg1 )
 435 {
 436    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 437 }
 438
 439 static void emit_sge( struct brw_compile *p,
 440                       const struct brw_reg *dst,
 441                       GLuint mask,
 442                       const struct brw_reg *arg0,
 443                       const struct brw_reg *arg1 )
 444 {
 445    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 446 }
 447
 448 static void emit_seq( struct brw_compile *p,
 449                       const struct brw_reg *dst,
 450                       GLuint mask,
 451                       const struct brw_reg *arg0,
 452                       const struct brw_reg *arg1 )
 453 {
 454    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 455 }
 456
 457 static void emit_sne( struct brw_compile *p,
 458                       const struct brw_reg *dst,
 459                       GLuint mask,
 460                       const struct brw_reg *arg0,
 461                       const struct brw_reg *arg1 )
 462 {
 463    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 464 }
 465
 466 static void emit_cmp( struct brw_compile *p,
 467                       const struct brw_reg *dst,
 468                       GLuint mask,
 469                       const struct brw_reg *arg0,
 470                       const struct brw_reg *arg1,
 471                       const struct brw_reg *arg2 )
 472 {
 473    GLuint i;
 474
 475    for (i = 0; i < 4; i++) {
 476       if (mask & (1<<i)) {
 477          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 478          brw_MOV(p, dst[i], arg2[i]);
 479          brw_set_saturate(p, 0);
 480
 481          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 482
 483          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 484          brw_MOV(p, dst[i], arg1[i]);
 485          brw_set_saturate(p, 0);
 486          brw_set_predicate_control_flag_value(p, 0xff);
 487       }
 488    }
 489 }
 490
 491 static void emit_max( struct brw_compile *p,
 492                       const struct brw_reg *dst,
 493                       GLuint mask,
 494                       const struct brw_reg *arg0,
 495                       const struct brw_reg *arg1 )
 496 {
 497    GLuint i;
 498
 499    for (i = 0; i < 4; i++) {
 500       if (mask & (1<<i)) {
 501          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 502          brw_MOV(p, dst[i], arg0[i]);
 503          brw_set_saturate(p, 0);
 504
 505          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 506
 507          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 508          brw_MOV(p, dst[i], arg1[i]);
 509          brw_set_saturate(p, 0);
 510          brw_set_predicate_control_flag_value(p, 0xff);
 511       }
 512    }
 513 }
 514
 515 static void emit_min( struct brw_compile *p,
 516                       const struct brw_reg *dst,
 517                       GLuint mask,
 518                       const struct brw_reg *arg0,
 519                       const struct brw_reg *arg1 )
 520 {
 521    GLuint i;
 522
 523    for (i = 0; i < 4; i++) {
 524       if (mask & (1<<i)) {
 525          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 526          brw_MOV(p, dst[i], arg1[i]);
 527          brw_set_saturate(p, 0);
 528
 529          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 530
 531          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 532          brw_MOV(p, dst[i], arg0[i]);
 533          brw_set_saturate(p, 0);
 534          brw_set_predicate_control_flag_value(p, 0xff);
 535       }
 536    }
 537 }
 538
 539
 540 static void emit_dp3( struct brw_compile *p,
 541                       const struct brw_reg *dst,
 542                       GLuint mask,
 543                       const struct brw_reg *arg0,
 544                       const struct brw_reg *arg1 )
 545 {
 546    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 547
 548    if (!(mask & WRITEMASK_XYZW))
 549       return; /* Do not emit dead code */
 550
 551    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 552
 553    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 554    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 555
 556    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 557    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 558    brw_set_saturate(p, 0);
 559 }
 560
 561
 562 static void emit_dp4( struct brw_compile *p,
 563                       const struct brw_reg *dst,
 564                       GLuint mask,
 565                       const struct brw_reg *arg0,
 566                       const struct brw_reg *arg1 )
 567 {
 568    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 569
 570    if (!(mask & WRITEMASK_XYZW))
 571       return; /* Do not emit dead code */
 572
 573    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 574
 575    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 576    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 577    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 578
 579    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 580    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 581    brw_set_saturate(p, 0);
 582 }
 583
 584
 585 static void emit_dph( struct brw_compile *p,
 586                       const struct brw_reg *dst,
 587                       GLuint mask,
 588                       const struct brw_reg *arg0,
 589                       const struct brw_reg *arg1 )
 590 {
 591    const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 592
 593    if (!(mask & WRITEMASK_XYZW))
 594       return; /* Do not emit dead code */
 595
 596    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 597
 598    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 599    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 600    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 601
 602    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 603    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 604    brw_set_saturate(p, 0);
 605 }
 606
 607
 608 static void emit_xpd( struct brw_compile *p,
 609                       const struct brw_reg *dst,
 610                       GLuint mask,
 611                       const struct brw_reg *arg0,
 612                       const struct brw_reg *arg1 )
 613 {
 614    GLuint i;
 615
 616    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 617
 618    for (i = 0 ; i < 3; i++) {
 619       if (mask & (1<<i)) {
 620          GLuint i2 = (i+2)%3;
 621          GLuint i1 = (i+1)%3;
 622
 623          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 624
 625          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 626          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 627          brw_set_saturate(p, 0);
 628       }
 629    }
 630 }
 631
 632
 633 static void emit_math1( struct brw_compile *p,
 634                         GLuint function,
 635                         const struct brw_reg *dst,
 636                         GLuint mask,
 637                         const struct brw_reg *arg0 )
 638 {
 639    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 640
 641    if (!(mask & WRITEMASK_XYZW))
 642       return; /* Do not emit dead code */
 643
 644    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 645
 646    brw_MOV(p, brw_message_reg(2), arg0[0]);
 647
 648    /* Send two messages to perform all 16 operations:
 649     */
 650    brw_math_16(p,
 651                dst[dst_chan],
 652                function,
 653                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 654                2,
 655                brw_null_reg(),
 656                BRW_MATH_PRECISION_FULL);
 657 }
 658
 659
 660 static void emit_math2( struct brw_compile *p,
 661                         GLuint function,
 662                         const struct brw_reg *dst,
 663                         GLuint mask,
 664                         const struct brw_reg *arg0,
 665                         const struct brw_reg *arg1)
 666 {
 667    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 668
 669    if (!(mask & WRITEMASK_XYZW))
 670       return; /* Do not emit dead code */
 671
 672    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 673
 674    brw_push_insn_state(p);
 675
 676    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 677    brw_MOV(p, brw_message_reg(2), arg0[0]);
 678    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 679    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 680
 681    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 682    brw_MOV(p, brw_message_reg(3), arg1[0]);
 683    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 684    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 685
 686
 687    /* Send two messages to perform all 16 operations:
 688     */
 689    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 690    brw_math(p,
 691             dst[dst_chan],
 692             function,
 693             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 694             2,
 695             brw_null_reg(),
 696             BRW_MATH_DATA_VECTOR,
 697             BRW_MATH_PRECISION_FULL);
 698
 699    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 700    brw_math(p,
 701             offset(dst[dst_chan],1),
 702             function,
 703             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 704             4,
 705             brw_null_reg(),
 706             BRW_MATH_DATA_VECTOR,
 707             BRW_MATH_PRECISION_FULL);
 708
 709    brw_pop_insn_state(p);
 710 }
 711
 712
 713
 714 static void emit_tex( struct brw_wm_compile *c,
 715                       const struct brw_wm_instruction *inst,
 716                       struct brw_reg *dst,
 717                       GLuint dst_flags,
 718                       struct brw_reg *arg )
 719 {
 720    struct brw_compile *p = &c->func;
 721    GLuint msgLength, responseLength;
 722    GLuint i, nr;
 723    GLuint emit;
 724    GLuint msg_type;
 725
 726    /* How many input regs are there?
 727     */
 728    switch (inst->tex_idx) {
 729    case TEXTURE_1D_INDEX:
 730       emit = WRITEMASK_X;
 731       nr = 1;
 732       break;
 733    case TEXTURE_2D_INDEX:
 734    case TEXTURE_RECT_INDEX:
 735       emit = WRITEMASK_XY;
 736       nr = 2;
 737       break;
 738    default:
 739       emit = WRITEMASK_XYZ;
 740       nr = 3;
 741       break;
 742    }
 743
 744    if (inst->tex_shadow) {
 745       nr = 4;
 746       emit |= WRITEMASK_W;
 747    }
 748
 749    msgLength = 1;
 750
 751    for (i = 0; i < nr; i++) {
 752       static const GLuint swz[4] = {0,1,2,2};
 753       if (emit & (1<<i))
 754          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 755       else
 756          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 757       msgLength += 2;
 758    }
 759
 760    responseLength = 8;          /* always */
 761
 762    if (BRW_IS_IGDNG(p->brw)) {
 763        if (inst->tex_shadow)
 764            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 765        else
 766            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 767    } else {
 768        if (inst->tex_shadow)
 769            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 770        else
 771            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 772    }
 773
 774    brw_SAMPLE(p,
 775               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 776               1,
 777               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 778               SURF_INDEX_TEXTURE(inst->tex_unit),
 779               inst->tex_unit,     /* sampler */
 780               inst->writemask,
 781               msg_type,
 782               responseLength,
 783               msgLength,
 784               0,
 785               1,
 786               BRW_SAMPLER_SIMD_MODE_SIMD16);
 787 }
 788
 789
 790 static void emit_txb( struct brw_wm_compile *c,
 791                       const struct brw_wm_instruction *inst,
 792                       struct brw_reg *dst,
 793                       GLuint dst_flags,
 794                       struct brw_reg *arg )
 795 {
 796    struct brw_compile *p = &c->func;
 797    GLuint msgLength;
 798    GLuint msg_type;
 799    /* Shadow ignored for txb.
 800     */
 801    switch (inst->tex_idx) {
 802    case TEXTURE_1D_INDEX:
 803       brw_MOV(p, brw_message_reg(2), arg[0]);
 804       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 805       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 806       break;
 807    case TEXTURE_2D_INDEX:
 808    case TEXTURE_RECT_INDEX:
 809       brw_MOV(p, brw_message_reg(2), arg[0]);
 810       brw_MOV(p, brw_message_reg(4), arg[1]);
 811       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 812       break;
 813    default:
 814       brw_MOV(p, brw_message_reg(2), arg[0]);
 815       brw_MOV(p, brw_message_reg(4), arg[1]);
 816       brw_MOV(p, brw_message_reg(6), arg[2]);
 817       break;
 818    }
 819
 820    brw_MOV(p, brw_message_reg(8), arg[3]);
 821    msgLength = 9;
 822
 823    if (BRW_IS_IGDNG(p->brw))
 824        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 825    else
 826        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 827
 828    brw_SAMPLE(p,
 829               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 830               1,
 831               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 832               SURF_INDEX_TEXTURE(inst->tex_unit),
 833               inst->tex_unit,     /* sampler */
 834               inst->writemask,
 835               msg_type,
 836               8,                /* responseLength */
 837               msgLength,
 838               0,
 839               1,
 840               BRW_SAMPLER_SIMD_MODE_SIMD16);
 841 }
 842
 843
 844 static void emit_lit( struct brw_compile *p,
 845                       const struct brw_reg *dst,
 846                       GLuint mask,
 847                       const struct brw_reg *arg0 )
 848 {
 849    assert((mask & WRITEMASK_XW) == 0);
 850
 851    if (mask & WRITEMASK_Y) {
 852       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 853       brw_MOV(p, dst[1], arg0[0]);
 854       brw_set_saturate(p, 0);
 855    }
 856
 857    if (mask & WRITEMASK_Z) {
 858       emit_math2(p, BRW_MATH_FUNCTION_POW,
 859                  &dst[2],
 860                  WRITEMASK_X | (mask & SATURATE),
 861                  &arg0[1],
 862                  &arg0[3]);
 863    }
 864
 865    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 866     * some of the POW calculations above, but 16-wide iff statements
 867     * seem to lock c1 hardware, so this is a nasty workaround:
 868     */
 869    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 870    {
 871       if (mask & WRITEMASK_Y)
 872          brw_MOV(p, dst[1], brw_imm_f(0));
 873
 874       if (mask & WRITEMASK_Z)
 875          brw_MOV(p, dst[2], brw_imm_f(0));
 876    }
 877    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 878 }
 879
 880
 881 /* Kill pixel - set execution mask to zero for those pixels which
 882  * fail.
 883  */
 884 static void emit_kil( struct brw_wm_compile *c,
 885                       struct brw_reg *arg0)
 886 {
 887    struct brw_compile *p = &c->func;
 888    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 889    GLuint i;
 890
 891    /* XXX - usually won't need 4 compares!
 892     */
 893    for (i = 0; i < 4; i++) {
 894       brw_push_insn_state(p);
 895       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 896       brw_set_predicate_control_flag_value(p, 0xff);
 897       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 898       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 899       brw_pop_insn_state(p);
 900    }
 901 }
 902
 903
 904 static void fire_fb_write( struct brw_wm_compile *c,
 905                            GLuint base_reg,
 906                            GLuint nr,
 907                            GLuint target,
 908                            GLuint eot )
 909 {
 910    struct brw_compile *p = &c->func;
 911
 912    /* Pass through control information:
 913     */
 914 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 915    {
 916       brw_push_insn_state(p);
 917       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 918       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 919       brw_MOV(p,
 920                brw_message_reg(base_reg + 1),
 921                brw_vec8_grf(1, 0));
 922       brw_pop_insn_state(p);
 923    }
 924
 925    /* Send framebuffer write message: */
 926 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 927    brw_fb_WRITE(p,
 928                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 929                 base_reg,
 930                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 931                 target,
 932                 nr,
 933                 0,
 934                 eot);
 935 }
 936
 937
 938 static void emit_aa( struct brw_wm_compile *c,
 939                      struct brw_reg *arg1,
 940                      GLuint reg )
 941 {
 942    struct brw_compile *p = &c->func;
 943    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 944    GLuint off = c->key.aa_dest_stencil_reg % 2;
 945    struct brw_reg aa = offset(arg1[comp], off);
 946
 947    brw_push_insn_state(p);
 948    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 949    brw_MOV(p, brw_message_reg(reg), aa);
 950    brw_pop_insn_state(p);
 951 }
 952
 953
 954 /* Post-fragment-program processing.  Send the results to the
 955  * framebuffer.
 956  * \param arg0  the fragment color
 957  * \param arg1  the pass-through depth value
 958  * \param arg2  the shader-computed depth value
 959  */
 960 static void emit_fb_write( struct brw_wm_compile *c,
 961                            struct brw_reg *arg0,
 962                            struct brw_reg *arg1,
 963                            struct brw_reg *arg2,
 964                            GLuint target,
 965                            GLuint eot)
 966 {
 967    struct brw_compile *p = &c->func;
 968    GLuint nr = 2;
 969    GLuint channel;
 970
 971    /* Reserve a space for AA - may not be needed:
 972     */
 973    if (c->key.aa_dest_stencil_reg)
 974       nr += 1;
 975
 976    /* I don't really understand how this achieves the color interleave
 977     * (ie RGBARGBA) in the result:  [Do the saturation here]
 978     */
 979    {
 980       brw_push_insn_state(p);
 981
 982       for (channel = 0; channel < 4; channel++) {
 983          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 984          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 985
 986          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 987          brw_MOV(p,
 988                  brw_message_reg(nr + channel),
 989                  arg0[channel]);
 990
 991          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 992          brw_MOV(p,
 993                  brw_message_reg(nr + channel + 4),
 994                  sechalf(arg0[channel]));
 995       }
 996
 997       /* skip over the regs populated above:
 998        */
 999       nr += 8;
1000
1001       brw_pop_insn_state(p);
1002    }
1003
1004    if (c->key.source_depth_to_render_target)
1005    {
1006       if (c->key.computes_depth)
1007          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1008       else
1009          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1010
1011       nr += 2;
1012    }
1013
1014    if (c->key.dest_depth_reg)
1015    {
1016       GLuint comp = c->key.dest_depth_reg / 2;
1017       GLuint off = c->key.dest_depth_reg % 2;
1018
1019       if (off != 0) {
1020          brw_push_insn_state(p);
1021          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1022
1023          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1024          /* 2nd half? */
1025          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1026          brw_pop_insn_state(p);
1027       }
1028       else {
1029          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1030       }
1031       nr += 2;
1032    }
1033
1034    if (!c->key.runtime_check_aads_emit) {
1035       if (c->key.aa_dest_stencil_reg)
1036          emit_aa(c, arg1, 2);
1037
1038       fire_fb_write(c, 0, nr, target, eot);
1039    }
1040    else {
1041       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1042       struct brw_reg ip = brw_ip_reg();
1043       struct brw_instruction *jmp;
1044
1045       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1046       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1047       brw_AND(p,
1048               v1_null_ud,
1049               get_element_ud(brw_vec8_grf(1,0), 6),
1050               brw_imm_ud(1<<26));
1051
1052       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1053       {
1054          emit_aa(c, arg1, 2);
1055          fire_fb_write(c, 0, nr, target, eot);
1056          /* note - thread killed in subroutine */
1057       }
1058       brw_land_fwd_jump(p, jmp);
1059
1060       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1061        */
1062       fire_fb_write(c, 1, nr-1, target, eot);
1063    }
1064 }
1065
1066
1067 /**
1068  * Move a GPR to scratch memory.
1069  */
1070 static void emit_spill( struct brw_wm_compile *c,
1071                         struct brw_reg reg,
1072                         GLuint slot )
1073 {
1074    struct brw_compile *p = &c->func;
1075
1076    /*
1077      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1078    */
1079    brw_MOV(p, brw_message_reg(2), reg);
1080
1081    /*
1082      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1083      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1084    */
1085    brw_dp_WRITE_16(p,
1086                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1087                    slot);
1088 }
1089
1090
1091 /**
1092  * Load a GPR from scratch memory.
1093  */
1094 static void emit_unspill( struct brw_wm_compile *c,
1095                           struct brw_reg reg,
1096                           GLuint slot )
1097 {
1098    struct brw_compile *p = &c->func;
1099
1100    /* Slot 0 is the undef value.
1101     */
1102    if (slot == 0) {
1103       brw_MOV(p, reg, brw_imm_f(0));
1104       return;
1105    }
1106
1107    /*
1108      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1109      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1110    */
1111
1112    brw_dp_READ_16(p,
1113                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1114                   slot);
1115 }
1116
1117
1118 /**
1119  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1120  * Args with unspill_reg != 0 will be loaded from scratch memory.
1121  */
1122 static void get_argument_regs( struct brw_wm_compile *c,
1123                                struct brw_wm_ref *arg[],
1124                                struct brw_reg *regs )
1125 {
1126    GLuint i;
1127
1128    for (i = 0; i < 4; i++) {
1129       if (arg[i]) {
1130          if (arg[i]->unspill_reg)
1131             emit_unspill(c,
1132                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1133                          arg[i]->value->spill_slot);
1134
1135          regs[i] = arg[i]->hw_reg;
1136       }
1137       else {
1138          regs[i] = brw_null_reg();
1139       }
1140    }
1141 }
1142
1143
1144 /**
1145  * For values that have a spill_slot!=0, write those regs to scratch memory.
1146  */
1147 static void spill_values( struct brw_wm_compile *c,
1148                           struct brw_wm_value *values,
1149                           GLuint nr )
1150 {
1151    GLuint i;
1152
1153    for (i = 0; i < nr; i++)
1154       if (values[i].spill_slot)
1155          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1156 }
1157
1158
1159 /* Emit the fragment program instructions here.
1160  */
1161 void brw_wm_emit( struct brw_wm_compile *c )
1162 {
1163    struct brw_compile *p = &c->func;
1164    GLuint insn;
1165
1166    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1167
1168    /* Check if any of the payload regs need to be spilled:
1169     */
1170    spill_values(c, c->payload.depth, 4);
1171    spill_values(c, c->creg, c->nr_creg);
1172    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1173
1174
1175    for (insn = 0; insn < c->nr_insns; insn++) {
1176
1177       struct brw_wm_instruction *inst = &c->instruction[insn];
1178       struct brw_reg args[3][4], dst[4];
1179       GLuint i, dst_flags;
1180
1181       /* Get argument regs:
1182        */
1183       for (i = 0; i < 3; i++)
1184          get_argument_regs(c, inst->src[i], args[i]);
1185
1186       /* Get dest regs:
1187        */
1188       for (i = 0; i < 4; i++)
1189          if (inst->dst[i])
1190             dst[i] = inst->dst[i]->hw_reg;
1191          else
1192             dst[i] = brw_null_reg();
1193
1194       /* Flags
1195        */
1196       dst_flags = inst->writemask;
1197       if (inst->saturate)
1198          dst_flags |= SATURATE;
1199
1200       switch (inst->opcode) {
1201          /* Generated instructions for calculating triangle interpolants:
1202           */
1203       case WM_PIXELXY:
1204          emit_pixel_xy(p, dst, dst_flags);
1205          break;
1206
1207       case WM_DELTAXY:
1208          emit_delta_xy(p, dst, dst_flags, args[0]);
1209          break;
1210
1211       case WM_WPOSXY:
1212          emit_wpos_xy(c, dst, dst_flags, args[0]);
1213          break;
1214
1215       case WM_PIXELW:
1216          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1217          break;
1218
1219       case WM_LINTERP:
1220          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1221          break;
1222
1223       case WM_PINTERP:
1224          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1225          break;
1226
1227       case WM_CINTERP:
1228          emit_cinterp(p, dst, dst_flags, args[0]);
1229          break;
1230
1231       case WM_FB_WRITE:
1232          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1233          break;
1234
1235       case WM_FRONTFACING:
1236          emit_frontfacing(p, dst, dst_flags);
1237          break;
1238
1239          /* Straightforward arithmetic:
1240           */
1241       case OPCODE_ADD:
1242          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1243          break;
1244
1245       case OPCODE_FRC:
1246          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1247          break;
1248
1249       case OPCODE_FLR:
1250          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1251          break;
1252
1253       case OPCODE_DP3:
1254          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1255          break;
1256
1257       case OPCODE_DP4:
1258          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1259          break;
1260
1261       case OPCODE_DPH:
1262          emit_dph(p, dst, dst_flags, args[0], args[1]);
1263          break;
1264
1265       case OPCODE_TRUNC:
1266          emit_trunc(p, dst, dst_flags, args[0]);
1267          break;
1268
1269       case OPCODE_LRP:
1270          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1271          break;
1272
1273       case OPCODE_MAD:
1274          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1275          break;
1276
1277       case OPCODE_MOV:
1278       case OPCODE_SWZ:
1279          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1280          break;
1281
1282       case OPCODE_MUL:
1283          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1284          break;
1285
1286       case OPCODE_XPD:
1287          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1288          break;
1289
1290          /* Higher math functions:
1291           */
1292       case OPCODE_RCP:
1293          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1294          break;
1295
1296       case OPCODE_RSQ:
1297          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1298          break;
1299
1300       case OPCODE_SIN:
1301          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1302          break;
1303
1304       case OPCODE_COS:
1305          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1306          break;
1307
1308       case OPCODE_EX2:
1309          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1310          break;
1311
1312       case OPCODE_LG2:
1313          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1314          break;
1315
1316       case OPCODE_SCS:
1317          /* There is an scs math function, but it would need some
1318           * fixup for 16-element execution.
1319           */
1320          if (dst_flags & WRITEMASK_X)
1321             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1322          if (dst_flags & WRITEMASK_Y)
1323             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1324          break;
1325
1326       case OPCODE_POW:
1327          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1328          break;
1329
1330          /* Comparisons:
1331           */
1332       case OPCODE_CMP:
1333          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1334          break;
1335
1336       case OPCODE_MAX:
1337          emit_max(p, dst, dst_flags, args[0], args[1]);
1338          break;
1339
1340       case OPCODE_MIN:
1341          emit_min(p, dst, dst_flags, args[0], args[1]);
1342          break;
1343
1344       case OPCODE_SLT:
1345          emit_slt(p, dst, dst_flags, args[0], args[1]);
1346          break;
1347
1348       case OPCODE_SLE:
1349          emit_sle(p, dst, dst_flags, args[0], args[1]);
1350         break;
1351       case OPCODE_SGT:
1352          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1353         break;
1354       case OPCODE_SGE:
1355          emit_sge(p, dst, dst_flags, args[0], args[1]);
1356          break;
1357       case OPCODE_SEQ:
1358          emit_seq(p, dst, dst_flags, args[0], args[1]);
1359         break;
1360       case OPCODE_SNE:
1361          emit_sne(p, dst, dst_flags, args[0], args[1]);
1362         break;
1363
1364       case OPCODE_LIT:
1365          emit_lit(p, dst, dst_flags, args[0]);
1366          break;
1367
1368          /* Texturing operations:
1369           */
1370       case OPCODE_TEX:
1371          emit_tex(c, inst, dst, dst_flags, args[0]);
1372          break;
1373
1374       case OPCODE_TXB:
1375          emit_txb(c, inst, dst, dst_flags, args[0]);
1376          break;
1377
1378       case OPCODE_KIL:
1379          emit_kil(c, args[0]);
1380          break;
1381
1382       default:
1383          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1384                       inst->opcode, inst->opcode < MAX_OPCODE ?
1385                                     _mesa_opcode_string(inst->opcode) :
1386                                     "unknown");
1387       }
1388
1389       for (i = 0; i < 4; i++)
1390         if (inst->dst[i] && inst->dst[i]->spill_slot)
1391            emit_spill(c,
1392                       inst->dst[i]->hw_reg,
1393                       inst->dst[i]->spill_slot);
1394    }
1395
1396    if (INTEL_DEBUG & DEBUG_WM) {
1397       int i;
1398
1399       _mesa_printf("wm-native:\n");
1400       for (i = 0; i < p->nr_insn; i++)
1401          brw_disasm(stderr, &p->store[i]);
1402       _mesa_printf("\n");
1403    }
1404 }