src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 #define SATURATE (1<<5)
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  57  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  58  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  59  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask,
  69                           const struct brw_reg *arg0)
  70 {
  71    struct brw_reg r1 = brw_vec1_grf(1, 0);
  72    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  73
  74    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  75
  76    /* Calculate pixel centers by adding 1 or 0 to each of the
  77     * micro-tile coordinates passed in r1.
  78     */
  79    if (mask & WRITEMASK_X) {
  80       brw_ADD(p,
  81               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  82               stride(suboffset(r1_uw, 4), 2, 4, 0),
  83               brw_imm_v(0x10101010));
  84    }
  85
  86    if (mask & WRITEMASK_Y) {
  87       brw_ADD(p,
  88               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  89               stride(suboffset(r1_uw,5), 2, 4, 0),
  90               brw_imm_v(0x11001100));
  91    }
  92
  93    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  94 }
  95
  96
  97
  98 static void emit_delta_xy(struct brw_compile *p,
  99                           const struct brw_reg *dst,
 100                           GLuint mask,
 101                           const struct brw_reg *arg0,
 102                           const struct brw_reg *arg1)
 103 {
 104    struct brw_reg r1 = brw_vec1_grf(1, 0);
 105
 106    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 107     * centers.
 108     */
 109    if (mask & WRITEMASK_X) {
 110       brw_ADD(p,
 111               dst[0],
 112               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 113               negate(r1));
 114    }
 115
 116    if (mask & WRITEMASK_Y) {
 117       brw_ADD(p,
 118               dst[1],
 119               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 120               negate(suboffset(r1,1)));
 121
 122    }
 123 }
 124
 125 static void emit_wpos_xy(struct brw_wm_compile *c,
 126                          const struct brw_reg *dst,
 127                          GLuint mask,
 128                          const struct brw_reg *arg0)
 129 {
 130    struct brw_compile *p = &c->func;
 131
 132    /* Calculate the pixel offset from window bottom left into destination
 133     * X and Y channels.
 134     */
 135    if (mask & WRITEMASK_X) {
 136       /* X' = X - origin */
 137       brw_ADD(p,
 138               dst[0],
 139               retype(arg0[0], BRW_REGISTER_TYPE_W),
 140               brw_imm_d(0 - c->key.origin_x));
 141    }
 142
 143    if (mask & WRITEMASK_Y) {
 144       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 145       brw_ADD(p,
 146               dst[1],
 147               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 148               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 149    }
 150 }
 151
 152
 153 static void emit_pixel_w( struct brw_compile *p,
 154                           const struct brw_reg *dst,
 155                           GLuint mask,
 156                           const struct brw_reg *arg0,
 157                           const struct brw_reg *deltas)
 158 {
 159    /* Don't need this if all you are doing is interpolating color, for
 160     * instance.
 161     */
 162    if (mask & WRITEMASK_W) {
 163       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 164
 165       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 166        * result straight into a message reg.
 167        */
 168       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 169       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 170
 171       /* Calc w */
 172       brw_math_16( p, dst[3],
 173                    BRW_MATH_FUNCTION_INV,
 174                    BRW_MATH_SATURATE_NONE,
 175                    2, brw_null_reg(),
 176                    BRW_MATH_PRECISION_FULL);
 177    }
 178 }
 179
 180
 181
 182 static void emit_linterp( struct brw_compile *p,
 183                          const struct brw_reg *dst,
 184                          GLuint mask,
 185                          const struct brw_reg *arg0,
 186                          const struct brw_reg *deltas )
 187 {
 188    struct brw_reg interp[4];
 189    GLuint nr = arg0[0].nr;
 190    GLuint i;
 191
 192    interp[0] = brw_vec1_grf(nr, 0);
 193    interp[1] = brw_vec1_grf(nr, 4);
 194    interp[2] = brw_vec1_grf(nr+1, 0);
 195    interp[3] = brw_vec1_grf(nr+1, 4);
 196
 197    for (i = 0; i < 4; i++) {
 198       if (mask & (1<<i)) {
 199          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 200          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 201       }
 202    }
 203 }
 204
 205
 206 static void emit_pinterp( struct brw_compile *p,
 207                           const struct brw_reg *dst,
 208                           GLuint mask,
 209                           const struct brw_reg *arg0,
 210                           const struct brw_reg *deltas,
 211                           const struct brw_reg *w)
 212 {
 213    struct brw_reg interp[4];
 214    GLuint nr = arg0[0].nr;
 215    GLuint i;
 216
 217    interp[0] = brw_vec1_grf(nr, 0);
 218    interp[1] = brw_vec1_grf(nr, 4);
 219    interp[2] = brw_vec1_grf(nr+1, 0);
 220    interp[3] = brw_vec1_grf(nr+1, 4);
 221
 222    for (i = 0; i < 4; i++) {
 223       if (mask & (1<<i)) {
 224          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 225          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 226       }
 227    }
 228    for (i = 0; i < 4; i++) {
 229       if (mask & (1<<i)) {
 230          brw_MUL(p, dst[i], dst[i], w[3]);
 231       }
 232    }
 233 }
 234
 235
 236 static void emit_cinterp( struct brw_compile *p,
 237                          const struct brw_reg *dst,
 238                          GLuint mask,
 239                          const struct brw_reg *arg0 )
 240 {
 241    struct brw_reg interp[4];
 242    GLuint nr = arg0[0].nr;
 243    GLuint i;
 244
 245    interp[0] = brw_vec1_grf(nr, 0);
 246    interp[1] = brw_vec1_grf(nr, 4);
 247    interp[2] = brw_vec1_grf(nr+1, 0);
 248    interp[3] = brw_vec1_grf(nr+1, 4);
 249
 250    for (i = 0; i < 4; i++) {
 251       if (mask & (1<<i)) {
 252          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 253       }
 254    }
 255 }
 256
 257 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 258 static void emit_frontfacing( struct brw_compile *p,
 259                               const struct brw_reg *dst,
 260                               GLuint mask )
 261 {
 262    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 263    GLuint i;
 264
 265    if (!(mask & WRITEMASK_XYZW))
 266       return;
 267
 268    for (i = 0; i < 4; i++) {
 269       if (mask & (1<<i)) {
 270          brw_MOV(p, dst[i], brw_imm_f(0.0));
 271       }
 272    }
 273
 274    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 275     * us front face
 276     */
 277    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 278    for (i = 0; i < 4; i++) {
 279       if (mask & (1<<i)) {
 280          brw_MOV(p, dst[i], brw_imm_f(1.0));
 281       }
 282    }
 283    brw_set_predicate_control_flag_value(p, 0xff);
 284 }
 285
 286 static void emit_alu1( struct brw_compile *p,
 287                        struct brw_instruction *(*func)(struct brw_compile *,
 288                                                        struct brw_reg,
 289                                                        struct brw_reg),
 290                        const struct brw_reg *dst,
 291                        GLuint mask,
 292                        const struct brw_reg *arg0 )
 293 {
 294    GLuint i;
 295
 296    if (mask & SATURATE)
 297       brw_set_saturate(p, 1);
 298
 299    for (i = 0; i < 4; i++) {
 300       if (mask & (1<<i)) {
 301          func(p, dst[i], arg0[i]);
 302       }
 303    }
 304
 305    if (mask & SATURATE)
 306       brw_set_saturate(p, 0);
 307 }
 308
 309
 310 static void emit_alu2( struct brw_compile *p,
 311                        struct brw_instruction *(*func)(struct brw_compile *,
 312                                                        struct brw_reg,
 313                                                        struct brw_reg,
 314                                                        struct brw_reg),
 315                        const struct brw_reg *dst,
 316                        GLuint mask,
 317                        const struct brw_reg *arg0,
 318                        const struct brw_reg *arg1 )
 319 {
 320    GLuint i;
 321
 322    if (mask & SATURATE)
 323       brw_set_saturate(p, 1);
 324
 325    for (i = 0; i < 4; i++) {
 326       if (mask & (1<<i)) {
 327          func(p, dst[i], arg0[i], arg1[i]);
 328       }
 329    }
 330
 331    if (mask & SATURATE)
 332       brw_set_saturate(p, 0);
 333 }
 334
 335
 336 static void emit_mad( struct brw_compile *p,
 337                       const struct brw_reg *dst,
 338                       GLuint mask,
 339                       const struct brw_reg *arg0,
 340                       const struct brw_reg *arg1,
 341                       const struct brw_reg *arg2 )
 342 {
 343    GLuint i;
 344
 345    for (i = 0; i < 4; i++) {
 346       if (mask & (1<<i)) {
 347          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 348
 349          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 350          brw_ADD(p, dst[i], dst[i], arg2[i]);
 351          brw_set_saturate(p, 0);
 352       }
 353    }
 354 }
 355
 356 static void emit_trunc( struct brw_compile *p,
 357                       const struct brw_reg *dst,
 358                       GLuint mask,
 359                       const struct brw_reg *arg0)
 360 {
 361    GLuint i;
 362
 363    for (i = 0; i < 4; i++) {
 364       if (mask & (1<<i)) {
 365          brw_RNDZ(p, dst[i], arg0[i]);
 366       }
 367    }
 368 }
 369
 370 static void emit_lrp( struct brw_compile *p,
 371                       const struct brw_reg *dst,
 372                       GLuint mask,
 373                       const struct brw_reg *arg0,
 374                       const struct brw_reg *arg1,
 375                       const struct brw_reg *arg2 )
 376 {
 377    GLuint i;
 378
 379    /* Uses dst as a temporary:
 380     */
 381    for (i = 0; i < 4; i++) {
 382       if (mask & (1<<i)) {
 383          /* Can I use the LINE instruction for this?
 384           */
 385          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 386          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 387
 388          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 389          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 390          brw_set_saturate(p, 0);
 391       }
 392    }
 393 }
 394
 395 static void emit_sop( struct brw_compile *p,
 396                       const struct brw_reg *dst,
 397                       GLuint mask,
 398                       GLuint cond,
 399                       const struct brw_reg *arg0,
 400                       const struct brw_reg *arg1 )
 401 {
 402    GLuint i;
 403
 404    for (i = 0; i < 4; i++) {
 405       if (mask & (1<<i)) {
 406          brw_MOV(p, dst[i], brw_imm_f(0));
 407          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 408          brw_MOV(p, dst[i], brw_imm_f(1.0));
 409          brw_set_predicate_control_flag_value(p, 0xff);
 410       }
 411    }
 412 }
 413
 414 static void emit_slt( struct brw_compile *p,
 415                       const struct brw_reg *dst,
 416                       GLuint mask,
 417                       const struct brw_reg *arg0,
 418                       const struct brw_reg *arg1 )
 419 {
 420    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 421 }
 422
 423 static void emit_sle( struct brw_compile *p,
 424                       const struct brw_reg *dst,
 425                       GLuint mask,
 426                       const struct brw_reg *arg0,
 427                       const struct brw_reg *arg1 )
 428 {
 429    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 430 }
 431
 432 static void emit_sgt( struct brw_compile *p,
 433                       const struct brw_reg *dst,
 434                       GLuint mask,
 435                       const struct brw_reg *arg0,
 436                       const struct brw_reg *arg1 )
 437 {
 438    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 439 }
 440
 441 static void emit_sge( struct brw_compile *p,
 442                       const struct brw_reg *dst,
 443                       GLuint mask,
 444                       const struct brw_reg *arg0,
 445                       const struct brw_reg *arg1 )
 446 {
 447    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 448 }
 449
 450 static void emit_seq( struct brw_compile *p,
 451                       const struct brw_reg *dst,
 452                       GLuint mask,
 453                       const struct brw_reg *arg0,
 454                       const struct brw_reg *arg1 )
 455 {
 456    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 457 }
 458
 459 static void emit_sne( struct brw_compile *p,
 460                       const struct brw_reg *dst,
 461                       GLuint mask,
 462                       const struct brw_reg *arg0,
 463                       const struct brw_reg *arg1 )
 464 {
 465    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 466 }
 467
 468 static void emit_cmp( struct brw_compile *p,
 469                       const struct brw_reg *dst,
 470                       GLuint mask,
 471                       const struct brw_reg *arg0,
 472                       const struct brw_reg *arg1,
 473                       const struct brw_reg *arg2 )
 474 {
 475    GLuint i;
 476
 477    for (i = 0; i < 4; i++) {
 478       if (mask & (1<<i)) {
 479          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 480          brw_MOV(p, dst[i], arg2[i]);
 481          brw_set_saturate(p, 0);
 482
 483          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 484
 485          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 486          brw_MOV(p, dst[i], arg1[i]);
 487          brw_set_saturate(p, 0);
 488          brw_set_predicate_control_flag_value(p, 0xff);
 489       }
 490    }
 491 }
 492
 493 static void emit_max( struct brw_compile *p,
 494                       const struct brw_reg *dst,
 495                       GLuint mask,
 496                       const struct brw_reg *arg0,
 497                       const struct brw_reg *arg1 )
 498 {
 499    GLuint i;
 500
 501    for (i = 0; i < 4; i++) {
 502       if (mask & (1<<i)) {
 503          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 504          brw_MOV(p, dst[i], arg0[i]);
 505          brw_set_saturate(p, 0);
 506
 507          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 508
 509          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 510          brw_MOV(p, dst[i], arg1[i]);
 511          brw_set_saturate(p, 0);
 512          brw_set_predicate_control_flag_value(p, 0xff);
 513       }
 514    }
 515 }
 516
 517 static void emit_min( struct brw_compile *p,
 518                       const struct brw_reg *dst,
 519                       GLuint mask,
 520                       const struct brw_reg *arg0,
 521                       const struct brw_reg *arg1 )
 522 {
 523    GLuint i;
 524
 525    for (i = 0; i < 4; i++) {
 526       if (mask & (1<<i)) {
 527          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 528          brw_MOV(p, dst[i], arg1[i]);
 529          brw_set_saturate(p, 0);
 530
 531          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 532
 533          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 534          brw_MOV(p, dst[i], arg0[i]);
 535          brw_set_saturate(p, 0);
 536          brw_set_predicate_control_flag_value(p, 0xff);
 537       }
 538    }
 539 }
 540
 541
 542 static void emit_dp3( struct brw_compile *p,
 543                       const struct brw_reg *dst,
 544                       GLuint mask,
 545                       const struct brw_reg *arg0,
 546                       const struct brw_reg *arg1 )
 547 {
 548    if (!(mask & WRITEMASK_XYZW))
 549       return; /* Do not emit dead code */
 550
 551    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 552
 553    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 554    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 555
 556    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 557    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 558    brw_set_saturate(p, 0);
 559 }
 560
 561
 562 static void emit_dp4( struct brw_compile *p,
 563                       const struct brw_reg *dst,
 564                       GLuint mask,
 565                       const struct brw_reg *arg0,
 566                       const struct brw_reg *arg1 )
 567 {
 568    if (!(mask & WRITEMASK_XYZW))
 569       return; /* Do not emit dead code */
 570
 571    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 572
 573    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 574    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 575    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 576
 577    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 578    brw_MAC(p, dst[0], arg0[3], arg1[3]);
 579    brw_set_saturate(p, 0);
 580 }
 581
 582
 583 static void emit_dph( struct brw_compile *p,
 584                       const struct brw_reg *dst,
 585                       GLuint mask,
 586                       const struct brw_reg *arg0,
 587                       const struct brw_reg *arg1 )
 588 {
 589    if (!(mask & WRITEMASK_XYZW))
 590       return; /* Do not emit dead code */
 591
 592    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 593
 594    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 595    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 596    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 597
 598    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 599    brw_ADD(p, dst[0], dst[0], arg1[3]);
 600    brw_set_saturate(p, 0);
 601 }
 602
 603
 604 static void emit_xpd( struct brw_compile *p,
 605                       const struct brw_reg *dst,
 606                       GLuint mask,
 607                       const struct brw_reg *arg0,
 608                       const struct brw_reg *arg1 )
 609 {
 610    GLuint i;
 611
 612    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 613
 614    for (i = 0 ; i < 3; i++) {
 615       if (mask & (1<<i)) {
 616          GLuint i2 = (i+2)%3;
 617          GLuint i1 = (i+1)%3;
 618
 619          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 620
 621          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 622          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 623          brw_set_saturate(p, 0);
 624       }
 625    }
 626 }
 627
 628
 629 static void emit_math1( struct brw_compile *p,
 630                         GLuint function,
 631                         const struct brw_reg *dst,
 632                         GLuint mask,
 633                         const struct brw_reg *arg0 )
 634 {
 635    if (!(mask & WRITEMASK_XYZW))
 636       return; /* Do not emit dead code */
 637
 638    //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
 639    //     function == BRW_MATH_FUNCTION_SINCOS);
 640
 641    brw_MOV(p, brw_message_reg(2), arg0[0]);
 642
 643    /* Send two messages to perform all 16 operations:
 644     */
 645    brw_math_16(p,
 646                dst[0],
 647                function,
 648                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 649                2,
 650                brw_null_reg(),
 651                BRW_MATH_PRECISION_FULL);
 652 }
 653
 654
 655 static void emit_math2( struct brw_compile *p,
 656                         GLuint function,
 657                         const struct brw_reg *dst,
 658                         GLuint mask,
 659                         const struct brw_reg *arg0,
 660                         const struct brw_reg *arg1)
 661 {
 662    if (!(mask & WRITEMASK_XYZW))
 663       return; /* Do not emit dead code */
 664
 665    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 666
 667    brw_push_insn_state(p);
 668
 669    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 670    brw_MOV(p, brw_message_reg(2), arg0[0]);
 671    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 672    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 673
 674    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 675    brw_MOV(p, brw_message_reg(3), arg1[0]);
 676    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 677    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 678
 679
 680    /* Send two messages to perform all 16 operations:
 681     */
 682    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 683    brw_math(p,
 684             dst[0],
 685             function,
 686             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 687             2,
 688             brw_null_reg(),
 689             BRW_MATH_DATA_VECTOR,
 690             BRW_MATH_PRECISION_FULL);
 691
 692    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 693    brw_math(p,
 694             offset(dst[0],1),
 695             function,
 696             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 697             4,
 698             brw_null_reg(),
 699             BRW_MATH_DATA_VECTOR,
 700             BRW_MATH_PRECISION_FULL);
 701
 702    brw_pop_insn_state(p);
 703 }
 704
 705
 706
 707 static void emit_tex( struct brw_wm_compile *c,
 708                       const struct brw_wm_instruction *inst,
 709                       struct brw_reg *dst,
 710                       GLuint dst_flags,
 711                       struct brw_reg *arg )
 712 {
 713    struct brw_compile *p = &c->func;
 714    GLuint msgLength, responseLength;
 715    GLuint i, nr;
 716    GLuint emit;
 717
 718    /* How many input regs are there?
 719     */
 720    switch (inst->tex_idx) {
 721    case TEXTURE_1D_INDEX:
 722       emit = WRITEMASK_X;
 723       nr = 1;
 724       break;
 725    case TEXTURE_2D_INDEX:
 726    case TEXTURE_RECT_INDEX:
 727       emit = WRITEMASK_XY;
 728       nr = 2;
 729       break;
 730    default:
 731       emit = WRITEMASK_XYZ;
 732       nr = 3;
 733       break;
 734    }
 735
 736    if (inst->tex_shadow) {
 737       nr = 4;
 738       emit |= WRITEMASK_W;
 739    }
 740
 741    msgLength = 1;
 742
 743    for (i = 0; i < nr; i++) {
 744       static const GLuint swz[4] = {0,1,2,2};
 745       if (emit & (1<<i))
 746          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 747       else
 748          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 749       msgLength += 2;
 750    }
 751
 752    responseLength = 8;          /* always */
 753
 754    brw_SAMPLE(p,
 755               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 756               1,
 757               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 758               SURF_INDEX_TEXTURE(inst->tex_unit),
 759               inst->tex_unit,     /* sampler */
 760               inst->writemask,
 761               (inst->tex_shadow ?
 762                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
 763                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
 764               responseLength,
 765               msgLength,
 766               0);
 767 }
 768
 769
 770 static void emit_txb( struct brw_wm_compile *c,
 771                       const struct brw_wm_instruction *inst,
 772                       struct brw_reg *dst,
 773                       GLuint dst_flags,
 774                       struct brw_reg *arg )
 775 {
 776    struct brw_compile *p = &c->func;
 777    GLuint msgLength;
 778
 779    /* Shadow ignored for txb.
 780     */
 781    switch (inst->tex_idx) {
 782    case TEXTURE_1D_INDEX:
 783       brw_MOV(p, brw_message_reg(2), arg[0]);
 784       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 785       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 786       break;
 787    case TEXTURE_2D_INDEX:
 788    case TEXTURE_RECT_INDEX:
 789       brw_MOV(p, brw_message_reg(2), arg[0]);
 790       brw_MOV(p, brw_message_reg(4), arg[1]);
 791       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 792       break;
 793    default:
 794       brw_MOV(p, brw_message_reg(2), arg[0]);
 795       brw_MOV(p, brw_message_reg(4), arg[1]);
 796       brw_MOV(p, brw_message_reg(6), arg[2]);
 797       break;
 798    }
 799
 800    brw_MOV(p, brw_message_reg(8), arg[3]);
 801    msgLength = 9;
 802
 803    brw_SAMPLE(p,
 804               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 805               1,
 806               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 807               SURF_INDEX_TEXTURE(inst->tex_unit),
 808               inst->tex_unit,     /* sampler */
 809               inst->writemask,
 810               BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
 811               8,                /* responseLength */
 812               msgLength,
 813               0);
 814 }
 815
 816
 817 static void emit_lit( struct brw_compile *p,
 818                       const struct brw_reg *dst,
 819                       GLuint mask,
 820                       const struct brw_reg *arg0 )
 821 {
 822    assert((mask & WRITEMASK_XW) == 0);
 823
 824    if (mask & WRITEMASK_Y) {
 825       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 826       brw_MOV(p, dst[1], arg0[0]);
 827       brw_set_saturate(p, 0);
 828    }
 829
 830    if (mask & WRITEMASK_Z) {
 831       emit_math2(p, BRW_MATH_FUNCTION_POW,
 832                  &dst[2],
 833                  WRITEMASK_X | (mask & SATURATE),
 834                  &arg0[1],
 835                  &arg0[3]);
 836    }
 837
 838    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 839     * some of the POW calculations above, but 16-wide iff statements
 840     * seem to lock c1 hardware, so this is a nasty workaround:
 841     */
 842    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 843    {
 844       if (mask & WRITEMASK_Y)
 845          brw_MOV(p, dst[1], brw_imm_f(0));
 846
 847       if (mask & WRITEMASK_Z)
 848          brw_MOV(p, dst[2], brw_imm_f(0));
 849    }
 850    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 851 }
 852
 853
 854 /* Kill pixel - set execution mask to zero for those pixels which
 855  * fail.
 856  */
 857 static void emit_kil( struct brw_wm_compile *c,
 858                       struct brw_reg *arg0)
 859 {
 860    struct brw_compile *p = &c->func;
 861    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 862    GLuint i;
 863
 864    /* XXX - usually won't need 4 compares!
 865     */
 866    for (i = 0; i < 4; i++) {
 867       brw_push_insn_state(p);
 868       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 869       brw_set_predicate_control_flag_value(p, 0xff);
 870       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 871       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 872       brw_pop_insn_state(p);
 873    }
 874 }
 875
 876
 877 static void fire_fb_write( struct brw_wm_compile *c,
 878                            GLuint base_reg,
 879                            GLuint nr,
 880                            GLuint target,
 881                            GLuint eot )
 882 {
 883    struct brw_compile *p = &c->func;
 884
 885    /* Pass through control information:
 886     */
 887 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 888    {
 889       brw_push_insn_state(p);
 890       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 891       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 892       brw_MOV(p,
 893                brw_message_reg(base_reg + 1),
 894                brw_vec8_grf(1, 0));
 895       brw_pop_insn_state(p);
 896    }
 897
 898    /* Send framebuffer write message: */
 899 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 900    brw_fb_WRITE(p,
 901                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 902                 base_reg,
 903                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 904                 target,
 905                 nr,
 906                 0,
 907                 eot);
 908 }
 909
 910
 911 static void emit_aa( struct brw_wm_compile *c,
 912                      struct brw_reg *arg1,
 913                      GLuint reg )
 914 {
 915    struct brw_compile *p = &c->func;
 916    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 917    GLuint off = c->key.aa_dest_stencil_reg % 2;
 918    struct brw_reg aa = offset(arg1[comp], off);
 919
 920    brw_push_insn_state(p);
 921    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 922    brw_MOV(p, brw_message_reg(reg), aa);
 923    brw_pop_insn_state(p);
 924 }
 925
 926
 927 /* Post-fragment-program processing.  Send the results to the
 928  * framebuffer.
 929  * \param arg0  the fragment color
 930  * \param arg1  the pass-through depth value
 931  * \param arg2  the shader-computed depth value
 932  */
 933 static void emit_fb_write( struct brw_wm_compile *c,
 934                            struct brw_reg *arg0,
 935                            struct brw_reg *arg1,
 936                            struct brw_reg *arg2,
 937                            GLuint target,
 938                            GLuint eot)
 939 {
 940    struct brw_compile *p = &c->func;
 941    GLuint nr = 2;
 942    GLuint channel;
 943
 944    /* Reserve a space for AA - may not be needed:
 945     */
 946    if (c->key.aa_dest_stencil_reg)
 947       nr += 1;
 948
 949    /* I don't really understand how this achieves the color interleave
 950     * (ie RGBARGBA) in the result:  [Do the saturation here]
 951     */
 952    {
 953       brw_push_insn_state(p);
 954
 955       for (channel = 0; channel < 4; channel++) {
 956          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 957          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 958
 959          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 960          brw_MOV(p,
 961                  brw_message_reg(nr + channel),
 962                  arg0[channel]);
 963
 964          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 965          brw_MOV(p,
 966                  brw_message_reg(nr + channel + 4),
 967                  sechalf(arg0[channel]));
 968       }
 969
 970       /* skip over the regs populated above:
 971        */
 972       nr += 8;
 973
 974       brw_pop_insn_state(p);
 975    }
 976
 977    if (c->key.source_depth_to_render_target)
 978    {
 979       if (c->key.computes_depth)
 980          brw_MOV(p, brw_message_reg(nr), arg2[2]);
 981       else
 982          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
 983
 984       nr += 2;
 985    }
 986
 987    if (c->key.dest_depth_reg)
 988    {
 989       GLuint comp = c->key.dest_depth_reg / 2;
 990       GLuint off = c->key.dest_depth_reg % 2;
 991
 992       if (off != 0) {
 993          brw_push_insn_state(p);
 994          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 995
 996          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
 997          /* 2nd half? */
 998          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
 999          brw_pop_insn_state(p);
1000       }
1001       else {
1002          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1003       }
1004       nr += 2;
1005    }
1006
1007    if (!c->key.runtime_check_aads_emit) {
1008       if (c->key.aa_dest_stencil_reg)
1009          emit_aa(c, arg1, 2);
1010
1011       fire_fb_write(c, 0, nr, target, eot);
1012    }
1013    else {
1014       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1015       struct brw_reg ip = brw_ip_reg();
1016       struct brw_instruction *jmp;
1017
1018       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1019       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1020       brw_AND(p,
1021               v1_null_ud,
1022               get_element_ud(brw_vec8_grf(1,0), 6),
1023               brw_imm_ud(1<<26));
1024
1025       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1026       {
1027          emit_aa(c, arg1, 2);
1028          fire_fb_write(c, 0, nr, target, eot);
1029          /* note - thread killed in subroutine */
1030       }
1031       brw_land_fwd_jump(p, jmp);
1032
1033       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1034        */
1035       fire_fb_write(c, 1, nr-1, target, eot);
1036    }
1037 }
1038
1039
1040 /**
1041  * Move a GPR to scratch memory.
1042  */
1043 static void emit_spill( struct brw_wm_compile *c,
1044                         struct brw_reg reg,
1045                         GLuint slot )
1046 {
1047    struct brw_compile *p = &c->func;
1048
1049    /*
1050      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1051    */
1052    brw_MOV(p, brw_message_reg(2), reg);
1053
1054    /*
1055      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1056      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1057    */
1058    brw_dp_WRITE_16(p,
1059                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1060                    slot);
1061 }
1062
1063
1064 /**
1065  * Load a GPR from scratch memory.
1066  */
1067 static void emit_unspill( struct brw_wm_compile *c,
1068                           struct brw_reg reg,
1069                           GLuint slot )
1070 {
1071    struct brw_compile *p = &c->func;
1072
1073    /* Slot 0 is the undef value.
1074     */
1075    if (slot == 0) {
1076       brw_MOV(p, reg, brw_imm_f(0));
1077       return;
1078    }
1079
1080    /*
1081      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1082      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1083    */
1084
1085    brw_dp_READ_16(p,
1086                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1087                   slot);
1088 }
1089
1090
1091 /**
1092  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1093  * Args with unspill_reg != 0 will be loaded from scratch memory.
1094  */
1095 static void get_argument_regs( struct brw_wm_compile *c,
1096                                struct brw_wm_ref *arg[],
1097                                struct brw_reg *regs )
1098 {
1099    GLuint i;
1100
1101    for (i = 0; i < 4; i++) {
1102       if (arg[i]) {
1103          if (arg[i]->unspill_reg)
1104             emit_unspill(c,
1105                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1106                          arg[i]->value->spill_slot);
1107
1108          regs[i] = arg[i]->hw_reg;
1109       }
1110       else {
1111          regs[i] = brw_null_reg();
1112       }
1113    }
1114 }
1115
1116
1117 /**
1118  * For values that have a spill_slot!=0, write those regs to scratch memory.
1119  */
1120 static void spill_values( struct brw_wm_compile *c,
1121                           struct brw_wm_value *values,
1122                           GLuint nr )
1123 {
1124    GLuint i;
1125
1126    for (i = 0; i < nr; i++)
1127       if (values[i].spill_slot)
1128          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1129 }
1130
1131
1132 /* Emit the fragment program instructions here.
1133  */
1134 void brw_wm_emit( struct brw_wm_compile *c )
1135 {
1136    struct brw_compile *p = &c->func;
1137    GLuint insn;
1138
1139    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1140
1141    /* Check if any of the payload regs need to be spilled:
1142     */
1143    spill_values(c, c->payload.depth, 4);
1144    spill_values(c, c->creg, c->nr_creg);
1145    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1146
1147
1148    for (insn = 0; insn < c->nr_insns; insn++) {
1149
1150       struct brw_wm_instruction *inst = &c->instruction[insn];
1151       struct brw_reg args[3][4], dst[4];
1152       GLuint i, dst_flags;
1153
1154       /* Get argument regs:
1155        */
1156       for (i = 0; i < 3; i++)
1157          get_argument_regs(c, inst->src[i], args[i]);
1158
1159       /* Get dest regs:
1160        */
1161       for (i = 0; i < 4; i++)
1162          if (inst->dst[i])
1163             dst[i] = inst->dst[i]->hw_reg;
1164          else
1165             dst[i] = brw_null_reg();
1166
1167       /* Flags
1168        */
1169       dst_flags = inst->writemask;
1170       if (inst->saturate)
1171          dst_flags |= SATURATE;
1172
1173       switch (inst->opcode) {
1174          /* Generated instructions for calculating triangle interpolants:
1175           */
1176       case WM_PIXELXY:
1177          emit_pixel_xy(p, dst, dst_flags, args[0]);
1178          break;
1179
1180       case WM_DELTAXY:
1181          emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1182          break;
1183
1184       case WM_WPOSXY:
1185          emit_wpos_xy(c, dst, dst_flags, args[0]);
1186          break;
1187
1188       case WM_PIXELW:
1189          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1190          break;
1191
1192       case WM_LINTERP:
1193          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1194          break;
1195
1196       case WM_PINTERP:
1197          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1198          break;
1199
1200       case WM_CINTERP:
1201          emit_cinterp(p, dst, dst_flags, args[0]);
1202          break;
1203
1204       case WM_FB_WRITE:
1205          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1206          break;
1207
1208       case WM_FRONTFACING:
1209          emit_frontfacing(p, dst, dst_flags);
1210          break;
1211
1212          /* Straightforward arithmetic:
1213           */
1214       case OPCODE_ADD:
1215          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1216          break;
1217
1218       case OPCODE_FRC:
1219          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1220          break;
1221
1222       case OPCODE_FLR:
1223          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1224          break;
1225
1226       case OPCODE_DP3:
1227          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1228          break;
1229
1230       case OPCODE_DP4:
1231          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1232          break;
1233
1234       case OPCODE_DPH:
1235          emit_dph(p, dst, dst_flags, args[0], args[1]);
1236          break;
1237
1238       case OPCODE_TRUNC:
1239          emit_trunc(p, dst, dst_flags, args[0]);
1240          break;
1241
1242       case OPCODE_LRP:
1243          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1244          break;
1245
1246       case OPCODE_MAD:
1247          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1248          break;
1249
1250       case OPCODE_MOV:
1251       case OPCODE_SWZ:
1252          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1253          break;
1254
1255       case OPCODE_MUL:
1256          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1257          break;
1258
1259       case OPCODE_XPD:
1260          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1261          break;
1262
1263          /* Higher math functions:
1264           */
1265       case OPCODE_RCP:
1266          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1267          break;
1268
1269       case OPCODE_RSQ:
1270          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1271          break;
1272
1273       case OPCODE_SIN:
1274          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1275          break;
1276
1277       case OPCODE_COS:
1278          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1279          break;
1280
1281       case OPCODE_EX2:
1282          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1283          break;
1284
1285       case OPCODE_LG2:
1286          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1287          break;
1288
1289       case OPCODE_SCS:
1290          /* There is an scs math function, but it would need some
1291           * fixup for 16-element execution.
1292           */
1293          if (dst_flags & WRITEMASK_X)
1294             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1295          if (dst_flags & WRITEMASK_Y)
1296             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1297          break;
1298
1299       case OPCODE_POW:
1300          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1301          break;
1302
1303          /* Comparisons:
1304           */
1305       case OPCODE_CMP:
1306          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1307          break;
1308
1309       case OPCODE_MAX:
1310          emit_max(p, dst, dst_flags, args[0], args[1]);
1311          break;
1312
1313       case OPCODE_MIN:
1314          emit_min(p, dst, dst_flags, args[0], args[1]);
1315          break;
1316
1317       case OPCODE_SLT:
1318          emit_slt(p, dst, dst_flags, args[0], args[1]);
1319          break;
1320
1321       case OPCODE_SLE:
1322          emit_sle(p, dst, dst_flags, args[0], args[1]);
1323         break;
1324       case OPCODE_SGT:
1325          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1326         break;
1327       case OPCODE_SGE:
1328          emit_sge(p, dst, dst_flags, args[0], args[1]);
1329          break;
1330       case OPCODE_SEQ:
1331          emit_seq(p, dst, dst_flags, args[0], args[1]);
1332         break;
1333       case OPCODE_SNE:
1334          emit_sne(p, dst, dst_flags, args[0], args[1]);
1335         break;
1336
1337       case OPCODE_LIT:
1338          emit_lit(p, dst, dst_flags, args[0]);
1339          break;
1340
1341          /* Texturing operations:
1342           */
1343       case OPCODE_TEX:
1344          emit_tex(c, inst, dst, dst_flags, args[0]);
1345          break;
1346
1347       case OPCODE_TXB:
1348          emit_txb(c, inst, dst, dst_flags, args[0]);
1349          break;
1350
1351       case OPCODE_KIL:
1352          emit_kil(c, args[0]);
1353          break;
1354
1355       default:
1356          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1357                       inst->opcode, inst->opcode < MAX_OPCODE ?
1358                                     _mesa_opcode_string(inst->opcode) :
1359                                     "unknown");
1360       }
1361
1362       for (i = 0; i < 4; i++)
1363         if (inst->dst[i] && inst->dst[i]->spill_slot)
1364            emit_spill(c,
1365                       inst->dst[i]->hw_reg,
1366                       inst->dst[i]->spill_slot);
1367    }
1368 }