src/gallium/drivers/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "main/macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 /* Not quite sure how correct this is - need to understand horiz
  38  * vs. vertical strides a little better.
  39  */
  40 static INLINE struct brw_reg sechalf( struct brw_reg reg )
  41 {
  42    if (reg.vstride)
  43       reg.nr++;
  44    return reg;
  45 }
  46
  47 /* Payload R0:
  48  *
  49  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  50  *         corresponding to each of the 16 execution channels.
  51  * R0.1..8 -- ?
  52  * R1.0 -- triangle vertex 0.X
  53  * R1.1 -- triangle vertex 0.Y
  54  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  55  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  56  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  57  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  58  * R1.6 -- ?
  59  * R1.7 -- ?
  60  * R1.8 -- ?
  61  */
  62
  63
  64 static void emit_pixel_xy(struct brw_compile *p,
  65                           const struct brw_reg *dst,
  66                           GLuint mask)
  67 {
  68    struct brw_reg r1 = brw_vec1_grf(1, 0);
  69    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  70
  71    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  72
  73    /* Calculate pixel centers by adding 1 or 0 to each of the
  74     * micro-tile coordinates passed in r1.
  75     */
  76    if (mask & WRITEMASK_X) {
  77       brw_ADD(p,
  78               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  79               stride(suboffset(r1_uw, 4), 2, 4, 0),
  80               brw_imm_v(0x10101010));
  81    }
  82
  83    if (mask & WRITEMASK_Y) {
  84       brw_ADD(p,
  85               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  86               stride(suboffset(r1_uw,5), 2, 4, 0),
  87               brw_imm_v(0x11001100));
  88    }
  89
  90    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  91 }
  92
  93
  94
  95 static void emit_delta_xy(struct brw_compile *p,
  96                           const struct brw_reg *dst,
  97                           GLuint mask,
  98                           const struct brw_reg *arg0)
  99 {
 100    struct brw_reg r1 = brw_vec1_grf(1, 0);
 101
 102    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 103     * centers.
 104     */
 105    if (mask & WRITEMASK_X) {
 106       brw_ADD(p,
 107               dst[0],
 108               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 109               negate(r1));
 110    }
 111
 112    if (mask & WRITEMASK_Y) {
 113       brw_ADD(p,
 114               dst[1],
 115               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 116               negate(suboffset(r1,1)));
 117
 118    }
 119 }
 120
 121 static void emit_wpos_xy(struct brw_wm_compile *c,
 122                          const struct brw_reg *dst,
 123                          GLuint mask,
 124                          const struct brw_reg *arg0)
 125 {
 126    struct brw_compile *p = &c->func;
 127
 128    /* Calculate the pixel offset from window bottom left into destination
 129     * X and Y channels.
 130     */
 131    if (mask & WRITEMASK_X) {
 132       /* X' = X - origin */
 133       brw_ADD(p,
 134               dst[0],
 135               retype(arg0[0], BRW_REGISTER_TYPE_W),
 136               brw_imm_d(0 - c->key.origin_x));
 137    }
 138
 139    if (mask & WRITEMASK_Y) {
 140       /* Y' = height - (Y - origin_y) = height + origin_y - Y */
 141       brw_ADD(p,
 142               dst[1],
 143               negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 144               brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
 145    }
 146 }
 147
 148
 149 static void emit_pixel_w( struct brw_compile *p,
 150                           const struct brw_reg *dst,
 151                           GLuint mask,
 152                           const struct brw_reg *arg0,
 153                           const struct brw_reg *deltas)
 154 {
 155    /* Don't need this if all you are doing is interpolating color, for
 156     * instance.
 157     */
 158    if (mask & WRITEMASK_W) {
 159       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 160
 161       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 162        * result straight into a message reg.
 163        */
 164       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 165       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 166
 167       /* Calc w */
 168       brw_math_16( p, dst[3],
 169                    BRW_MATH_FUNCTION_INV,
 170                    BRW_MATH_SATURATE_NONE,
 171                    2, brw_null_reg(),
 172                    BRW_MATH_PRECISION_FULL);
 173    }
 174 }
 175
 176
 177
 178 static void emit_linterp( struct brw_compile *p,
 179                          const struct brw_reg *dst,
 180                          GLuint mask,
 181                          const struct brw_reg *arg0,
 182                          const struct brw_reg *deltas )
 183 {
 184    struct brw_reg interp[4];
 185    GLuint nr = arg0[0].nr;
 186    GLuint i;
 187
 188    interp[0] = brw_vec1_grf(nr, 0);
 189    interp[1] = brw_vec1_grf(nr, 4);
 190    interp[2] = brw_vec1_grf(nr+1, 0);
 191    interp[3] = brw_vec1_grf(nr+1, 4);
 192
 193    for (i = 0; i < 4; i++) {
 194       if (mask & (1<<i)) {
 195          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 196          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 197       }
 198    }
 199 }
 200
 201
 202 static void emit_pinterp( struct brw_compile *p,
 203                           const struct brw_reg *dst,
 204                           GLuint mask,
 205                           const struct brw_reg *arg0,
 206                           const struct brw_reg *deltas,
 207                           const struct brw_reg *w)
 208 {
 209    struct brw_reg interp[4];
 210    GLuint nr = arg0[0].nr;
 211    GLuint i;
 212
 213    interp[0] = brw_vec1_grf(nr, 0);
 214    interp[1] = brw_vec1_grf(nr, 4);
 215    interp[2] = brw_vec1_grf(nr+1, 0);
 216    interp[3] = brw_vec1_grf(nr+1, 4);
 217
 218    for (i = 0; i < 4; i++) {
 219       if (mask & (1<<i)) {
 220          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 221          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 222       }
 223    }
 224    for (i = 0; i < 4; i++) {
 225       if (mask & (1<<i)) {
 226          brw_MUL(p, dst[i], dst[i], w[3]);
 227       }
 228    }
 229 }
 230
 231
 232 static void emit_cinterp( struct brw_compile *p,
 233                          const struct brw_reg *dst,
 234                          GLuint mask,
 235                          const struct brw_reg *arg0 )
 236 {
 237    struct brw_reg interp[4];
 238    GLuint nr = arg0[0].nr;
 239    GLuint i;
 240
 241    interp[0] = brw_vec1_grf(nr, 0);
 242    interp[1] = brw_vec1_grf(nr, 4);
 243    interp[2] = brw_vec1_grf(nr+1, 0);
 244    interp[3] = brw_vec1_grf(nr+1, 4);
 245
 246    for (i = 0; i < 4; i++) {
 247       if (mask & (1<<i)) {
 248          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 249       }
 250    }
 251 }
 252
 253 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 254 static void emit_frontfacing( struct brw_compile *p,
 255                               const struct brw_reg *dst,
 256                               GLuint mask )
 257 {
 258    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
 259    GLuint i;
 260
 261    if (!(mask & WRITEMASK_XYZW))
 262       return;
 263
 264    for (i = 0; i < 4; i++) {
 265       if (mask & (1<<i)) {
 266          brw_MOV(p, dst[i], brw_imm_f(0.0));
 267       }
 268    }
 269
 270    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
 271     * us front face
 272     */
 273    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
 274    for (i = 0; i < 4; i++) {
 275       if (mask & (1<<i)) {
 276          brw_MOV(p, dst[i], brw_imm_f(1.0));
 277       }
 278    }
 279    brw_set_predicate_control_flag_value(p, 0xff);
 280 }
 281
 282 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 283  * looking like:
 284  *
 285  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
 286  *
 287  * and we're trying to produce:
 288  *
 289  *           DDX                     DDY
 290  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
 291  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
 292  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
 293  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
 294  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
 295  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
 296  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
 297  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
 298  *
 299  * and add another set of two more subspans if in 16-pixel dispatch mode.
 300  *
 301  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 302  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 303  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 304  * between each other.  We could probably do it like ddx and swizzle the right
 305  * order later, but bail for now and just produce
 306  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
 307  */
 308 void emit_ddxy(struct brw_compile *p,
 309                const struct brw_reg *dst,
 310                GLuint mask,
 311                GLboolean is_ddx,
 312                const struct brw_reg *arg0)
 313 {
 314    int i;
 315    struct brw_reg src0, src1;
 316
 317    if (mask & SATURATE)
 318       brw_set_saturate(p, 1);
 319    for (i = 0; i < 4; i++ ) {
 320       if (mask & (1<<i)) {
 321          if (is_ddx) {
 322             src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
 323                            BRW_REGISTER_TYPE_F,
 324                            BRW_VERTICAL_STRIDE_2,
 325                            BRW_WIDTH_2,
 326                            BRW_HORIZONTAL_STRIDE_0,
 327                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 328             src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 329                            BRW_REGISTER_TYPE_F,
 330                            BRW_VERTICAL_STRIDE_2,
 331                            BRW_WIDTH_2,
 332                            BRW_HORIZONTAL_STRIDE_0,
 333                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 334          } else {
 335             src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 336                            BRW_REGISTER_TYPE_F,
 337                            BRW_VERTICAL_STRIDE_4,
 338                            BRW_WIDTH_4,
 339                            BRW_HORIZONTAL_STRIDE_0,
 340                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 341             src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 342                            BRW_REGISTER_TYPE_F,
 343                            BRW_VERTICAL_STRIDE_4,
 344                            BRW_WIDTH_4,
 345                            BRW_HORIZONTAL_STRIDE_0,
 346                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 347          }
 348          brw_ADD(p, dst[i], src0, negate(src1));
 349       }
 350    }
 351    if (mask & SATURATE)
 352       brw_set_saturate(p, 0);
 353 }
 354
 355 static void emit_alu1( struct brw_compile *p,
 356                        struct brw_instruction *(*func)(struct brw_compile *,
 357                                                        struct brw_reg,
 358                                                        struct brw_reg),
 359                        const struct brw_reg *dst,
 360                        GLuint mask,
 361                        const struct brw_reg *arg0 )
 362 {
 363    GLuint i;
 364
 365    if (mask & SATURATE)
 366       brw_set_saturate(p, 1);
 367
 368    for (i = 0; i < 4; i++) {
 369       if (mask & (1<<i)) {
 370          func(p, dst[i], arg0[i]);
 371       }
 372    }
 373
 374    if (mask & SATURATE)
 375       brw_set_saturate(p, 0);
 376 }
 377
 378
 379 static void emit_alu2( struct brw_compile *p,
 380                        struct brw_instruction *(*func)(struct brw_compile *,
 381                                                        struct brw_reg,
 382                                                        struct brw_reg,
 383                                                        struct brw_reg),
 384                        const struct brw_reg *dst,
 385                        GLuint mask,
 386                        const struct brw_reg *arg0,
 387                        const struct brw_reg *arg1 )
 388 {
 389    GLuint i;
 390
 391    if (mask & SATURATE)
 392       brw_set_saturate(p, 1);
 393
 394    for (i = 0; i < 4; i++) {
 395       if (mask & (1<<i)) {
 396          func(p, dst[i], arg0[i], arg1[i]);
 397       }
 398    }
 399
 400    if (mask & SATURATE)
 401       brw_set_saturate(p, 0);
 402 }
 403
 404
 405 static void emit_mad( struct brw_compile *p,
 406                       const struct brw_reg *dst,
 407                       GLuint mask,
 408                       const struct brw_reg *arg0,
 409                       const struct brw_reg *arg1,
 410                       const struct brw_reg *arg2 )
 411 {
 412    GLuint i;
 413
 414    for (i = 0; i < 4; i++) {
 415       if (mask & (1<<i)) {
 416          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 417
 418          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 419          brw_ADD(p, dst[i], dst[i], arg2[i]);
 420          brw_set_saturate(p, 0);
 421       }
 422    }
 423 }
 424
 425 static void emit_trunc( struct brw_compile *p,
 426                       const struct brw_reg *dst,
 427                       GLuint mask,
 428                       const struct brw_reg *arg0)
 429 {
 430    GLuint i;
 431
 432    for (i = 0; i < 4; i++) {
 433       if (mask & (1<<i)) {
 434          brw_RNDZ(p, dst[i], arg0[i]);
 435       }
 436    }
 437 }
 438
 439 static void emit_lrp( struct brw_compile *p,
 440                       const struct brw_reg *dst,
 441                       GLuint mask,
 442                       const struct brw_reg *arg0,
 443                       const struct brw_reg *arg1,
 444                       const struct brw_reg *arg2 )
 445 {
 446    GLuint i;
 447
 448    /* Uses dst as a temporary:
 449     */
 450    for (i = 0; i < 4; i++) {
 451       if (mask & (1<<i)) {
 452          /* Can I use the LINE instruction for this?
 453           */
 454          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 455          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 456
 457          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 458          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 459          brw_set_saturate(p, 0);
 460       }
 461    }
 462 }
 463
 464 static void emit_sop( struct brw_compile *p,
 465                       const struct brw_reg *dst,
 466                       GLuint mask,
 467                       GLuint cond,
 468                       const struct brw_reg *arg0,
 469                       const struct brw_reg *arg1 )
 470 {
 471    GLuint i;
 472
 473    for (i = 0; i < 4; i++) {
 474       if (mask & (1<<i)) {
 475          brw_MOV(p, dst[i], brw_imm_f(0));
 476          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 477          brw_MOV(p, dst[i], brw_imm_f(1.0));
 478          brw_set_predicate_control_flag_value(p, 0xff);
 479       }
 480    }
 481 }
 482
 483 static void emit_slt( struct brw_compile *p,
 484                       const struct brw_reg *dst,
 485                       GLuint mask,
 486                       const struct brw_reg *arg0,
 487                       const struct brw_reg *arg1 )
 488 {
 489    emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 490 }
 491
 492 static void emit_sle( struct brw_compile *p,
 493                       const struct brw_reg *dst,
 494                       GLuint mask,
 495                       const struct brw_reg *arg0,
 496                       const struct brw_reg *arg1 )
 497 {
 498    emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 499 }
 500
 501 static void emit_sgt( struct brw_compile *p,
 502                       const struct brw_reg *dst,
 503                       GLuint mask,
 504                       const struct brw_reg *arg0,
 505                       const struct brw_reg *arg1 )
 506 {
 507    emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 508 }
 509
 510 static void emit_sge( struct brw_compile *p,
 511                       const struct brw_reg *dst,
 512                       GLuint mask,
 513                       const struct brw_reg *arg0,
 514                       const struct brw_reg *arg1 )
 515 {
 516    emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 517 }
 518
 519 static void emit_seq( struct brw_compile *p,
 520                       const struct brw_reg *dst,
 521                       GLuint mask,
 522                       const struct brw_reg *arg0,
 523                       const struct brw_reg *arg1 )
 524 {
 525    emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 526 }
 527
 528 static void emit_sne( struct brw_compile *p,
 529                       const struct brw_reg *dst,
 530                       GLuint mask,
 531                       const struct brw_reg *arg0,
 532                       const struct brw_reg *arg1 )
 533 {
 534    emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 535 }
 536
 537 static void emit_cmp( struct brw_compile *p,
 538                       const struct brw_reg *dst,
 539                       GLuint mask,
 540                       const struct brw_reg *arg0,
 541                       const struct brw_reg *arg1,
 542                       const struct brw_reg *arg2 )
 543 {
 544    GLuint i;
 545
 546    for (i = 0; i < 4; i++) {
 547       if (mask & (1<<i)) {
 548          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 549          brw_MOV(p, dst[i], arg2[i]);
 550          brw_set_saturate(p, 0);
 551
 552          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 553
 554          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 555          brw_MOV(p, dst[i], arg1[i]);
 556          brw_set_saturate(p, 0);
 557          brw_set_predicate_control_flag_value(p, 0xff);
 558       }
 559    }
 560 }
 561
 562 static void emit_max( struct brw_compile *p,
 563                       const struct brw_reg *dst,
 564                       GLuint mask,
 565                       const struct brw_reg *arg0,
 566                       const struct brw_reg *arg1 )
 567 {
 568    GLuint i;
 569
 570    for (i = 0; i < 4; i++) {
 571       if (mask & (1<<i)) {
 572          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 573          brw_MOV(p, dst[i], arg0[i]);
 574          brw_set_saturate(p, 0);
 575
 576          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 577
 578          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 579          brw_MOV(p, dst[i], arg1[i]);
 580          brw_set_saturate(p, 0);
 581          brw_set_predicate_control_flag_value(p, 0xff);
 582       }
 583    }
 584 }
 585
 586 static void emit_min( struct brw_compile *p,
 587                       const struct brw_reg *dst,
 588                       GLuint mask,
 589                       const struct brw_reg *arg0,
 590                       const struct brw_reg *arg1 )
 591 {
 592    GLuint i;
 593
 594    for (i = 0; i < 4; i++) {
 595       if (mask & (1<<i)) {
 596          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 597          brw_MOV(p, dst[i], arg1[i]);
 598          brw_set_saturate(p, 0);
 599
 600          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 601
 602          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 603          brw_MOV(p, dst[i], arg0[i]);
 604          brw_set_saturate(p, 0);
 605          brw_set_predicate_control_flag_value(p, 0xff);
 606       }
 607    }
 608 }
 609
 610
 611 static void emit_dp3( struct brw_compile *p,
 612                       const struct brw_reg *dst,
 613                       GLuint mask,
 614                       const struct brw_reg *arg0,
 615                       const struct brw_reg *arg1 )
 616 {
 617    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 618
 619    if (!(mask & WRITEMASK_XYZW))
 620       return; /* Do not emit dead code */
 621
 622    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 623
 624    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 625    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 626
 627    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 628    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 629    brw_set_saturate(p, 0);
 630 }
 631
 632
 633 static void emit_dp4( struct brw_compile *p,
 634                       const struct brw_reg *dst,
 635                       GLuint mask,
 636                       const struct brw_reg *arg0,
 637                       const struct brw_reg *arg1 )
 638 {
 639    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 640
 641    if (!(mask & WRITEMASK_XYZW))
 642       return; /* Do not emit dead code */
 643
 644    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 645
 646    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 647    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 648    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 649
 650    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 651    brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
 652    brw_set_saturate(p, 0);
 653 }
 654
 655
 656 static void emit_dph( struct brw_compile *p,
 657                       const struct brw_reg *dst,
 658                       GLuint mask,
 659                       const struct brw_reg *arg0,
 660                       const struct brw_reg *arg1 )
 661 {
 662    const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 663
 664    if (!(mask & WRITEMASK_XYZW))
 665       return; /* Do not emit dead code */
 666
 667    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 668
 669    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 670    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 671    brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
 672
 673    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 674    brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
 675    brw_set_saturate(p, 0);
 676 }
 677
 678
 679 static void emit_xpd( struct brw_compile *p,
 680                       const struct brw_reg *dst,
 681                       GLuint mask,
 682                       const struct brw_reg *arg0,
 683                       const struct brw_reg *arg1 )
 684 {
 685    GLuint i;
 686
 687    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 688
 689    for (i = 0 ; i < 3; i++) {
 690       if (mask & (1<<i)) {
 691          GLuint i2 = (i+2)%3;
 692          GLuint i1 = (i+1)%3;
 693
 694          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 695
 696          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 697          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 698          brw_set_saturate(p, 0);
 699       }
 700    }
 701 }
 702
 703
 704 static void emit_math1( struct brw_compile *p,
 705                         GLuint function,
 706                         const struct brw_reg *dst,
 707                         GLuint mask,
 708                         const struct brw_reg *arg0 )
 709 {
 710    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 711
 712    if (!(mask & WRITEMASK_XYZW))
 713       return; /* Do not emit dead code */
 714
 715    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 716
 717    brw_MOV(p, brw_message_reg(2), arg0[0]);
 718
 719    /* Send two messages to perform all 16 operations:
 720     */
 721    brw_math_16(p,
 722                dst[dst_chan],
 723                function,
 724                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 725                2,
 726                brw_null_reg(),
 727                BRW_MATH_PRECISION_FULL);
 728 }
 729
 730
 731 static void emit_math2( struct brw_compile *p,
 732                         GLuint function,
 733                         const struct brw_reg *dst,
 734                         GLuint mask,
 735                         const struct brw_reg *arg0,
 736                         const struct brw_reg *arg1)
 737 {
 738    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
 739
 740    if (!(mask & WRITEMASK_XYZW))
 741       return; /* Do not emit dead code */
 742
 743    assert(is_power_of_two(mask & WRITEMASK_XYZW));
 744
 745    brw_push_insn_state(p);
 746
 747    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 748    brw_MOV(p, brw_message_reg(2), arg0[0]);
 749    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 750    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 751
 752    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 753    brw_MOV(p, brw_message_reg(3), arg1[0]);
 754    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 755    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 756
 757
 758    /* Send two messages to perform all 16 operations:
 759     */
 760    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 761    brw_math(p,
 762             dst[dst_chan],
 763             function,
 764             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 765             2,
 766             brw_null_reg(),
 767             BRW_MATH_DATA_VECTOR,
 768             BRW_MATH_PRECISION_FULL);
 769
 770    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 771    brw_math(p,
 772             offset(dst[dst_chan],1),
 773             function,
 774             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 775             4,
 776             brw_null_reg(),
 777             BRW_MATH_DATA_VECTOR,
 778             BRW_MATH_PRECISION_FULL);
 779
 780    brw_pop_insn_state(p);
 781 }
 782
 783
 784
 785 static void emit_tex( struct brw_wm_compile *c,
 786                       const struct brw_wm_instruction *inst,
 787                       struct brw_reg *dst,
 788                       GLuint dst_flags,
 789                       struct brw_reg *arg )
 790 {
 791    struct brw_compile *p = &c->func;
 792    GLuint msgLength, responseLength;
 793    GLuint i, nr;
 794    GLuint emit;
 795    GLuint msg_type;
 796
 797    /* How many input regs are there?
 798     */
 799    switch (inst->tex_idx) {
 800    case TEXTURE_1D_INDEX:
 801       emit = WRITEMASK_X;
 802       nr = 1;
 803       break;
 804    case TEXTURE_2D_INDEX:
 805    case TEXTURE_RECT_INDEX:
 806       emit = WRITEMASK_XY;
 807       nr = 2;
 808       break;
 809    case TEXTURE_3D_INDEX:
 810    case TEXTURE_CUBE_INDEX:
 811       emit = WRITEMASK_XYZ;
 812       nr = 3;
 813       break;
 814    default:
 815       /* unexpected target */
 816       abort();
 817    }
 818
 819    if (inst->tex_shadow) {
 820       nr = 4;
 821       emit |= WRITEMASK_W;
 822    }
 823
 824    msgLength = 1;
 825
 826    for (i = 0; i < nr; i++) {
 827       static const GLuint swz[4] = {0,1,2,2};
 828       if (emit & (1<<i))
 829          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 830       else
 831          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 832       msgLength += 2;
 833    }
 834
 835    responseLength = 8;          /* always */
 836
 837    if (BRW_IS_IGDNG(p->brw)) {
 838        if (inst->tex_shadow)
 839            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
 840        else
 841            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
 842    } else {
 843        if (inst->tex_shadow)
 844            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
 845        else
 846            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
 847    }
 848
 849    brw_SAMPLE(p,
 850               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 851               1,
 852               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 853               SURF_INDEX_TEXTURE(inst->tex_unit),
 854               inst->tex_unit,     /* sampler */
 855               inst->writemask,
 856               msg_type,
 857               responseLength,
 858               msgLength,
 859               0,
 860               1,
 861               BRW_SAMPLER_SIMD_MODE_SIMD16);
 862 }
 863
 864
 865 static void emit_txb( struct brw_wm_compile *c,
 866                       const struct brw_wm_instruction *inst,
 867                       struct brw_reg *dst,
 868                       GLuint dst_flags,
 869                       struct brw_reg *arg )
 870 {
 871    struct brw_compile *p = &c->func;
 872    GLuint msgLength;
 873    GLuint msg_type;
 874    /* Shadow ignored for txb.
 875     */
 876    switch (inst->tex_idx) {
 877    case TEXTURE_1D_INDEX:
 878       brw_MOV(p, brw_message_reg(2), arg[0]);
 879       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 880       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 881       break;
 882    case TEXTURE_2D_INDEX:
 883    case TEXTURE_RECT_INDEX:
 884       brw_MOV(p, brw_message_reg(2), arg[0]);
 885       brw_MOV(p, brw_message_reg(4), arg[1]);
 886       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 887       break;
 888    case TEXTURE_3D_INDEX:
 889    case TEXTURE_CUBE_INDEX:
 890       brw_MOV(p, brw_message_reg(2), arg[0]);
 891       brw_MOV(p, brw_message_reg(4), arg[1]);
 892       brw_MOV(p, brw_message_reg(6), arg[2]);
 893       break;
 894    default:
 895       /* unexpected target */
 896       abort();
 897    }
 898
 899    brw_MOV(p, brw_message_reg(8), arg[3]);
 900    msgLength = 9;
 901
 902    if (BRW_IS_IGDNG(p->brw))
 903        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
 904    else
 905        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 906
 907    brw_SAMPLE(p,
 908               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 909               1,
 910               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 911               SURF_INDEX_TEXTURE(inst->tex_unit),
 912               inst->tex_unit,     /* sampler */
 913               inst->writemask,
 914               msg_type,
 915               8,                /* responseLength */
 916               msgLength,
 917               0,
 918               1,
 919               BRW_SAMPLER_SIMD_MODE_SIMD16);
 920 }
 921
 922
 923 static void emit_lit( struct brw_compile *p,
 924                       const struct brw_reg *dst,
 925                       GLuint mask,
 926                       const struct brw_reg *arg0 )
 927 {
 928    assert((mask & WRITEMASK_XW) == 0);
 929
 930    if (mask & WRITEMASK_Y) {
 931       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 932       brw_MOV(p, dst[1], arg0[0]);
 933       brw_set_saturate(p, 0);
 934    }
 935
 936    if (mask & WRITEMASK_Z) {
 937       emit_math2(p, BRW_MATH_FUNCTION_POW,
 938                  &dst[2],
 939                  WRITEMASK_X | (mask & SATURATE),
 940                  &arg0[1],
 941                  &arg0[3]);
 942    }
 943
 944    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 945     * some of the POW calculations above, but 16-wide iff statements
 946     * seem to lock c1 hardware, so this is a nasty workaround:
 947     */
 948    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 949    {
 950       if (mask & WRITEMASK_Y)
 951          brw_MOV(p, dst[1], brw_imm_f(0));
 952
 953       if (mask & WRITEMASK_Z)
 954          brw_MOV(p, dst[2], brw_imm_f(0));
 955    }
 956    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 957 }
 958
 959
 960 /* Kill pixel - set execution mask to zero for those pixels which
 961  * fail.
 962  */
 963 static void emit_kil( struct brw_wm_compile *c,
 964                       struct brw_reg *arg0)
 965 {
 966    struct brw_compile *p = &c->func;
 967    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 968    GLuint i;
 969
 970    /* XXX - usually won't need 4 compares!
 971     */
 972    for (i = 0; i < 4; i++) {
 973       brw_push_insn_state(p);
 974       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 975       brw_set_predicate_control_flag_value(p, 0xff);
 976       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 977       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 978       brw_pop_insn_state(p);
 979    }
 980 }
 981
 982 /* KIL_NV kills the pixels that are currently executing, not based on a test
 983  * of the arguments.
 984  */
 985 static void emit_kil_nv( struct brw_wm_compile *c )
 986 {
 987    struct brw_compile *p = &c->func;
 988    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 989
 990    brw_push_insn_state(p);
 991    brw_set_mask_control(p, BRW_MASK_DISABLE);
 992    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
 993    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
 994    brw_pop_insn_state(p);
 995 }
 996
 997 static void fire_fb_write( struct brw_wm_compile *c,
 998                            GLuint base_reg,
 999                            GLuint nr,
1000                            GLuint target,
1001                            GLuint eot )
1002 {
1003    struct brw_compile *p = &c->func;
1004
1005    /* Pass through control information:
1006     */
1007 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
1008    {
1009       brw_push_insn_state(p);
1010       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
1011       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1012       brw_MOV(p,
1013                brw_message_reg(base_reg + 1),
1014                brw_vec8_grf(1, 0));
1015       brw_pop_insn_state(p);
1016    }
1017
1018    /* Send framebuffer write message: */
1019 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
1020    brw_fb_WRITE(p,
1021                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1022                 base_reg,
1023                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1024                 target,
1025                 nr,
1026                 0,
1027                 eot);
1028 }
1029
1030
1031 static void emit_aa( struct brw_wm_compile *c,
1032                      struct brw_reg *arg1,
1033                      GLuint reg )
1034 {
1035    struct brw_compile *p = &c->func;
1036    GLuint comp = c->key.aa_dest_stencil_reg / 2;
1037    GLuint off = c->key.aa_dest_stencil_reg % 2;
1038    struct brw_reg aa = offset(arg1[comp], off);
1039
1040    brw_push_insn_state(p);
1041    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
1042    brw_MOV(p, brw_message_reg(reg), aa);
1043    brw_pop_insn_state(p);
1044 }
1045
1046
1047 /* Post-fragment-program processing.  Send the results to the
1048  * framebuffer.
1049  * \param arg0  the fragment color
1050  * \param arg1  the pass-through depth value
1051  * \param arg2  the shader-computed depth value
1052  */
1053 static void emit_fb_write( struct brw_wm_compile *c,
1054                            struct brw_reg *arg0,
1055                            struct brw_reg *arg1,
1056                            struct brw_reg *arg2,
1057                            GLuint target,
1058                            GLuint eot)
1059 {
1060    struct brw_compile *p = &c->func;
1061    GLuint nr = 2;
1062    GLuint channel;
1063
1064    /* Reserve a space for AA - may not be needed:
1065     */
1066    if (c->key.aa_dest_stencil_reg)
1067       nr += 1;
1068
1069    /* I don't really understand how this achieves the color interleave
1070     * (ie RGBARGBA) in the result:  [Do the saturation here]
1071     */
1072    {
1073       brw_push_insn_state(p);
1074
1075       for (channel = 0; channel < 4; channel++) {
1076          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
1077          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
1078
1079          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1080          brw_MOV(p,
1081                  brw_message_reg(nr + channel),
1082                  arg0[channel]);
1083
1084          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
1085          brw_MOV(p,
1086                  brw_message_reg(nr + channel + 4),
1087                  sechalf(arg0[channel]));
1088       }
1089
1090       /* skip over the regs populated above:
1091        */
1092       nr += 8;
1093
1094       brw_pop_insn_state(p);
1095    }
1096
1097    if (c->key.source_depth_to_render_target)
1098    {
1099       if (c->key.computes_depth)
1100          brw_MOV(p, brw_message_reg(nr), arg2[2]);
1101       else
1102          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
1103
1104       nr += 2;
1105    }
1106
1107    if (c->key.dest_depth_reg)
1108    {
1109       GLuint comp = c->key.dest_depth_reg / 2;
1110       GLuint off = c->key.dest_depth_reg % 2;
1111
1112       if (off != 0) {
1113          brw_push_insn_state(p);
1114          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1115
1116          brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
1117          /* 2nd half? */
1118          brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
1119          brw_pop_insn_state(p);
1120       }
1121       else {
1122          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
1123       }
1124       nr += 2;
1125    }
1126
1127    if (!c->key.runtime_check_aads_emit) {
1128       if (c->key.aa_dest_stencil_reg)
1129          emit_aa(c, arg1, 2);
1130
1131       fire_fb_write(c, 0, nr, target, eot);
1132    }
1133    else {
1134       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
1135       struct brw_reg ip = brw_ip_reg();
1136       struct brw_instruction *jmp;
1137
1138       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1139       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
1140       brw_AND(p,
1141               v1_null_ud,
1142               get_element_ud(brw_vec8_grf(1,0), 6),
1143               brw_imm_ud(1<<26));
1144
1145       jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
1146       {
1147          emit_aa(c, arg1, 2);
1148          fire_fb_write(c, 0, nr, target, eot);
1149          /* note - thread killed in subroutine */
1150       }
1151       brw_land_fwd_jump(p, jmp);
1152
1153       /* ELSE: Shuffle up one register to fill in the hole left for AA:
1154        */
1155       fire_fb_write(c, 1, nr-1, target, eot);
1156    }
1157 }
1158
1159
1160 /**
1161  * Move a GPR to scratch memory.
1162  */
1163 static void emit_spill( struct brw_wm_compile *c,
1164                         struct brw_reg reg,
1165                         GLuint slot )
1166 {
1167    struct brw_compile *p = &c->func;
1168
1169    /*
1170      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
1171    */
1172    brw_MOV(p, brw_message_reg(2), reg);
1173
1174    /*
1175      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
1176      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
1177    */
1178    brw_dp_WRITE_16(p,
1179                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
1180                    slot);
1181 }
1182
1183
1184 /**
1185  * Load a GPR from scratch memory.
1186  */
1187 static void emit_unspill( struct brw_wm_compile *c,
1188                           struct brw_reg reg,
1189                           GLuint slot )
1190 {
1191    struct brw_compile *p = &c->func;
1192
1193    /* Slot 0 is the undef value.
1194     */
1195    if (slot == 0) {
1196       brw_MOV(p, reg, brw_imm_f(0));
1197       return;
1198    }
1199
1200    /*
1201      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1202      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1203    */
1204
1205    brw_dp_READ_16(p,
1206                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1207                   slot);
1208 }
1209
1210
1211 /**
1212  * Retrieve up to 4 GEN4 register pairs for the given wm reg:
1213  * Args with unspill_reg != 0 will be loaded from scratch memory.
1214  */
1215 static void get_argument_regs( struct brw_wm_compile *c,
1216                                struct brw_wm_ref *arg[],
1217                                struct brw_reg *regs )
1218 {
1219    GLuint i;
1220
1221    for (i = 0; i < 4; i++) {
1222       if (arg[i]) {
1223          if (arg[i]->unspill_reg)
1224             emit_unspill(c,
1225                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1226                          arg[i]->value->spill_slot);
1227
1228          regs[i] = arg[i]->hw_reg;
1229       }
1230       else {
1231          regs[i] = brw_null_reg();
1232       }
1233    }
1234 }
1235
1236
1237 /**
1238  * For values that have a spill_slot!=0, write those regs to scratch memory.
1239  */
1240 static void spill_values( struct brw_wm_compile *c,
1241                           struct brw_wm_value *values,
1242                           GLuint nr )
1243 {
1244    GLuint i;
1245
1246    for (i = 0; i < nr; i++)
1247       if (values[i].spill_slot)
1248          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1249 }
1250
1251
1252 /* Emit the fragment program instructions here.
1253  */
1254 void brw_wm_emit( struct brw_wm_compile *c )
1255 {
1256    struct brw_compile *p = &c->func;
1257    GLuint insn;
1258
1259    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1260
1261    /* Check if any of the payload regs need to be spilled:
1262     */
1263    spill_values(c, c->payload.depth, 4);
1264    spill_values(c, c->creg, c->nr_creg);
1265    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1266
1267
1268    for (insn = 0; insn < c->nr_insns; insn++) {
1269
1270       struct brw_wm_instruction *inst = &c->instruction[insn];
1271       struct brw_reg args[3][4], dst[4];
1272       GLuint i, dst_flags;
1273
1274       /* Get argument regs:
1275        */
1276       for (i = 0; i < 3; i++)
1277          get_argument_regs(c, inst->src[i], args[i]);
1278
1279       /* Get dest regs:
1280        */
1281       for (i = 0; i < 4; i++)
1282          if (inst->dst[i])
1283             dst[i] = inst->dst[i]->hw_reg;
1284          else
1285             dst[i] = brw_null_reg();
1286
1287       /* Flags
1288        */
1289       dst_flags = inst->writemask;
1290       if (inst->saturate)
1291          dst_flags |= SATURATE;
1292
1293       switch (inst->opcode) {
1294          /* Generated instructions for calculating triangle interpolants:
1295           */
1296       case WM_PIXELXY:
1297          emit_pixel_xy(p, dst, dst_flags);
1298          break;
1299
1300       case WM_DELTAXY:
1301          emit_delta_xy(p, dst, dst_flags, args[0]);
1302          break;
1303
1304       case WM_WPOSXY:
1305          emit_wpos_xy(c, dst, dst_flags, args[0]);
1306          break;
1307
1308       case WM_PIXELW:
1309          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1310          break;
1311
1312       case WM_LINTERP:
1313          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1314          break;
1315
1316       case WM_PINTERP:
1317          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1318          break;
1319
1320       case WM_CINTERP:
1321          emit_cinterp(p, dst, dst_flags, args[0]);
1322          break;
1323
1324       case WM_FB_WRITE:
1325          emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
1326          break;
1327
1328       case WM_FRONTFACING:
1329          emit_frontfacing(p, dst, dst_flags);
1330          break;
1331
1332          /* Straightforward arithmetic:
1333           */
1334       case OPCODE_ADD:
1335          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1336          break;
1337
1338       case OPCODE_FRC:
1339          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1340          break;
1341
1342       case OPCODE_FLR:
1343          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1344          break;
1345
1346       case OPCODE_DDX:
1347          emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
1348          break;
1349
1350       case OPCODE_DDY:
1351          emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
1352          break;
1353
1354       case OPCODE_DP3:
1355          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1356          break;
1357
1358       case OPCODE_DP4:
1359          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1360          break;
1361
1362       case OPCODE_DPH:
1363          emit_dph(p, dst, dst_flags, args[0], args[1]);
1364          break;
1365
1366       case OPCODE_TRUNC:
1367          emit_trunc(p, dst, dst_flags, args[0]);
1368          break;
1369
1370       case OPCODE_LRP:
1371          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1372          break;
1373
1374       case OPCODE_MAD:
1375          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1376          break;
1377
1378       case OPCODE_MOV:
1379       case OPCODE_SWZ:
1380          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1381          break;
1382
1383       case OPCODE_MUL:
1384          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1385          break;
1386
1387       case OPCODE_XPD:
1388          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1389          break;
1390
1391          /* Higher math functions:
1392           */
1393       case OPCODE_RCP:
1394          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1395          break;
1396
1397       case OPCODE_RSQ:
1398          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1399          break;
1400
1401       case OPCODE_SIN:
1402          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1403          break;
1404
1405       case OPCODE_COS:
1406          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1407          break;
1408
1409       case OPCODE_EX2:
1410          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1411          break;
1412
1413       case OPCODE_LG2:
1414          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1415          break;
1416
1417       case OPCODE_SCS:
1418          /* There is an scs math function, but it would need some
1419           * fixup for 16-element execution.
1420           */
1421          if (dst_flags & WRITEMASK_X)
1422             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1423          if (dst_flags & WRITEMASK_Y)
1424             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1425          break;
1426
1427       case OPCODE_POW:
1428          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1429          break;
1430
1431          /* Comparisons:
1432           */
1433       case OPCODE_CMP:
1434          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1435          break;
1436
1437       case OPCODE_MAX:
1438          emit_max(p, dst, dst_flags, args[0], args[1]);
1439          break;
1440
1441       case OPCODE_MIN:
1442          emit_min(p, dst, dst_flags, args[0], args[1]);
1443          break;
1444
1445       case OPCODE_SLT:
1446          emit_slt(p, dst, dst_flags, args[0], args[1]);
1447          break;
1448
1449       case OPCODE_SLE:
1450          emit_sle(p, dst, dst_flags, args[0], args[1]);
1451         break;
1452       case OPCODE_SGT:
1453          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1454         break;
1455       case OPCODE_SGE:
1456          emit_sge(p, dst, dst_flags, args[0], args[1]);
1457          break;
1458       case OPCODE_SEQ:
1459          emit_seq(p, dst, dst_flags, args[0], args[1]);
1460         break;
1461       case OPCODE_SNE:
1462          emit_sne(p, dst, dst_flags, args[0], args[1]);
1463         break;
1464
1465       case OPCODE_LIT:
1466          emit_lit(p, dst, dst_flags, args[0]);
1467          break;
1468
1469          /* Texturing operations:
1470           */
1471       case OPCODE_TEX:
1472          emit_tex(c, inst, dst, dst_flags, args[0]);
1473          break;
1474
1475       case OPCODE_TXB:
1476          emit_txb(c, inst, dst, dst_flags, args[0]);
1477          break;
1478
1479       case OPCODE_KIL:
1480          emit_kil(c, args[0]);
1481          break;
1482
1483       case OPCODE_KIL_NV:
1484          emit_kil_nv(c);
1485          break;
1486
1487       default:
1488          _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
1489                       inst->opcode, inst->opcode < MAX_OPCODE ?
1490                                     _mesa_opcode_string(inst->opcode) :
1491                                     "unknown");
1492       }
1493
1494       for (i = 0; i < 4; i++)
1495         if (inst->dst[i] && inst->dst[i]->spill_slot)
1496            emit_spill(c,
1497                       inst->dst[i]->hw_reg,
1498                       inst->dst[i]->spill_slot);
1499    }
1500
1501    if (INTEL_DEBUG & DEBUG_WM) {
1502       int i;
1503
1504       _mesa_printf("wm-native:\n");
1505       for (i = 0; i < p->nr_insn; i++)
1506          brw_disasm(stderr, &p->store[i]);
1507       _mesa_printf("\n");
1508    }
1509 }