src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 #define SATURATE (1<<5)
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static __inline struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  57  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  58  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  59  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask,
  69                           const struct brw_reg *arg0)
  70 {
  71    struct brw_reg r1 = brw_vec1_grf(1, 0);
  72    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  73
  74    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  75
  76    /* Calculate pixel centers by adding 1 or 0 to each of the
  77     * micro-tile coordinates passed in r1.
  78     */
  79    if (mask & WRITEMASK_X) {
  80       brw_ADD(p,
  81               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  82               stride(suboffset(r1_uw, 4), 2, 4, 0),
  83               brw_imm_v(0x10101010));
  84    }
  85
  86    if (mask & WRITEMASK_Y) {
  87       brw_ADD(p,
  88               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  89               stride(suboffset(r1_uw,5), 2, 4, 0),
  90               brw_imm_v(0x11001100));
  91    }
  92
  93    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  94 }
  95
  96
  97
  98 static void emit_delta_xy(struct brw_compile *p,
  99                           const struct brw_reg *dst,
 100                           GLuint mask,
 101                           const struct brw_reg *arg0,
 102                           const struct brw_reg *arg1)
 103 {
 104    struct brw_reg r1 = brw_vec1_grf(1, 0);
 105
 106    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 107     * centers.
 108     */
 109    if (mask & WRITEMASK_X) {
 110       brw_ADD(p,
 111               dst[0],
 112               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 113               negate(r1));
 114    }
 115
 116    if (mask & WRITEMASK_Y) {
 117       brw_ADD(p,
 118               dst[1],
 119               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 120               negate(suboffset(r1,1)));
 121
 122    }
 123 }
 124
 125 static void emit_wpos_xy(struct brw_compile *p,
 126                            const struct brw_reg *dst,
 127                            GLuint mask,
 128                            const struct brw_reg *arg0)
 129 {
 130    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 131     * centers.
 132     */
 133    if (mask & WRITEMASK_X) {
 134       brw_MOV(p,
 135               dst[0],
 136               retype(arg0[0], BRW_REGISTER_TYPE_UW));
 137    }
 138
 139    if (mask & WRITEMASK_Y) {
 140       /* TODO -- window_height - Y */
 141       brw_MOV(p,
 142               dst[1],
 143               negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
 144
 145    }
 146 }
 147
 148
 149 static void emit_pixel_w( struct brw_compile *p,
 150                           const struct brw_reg *dst,
 151                           GLuint mask,
 152                           const struct brw_reg *arg0,
 153                           const struct brw_reg *deltas)
 154 {
 155    /* Don't need this if all you are doing is interpolating color, for
 156     * instance.
 157     */
 158    if (mask & WRITEMASK_W) {
 159       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 160
 161       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 162        * result straight into a message reg.
 163        */
 164       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 165       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 166
 167       /* Calc w */
 168       brw_math_16( p, dst[3],
 169                    BRW_MATH_FUNCTION_INV,
 170                    BRW_MATH_SATURATE_NONE,
 171                    2, brw_null_reg(),
 172                    BRW_MATH_PRECISION_FULL);
 173    }
 174 }
 175
 176
 177
 178 static void emit_linterp( struct brw_compile *p,
 179                          const struct brw_reg *dst,
 180                          GLuint mask,
 181                          const struct brw_reg *arg0,
 182                          const struct brw_reg *deltas )
 183 {
 184    struct brw_reg interp[4];
 185    GLuint nr = arg0[0].nr;
 186    GLuint i;
 187
 188    interp[0] = brw_vec1_grf(nr, 0);
 189    interp[1] = brw_vec1_grf(nr, 4);
 190    interp[2] = brw_vec1_grf(nr+1, 0);
 191    interp[3] = brw_vec1_grf(nr+1, 4);
 192
 193    for(i = 0; i < 4; i++ ) {
 194       if (mask & (1<<i)) {
 195          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 196          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 197       }
 198    }
 199 }
 200
 201
 202 static void emit_pinterp( struct brw_compile *p,
 203                           const struct brw_reg *dst,
 204                           GLuint mask,
 205                           const struct brw_reg *arg0,
 206                           const struct brw_reg *deltas,
 207                           const struct brw_reg *w)
 208 {
 209    struct brw_reg interp[4];
 210    GLuint nr = arg0[0].nr;
 211    GLuint i;
 212
 213    interp[0] = brw_vec1_grf(nr, 0);
 214    interp[1] = brw_vec1_grf(nr, 4);
 215    interp[2] = brw_vec1_grf(nr+1, 0);
 216    interp[3] = brw_vec1_grf(nr+1, 4);
 217
 218    for(i = 0; i < 4; i++ ) {
 219       if (mask & (1<<i)) {
 220          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 221          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 222          brw_MUL(p, dst[i], dst[i], w[3]);
 223       }
 224    }
 225 }
 226
 227 static void emit_cinterp( struct brw_compile *p,
 228                          const struct brw_reg *dst,
 229                          GLuint mask,
 230                          const struct brw_reg *arg0 )
 231 {
 232         struct brw_reg interp[4];
 233         GLuint nr = arg0[0].nr;
 234         GLuint i;
 235
 236         interp[0] = brw_vec1_grf(nr, 0);
 237         interp[1] = brw_vec1_grf(nr, 4);
 238         interp[2] = brw_vec1_grf(nr+1, 0);
 239         interp[3] = brw_vec1_grf(nr+1, 4);
 240
 241         for(i = 0; i < 4; i++ ) {
 242                 if (mask & (1<<i)) {
 243                         brw_MOV(p, dst[i], suboffset(interp[i],3));     /* TODO: optimize away like other moves */
 244                 }
 245         }
 246 }
 247
 248
 249
 250
 251
 252 static void emit_alu1( struct brw_compile *p,
 253                        struct brw_instruction *(*func)(struct brw_compile *,
 254                                                        struct brw_reg,
 255                                                        struct brw_reg),
 256                        const struct brw_reg *dst,
 257                        GLuint mask,
 258                        const struct brw_reg *arg0 )
 259 {
 260    GLuint i;
 261
 262    if (mask & SATURATE)
 263       brw_set_saturate(p, 1);
 264
 265    for (i = 0; i < 4; i++) {
 266       if (mask & (1<<i)) {
 267          func(p, dst[i], arg0[i]);
 268       }
 269    }
 270
 271    if (mask & SATURATE)
 272       brw_set_saturate(p, 0);
 273 }
 274
 275 static void emit_alu2( struct brw_compile *p,
 276                        struct brw_instruction *(*func)(struct brw_compile *,
 277                                                        struct brw_reg,
 278                                                        struct brw_reg,
 279                                                        struct brw_reg),
 280                        const struct brw_reg *dst,
 281                        GLuint mask,
 282                        const struct brw_reg *arg0,
 283                        const struct brw_reg *arg1 )
 284 {
 285    GLuint i;
 286
 287    if (mask & SATURATE)
 288       brw_set_saturate(p, 1);
 289
 290    for (i = 0; i < 4; i++) {
 291       if (mask & (1<<i)) {
 292          func(p, dst[i], arg0[i], arg1[i]);
 293       }
 294    }
 295
 296    if (mask & SATURATE)
 297       brw_set_saturate(p, 0);
 298 }
 299
 300
 301 static void emit_mad( struct brw_compile *p,
 302                       const struct brw_reg *dst,
 303                       GLuint mask,
 304                       const struct brw_reg *arg0,
 305                       const struct brw_reg *arg1,
 306                       const struct brw_reg *arg2 )
 307 {
 308    GLuint i;
 309
 310    for (i = 0; i < 4; i++) {
 311       if (mask & (1<<i)) {
 312          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 313
 314          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 315          brw_ADD(p, dst[i], dst[i], arg2[i]);
 316          brw_set_saturate(p, 0);
 317       }
 318    }
 319 }
 320
 321
 322 static void emit_lrp( struct brw_compile *p,
 323                       const struct brw_reg *dst,
 324                       GLuint mask,
 325                       const struct brw_reg *arg0,
 326                       const struct brw_reg *arg1,
 327                       const struct brw_reg *arg2 )
 328 {
 329    GLuint i;
 330
 331    /* Uses dst as a temporary:
 332     */
 333    for (i = 0; i < 4; i++) {
 334       if (mask & (1<<i)) {
 335          /* Can I use the LINE instruction for this?
 336           */
 337          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 338          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 339
 340          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 341          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 342          brw_set_saturate(p, 0);
 343       }
 344    }
 345 }
 346 static void emit_sop( struct brw_compile *p,
 347                       const struct brw_reg *dst,
 348                       GLuint mask,
 349                       GLuint cond,
 350                       const struct brw_reg *arg0,
 351                       const struct brw_reg *arg1 )
 352 {
 353    GLuint i;
 354
 355    for (i = 0; i < 4; i++) {
 356       if (mask & (1<<i)) {
 357          brw_MOV(p, dst[i], brw_imm_f(0));
 358          brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
 359          brw_MOV(p, dst[i], brw_imm_f(1.0));
 360          brw_set_predicate_control_flag_value(p, 0xff);
 361       }
 362    }
 363 }
 364
 365 static void emit_slt( struct brw_compile *p,
 366                       const struct brw_reg *dst,
 367                       GLuint mask,
 368                       const struct brw_reg *arg0,
 369                       const struct brw_reg *arg1 )
 370 {
 371          emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
 372 }
 373
 374 static void emit_sle( struct brw_compile *p,
 375                       const struct brw_reg *dst,
 376                       GLuint mask,
 377                       const struct brw_reg *arg0,
 378                       const struct brw_reg *arg1 )
 379 {
 380          emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
 381 }
 382
 383 static void emit_sgt( struct brw_compile *p,
 384                       const struct brw_reg *dst,
 385                       GLuint mask,
 386                       const struct brw_reg *arg0,
 387                       const struct brw_reg *arg1 )
 388 {
 389          emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
 390 }
 391
 392 static void emit_sge( struct brw_compile *p,
 393                       const struct brw_reg *dst,
 394                       GLuint mask,
 395                       const struct brw_reg *arg0,
 396                       const struct brw_reg *arg1 )
 397 {
 398          emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
 399 }
 400
 401 static void emit_seq( struct brw_compile *p,
 402                       const struct brw_reg *dst,
 403                       GLuint mask,
 404                       const struct brw_reg *arg0,
 405                       const struct brw_reg *arg1 )
 406 {
 407          emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
 408 }
 409
 410 static void emit_sne( struct brw_compile *p,
 411                       const struct brw_reg *dst,
 412                       GLuint mask,
 413                       const struct brw_reg *arg0,
 414                       const struct brw_reg *arg1 )
 415 {
 416          emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
 417 }
 418
 419 static void emit_cmp( struct brw_compile *p,
 420                       const struct brw_reg *dst,
 421                       GLuint mask,
 422                       const struct brw_reg *arg0,
 423                       const struct brw_reg *arg1,
 424                       const struct brw_reg *arg2 )
 425 {
 426    GLuint i;
 427
 428    for (i = 0; i < 4; i++) {
 429       if (mask & (1<<i)) {
 430          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 431          brw_MOV(p, dst[i], arg2[i]);
 432          brw_set_saturate(p, 0);
 433
 434          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 435
 436          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 437          brw_MOV(p, dst[i], arg1[i]);
 438          brw_set_saturate(p, 0);
 439          brw_set_predicate_control_flag_value(p, 0xff);
 440       }
 441    }
 442 }
 443
 444 static void emit_max( struct brw_compile *p,
 445                       const struct brw_reg *dst,
 446                       GLuint mask,
 447                       const struct brw_reg *arg0,
 448                       const struct brw_reg *arg1 )
 449 {
 450    GLuint i;
 451
 452    for (i = 0; i < 4; i++) {
 453       if (mask & (1<<i)) {
 454          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 455          brw_MOV(p, dst[i], arg0[i]);
 456          brw_set_saturate(p, 0);
 457
 458          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 459
 460          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 461          brw_MOV(p, dst[i], arg1[i]);
 462          brw_set_saturate(p, 0);
 463          brw_set_predicate_control_flag_value(p, 0xff);
 464       }
 465    }
 466 }
 467
 468 static void emit_min( struct brw_compile *p,
 469                       const struct brw_reg *dst,
 470                       GLuint mask,
 471                       const struct brw_reg *arg0,
 472                       const struct brw_reg *arg1 )
 473 {
 474    GLuint i;
 475
 476    for (i = 0; i < 4; i++) {
 477       if (mask & (1<<i)) {
 478          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 479          brw_MOV(p, dst[i], arg1[i]);
 480          brw_set_saturate(p, 0);
 481
 482          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 483
 484          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 485          brw_MOV(p, dst[i], arg0[i]);
 486          brw_set_saturate(p, 0);
 487          brw_set_predicate_control_flag_value(p, 0xff);
 488       }
 489    }
 490 }
 491
 492
 493 static void emit_dp3( struct brw_compile *p,
 494                       const struct brw_reg *dst,
 495                       GLuint mask,
 496                       const struct brw_reg *arg0,
 497                       const struct brw_reg *arg1 )
 498 {
 499    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 500
 501    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 502    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 503
 504    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 505    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 506    brw_set_saturate(p, 0);
 507 }
 508
 509
 510 static void emit_dp4( struct brw_compile *p,
 511                       const struct brw_reg *dst,
 512                       GLuint mask,
 513                       const struct brw_reg *arg0,
 514                       const struct brw_reg *arg1 )
 515 {
 516    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 517
 518    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 519    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 520    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 521
 522    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 523    brw_MAC(p, dst[0], arg0[3], arg1[3]);
 524    brw_set_saturate(p, 0);
 525 }
 526
 527
 528 static void emit_dph( struct brw_compile *p,
 529                       const struct brw_reg *dst,
 530                       GLuint mask,
 531                       const struct brw_reg *arg0,
 532                       const struct brw_reg *arg1 )
 533 {
 534    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 535
 536    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 537    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 538    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 539
 540    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 541    brw_ADD(p, dst[0], dst[0], arg1[3]);
 542    brw_set_saturate(p, 0);
 543 }
 544
 545
 546 static void emit_xpd( struct brw_compile *p,
 547                       const struct brw_reg *dst,
 548                       GLuint mask,
 549                       const struct brw_reg *arg0,
 550                       const struct brw_reg *arg1 )
 551 {
 552    GLuint i;
 553
 554    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 555
 556    for (i = 0 ; i < 3; i++) {
 557       if (mask & (1<<i)) {
 558          GLuint i2 = (i+2)%3;
 559          GLuint i1 = (i+1)%3;
 560
 561          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 562
 563          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 564          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 565          brw_set_saturate(p, 0);
 566       }
 567    }
 568 }
 569
 570
 571 static void emit_math1( struct brw_compile *p,
 572                         GLuint function,
 573                         const struct brw_reg *dst,
 574                         GLuint mask,
 575                         const struct brw_reg *arg0 )
 576 {
 577    //assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
 578    //     function == BRW_MATH_FUNCTION_SINCOS);
 579
 580    brw_MOV(p, brw_message_reg(2), arg0[0]);
 581
 582    /* Send two messages to perform all 16 operations:
 583     */
 584    brw_math_16(p,
 585                dst[0],
 586                function,
 587                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 588                2,
 589                brw_null_reg(),
 590                BRW_MATH_PRECISION_FULL);
 591 }
 592
 593
 594 static void emit_math2( struct brw_compile *p,
 595                         GLuint function,
 596                         const struct brw_reg *dst,
 597                         GLuint mask,
 598                         const struct brw_reg *arg0,
 599                         const struct brw_reg *arg1)
 600 {
 601    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 602
 603    brw_push_insn_state(p);
 604
 605    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 606    brw_MOV(p, brw_message_reg(2), arg0[0]);
 607    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 608    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 609
 610    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 611    brw_MOV(p, brw_message_reg(3), arg1[0]);
 612    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 613    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 614
 615
 616    /* Send two messages to perform all 16 operations:
 617     */
 618    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 619    brw_math(p,
 620             dst[0],
 621             function,
 622             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 623             2,
 624             brw_null_reg(),
 625             BRW_MATH_DATA_VECTOR,
 626             BRW_MATH_PRECISION_FULL);
 627
 628    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 629    brw_math(p,
 630             offset(dst[0],1),
 631             function,
 632             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 633             4,
 634             brw_null_reg(),
 635             BRW_MATH_DATA_VECTOR,
 636             BRW_MATH_PRECISION_FULL);
 637
 638    brw_pop_insn_state(p);
 639 }
 640
 641
 642
 643 static void emit_tex( struct brw_wm_compile *c,
 644                       const struct brw_wm_instruction *inst,
 645                       struct brw_reg *dst,
 646                       GLuint dst_flags,
 647                       struct brw_reg *arg )
 648 {
 649    struct brw_compile *p = &c->func;
 650    GLuint msgLength, responseLength;
 651    GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
 652    GLuint i, nr;
 653    GLuint emit;
 654
 655    /* How many input regs are there?
 656     */
 657    switch (inst->tex_idx) {
 658    case TEXTURE_1D_INDEX:
 659       emit = WRITEMASK_X;
 660       nr = 1;
 661       break;
 662    case TEXTURE_2D_INDEX:
 663    case TEXTURE_RECT_INDEX:
 664       emit = WRITEMASK_XY;
 665       nr = 2;
 666       break;
 667    default:
 668       emit = WRITEMASK_XYZ;
 669       nr = 3;
 670       break;
 671    }
 672
 673    if (shadow) {
 674       nr = 4;
 675       emit |= WRITEMASK_W;
 676    }
 677
 678    msgLength = 1;
 679
 680    for (i = 0; i < nr; i++) {
 681       static const GLuint swz[4] = {0,1,2,2};
 682       if (emit & (1<<i))
 683          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 684       else
 685          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 686       msgLength += 2;
 687    }
 688
 689    responseLength = 8;          /* always */
 690
 691    brw_SAMPLE(p,
 692               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 693               1,
 694               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 695               inst->tex_unit + 1, /* surface */
 696               inst->tex_unit,     /* sampler */
 697               inst->writemask,
 698               (shadow ?
 699                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
 700                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
 701               responseLength,
 702               msgLength,
 703               0);
 704
 705 }
 706
 707
 708 static void emit_txb( struct brw_wm_compile *c,
 709                       const struct brw_wm_instruction *inst,
 710                       struct brw_reg *dst,
 711                       GLuint dst_flags,
 712                       struct brw_reg *arg )
 713 {
 714    struct brw_compile *p = &c->func;
 715    GLuint msgLength;
 716
 717    /* Shadow ignored for txb.
 718     */
 719    switch (inst->tex_idx) {
 720    case TEXTURE_1D_INDEX:
 721       brw_MOV(p, brw_message_reg(2), arg[0]);
 722       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 723       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 724       break;
 725    case TEXTURE_2D_INDEX:
 726    case TEXTURE_RECT_INDEX:
 727       brw_MOV(p, brw_message_reg(2), arg[0]);
 728       brw_MOV(p, brw_message_reg(4), arg[1]);
 729       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 730       break;
 731    default:
 732       brw_MOV(p, brw_message_reg(2), arg[0]);
 733       brw_MOV(p, brw_message_reg(4), arg[1]);
 734       brw_MOV(p, brw_message_reg(6), arg[2]);
 735       break;
 736    }
 737
 738    brw_MOV(p, brw_message_reg(8), arg[3]);
 739    msgLength = 9;
 740
 741
 742    brw_SAMPLE(p,
 743               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 744               1,
 745               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 746               inst->tex_unit + 1, /* surface */
 747               inst->tex_unit,     /* sampler */
 748               inst->writemask,
 749               BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
 750               8,                /* responseLength */
 751               msgLength,
 752               0);
 753
 754 }
 755
 756
 757 static void emit_lit( struct brw_compile *p,
 758                       const struct brw_reg *dst,
 759                       GLuint mask,
 760                       const struct brw_reg *arg0 )
 761 {
 762    assert((mask & WRITEMASK_XW) == 0);
 763
 764    if (mask & WRITEMASK_Y) {
 765       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 766       brw_MOV(p, dst[1], arg0[0]);
 767       brw_set_saturate(p, 0);
 768    }
 769
 770    if (mask & WRITEMASK_Z) {
 771       emit_math2(p, BRW_MATH_FUNCTION_POW,
 772                  &dst[2],
 773                  WRITEMASK_X | (mask & SATURATE),
 774                  &arg0[1],
 775                  &arg0[3]);
 776    }
 777
 778    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 779     * some of the POW calculations above, but 16-wide iff statements
 780     * seem to lock c1 hardware, so this is a nasty workaround:
 781     */
 782    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 783    {
 784       if (mask & WRITEMASK_Y)
 785          brw_MOV(p, dst[1], brw_imm_f(0));
 786
 787       if (mask & WRITEMASK_Z)
 788          brw_MOV(p, dst[2], brw_imm_f(0));
 789    }
 790    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 791 }
 792
 793
 794 /* Kill pixel - set execution mask to zero for those pixels which
 795  * fail.
 796  */
 797 static void emit_kil( struct brw_wm_compile *c,
 798                       struct brw_reg *arg0)
 799 {
 800    struct brw_compile *p = &c->func;
 801    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 802    GLuint i;
 803
 804
 805    /* XXX - usually won't need 4 compares!
 806     */
 807    for (i = 0; i < 4; i++) {
 808       brw_push_insn_state(p);
 809       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 810       brw_set_predicate_control_flag_value(p, 0xff);
 811       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 812       brw_pop_insn_state(p);
 813    }
 814 }
 815
 816 static void fire_fb_write( struct brw_wm_compile *c,
 817                            GLuint base_reg,
 818                            GLuint nr )
 819 {
 820    struct brw_compile *p = &c->func;
 821
 822    /* Pass through control information:
 823     */
 824 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 825    {
 826       brw_push_insn_state(p);
 827       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 828       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 829       brw_MOV(p,
 830                brw_message_reg(base_reg + 1),
 831                brw_vec8_grf(1, 0));
 832       brw_pop_insn_state(p);
 833    }
 834
 835    /* Send framebuffer write message: */
 836 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 837    brw_fb_WRITE(p,
 838                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 839                 base_reg,
 840                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 841                 0,              /* render surface always 0 */
 842                 nr,
 843                 0,
 844                 1);
 845 }
 846
 847 static void emit_aa( struct brw_wm_compile *c,
 848                      struct brw_reg *arg1,
 849                      GLuint reg )
 850 {
 851    struct brw_compile *p = &c->func;
 852    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 853    GLuint off = c->key.aa_dest_stencil_reg % 2;
 854    struct brw_reg aa = offset(arg1[comp], off);
 855
 856    brw_push_insn_state(p);
 857    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 858    brw_MOV(p, brw_message_reg(reg), aa);
 859    brw_pop_insn_state(p);
 860 }
 861
 862
 863 /* Post-fragment-program processing.  Send the results to the
 864  * framebuffer.
 865  */
 866 static void emit_fb_write( struct brw_wm_compile *c,
 867                            struct brw_reg *arg0,
 868                            struct brw_reg *arg1,
 869                            struct brw_reg *arg2)
 870 {
 871    struct brw_compile *p = &c->func;
 872    GLuint nr = 2;
 873    GLuint channel;
 874
 875    /* Reserve a space for AA - may not be needed:
 876     */
 877    if (c->key.aa_dest_stencil_reg)
 878       nr += 1;
 879
 880    /* I don't really understand how this achieves the color interleave
 881     * (ie RGBARGBA) in the result:  [Do the saturation here]
 882     */
 883    {
 884       brw_push_insn_state(p);
 885
 886       for (channel = 0; channel < 4; channel++) {
 887          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 888          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 889
 890          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 891          brw_MOV(p,
 892                  brw_message_reg(nr + channel),
 893                  arg0[channel]);
 894
 895          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 896          brw_MOV(p,
 897                  brw_message_reg(nr + channel + 4),
 898                  sechalf(arg0[channel]));
 899       }
 900
 901       /* skip over the regs populated above:
 902        */
 903       nr += 8;
 904
 905       brw_pop_insn_state(p);
 906    }
 907
 908    if (c->key.source_depth_to_render_target)
 909    {
 910       if (c->key.computes_depth)
 911          brw_MOV(p, brw_message_reg(nr), arg2[2]);
 912       else
 913          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
 914
 915       nr += 2;
 916    }
 917
 918    if (c->key.dest_depth_reg)
 919    {
 920       GLuint comp = c->key.dest_depth_reg / 2;
 921       GLuint off = c->key.dest_depth_reg % 2;
 922
 923       if (off != 0) {
 924          brw_push_insn_state(p);
 925          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 926          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
 927          /* 2nd half? */
 928          brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
 929          brw_pop_insn_state(p);
 930       }
 931       else {
 932          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
 933       }
 934       nr += 2;
 935    }
 936
 937
 938    if (!c->key.runtime_check_aads_emit) {
 939       if (c->key.aa_dest_stencil_reg)
 940          emit_aa(c, arg1, 2);
 941
 942       fire_fb_write(c, 0, nr);
 943    }
 944    else {
 945       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
 946       struct brw_reg ip = brw_ip_reg();
 947       struct brw_instruction *jmp;
 948
 949       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 950       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
 951       brw_AND(p,
 952               v1_null_ud,
 953               get_element_ud(brw_vec8_grf(1,0), 6),
 954               brw_imm_ud(1<<26));
 955
 956       jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
 957       {
 958          emit_aa(c, arg1, 2);
 959          fire_fb_write(c, 0, nr);
 960          /* note - thread killed in subroutine */
 961       }
 962       brw_land_fwd_jump(p, jmp);
 963
 964       /* ELSE: Shuffle up one register to fill in the hole left for AA:
 965        */
 966       fire_fb_write(c, 1, nr-1);
 967    }
 968 }
 969
 970
 971
 972
 973 /* Post-fragment-program processing.  Send the results to the
 974  * framebuffer.
 975  */
 976 static void emit_spill( struct brw_wm_compile *c,
 977                         struct brw_reg reg,
 978                         GLuint slot )
 979 {
 980    struct brw_compile *p = &c->func;
 981
 982    /*
 983      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
 984    */
 985    brw_MOV(p, brw_message_reg(2), reg);
 986
 987    /*
 988      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
 989      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
 990    */
 991    brw_dp_WRITE_16(p,
 992                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
 993                    1,
 994                    slot);
 995 }
 996
 997 static void emit_unspill( struct brw_wm_compile *c,
 998                           struct brw_reg reg,
 999                           GLuint slot )
1000 {
1001    struct brw_compile *p = &c->func;
1002
1003    /* Slot 0 is the undef value.
1004     */
1005    if (slot == 0) {
1006       brw_MOV(p, reg, brw_imm_f(0));
1007       return;
1008    }
1009
1010    /*
1011      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
1012      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
1013    */
1014
1015    brw_dp_READ_16(p,
1016                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
1017                   1,
1018                   slot);
1019 }
1020
1021
1022
1023 /**
1024  * Retrieve upto 4 GEN4 register pairs for the given wm reg:
1025  */
1026 static void get_argument_regs( struct brw_wm_compile *c,
1027                                struct brw_wm_ref *arg[],
1028                                struct brw_reg *regs )
1029 {
1030    GLuint i;
1031
1032    for (i = 0; i < 4; i++) {
1033       if (arg[i]) {
1034
1035          if (arg[i]->unspill_reg)
1036             emit_unspill(c,
1037                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1038                          arg[i]->value->spill_slot);
1039
1040          regs[i] = arg[i]->hw_reg;
1041       }
1042       else {
1043          regs[i] = brw_null_reg();
1044       }
1045    }
1046 }
1047
1048 static void spill_values( struct brw_wm_compile *c,
1049                           struct brw_wm_value *values,
1050                           GLuint nr )
1051 {
1052    GLuint i;
1053
1054    for (i = 0; i < nr; i++)
1055       if (values[i].spill_slot)
1056          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1057 }
1058
1059
1060
1061 /* Emit the fragment program instructions here.
1062  */
1063 void brw_wm_emit( struct brw_wm_compile *c )
1064 {
1065    struct brw_compile *p = &c->func;
1066    GLuint insn;
1067
1068    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1069
1070    /* Check if any of the payload regs need to be spilled:
1071     */
1072    spill_values(c, c->payload.depth, 4);
1073    spill_values(c, c->creg, c->nr_creg);
1074    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1075
1076
1077    for (insn = 0; insn < c->nr_insns; insn++) {
1078
1079       struct brw_wm_instruction *inst = &c->instruction[insn];
1080       struct brw_reg args[3][4], dst[4];
1081       GLuint i, dst_flags;
1082
1083       /* Get argument regs:
1084        */
1085       for (i = 0; i < 3; i++)
1086          get_argument_regs(c, inst->src[i], args[i]);
1087
1088       /* Get dest regs:
1089        */
1090       for (i = 0; i < 4; i++)
1091          if (inst->dst[i])
1092             dst[i] = inst->dst[i]->hw_reg;
1093          else
1094             dst[i] = brw_null_reg();
1095
1096       /* Flags
1097        */
1098       dst_flags = inst->writemask;
1099       if (inst->saturate)
1100          dst_flags |= SATURATE;
1101
1102       switch (inst->opcode) {
1103          /* Generated instructions for calculating triangle interpolants:
1104           */
1105       case WM_PIXELXY:
1106          emit_pixel_xy(p, dst, dst_flags, args[0]);
1107          break;
1108
1109       case WM_DELTAXY:
1110          emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1111          break;
1112
1113       case WM_WPOSXY:
1114          emit_wpos_xy(p, dst, dst_flags, args[0]);
1115          break;
1116
1117       case WM_PIXELW:
1118          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1119          break;
1120
1121       case WM_LINTERP:
1122          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1123          break;
1124
1125       case WM_PINTERP:
1126          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1127          break;
1128
1129       case WM_CINTERP:
1130          emit_cinterp(p, dst, dst_flags, args[0]);
1131          break;
1132
1133       case WM_FB_WRITE:
1134          emit_fb_write(c, args[0], args[1], args[2]);
1135          break;
1136
1137          /* Straightforward arithmetic:
1138           */
1139       case OPCODE_ADD:
1140          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1141          break;
1142
1143       case OPCODE_FRC:
1144          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1145          break;
1146
1147       case OPCODE_FLR:
1148          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1149          break;
1150
1151       case OPCODE_DP3:  /*  */
1152          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1153          break;
1154
1155       case OPCODE_DP4:
1156          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1157          break;
1158
1159       case OPCODE_DPH:
1160          emit_dph(p, dst, dst_flags, args[0], args[1]);
1161          break;
1162
1163       case OPCODE_LRP:  /*  */
1164          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1165          break;
1166
1167       case OPCODE_MAD:
1168          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1169          break;
1170
1171       case OPCODE_MOV:
1172       case OPCODE_SWZ:
1173          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1174          break;
1175
1176       case OPCODE_MUL:
1177          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1178          break;
1179
1180       case OPCODE_XPD:
1181          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1182          break;
1183
1184          /* Higher math functions:
1185           */
1186       case OPCODE_RCP:
1187          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1188          break;
1189
1190       case OPCODE_RSQ:
1191          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1192          break;
1193
1194       case OPCODE_SIN:
1195          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1196          break;
1197
1198       case OPCODE_COS:
1199          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1200          break;
1201
1202       case OPCODE_EX2:
1203          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1204          break;
1205
1206       case OPCODE_LG2:
1207          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1208          break;
1209
1210       case OPCODE_SCS:
1211          /* There is an scs math function, but it would need some
1212           * fixup for 16-element execution.
1213           */
1214          if (dst_flags & WRITEMASK_X)
1215             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1216          if (dst_flags & WRITEMASK_Y)
1217             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1218          break;
1219
1220       case OPCODE_POW:
1221          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1222          break;
1223
1224          /* Comparisons:
1225           */
1226       case OPCODE_CMP:
1227          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1228          break;
1229
1230       case OPCODE_MAX:
1231          emit_max(p, dst, dst_flags, args[0], args[1]);
1232          break;
1233
1234       case OPCODE_MIN:
1235          emit_min(p, dst, dst_flags, args[0], args[1]);
1236          break;
1237
1238       case OPCODE_SLT:
1239          emit_slt(p, dst, dst_flags, args[0], args[1]);
1240          break;
1241
1242       case OPCODE_SLE:
1243          emit_sle(p, dst, dst_flags, args[0], args[1]);
1244         break;
1245       case OPCODE_SGT:
1246          emit_sgt(p, dst, dst_flags, args[0], args[1]);
1247         break;
1248       case OPCODE_SGE:
1249          emit_sge(p, dst, dst_flags, args[0], args[1]);
1250          break;
1251       case OPCODE_SEQ:
1252          emit_seq(p, dst, dst_flags, args[0], args[1]);
1253         break;
1254       case OPCODE_SNE:
1255          emit_sne(p, dst, dst_flags, args[0], args[1]);
1256         break;
1257
1258       case OPCODE_LIT:
1259          emit_lit(p, dst, dst_flags, args[0]);
1260          break;
1261
1262          /* Texturing operations:
1263           */
1264       case OPCODE_TEX:
1265          emit_tex(c, inst, dst, dst_flags, args[0]);
1266          break;
1267
1268       case OPCODE_TXB:
1269          emit_txb(c, inst, dst, dst_flags, args[0]);
1270          break;
1271
1272       case OPCODE_KIL:
1273          emit_kil(c, args[0]);
1274          break;
1275
1276       default:
1277         _mesa_printf("unsupport opcode %d in fragment program\n",
1278                 inst->opcode);
1279       }
1280
1281       for (i = 0; i < 4; i++)
1282         if (inst->dst[i] && inst->dst[i]->spill_slot)
1283            emit_spill(c,
1284                       inst->dst[i]->hw_reg,
1285                       inst->dst[i]->spill_slot);
1286    }
1287 }
1288
1289
1290
1291
1292