src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "brw_context.h"
  34 #include "program.h"
  35 #include "program_instruction.h"
  36 #include "macros.h"
  37 #include "brw_wm.h"
  38
  39 #define SATURATE (1<<5)
  40
  41 /* Not quite sure how correct this is - need to understand horiz
  42  * vs. vertical strides a little better.
  43  */
  44 static __inline struct brw_reg sechalf( struct brw_reg reg )
  45 {
  46    if (reg.vstride)
  47       reg.nr++;
  48    return reg;
  49 }
  50
  51 /* Payload R0:
  52  *
  53  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  54  *         corresponding to each of the 16 execution channels.
  55  * R0.1..8 -- ?
  56  * R1.0 -- triangle vertex 0.X
  57  * R1.1 -- triangle vertex 0.Y
  58  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  59  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  60  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  61  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  62  * R1.6 -- ?
  63  * R1.7 -- ?
  64  * R1.8 -- ?
  65  */
  66
  67
  68 static void emit_pixel_xy(struct brw_compile *p,
  69                           const struct brw_reg *dst,
  70                           GLuint mask,
  71                           const struct brw_reg *arg0)
  72 {
  73    struct brw_reg r1 = brw_vec1_grf(1, 0);
  74    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  75
  76    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  77
  78    /* Calculate pixel centers by adding 1 or 0 to each of the
  79     * micro-tile coordinates passed in r1.
  80     */
  81    if (mask & WRITEMASK_X) {
  82       brw_ADD(p,
  83               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  84               stride(suboffset(r1_uw, 4), 2, 4, 0),
  85               brw_imm_v(0x10101010));
  86    }
  87
  88    if (mask & WRITEMASK_Y) {
  89       brw_ADD(p,
  90               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  91               stride(suboffset(r1_uw,5), 2, 4, 0),
  92               brw_imm_v(0x11001100));
  93    }
  94
  95    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  96 }
  97
  98
  99
 100 static void emit_delta_xy(struct brw_compile *p,
 101                           const struct brw_reg *dst,
 102                           GLuint mask,
 103                           const struct brw_reg *arg0,
 104                           const struct brw_reg *arg1)
 105 {
 106    struct brw_reg r1 = brw_vec1_grf(1, 0);
 107
 108    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 109     * centers.
 110     */
 111    if (mask & WRITEMASK_X) {
 112       brw_ADD(p,
 113               dst[0],
 114               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 115               negate(r1));
 116    }
 117
 118    if (mask & WRITEMASK_Y) {
 119       brw_ADD(p,
 120               dst[1],
 121               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 122               negate(suboffset(r1,1)));
 123
 124    }
 125 }
 126
 127 static void emit_wpos_xy(struct brw_compile *p,
 128                            const struct brw_reg *dst,
 129                            GLuint mask,
 130                            const struct brw_reg *arg0)
 131 {
 132    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 133     * centers.
 134     */
 135    if (mask & WRITEMASK_X) {
 136       brw_MOV(p,
 137               dst[0],
 138               retype(arg0[0], BRW_REGISTER_TYPE_UW));
 139    }
 140
 141    if (mask & WRITEMASK_Y) {
 142       /* TODO -- window_height - Y */
 143       brw_MOV(p,
 144               dst[1],
 145               negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
 146
 147    }
 148 }
 149
 150
 151 static void emit_pixel_w( struct brw_compile *p,
 152                           const struct brw_reg *dst,
 153                           GLuint mask,
 154                           const struct brw_reg *arg0,
 155                           const struct brw_reg *deltas)
 156 {
 157    /* Don't need this if all you are doing is interpolating color, for
 158     * instance.
 159     */
 160    if (mask & WRITEMASK_W) {
 161       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 162
 163       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 164        * result straight into a message reg.
 165        */
 166       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 167       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 168
 169       /* Calc w */
 170       brw_math_16( p, dst[3],
 171                    BRW_MATH_FUNCTION_INV,
 172                    BRW_MATH_SATURATE_NONE,
 173                    2, brw_null_reg(),
 174                    BRW_MATH_PRECISION_FULL);
 175    }
 176 }
 177
 178
 179
 180 static void emit_linterp( struct brw_compile *p,
 181                          const struct brw_reg *dst,
 182                          GLuint mask,
 183                          const struct brw_reg *arg0,
 184                          const struct brw_reg *deltas )
 185 {
 186    struct brw_reg interp[4];
 187    GLuint nr = arg0[0].nr;
 188    GLuint i;
 189
 190    interp[0] = brw_vec1_grf(nr, 0);
 191    interp[1] = brw_vec1_grf(nr, 4);
 192    interp[2] = brw_vec1_grf(nr+1, 0);
 193    interp[3] = brw_vec1_grf(nr+1, 4);
 194
 195    for(i = 0; i < 4; i++ ) {
 196       if (mask & (1<<i)) {
 197          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 198          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 199       }
 200    }
 201 }
 202
 203
 204 static void emit_pinterp( struct brw_compile *p,
 205                           const struct brw_reg *dst,
 206                           GLuint mask,
 207                           const struct brw_reg *arg0,
 208                           const struct brw_reg *deltas,
 209                           const struct brw_reg *w)
 210 {
 211    struct brw_reg interp[4];
 212    GLuint nr = arg0[0].nr;
 213    GLuint i;
 214
 215    interp[0] = brw_vec1_grf(nr, 0);
 216    interp[1] = brw_vec1_grf(nr, 4);
 217    interp[2] = brw_vec1_grf(nr+1, 0);
 218    interp[3] = brw_vec1_grf(nr+1, 4);
 219
 220    for(i = 0; i < 4; i++ ) {
 221       if (mask & (1<<i)) {
 222          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 223          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 224          brw_MUL(p, dst[i], dst[i], w[3]);
 225       }
 226    }
 227 }
 228
 229 static void emit_cinterp( struct brw_compile *p,
 230                          const struct brw_reg *dst,
 231                          GLuint mask,
 232                          const struct brw_reg *arg0 )
 233 {
 234    struct brw_reg interp[4];
 235    GLuint nr = arg0[0].nr;
 236    GLuint i;
 237
 238    interp[0] = brw_vec1_grf(nr, 0);
 239    interp[1] = brw_vec1_grf(nr, 4);
 240    interp[2] = brw_vec1_grf(nr+1, 0);
 241    interp[3] = brw_vec1_grf(nr+1, 4);
 242
 243    for(i = 0; i < 4; i++ ) {
 244       if (mask & (1<<i)) {
 245          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 246       }
 247    }
 248 }
 249
 250
 251
 252
 253
 254 static void emit_alu1( struct brw_compile *p,
 255                        struct brw_instruction *(*func)(struct brw_compile *,
 256                                                        struct brw_reg,
 257                                                        struct brw_reg),
 258                        const struct brw_reg *dst,
 259                        GLuint mask,
 260                        const struct brw_reg *arg0 )
 261 {
 262    GLuint i;
 263
 264    if (mask & SATURATE)
 265       brw_set_saturate(p, 1);
 266
 267    for (i = 0; i < 4; i++) {
 268       if (mask & (1<<i)) {
 269          func(p, dst[i], arg0[i]);
 270       }
 271    }
 272
 273    if (mask & SATURATE)
 274       brw_set_saturate(p, 0);
 275 }
 276
 277 static void emit_alu2( struct brw_compile *p,
 278                        struct brw_instruction *(*func)(struct brw_compile *,
 279                                                        struct brw_reg,
 280                                                        struct brw_reg,
 281                                                        struct brw_reg),
 282                        const struct brw_reg *dst,
 283                        GLuint mask,
 284                        const struct brw_reg *arg0,
 285                        const struct brw_reg *arg1 )
 286 {
 287    GLuint i;
 288
 289    if (mask & SATURATE)
 290       brw_set_saturate(p, 1);
 291
 292    for (i = 0; i < 4; i++) {
 293       if (mask & (1<<i)) {
 294          func(p, dst[i], arg0[i], arg1[i]);
 295       }
 296    }
 297
 298    if (mask & SATURATE)
 299       brw_set_saturate(p, 0);
 300 }
 301
 302
 303 static void emit_mad( struct brw_compile *p,
 304                       const struct brw_reg *dst,
 305                       GLuint mask,
 306                       const struct brw_reg *arg0,
 307                       const struct brw_reg *arg1,
 308                       const struct brw_reg *arg2 )
 309 {
 310    GLuint i;
 311
 312    for (i = 0; i < 4; i++) {
 313       if (mask & (1<<i)) {
 314          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 315
 316          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 317          brw_ADD(p, dst[i], dst[i], arg2[i]);
 318          brw_set_saturate(p, 0);
 319       }
 320    }
 321 }
 322
 323
 324 static void emit_lrp( struct brw_compile *p,
 325                       const struct brw_reg *dst,
 326                       GLuint mask,
 327                       const struct brw_reg *arg0,
 328                       const struct brw_reg *arg1,
 329                       const struct brw_reg *arg2 )
 330 {
 331    GLuint i;
 332
 333    /* Uses dst as a temporary:
 334     */
 335    for (i = 0; i < 4; i++) {
 336       if (mask & (1<<i)) {
 337          /* Can I use the LINE instruction for this?
 338           */
 339          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 340          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 341
 342          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 343          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 344          brw_set_saturate(p, 0);
 345       }
 346    }
 347 }
 348
 349
 350 static void emit_slt( struct brw_compile *p,
 351                       const struct brw_reg *dst,
 352                       GLuint mask,
 353                       const struct brw_reg *arg0,
 354                       const struct brw_reg *arg1 )
 355 {
 356    GLuint i;
 357
 358    for (i = 0; i < 4; i++) {
 359       if (mask & (1<<i)) {
 360          brw_MOV(p, dst[i], brw_imm_f(0));
 361          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 362          brw_MOV(p, dst[i], brw_imm_f(1.0));
 363          brw_set_predicate_control_flag_value(p, 0xff);
 364       }
 365    }
 366 }
 367
 368 /* Isn't this just the same as the above with the args swapped?
 369  */
 370 static void emit_sge( struct brw_compile *p,
 371                       const struct brw_reg *dst,
 372                       GLuint mask,
 373                       const struct brw_reg *arg0,
 374                       const struct brw_reg *arg1 )
 375 {
 376    GLuint i;
 377
 378    for (i = 0; i < 4; i++) {
 379       if (mask & (1<<i)) {
 380          brw_MOV(p, dst[i], brw_imm_f(0));
 381          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
 382          brw_MOV(p, dst[i], brw_imm_f(1.0));
 383          brw_set_predicate_control_flag_value(p, 0xff);
 384       }
 385    }
 386 }
 387
 388
 389
 390 static void emit_cmp( struct brw_compile *p,
 391                       const struct brw_reg *dst,
 392                       GLuint mask,
 393                       const struct brw_reg *arg0,
 394                       const struct brw_reg *arg1,
 395                       const struct brw_reg *arg2 )
 396 {
 397    GLuint i;
 398
 399    for (i = 0; i < 4; i++) {
 400       if (mask & (1<<i)) {
 401          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 402          brw_MOV(p, dst[i], arg2[i]);
 403          brw_set_saturate(p, 0);
 404
 405          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 406
 407          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 408          brw_MOV(p, dst[i], arg1[i]);
 409          brw_set_saturate(p, 0);
 410          brw_set_predicate_control_flag_value(p, 0xff);
 411       }
 412    }
 413 }
 414
 415 static void emit_max( struct brw_compile *p,
 416                       const struct brw_reg *dst,
 417                       GLuint mask,
 418                       const struct brw_reg *arg0,
 419                       const struct brw_reg *arg1 )
 420 {
 421    GLuint i;
 422
 423    for (i = 0; i < 4; i++) {
 424       if (mask & (1<<i)) {
 425          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 426          brw_MOV(p, dst[i], arg0[i]);
 427          brw_set_saturate(p, 0);
 428
 429          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 430
 431          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 432          brw_MOV(p, dst[i], arg1[i]);
 433          brw_set_saturate(p, 0);
 434          brw_set_predicate_control_flag_value(p, 0xff);
 435       }
 436    }
 437 }
 438
 439 static void emit_min( struct brw_compile *p,
 440                       const struct brw_reg *dst,
 441                       GLuint mask,
 442                       const struct brw_reg *arg0,
 443                       const struct brw_reg *arg1 )
 444 {
 445    GLuint i;
 446
 447    for (i = 0; i < 4; i++) {
 448       if (mask & (1<<i)) {
 449          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 450          brw_MOV(p, dst[i], arg1[i]);
 451          brw_set_saturate(p, 0);
 452
 453          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 454
 455          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 456          brw_MOV(p, dst[i], arg0[i]);
 457          brw_set_saturate(p, 0);
 458          brw_set_predicate_control_flag_value(p, 0xff);
 459       }
 460    }
 461 }
 462
 463
 464 static void emit_dp3( struct brw_compile *p,
 465                       const struct brw_reg *dst,
 466                       GLuint mask,
 467                       const struct brw_reg *arg0,
 468                       const struct brw_reg *arg1 )
 469 {
 470    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 471
 472    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 473    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 474
 475    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 476    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 477    brw_set_saturate(p, 0);
 478 }
 479
 480
 481 static void emit_dp4( struct brw_compile *p,
 482                       const struct brw_reg *dst,
 483                       GLuint mask,
 484                       const struct brw_reg *arg0,
 485                       const struct brw_reg *arg1 )
 486 {
 487    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 488
 489    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 490    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 491    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 492
 493    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 494    brw_MAC(p, dst[0], arg0[3], arg1[3]);
 495    brw_set_saturate(p, 0);
 496 }
 497
 498
 499 static void emit_dph( struct brw_compile *p,
 500                       const struct brw_reg *dst,
 501                       GLuint mask,
 502                       const struct brw_reg *arg0,
 503                       const struct brw_reg *arg1 )
 504 {
 505    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 506
 507    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 508    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 509    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 510
 511    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 512    brw_ADD(p, dst[0], dst[0], arg1[3]);
 513    brw_set_saturate(p, 0);
 514 }
 515
 516
 517 static void emit_xpd( struct brw_compile *p,
 518                       const struct brw_reg *dst,
 519                       GLuint mask,
 520                       const struct brw_reg *arg0,
 521                       const struct brw_reg *arg1 )
 522 {
 523    GLuint i;
 524
 525    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 526
 527    for (i = 0 ; i < 3; i++) {
 528       if (mask & (1<<i)) {
 529          GLuint i2 = (i+2)%3;
 530          GLuint i1 = (i+1)%3;
 531
 532          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 533
 534          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 535          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 536          brw_set_saturate(p, 0);
 537       }
 538    }
 539 }
 540
 541
 542 static void emit_math1( struct brw_compile *p,
 543                         GLuint function,
 544                         const struct brw_reg *dst,
 545                         GLuint mask,
 546                         const struct brw_reg *arg0 )
 547 {
 548    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
 549           function == BRW_MATH_FUNCTION_SINCOS);
 550
 551    brw_MOV(p, brw_message_reg(2), arg0[0]);
 552
 553    /* Send two messages to perform all 16 operations:
 554     */
 555    brw_math_16(p,
 556                dst[0],
 557                function,
 558                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 559                2,
 560                brw_null_reg(),
 561                BRW_MATH_PRECISION_FULL);
 562 }
 563
 564
 565 static void emit_math2( struct brw_compile *p,
 566                         GLuint function,
 567                         const struct brw_reg *dst,
 568                         GLuint mask,
 569                         const struct brw_reg *arg0,
 570                         const struct brw_reg *arg1)
 571 {
 572    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 573
 574    brw_push_insn_state(p);
 575
 576    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 577    brw_MOV(p, brw_message_reg(2), arg0[0]);
 578    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 579    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 580
 581    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 582    brw_MOV(p, brw_message_reg(3), arg1[0]);
 583    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 584    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 585
 586
 587    /* Send two messages to perform all 16 operations:
 588     */
 589    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 590    brw_math(p,
 591             dst[0],
 592             function,
 593             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 594             2,
 595             brw_null_reg(),
 596             BRW_MATH_DATA_VECTOR,
 597             BRW_MATH_PRECISION_FULL);
 598
 599    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 600    brw_math(p,
 601             offset(dst[0],1),
 602             function,
 603             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 604             4,
 605             brw_null_reg(),
 606             BRW_MATH_DATA_VECTOR,
 607             BRW_MATH_PRECISION_FULL);
 608
 609    brw_pop_insn_state(p);
 610 }
 611
 612
 613
 614 static void emit_tex( struct brw_wm_compile *c,
 615                       const struct brw_wm_instruction *inst,
 616                       struct brw_reg *dst,
 617                       GLuint dst_flags,
 618                       struct brw_reg *arg )
 619 {
 620    struct brw_compile *p = &c->func;
 621    GLuint msgLength, responseLength;
 622    GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
 623    GLuint i, nr;
 624    GLuint emit;
 625
 626    /* How many input regs are there?
 627     */
 628    switch (inst->tex_idx) {
 629    case TEXTURE_1D_INDEX:
 630       emit = WRITEMASK_X;
 631       nr = 1;
 632       break;
 633    case TEXTURE_2D_INDEX:
 634    case TEXTURE_RECT_INDEX:
 635       emit = WRITEMASK_XY;
 636       nr = 2;
 637       break;
 638    default:
 639       emit = WRITEMASK_XYZ;
 640       nr = 3;
 641       break;
 642    }
 643
 644    if (shadow) {
 645       nr = 4;
 646       emit |= WRITEMASK_W;
 647    }
 648
 649    msgLength = 1;
 650
 651    for (i = 0; i < nr; i++) {
 652       static const GLuint swz[4] = {0,1,2,2};
 653       if (emit & (1<<i))
 654          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 655       else
 656          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 657       msgLength += 2;
 658    }
 659
 660    responseLength = 8;          /* always */
 661
 662    brw_SAMPLE(p,
 663               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 664               1,
 665               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 666               inst->tex_unit + 1, /* surface */
 667               inst->tex_unit,     /* sampler */
 668               inst->writemask,
 669               (shadow ?
 670                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
 671                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
 672               responseLength,
 673               msgLength,
 674               0);
 675
 676 }
 677
 678
 679 static void emit_txb( struct brw_wm_compile *c,
 680                       const struct brw_wm_instruction *inst,
 681                       struct brw_reg *dst,
 682                       GLuint dst_flags,
 683                       struct brw_reg *arg )
 684 {
 685    struct brw_compile *p = &c->func;
 686    GLuint msgLength;
 687
 688    /* Shadow ignored for txb.
 689     */
 690    switch (inst->tex_idx) {
 691    case TEXTURE_1D_INDEX:
 692       brw_MOV(p, brw_message_reg(2), arg[0]);
 693       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 694       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 695       break;
 696    case TEXTURE_2D_INDEX:
 697    case TEXTURE_RECT_INDEX:
 698       brw_MOV(p, brw_message_reg(2), arg[0]);
 699       brw_MOV(p, brw_message_reg(4), arg[1]);
 700       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 701       break;
 702    default:
 703       brw_MOV(p, brw_message_reg(2), arg[0]);
 704       brw_MOV(p, brw_message_reg(4), arg[1]);
 705       brw_MOV(p, brw_message_reg(6), arg[2]);
 706       break;
 707    }
 708
 709    brw_MOV(p, brw_message_reg(8), arg[3]);
 710    msgLength = 9;
 711
 712
 713    brw_SAMPLE(p,
 714               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 715               1,
 716               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 717               inst->tex_unit + 1, /* surface */
 718               inst->tex_unit,     /* sampler */
 719               inst->writemask,
 720               BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
 721               8,                /* responseLength */
 722               msgLength,
 723               0);
 724
 725 }
 726
 727
 728 static void emit_lit( struct brw_compile *p,
 729                       const struct brw_reg *dst,
 730                       GLuint mask,
 731                       const struct brw_reg *arg0 )
 732 {
 733    assert((mask & WRITEMASK_XW) == 0);
 734
 735    if (mask & WRITEMASK_Y) {
 736       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 737       brw_MOV(p, dst[1], arg0[0]);
 738       brw_set_saturate(p, 0);
 739    }
 740
 741    if (mask & WRITEMASK_Z) {
 742       emit_math2(p, BRW_MATH_FUNCTION_POW,
 743                  &dst[2],
 744                  WRITEMASK_X | (mask & SATURATE),
 745                  &arg0[1],
 746                  &arg0[3]);
 747    }
 748
 749    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 750     * some of the POW calculations above, but 16-wide iff statements
 751     * seem to lock c1 hardware, so this is a nasty workaround:
 752     */
 753    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 754    {
 755       if (mask & WRITEMASK_Y)
 756          brw_MOV(p, dst[1], brw_imm_f(0));
 757
 758       if (mask & WRITEMASK_Z)
 759          brw_MOV(p, dst[2], brw_imm_f(0));
 760    }
 761    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 762 }
 763
 764
 765 /* Kill pixel - set execution mask to zero for those pixels which
 766  * fail.
 767  */
 768 static void emit_kil( struct brw_wm_compile *c,
 769                       struct brw_reg *arg0)
 770 {
 771    struct brw_compile *p = &c->func;
 772    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 773    GLuint i;
 774
 775
 776    /* XXX - usually won't need 4 compares!
 777     */
 778    for (i = 0; i < 4; i++) {
 779       brw_push_insn_state(p);
 780       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 781       brw_set_predicate_control_flag_value(p, 0xff);
 782       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 783       brw_pop_insn_state(p);
 784    }
 785 }
 786
 787 static void fire_fb_write( struct brw_wm_compile *c,
 788                            GLuint base_reg,
 789                            GLuint nr )
 790 {
 791    struct brw_compile *p = &c->func;
 792
 793    /* Pass through control information:
 794     */
 795 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 796    {
 797       brw_push_insn_state(p);
 798       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 799       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 800       brw_MOV(p,
 801                brw_message_reg(base_reg + 1),
 802                brw_vec8_grf(1, 0));
 803       brw_pop_insn_state(p);
 804    }
 805
 806    /* Send framebuffer write message: */
 807 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 808    brw_fb_WRITE(p,
 809                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 810                 base_reg,
 811                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 812                 0,              /* render surface always 0 */
 813                 nr,
 814                 0,
 815                 1);
 816 }
 817
 818 static void emit_aa( struct brw_wm_compile *c,
 819                      struct brw_reg *arg1,
 820                      GLuint reg )
 821 {
 822    struct brw_compile *p = &c->func;
 823    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 824    GLuint off = c->key.aa_dest_stencil_reg % 2;
 825    struct brw_reg aa = offset(arg1[comp], off);
 826
 827    brw_push_insn_state(p);
 828    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 829    brw_MOV(p, brw_message_reg(reg), aa);
 830    brw_pop_insn_state(p);
 831 }
 832
 833
 834 /* Post-fragment-program processing.  Send the results to the
 835  * framebuffer.
 836  */
 837 static void emit_fb_write( struct brw_wm_compile *c,
 838                            struct brw_reg *arg0,
 839                            struct brw_reg *arg1,
 840                            struct brw_reg *arg2)
 841 {
 842    struct brw_compile *p = &c->func;
 843    GLuint nr = 2;
 844    GLuint channel;
 845
 846    /* Reserve a space for AA - may not be needed:
 847     */
 848    if (c->key.aa_dest_stencil_reg)
 849       nr += 1;
 850
 851    /* I don't really understand how this achieves the color interleave
 852     * (ie RGBARGBA) in the result:  [Do the saturation here]
 853     */
 854    {
 855       brw_push_insn_state(p);
 856
 857       for (channel = 0; channel < 4; channel++) {
 858          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 859          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 860
 861          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 862          brw_MOV(p,
 863                  brw_message_reg(nr + channel),
 864                  arg0[channel]);
 865
 866          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 867          brw_MOV(p,
 868                  brw_message_reg(nr + channel + 4),
 869                  sechalf(arg0[channel]));
 870       }
 871
 872       /* skip over the regs populated above:
 873        */
 874       nr += 8;
 875
 876       brw_pop_insn_state(p);
 877    }
 878
 879    if (c->key.source_depth_to_render_target)
 880    {
 881       if (c->key.computes_depth)
 882          brw_MOV(p, brw_message_reg(nr), arg2[2]);
 883       else
 884          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
 885
 886       nr += 2;
 887    }
 888
 889    if (c->key.dest_depth_reg)
 890    {
 891       GLuint comp = c->key.dest_depth_reg / 2;
 892       GLuint off = c->key.dest_depth_reg % 2;
 893
 894       if (off != 0) {
 895          brw_push_insn_state(p);
 896          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 897          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
 898          /* 2nd half? */
 899          brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
 900          brw_pop_insn_state(p);
 901       }
 902       else {
 903          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
 904       }
 905       nr += 2;
 906    }
 907
 908
 909    if (!c->key.runtime_check_aads_emit) {
 910       if (c->key.aa_dest_stencil_reg)
 911          emit_aa(c, arg1, 2);
 912
 913       fire_fb_write(c, 0, nr);
 914    }
 915    else {
 916       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
 917       struct brw_reg ip = brw_ip_reg();
 918       struct brw_instruction *jmp;
 919
 920       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 921       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
 922       brw_AND(p,
 923               v1_null_ud,
 924               get_element_ud(brw_vec8_grf(1,0), 6),
 925               brw_imm_ud(1<<26));
 926
 927       jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
 928       {
 929          emit_aa(c, arg1, 2);
 930          fire_fb_write(c, 0, nr);
 931          /* note - thread killed in subroutine */
 932       }
 933       brw_land_fwd_jump(p, jmp);
 934
 935       /* ELSE: Shuffle up one register to fill in the hole left for AA:
 936        */
 937       fire_fb_write(c, 1, nr-1);
 938    }
 939 }
 940
 941
 942
 943
 944 /* Post-fragment-program processing.  Send the results to the
 945  * framebuffer.
 946  */
 947 static void emit_spill( struct brw_wm_compile *c,
 948                         struct brw_reg reg,
 949                         GLuint slot )
 950 {
 951    struct brw_compile *p = &c->func;
 952
 953    /*
 954      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
 955    */
 956    brw_MOV(p, brw_message_reg(2), reg);
 957
 958    /*
 959      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
 960      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
 961    */
 962    brw_dp_WRITE_16(p,
 963                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
 964                    1,
 965                    slot);
 966 }
 967
 968 static void emit_unspill( struct brw_wm_compile *c,
 969                           struct brw_reg reg,
 970                           GLuint slot )
 971 {
 972    struct brw_compile *p = &c->func;
 973
 974    /* Slot 0 is the undef value.
 975     */
 976    if (slot == 0) {
 977       brw_MOV(p, reg, brw_imm_f(0));
 978       return;
 979    }
 980
 981    /*
 982      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
 983      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
 984    */
 985
 986    brw_dp_READ_16(p,
 987                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
 988                   1,
 989                   slot);
 990 }
 991
 992
 993
 994 /**
 995  * Retrieve upto 4 GEN4 register pairs for the given wm reg:
 996  */
 997 static void get_argument_regs( struct brw_wm_compile *c,
 998                                struct brw_wm_ref *arg[],
 999                                struct brw_reg *regs )
1000 {
1001    GLuint i;
1002
1003    for (i = 0; i < 4; i++) {
1004       if (arg[i]) {
1005
1006          if (arg[i]->unspill_reg)
1007             emit_unspill(c,
1008                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1009                          arg[i]->value->spill_slot);
1010
1011          regs[i] = arg[i]->hw_reg;
1012       }
1013       else {
1014          regs[i] = brw_null_reg();
1015       }
1016    }
1017 }
1018
1019 static void spill_values( struct brw_wm_compile *c,
1020                           struct brw_wm_value *values,
1021                           GLuint nr )
1022 {
1023    GLuint i;
1024
1025    for (i = 0; i < nr; i++)
1026       if (values[i].spill_slot)
1027          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1028 }
1029
1030
1031
1032 /* Emit the fragment program instructions here.
1033  */
1034 void brw_wm_emit( struct brw_wm_compile *c )
1035 {
1036    struct brw_compile *p = &c->func;
1037    GLuint insn;
1038
1039    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1040
1041    /* Check if any of the payload regs need to be spilled:
1042     */
1043    spill_values(c, c->payload.depth, 4);
1044    spill_values(c, c->creg, c->nr_creg);
1045    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1046
1047
1048    for (insn = 0; insn < c->nr_insns; insn++) {
1049
1050       struct brw_wm_instruction *inst = &c->instruction[insn];
1051       struct brw_reg args[3][4], dst[4];
1052       GLuint i, dst_flags;
1053
1054       /* Get argument regs:
1055        */
1056       for (i = 0; i < 3; i++)
1057          get_argument_regs(c, inst->src[i], args[i]);
1058
1059       /* Get dest regs:
1060        */
1061       for (i = 0; i < 4; i++)
1062          if (inst->dst[i])
1063             dst[i] = inst->dst[i]->hw_reg;
1064          else
1065             dst[i] = brw_null_reg();
1066
1067       /* Flags
1068        */
1069       dst_flags = inst->writemask;
1070       if (inst->saturate)
1071          dst_flags |= SATURATE;
1072
1073       switch (inst->opcode) {
1074          /* Generated instructions for calculating triangle interpolants:
1075           */
1076       case WM_PIXELXY:
1077          emit_pixel_xy(p, dst, dst_flags, args[0]);
1078          break;
1079
1080       case WM_DELTAXY:
1081          emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1082          break;
1083
1084       case WM_WPOSXY:
1085          emit_wpos_xy(p, dst, dst_flags, args[0]);
1086          break;
1087
1088       case WM_PIXELW:
1089          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1090          break;
1091
1092       case WM_LINTERP:
1093          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1094          break;
1095
1096       case WM_PINTERP:
1097          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1098          break;
1099
1100       case WM_CINTERP:
1101          emit_cinterp(p, dst, dst_flags, args[0]);
1102          break;
1103
1104       case WM_FB_WRITE:
1105          emit_fb_write(c, args[0], args[1], args[2]);
1106          break;
1107
1108          /* Straightforward arithmetic:
1109           */
1110       case OPCODE_ADD:
1111          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1112          break;
1113
1114       case OPCODE_FRC:
1115          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1116          break;
1117
1118       case OPCODE_FLR:
1119          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1120          break;
1121
1122       case OPCODE_DP3:  /*  */
1123          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1124          break;
1125
1126       case OPCODE_DP4:
1127          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1128          break;
1129
1130       case OPCODE_DPH:
1131          emit_dph(p, dst, dst_flags, args[0], args[1]);
1132          break;
1133
1134       case OPCODE_LRP:  /*  */
1135          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1136          break;
1137
1138       case OPCODE_MAD:
1139          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1140          break;
1141
1142       case OPCODE_MOV:
1143       case OPCODE_SWZ:
1144          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1145          break;
1146
1147       case OPCODE_MUL:
1148          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1149          break;
1150
1151       case OPCODE_XPD:
1152          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1153          break;
1154
1155          /* Higher math functions:
1156           */
1157       case OPCODE_RCP:
1158          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1159          break;
1160
1161       case OPCODE_RSQ:
1162          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1163          break;
1164
1165       case OPCODE_SIN:
1166          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1167          break;
1168
1169       case OPCODE_COS:
1170          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1171          break;
1172
1173       case OPCODE_EX2:
1174          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1175          break;
1176
1177       case OPCODE_LG2:
1178          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1179          break;
1180
1181       case OPCODE_SCS:
1182          /* There is an scs math function, but it would need some
1183           * fixup for 16-element execution.
1184           */
1185          if (dst_flags & WRITEMASK_X)
1186             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1187          if (dst_flags & WRITEMASK_Y)
1188             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1189          break;
1190
1191       case OPCODE_POW:
1192          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1193          break;
1194
1195          /* Comparisons:
1196           */
1197       case OPCODE_CMP:
1198          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1199          break;
1200
1201       case OPCODE_MAX:
1202          emit_max(p, dst, dst_flags, args[0], args[1]);
1203          break;
1204
1205       case OPCODE_MIN:
1206          emit_min(p, dst, dst_flags, args[0], args[1]);
1207          break;
1208
1209       case OPCODE_SLT:
1210          emit_slt(p, dst, dst_flags, args[0], args[1]);
1211          break;
1212
1213       case OPCODE_SGE:
1214          emit_sge(p, dst, dst_flags, args[0], args[1]);
1215          break;
1216
1217       case OPCODE_LIT:
1218          emit_lit(p, dst, dst_flags, args[0]);
1219          break;
1220
1221          /* Texturing operations:
1222           */
1223       case OPCODE_TEX:
1224          emit_tex(c, inst, dst, dst_flags, args[0]);
1225          break;
1226
1227       case OPCODE_TXB:
1228          emit_txb(c, inst, dst, dst_flags, args[0]);
1229          break;
1230
1231       case OPCODE_KIL:
1232          emit_kil(c, args[0]);
1233          break;
1234
1235       default:
1236          assert(0);
1237       }
1238
1239       for (i = 0; i < 4; i++)
1240         if (inst->dst[i] && inst->dst[i]->spill_slot)
1241            emit_spill(c,
1242                       inst->dst[i]->hw_reg,
1243                       inst->dst[i]->spill_slot);
1244    }
1245 }
1246
1247
1248
1249
1250