src/mesa/drivers/dri/i965/brw_wm_emit.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #include "macros.h"
  34 #include "brw_context.h"
  35 #include "brw_wm.h"
  36
  37 #define SATURATE (1<<5)
  38
  39 /* Not quite sure how correct this is - need to understand horiz
  40  * vs. vertical strides a little better.
  41  */
  42 static __inline struct brw_reg sechalf( struct brw_reg reg )
  43 {
  44    if (reg.vstride)
  45       reg.nr++;
  46    return reg;
  47 }
  48
  49 /* Payload R0:
  50  *
  51  * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
  52  *         corresponding to each of the 16 execution channels.
  53  * R0.1..8 -- ?
  54  * R1.0 -- triangle vertex 0.X
  55  * R1.1 -- triangle vertex 0.Y
  56  * R1.2 -- tile 0 x,y coords (2 packed uwords)
  57  * R1.3 -- tile 1 x,y coords (2 packed uwords)
  58  * R1.4 -- tile 2 x,y coords (2 packed uwords)
  59  * R1.5 -- tile 3 x,y coords (2 packed uwords)
  60  * R1.6 -- ?
  61  * R1.7 -- ?
  62  * R1.8 -- ?
  63  */
  64
  65
  66 static void emit_pixel_xy(struct brw_compile *p,
  67                           const struct brw_reg *dst,
  68                           GLuint mask,
  69                           const struct brw_reg *arg0)
  70 {
  71    struct brw_reg r1 = brw_vec1_grf(1, 0);
  72    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
  73
  74    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  75
  76    /* Calculate pixel centers by adding 1 or 0 to each of the
  77     * micro-tile coordinates passed in r1.
  78     */
  79    if (mask & WRITEMASK_X) {
  80       brw_ADD(p,
  81               vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
  82               stride(suboffset(r1_uw, 4), 2, 4, 0),
  83               brw_imm_v(0x10101010));
  84    }
  85
  86    if (mask & WRITEMASK_Y) {
  87       brw_ADD(p,
  88               vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
  89               stride(suboffset(r1_uw,5), 2, 4, 0),
  90               brw_imm_v(0x11001100));
  91    }
  92
  93    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  94 }
  95
  96
  97
  98 static void emit_delta_xy(struct brw_compile *p,
  99                           const struct brw_reg *dst,
 100                           GLuint mask,
 101                           const struct brw_reg *arg0,
 102                           const struct brw_reg *arg1)
 103 {
 104    struct brw_reg r1 = brw_vec1_grf(1, 0);
 105
 106    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 107     * centers.
 108     */
 109    if (mask & WRITEMASK_X) {
 110       brw_ADD(p,
 111               dst[0],
 112               retype(arg0[0], BRW_REGISTER_TYPE_UW),
 113               negate(r1));
 114    }
 115
 116    if (mask & WRITEMASK_Y) {
 117       brw_ADD(p,
 118               dst[1],
 119               retype(arg0[1], BRW_REGISTER_TYPE_UW),
 120               negate(suboffset(r1,1)));
 121
 122    }
 123 }
 124
 125 static void emit_wpos_xy(struct brw_compile *p,
 126                            const struct brw_reg *dst,
 127                            GLuint mask,
 128                            const struct brw_reg *arg0)
 129 {
 130    /* Calc delta X,Y by subtracting origin in r1 from the pixel
 131     * centers.
 132     */
 133    if (mask & WRITEMASK_X) {
 134       brw_MOV(p,
 135               dst[0],
 136               retype(arg0[0], BRW_REGISTER_TYPE_UW));
 137    }
 138
 139    if (mask & WRITEMASK_Y) {
 140       /* TODO -- window_height - Y */
 141       brw_MOV(p,
 142               dst[1],
 143               negate(retype(arg0[1], BRW_REGISTER_TYPE_UW)));
 144
 145    }
 146 }
 147
 148
 149 static void emit_pixel_w( struct brw_compile *p,
 150                           const struct brw_reg *dst,
 151                           GLuint mask,
 152                           const struct brw_reg *arg0,
 153                           const struct brw_reg *deltas)
 154 {
 155    /* Don't need this if all you are doing is interpolating color, for
 156     * instance.
 157     */
 158    if (mask & WRITEMASK_W) {
 159       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 160
 161       /* Calc 1/w - just linterp wpos[3] optimized by putting the
 162        * result straight into a message reg.
 163        */
 164       brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
 165       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
 166
 167       /* Calc w */
 168       brw_math_16( p, dst[3],
 169                    BRW_MATH_FUNCTION_INV,
 170                    BRW_MATH_SATURATE_NONE,
 171                    2, brw_null_reg(),
 172                    BRW_MATH_PRECISION_FULL);
 173    }
 174 }
 175
 176
 177
 178 static void emit_linterp( struct brw_compile *p,
 179                          const struct brw_reg *dst,
 180                          GLuint mask,
 181                          const struct brw_reg *arg0,
 182                          const struct brw_reg *deltas )
 183 {
 184    struct brw_reg interp[4];
 185    GLuint nr = arg0[0].nr;
 186    GLuint i;
 187
 188    interp[0] = brw_vec1_grf(nr, 0);
 189    interp[1] = brw_vec1_grf(nr, 4);
 190    interp[2] = brw_vec1_grf(nr+1, 0);
 191    interp[3] = brw_vec1_grf(nr+1, 4);
 192
 193    for(i = 0; i < 4; i++ ) {
 194       if (mask & (1<<i)) {
 195          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 196          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 197       }
 198    }
 199 }
 200
 201
 202 static void emit_pinterp( struct brw_compile *p,
 203                           const struct brw_reg *dst,
 204                           GLuint mask,
 205                           const struct brw_reg *arg0,
 206                           const struct brw_reg *deltas,
 207                           const struct brw_reg *w)
 208 {
 209    struct brw_reg interp[4];
 210    GLuint nr = arg0[0].nr;
 211    GLuint i;
 212
 213    interp[0] = brw_vec1_grf(nr, 0);
 214    interp[1] = brw_vec1_grf(nr, 4);
 215    interp[2] = brw_vec1_grf(nr+1, 0);
 216    interp[3] = brw_vec1_grf(nr+1, 4);
 217
 218    for(i = 0; i < 4; i++ ) {
 219       if (mask & (1<<i)) {
 220          brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
 221          brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
 222          brw_MUL(p, dst[i], dst[i], w[3]);
 223       }
 224    }
 225 }
 226
 227 static void emit_cinterp( struct brw_compile *p,
 228                          const struct brw_reg *dst,
 229                          GLuint mask,
 230                          const struct brw_reg *arg0 )
 231 {
 232    struct brw_reg interp[4];
 233    GLuint nr = arg0[0].nr;
 234    GLuint i;
 235
 236    interp[0] = brw_vec1_grf(nr, 0);
 237    interp[1] = brw_vec1_grf(nr, 4);
 238    interp[2] = brw_vec1_grf(nr+1, 0);
 239    interp[3] = brw_vec1_grf(nr+1, 4);
 240
 241    for(i = 0; i < 4; i++ ) {
 242       if (mask & (1<<i)) {
 243          brw_MOV(p, dst[i], suboffset(interp[i],3));    /* TODO: optimize away like other moves */
 244       }
 245    }
 246 }
 247
 248
 249
 250
 251
 252 static void emit_alu1( struct brw_compile *p,
 253                        struct brw_instruction *(*func)(struct brw_compile *,
 254                                                        struct brw_reg,
 255                                                        struct brw_reg),
 256                        const struct brw_reg *dst,
 257                        GLuint mask,
 258                        const struct brw_reg *arg0 )
 259 {
 260    GLuint i;
 261
 262    if (mask & SATURATE)
 263       brw_set_saturate(p, 1);
 264
 265    for (i = 0; i < 4; i++) {
 266       if (mask & (1<<i)) {
 267          func(p, dst[i], arg0[i]);
 268       }
 269    }
 270
 271    if (mask & SATURATE)
 272       brw_set_saturate(p, 0);
 273 }
 274
 275 static void emit_alu2( struct brw_compile *p,
 276                        struct brw_instruction *(*func)(struct brw_compile *,
 277                                                        struct brw_reg,
 278                                                        struct brw_reg,
 279                                                        struct brw_reg),
 280                        const struct brw_reg *dst,
 281                        GLuint mask,
 282                        const struct brw_reg *arg0,
 283                        const struct brw_reg *arg1 )
 284 {
 285    GLuint i;
 286
 287    if (mask & SATURATE)
 288       brw_set_saturate(p, 1);
 289
 290    for (i = 0; i < 4; i++) {
 291       if (mask & (1<<i)) {
 292          func(p, dst[i], arg0[i], arg1[i]);
 293       }
 294    }
 295
 296    if (mask & SATURATE)
 297       brw_set_saturate(p, 0);
 298 }
 299
 300
 301 static void emit_mad( struct brw_compile *p,
 302                       const struct brw_reg *dst,
 303                       GLuint mask,
 304                       const struct brw_reg *arg0,
 305                       const struct brw_reg *arg1,
 306                       const struct brw_reg *arg2 )
 307 {
 308    GLuint i;
 309
 310    for (i = 0; i < 4; i++) {
 311       if (mask & (1<<i)) {
 312          brw_MUL(p, dst[i], arg0[i], arg1[i]);
 313
 314          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 315          brw_ADD(p, dst[i], dst[i], arg2[i]);
 316          brw_set_saturate(p, 0);
 317       }
 318    }
 319 }
 320
 321
 322 static void emit_lrp( struct brw_compile *p,
 323                       const struct brw_reg *dst,
 324                       GLuint mask,
 325                       const struct brw_reg *arg0,
 326                       const struct brw_reg *arg1,
 327                       const struct brw_reg *arg2 )
 328 {
 329    GLuint i;
 330
 331    /* Uses dst as a temporary:
 332     */
 333    for (i = 0; i < 4; i++) {
 334       if (mask & (1<<i)) {
 335          /* Can I use the LINE instruction for this?
 336           */
 337          brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
 338          brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
 339
 340          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 341          brw_MAC(p, dst[i], arg0[i], arg1[i]);
 342          brw_set_saturate(p, 0);
 343       }
 344    }
 345 }
 346
 347
 348 static void emit_slt( struct brw_compile *p,
 349                       const struct brw_reg *dst,
 350                       GLuint mask,
 351                       const struct brw_reg *arg0,
 352                       const struct brw_reg *arg1 )
 353 {
 354    GLuint i;
 355
 356    for (i = 0; i < 4; i++) {
 357       if (mask & (1<<i)) {
 358          brw_MOV(p, dst[i], brw_imm_f(0));
 359          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 360          brw_MOV(p, dst[i], brw_imm_f(1.0));
 361          brw_set_predicate_control_flag_value(p, 0xff);
 362       }
 363    }
 364 }
 365
 366 /* Isn't this just the same as the above with the args swapped?
 367  */
 368 static void emit_sge( struct brw_compile *p,
 369                       const struct brw_reg *dst,
 370                       GLuint mask,
 371                       const struct brw_reg *arg0,
 372                       const struct brw_reg *arg1 )
 373 {
 374    GLuint i;
 375
 376    for (i = 0; i < 4; i++) {
 377       if (mask & (1<<i)) {
 378          brw_MOV(p, dst[i], brw_imm_f(0));
 379          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
 380          brw_MOV(p, dst[i], brw_imm_f(1.0));
 381          brw_set_predicate_control_flag_value(p, 0xff);
 382       }
 383    }
 384 }
 385
 386
 387
 388 static void emit_cmp( struct brw_compile *p,
 389                       const struct brw_reg *dst,
 390                       GLuint mask,
 391                       const struct brw_reg *arg0,
 392                       const struct brw_reg *arg1,
 393                       const struct brw_reg *arg2 )
 394 {
 395    GLuint i;
 396
 397    for (i = 0; i < 4; i++) {
 398       if (mask & (1<<i)) {
 399          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 400          brw_MOV(p, dst[i], arg2[i]);
 401          brw_set_saturate(p, 0);
 402
 403          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
 404
 405          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 406          brw_MOV(p, dst[i], arg1[i]);
 407          brw_set_saturate(p, 0);
 408          brw_set_predicate_control_flag_value(p, 0xff);
 409       }
 410    }
 411 }
 412
 413 static void emit_max( struct brw_compile *p,
 414                       const struct brw_reg *dst,
 415                       GLuint mask,
 416                       const struct brw_reg *arg0,
 417                       const struct brw_reg *arg1 )
 418 {
 419    GLuint i;
 420
 421    for (i = 0; i < 4; i++) {
 422       if (mask & (1<<i)) {
 423          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 424          brw_MOV(p, dst[i], arg0[i]);
 425          brw_set_saturate(p, 0);
 426
 427          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 428
 429          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 430          brw_MOV(p, dst[i], arg1[i]);
 431          brw_set_saturate(p, 0);
 432          brw_set_predicate_control_flag_value(p, 0xff);
 433       }
 434    }
 435 }
 436
 437 static void emit_min( struct brw_compile *p,
 438                       const struct brw_reg *dst,
 439                       GLuint mask,
 440                       const struct brw_reg *arg0,
 441                       const struct brw_reg *arg1 )
 442 {
 443    GLuint i;
 444
 445    for (i = 0; i < 4; i++) {
 446       if (mask & (1<<i)) {
 447          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 448          brw_MOV(p, dst[i], arg1[i]);
 449          brw_set_saturate(p, 0);
 450
 451          brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
 452
 453          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 454          brw_MOV(p, dst[i], arg0[i]);
 455          brw_set_saturate(p, 0);
 456          brw_set_predicate_control_flag_value(p, 0xff);
 457       }
 458    }
 459 }
 460
 461
 462 static void emit_dp3( struct brw_compile *p,
 463                       const struct brw_reg *dst,
 464                       GLuint mask,
 465                       const struct brw_reg *arg0,
 466                       const struct brw_reg *arg1 )
 467 {
 468    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 469
 470    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 471    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 472
 473    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 474    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 475    brw_set_saturate(p, 0);
 476 }
 477
 478
 479 static void emit_dp4( struct brw_compile *p,
 480                       const struct brw_reg *dst,
 481                       GLuint mask,
 482                       const struct brw_reg *arg0,
 483                       const struct brw_reg *arg1 )
 484 {
 485    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 486
 487    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 488    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 489    brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
 490
 491    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 492    brw_MAC(p, dst[0], arg0[3], arg1[3]);
 493    brw_set_saturate(p, 0);
 494 }
 495
 496
 497 static void emit_dph( struct brw_compile *p,
 498                       const struct brw_reg *dst,
 499                       GLuint mask,
 500                       const struct brw_reg *arg0,
 501                       const struct brw_reg *arg1 )
 502 {
 503    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 504
 505    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
 506    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
 507    brw_MAC(p, dst[0], arg0[2], arg1[2]);
 508
 509    brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 510    brw_ADD(p, dst[0], dst[0], arg1[3]);
 511    brw_set_saturate(p, 0);
 512 }
 513
 514
 515 static void emit_xpd( struct brw_compile *p,
 516                       const struct brw_reg *dst,
 517                       GLuint mask,
 518                       const struct brw_reg *arg0,
 519                       const struct brw_reg *arg1 )
 520 {
 521    GLuint i;
 522
 523    assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
 524
 525    for (i = 0 ; i < 3; i++) {
 526       if (mask & (1<<i)) {
 527          GLuint i2 = (i+2)%3;
 528          GLuint i1 = (i+1)%3;
 529
 530          brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
 531
 532          brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 533          brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
 534          brw_set_saturate(p, 0);
 535       }
 536    }
 537 }
 538
 539
 540 static void emit_math1( struct brw_compile *p,
 541                         GLuint function,
 542                         const struct brw_reg *dst,
 543                         GLuint mask,
 544                         const struct brw_reg *arg0 )
 545 {
 546    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X ||
 547           function == BRW_MATH_FUNCTION_SINCOS);
 548
 549    brw_MOV(p, brw_message_reg(2), arg0[0]);
 550
 551    /* Send two messages to perform all 16 operations:
 552     */
 553    brw_math_16(p,
 554                dst[0],
 555                function,
 556                (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 557                2,
 558                brw_null_reg(),
 559                BRW_MATH_PRECISION_FULL);
 560 }
 561
 562
 563 static void emit_math2( struct brw_compile *p,
 564                         GLuint function,
 565                         const struct brw_reg *dst,
 566                         GLuint mask,
 567                         const struct brw_reg *arg0,
 568                         const struct brw_reg *arg1)
 569 {
 570    assert((mask & WRITEMASK_XYZW) == WRITEMASK_X);
 571
 572    brw_push_insn_state(p);
 573
 574    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 575    brw_MOV(p, brw_message_reg(2), arg0[0]);
 576    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 577    brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
 578
 579    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 580    brw_MOV(p, brw_message_reg(3), arg1[0]);
 581    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 582    brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
 583
 584
 585    /* Send two messages to perform all 16 operations:
 586     */
 587    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 588    brw_math(p,
 589             dst[0],
 590             function,
 591             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 592             2,
 593             brw_null_reg(),
 594             BRW_MATH_DATA_VECTOR,
 595             BRW_MATH_PRECISION_FULL);
 596
 597    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 598    brw_math(p,
 599             offset(dst[0],1),
 600             function,
 601             (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
 602             4,
 603             brw_null_reg(),
 604             BRW_MATH_DATA_VECTOR,
 605             BRW_MATH_PRECISION_FULL);
 606
 607    brw_pop_insn_state(p);
 608 }
 609
 610
 611
 612 static void emit_tex( struct brw_wm_compile *c,
 613                       const struct brw_wm_instruction *inst,
 614                       struct brw_reg *dst,
 615                       GLuint dst_flags,
 616                       struct brw_reg *arg )
 617 {
 618    struct brw_compile *p = &c->func;
 619    GLuint msgLength, responseLength;
 620    GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0;
 621    GLuint i, nr;
 622    GLuint emit;
 623
 624    /* How many input regs are there?
 625     */
 626    switch (inst->tex_idx) {
 627    case TEXTURE_1D_INDEX:
 628       emit = WRITEMASK_X;
 629       nr = 1;
 630       break;
 631    case TEXTURE_2D_INDEX:
 632    case TEXTURE_RECT_INDEX:
 633       emit = WRITEMASK_XY;
 634       nr = 2;
 635       break;
 636    default:
 637       emit = WRITEMASK_XYZ;
 638       nr = 3;
 639       break;
 640    }
 641
 642    if (shadow) {
 643       nr = 4;
 644       emit |= WRITEMASK_W;
 645    }
 646
 647    msgLength = 1;
 648
 649    for (i = 0; i < nr; i++) {
 650       static const GLuint swz[4] = {0,1,2,2};
 651       if (emit & (1<<i))
 652          brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
 653       else
 654          brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
 655       msgLength += 2;
 656    }
 657
 658    responseLength = 8;          /* always */
 659
 660    brw_SAMPLE(p,
 661               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 662               1,
 663               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 664               inst->tex_unit + 1, /* surface */
 665               inst->tex_unit,     /* sampler */
 666               inst->writemask,
 667               (shadow ?
 668                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE :
 669                BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE),
 670               responseLength,
 671               msgLength,
 672               0);
 673
 674 }
 675
 676
 677 static void emit_txb( struct brw_wm_compile *c,
 678                       const struct brw_wm_instruction *inst,
 679                       struct brw_reg *dst,
 680                       GLuint dst_flags,
 681                       struct brw_reg *arg )
 682 {
 683    struct brw_compile *p = &c->func;
 684    GLuint msgLength;
 685
 686    /* Shadow ignored for txb.
 687     */
 688    switch (inst->tex_idx) {
 689    case TEXTURE_1D_INDEX:
 690       brw_MOV(p, brw_message_reg(2), arg[0]);
 691       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
 692       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 693       break;
 694    case TEXTURE_2D_INDEX:
 695    case TEXTURE_RECT_INDEX:
 696       brw_MOV(p, brw_message_reg(2), arg[0]);
 697       brw_MOV(p, brw_message_reg(4), arg[1]);
 698       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
 699       break;
 700    default:
 701       brw_MOV(p, brw_message_reg(2), arg[0]);
 702       brw_MOV(p, brw_message_reg(4), arg[1]);
 703       brw_MOV(p, brw_message_reg(6), arg[2]);
 704       break;
 705    }
 706
 707    brw_MOV(p, brw_message_reg(8), arg[3]);
 708    msgLength = 9;
 709
 710
 711    brw_SAMPLE(p,
 712               retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 713               1,
 714               retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
 715               inst->tex_unit + 1, /* surface */
 716               inst->tex_unit,     /* sampler */
 717               inst->writemask,
 718               BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
 719               8,                /* responseLength */
 720               msgLength,
 721               0);
 722
 723 }
 724
 725
 726 static void emit_lit( struct brw_compile *p,
 727                       const struct brw_reg *dst,
 728                       GLuint mask,
 729                       const struct brw_reg *arg0 )
 730 {
 731    assert((mask & WRITEMASK_XW) == 0);
 732
 733    if (mask & WRITEMASK_Y) {
 734       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
 735       brw_MOV(p, dst[1], arg0[0]);
 736       brw_set_saturate(p, 0);
 737    }
 738
 739    if (mask & WRITEMASK_Z) {
 740       emit_math2(p, BRW_MATH_FUNCTION_POW,
 741                  &dst[2],
 742                  WRITEMASK_X | (mask & SATURATE),
 743                  &arg0[1],
 744                  &arg0[3]);
 745    }
 746
 747    /* Ordinarily you'd use an iff statement to skip or shortcircuit
 748     * some of the POW calculations above, but 16-wide iff statements
 749     * seem to lock c1 hardware, so this is a nasty workaround:
 750     */
 751    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
 752    {
 753       if (mask & WRITEMASK_Y)
 754          brw_MOV(p, dst[1], brw_imm_f(0));
 755
 756       if (mask & WRITEMASK_Z)
 757          brw_MOV(p, dst[2], brw_imm_f(0));
 758    }
 759    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 760 }
 761
 762
 763 /* Kill pixel - set execution mask to zero for those pixels which
 764  * fail.
 765  */
 766 static void emit_kil( struct brw_wm_compile *c,
 767                       struct brw_reg *arg0)
 768 {
 769    struct brw_compile *p = &c->func;
 770    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 771    GLuint i;
 772
 773
 774    /* XXX - usually won't need 4 compares!
 775     */
 776    for (i = 0; i < 4; i++) {
 777       brw_push_insn_state(p);
 778       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));
 779       brw_set_predicate_control_flag_value(p, 0xff);
 780       brw_AND(p, r0uw, brw_flag_reg(), r0uw);
 781       brw_pop_insn_state(p);
 782    }
 783 }
 784
 785 static void fire_fb_write( struct brw_wm_compile *c,
 786                            GLuint base_reg,
 787                            GLuint nr )
 788 {
 789    struct brw_compile *p = &c->func;
 790
 791    /* Pass through control information:
 792     */
 793 /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
 794    {
 795       brw_push_insn_state(p);
 796       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
 797       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 798       brw_MOV(p,
 799                brw_message_reg(base_reg + 1),
 800                brw_vec8_grf(1, 0));
 801       brw_pop_insn_state(p);
 802    }
 803
 804    /* Send framebuffer write message: */
 805 /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
 806    brw_fb_WRITE(p,
 807                 retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
 808                 base_reg,
 809                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
 810                 0,              /* render surface always 0 */
 811                 nr,
 812                 0,
 813                 1);
 814 }
 815
 816 static void emit_aa( struct brw_wm_compile *c,
 817                      struct brw_reg *arg1,
 818                      GLuint reg )
 819 {
 820    struct brw_compile *p = &c->func;
 821    GLuint comp = c->key.aa_dest_stencil_reg / 2;
 822    GLuint off = c->key.aa_dest_stencil_reg % 2;
 823    struct brw_reg aa = offset(arg1[comp], off);
 824
 825    brw_push_insn_state(p);
 826    brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
 827    brw_MOV(p, brw_message_reg(reg), aa);
 828    brw_pop_insn_state(p);
 829 }
 830
 831
 832 /* Post-fragment-program processing.  Send the results to the
 833  * framebuffer.
 834  */
 835 static void emit_fb_write( struct brw_wm_compile *c,
 836                            struct brw_reg *arg0,
 837                            struct brw_reg *arg1,
 838                            struct brw_reg *arg2)
 839 {
 840    struct brw_compile *p = &c->func;
 841    GLuint nr = 2;
 842    GLuint channel;
 843
 844    /* Reserve a space for AA - may not be needed:
 845     */
 846    if (c->key.aa_dest_stencil_reg)
 847       nr += 1;
 848
 849    /* I don't really understand how this achieves the color interleave
 850     * (ie RGBARGBA) in the result:  [Do the saturation here]
 851     */
 852    {
 853       brw_push_insn_state(p);
 854
 855       for (channel = 0; channel < 4; channel++) {
 856          /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
 857          /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
 858
 859          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 860          brw_MOV(p,
 861                  brw_message_reg(nr + channel),
 862                  arg0[channel]);
 863
 864          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 865          brw_MOV(p,
 866                  brw_message_reg(nr + channel + 4),
 867                  sechalf(arg0[channel]));
 868       }
 869
 870       /* skip over the regs populated above:
 871        */
 872       nr += 8;
 873
 874       brw_pop_insn_state(p);
 875    }
 876
 877    if (c->key.source_depth_to_render_target)
 878    {
 879       if (c->key.computes_depth)
 880          brw_MOV(p, brw_message_reg(nr), arg2[2]);
 881       else
 882          brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
 883
 884       nr += 2;
 885    }
 886
 887    if (c->key.dest_depth_reg)
 888    {
 889       GLuint comp = c->key.dest_depth_reg / 2;
 890       GLuint off = c->key.dest_depth_reg % 2;
 891
 892       if (off != 0) {
 893          brw_push_insn_state(p);
 894          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 895          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
 896          /* 2nd half? */
 897          brw_MOV(p, brw_message_reg(nr+1), offset(arg1[comp],1));
 898          brw_pop_insn_state(p);
 899       }
 900       else {
 901          brw_MOV(p, brw_message_reg(nr), arg1[comp]);
 902       }
 903       nr += 2;
 904    }
 905
 906
 907    if (!c->key.runtime_check_aads_emit) {
 908       if (c->key.aa_dest_stencil_reg)
 909          emit_aa(c, arg1, 2);
 910
 911       fire_fb_write(c, 0, nr);
 912    }
 913    else {
 914       struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
 915       struct brw_reg ip = brw_ip_reg();
 916       struct brw_instruction *jmp;
 917
 918       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 919       brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
 920       brw_AND(p,
 921               v1_null_ud,
 922               get_element_ud(brw_vec8_grf(1,0), 6),
 923               brw_imm_ud(1<<26));
 924
 925       jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
 926       {
 927          emit_aa(c, arg1, 2);
 928          fire_fb_write(c, 0, nr);
 929          /* note - thread killed in subroutine */
 930       }
 931       brw_land_fwd_jump(p, jmp);
 932
 933       /* ELSE: Shuffle up one register to fill in the hole left for AA:
 934        */
 935       fire_fb_write(c, 1, nr-1);
 936    }
 937 }
 938
 939
 940
 941
 942 /* Post-fragment-program processing.  Send the results to the
 943  * framebuffer.
 944  */
 945 static void emit_spill( struct brw_wm_compile *c,
 946                         struct brw_reg reg,
 947                         GLuint slot )
 948 {
 949    struct brw_compile *p = &c->func;
 950
 951    /*
 952      mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
 953    */
 954    brw_MOV(p, brw_message_reg(2), reg);
 955
 956    /*
 957      mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
 958      send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
 959    */
 960    brw_dp_WRITE_16(p,
 961                    retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
 962                    1,
 963                    slot);
 964 }
 965
 966 static void emit_unspill( struct brw_wm_compile *c,
 967                           struct brw_reg reg,
 968                           GLuint slot )
 969 {
 970    struct brw_compile *p = &c->func;
 971
 972    /* Slot 0 is the undef value.
 973     */
 974    if (slot == 0) {
 975       brw_MOV(p, reg, brw_imm_f(0));
 976       return;
 977    }
 978
 979    /*
 980      mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
 981      send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
 982    */
 983
 984    brw_dp_READ_16(p,
 985                   retype(vec16(reg), BRW_REGISTER_TYPE_UW),
 986                   1,
 987                   slot);
 988 }
 989
 990
 991
 992 /**
 993  * Retrieve upto 4 GEN4 register pairs for the given wm reg:
 994  */
 995 static void get_argument_regs( struct brw_wm_compile *c,
 996                                struct brw_wm_ref *arg[],
 997                                struct brw_reg *regs )
 998 {
 999    GLuint i;
1000
1001    for (i = 0; i < 4; i++) {
1002       if (arg[i]) {
1003
1004          if (arg[i]->unspill_reg)
1005             emit_unspill(c,
1006                          brw_vec8_grf(arg[i]->unspill_reg, 0),
1007                          arg[i]->value->spill_slot);
1008
1009          regs[i] = arg[i]->hw_reg;
1010       }
1011       else {
1012          regs[i] = brw_null_reg();
1013       }
1014    }
1015 }
1016
1017 static void spill_values( struct brw_wm_compile *c,
1018                           struct brw_wm_value *values,
1019                           GLuint nr )
1020 {
1021    GLuint i;
1022
1023    for (i = 0; i < nr; i++)
1024       if (values[i].spill_slot)
1025          emit_spill(c, values[i].hw_reg, values[i].spill_slot);
1026 }
1027
1028
1029
1030 /* Emit the fragment program instructions here.
1031  */
1032 void brw_wm_emit( struct brw_wm_compile *c )
1033 {
1034    struct brw_compile *p = &c->func;
1035    GLuint insn;
1036
1037    brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
1038
1039    /* Check if any of the payload regs need to be spilled:
1040     */
1041    spill_values(c, c->payload.depth, 4);
1042    spill_values(c, c->creg, c->nr_creg);
1043    spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
1044
1045
1046    for (insn = 0; insn < c->nr_insns; insn++) {
1047
1048       struct brw_wm_instruction *inst = &c->instruction[insn];
1049       struct brw_reg args[3][4], dst[4];
1050       GLuint i, dst_flags;
1051
1052       /* Get argument regs:
1053        */
1054       for (i = 0; i < 3; i++)
1055          get_argument_regs(c, inst->src[i], args[i]);
1056
1057       /* Get dest regs:
1058        */
1059       for (i = 0; i < 4; i++)
1060          if (inst->dst[i])
1061             dst[i] = inst->dst[i]->hw_reg;
1062          else
1063             dst[i] = brw_null_reg();
1064
1065       /* Flags
1066        */
1067       dst_flags = inst->writemask;
1068       if (inst->saturate)
1069          dst_flags |= SATURATE;
1070
1071       switch (inst->opcode) {
1072          /* Generated instructions for calculating triangle interpolants:
1073           */
1074       case WM_PIXELXY:
1075          emit_pixel_xy(p, dst, dst_flags, args[0]);
1076          break;
1077
1078       case WM_DELTAXY:
1079          emit_delta_xy(p, dst, dst_flags, args[0], args[1]);
1080          break;
1081
1082       case WM_WPOSXY:
1083          emit_wpos_xy(p, dst, dst_flags, args[0]);
1084          break;
1085
1086       case WM_PIXELW:
1087          emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
1088          break;
1089
1090       case WM_LINTERP:
1091          emit_linterp(p, dst, dst_flags, args[0], args[1]);
1092          break;
1093
1094       case WM_PINTERP:
1095          emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
1096          break;
1097
1098       case WM_CINTERP:
1099          emit_cinterp(p, dst, dst_flags, args[0]);
1100          break;
1101
1102       case WM_FB_WRITE:
1103          emit_fb_write(c, args[0], args[1], args[2]);
1104          break;
1105
1106          /* Straightforward arithmetic:
1107           */
1108       case OPCODE_ADD:
1109          emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
1110          break;
1111
1112       case OPCODE_FRC:
1113          emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
1114          break;
1115
1116       case OPCODE_FLR:
1117          emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
1118          break;
1119
1120       case OPCODE_DP3:  /*  */
1121          emit_dp3(p, dst, dst_flags, args[0], args[1]);
1122          break;
1123
1124       case OPCODE_DP4:
1125          emit_dp4(p, dst, dst_flags, args[0], args[1]);
1126          break;
1127
1128       case OPCODE_DPH:
1129          emit_dph(p, dst, dst_flags, args[0], args[1]);
1130          break;
1131
1132       case OPCODE_LRP:  /*  */
1133          emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
1134          break;
1135
1136       case OPCODE_MAD:
1137          emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
1138          break;
1139
1140       case OPCODE_MOV:
1141       case OPCODE_SWZ:
1142          emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
1143          break;
1144
1145       case OPCODE_MUL:
1146          emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
1147          break;
1148
1149       case OPCODE_XPD:
1150          emit_xpd(p, dst, dst_flags, args[0], args[1]);
1151          break;
1152
1153          /* Higher math functions:
1154           */
1155       case OPCODE_RCP:
1156          emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
1157          break;
1158
1159       case OPCODE_RSQ:
1160          emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
1161          break;
1162
1163       case OPCODE_SIN:
1164          emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
1165          break;
1166
1167       case OPCODE_COS:
1168          emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
1169          break;
1170
1171       case OPCODE_EX2:
1172          emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
1173          break;
1174
1175       case OPCODE_LG2:
1176          emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
1177          break;
1178
1179       case OPCODE_SCS:
1180          /* There is an scs math function, but it would need some
1181           * fixup for 16-element execution.
1182           */
1183          if (dst_flags & WRITEMASK_X)
1184             emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1185          if (dst_flags & WRITEMASK_Y)
1186             emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
1187          break;
1188
1189       case OPCODE_POW:
1190          emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
1191          break;
1192
1193          /* Comparisons:
1194           */
1195       case OPCODE_CMP:
1196          emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
1197          break;
1198
1199       case OPCODE_MAX:
1200          emit_max(p, dst, dst_flags, args[0], args[1]);
1201          break;
1202
1203       case OPCODE_MIN:
1204          emit_min(p, dst, dst_flags, args[0], args[1]);
1205          break;
1206
1207       case OPCODE_SLT:
1208          emit_slt(p, dst, dst_flags, args[0], args[1]);
1209          break;
1210
1211       case OPCODE_SGE:
1212          emit_sge(p, dst, dst_flags, args[0], args[1]);
1213          break;
1214
1215       case OPCODE_LIT:
1216          emit_lit(p, dst, dst_flags, args[0]);
1217          break;
1218
1219          /* Texturing operations:
1220           */
1221       case OPCODE_TEX:
1222          emit_tex(c, inst, dst, dst_flags, args[0]);
1223          break;
1224
1225       case OPCODE_TXB:
1226          emit_txb(c, inst, dst, dst_flags, args[0]);
1227          break;
1228
1229       case OPCODE_KIL:
1230          emit_kil(c, args[0]);
1231          break;
1232
1233       default:
1234          assert(0);
1235       }
1236
1237       for (i = 0; i < 4; i++)
1238         if (inst->dst[i] && inst->dst[i]->spill_slot)
1239            emit_spill(c,
1240                       inst->dst[i]->hw_reg,
1241                       inst->dst[i]->spill_slot);
1242    }
1243 }
1244
1245
1246
1247
1248