src/mesa/drivers/dri/i965/brw_fs_emit.cpp

   1 /*
   2  * Copyright © 2010 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_emit.cpp
  25  *
  26  * This file supports emitting code from the FS LIR to the actual
  27  * native instructions.
  28  */
  29
  30 extern "C" {
  31 #include "main/macros.h"
  32 #include "brw_context.h"
  33 #include "brw_eu.h"
  34 } /* extern "C" */
  35
  36 #include "brw_fs.h"
  37 #include "../glsl/ir_print_visitor.h"
  38
  39 void
  40 fs_visitor::generate_fb_write(fs_inst *inst)
  41 {
  42    GLboolean eot = inst->eot;
  43    struct brw_reg implied_header;
  44
  45    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
  46     * move, here's g1.
  47     */
  48    brw_push_insn_state(p);
  49    brw_set_mask_control(p, BRW_MASK_DISABLE);
  50    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  51
  52    if (inst->header_present) {
  53       if (intel->gen >= 6) {
  54          brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
  55          brw_MOV(p,
  56                  retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
  57                  retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
  58          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  59
  60          if (inst->target > 0) {
  61             /* Set the render target index for choosing BLEND_STATE. */
  62             brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 2),
  63                               BRW_REGISTER_TYPE_UD),
  64                     brw_imm_ud(inst->target));
  65          }
  66
  67          implied_header = brw_null_reg();
  68       } else {
  69          implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
  70
  71          brw_MOV(p,
  72                  brw_message_reg(inst->base_mrf + 1),
  73                  brw_vec8_grf(1, 0));
  74       }
  75    } else {
  76       implied_header = brw_null_reg();
  77    }
  78
  79    brw_pop_insn_state(p);
  80
  81    brw_fb_WRITE(p,
  82                 c->dispatch_width,
  83                 inst->base_mrf,
  84                 implied_header,
  85                 inst->target,
  86                 inst->mlen,
  87                 0,
  88                 eot,
  89                 inst->header_present);
  90 }
  91
  92 /* Computes the integer pixel x,y values from the origin.
  93  *
  94  * This is the basis of gl_FragCoord computation, but is also used
  95  * pre-gen6 for computing the deltas from v0 for computing
  96  * interpolation.
  97  */
  98 void
  99 fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)
 100 {
 101    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
 102    struct brw_reg src;
 103    struct brw_reg deltas;
 104
 105    if (is_x) {
 106       src = stride(suboffset(g1_uw, 4), 2, 4, 0);
 107       deltas = brw_imm_v(0x10101010);
 108    } else {
 109       src = stride(suboffset(g1_uw, 5), 2, 4, 0);
 110       deltas = brw_imm_v(0x11001100);
 111    }
 112
 113    if (c->dispatch_width == 16) {
 114       dst = vec16(dst);
 115    }
 116
 117    /* We do this 8 or 16-wide, but since the destination is UW we
 118     * don't do compression in the 16-wide case.
 119     */
 120    brw_push_insn_state(p);
 121    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 122    brw_ADD(p, dst, src, deltas);
 123    brw_pop_insn_state(p);
 124 }
 125
 126 void
 127 fs_visitor::generate_linterp(fs_inst *inst,
 128                              struct brw_reg dst, struct brw_reg *src)
 129 {
 130    struct brw_reg delta_x = src[0];
 131    struct brw_reg delta_y = src[1];
 132    struct brw_reg interp = src[2];
 133
 134    if (brw->has_pln &&
 135        delta_y.nr == delta_x.nr + 1 &&
 136        (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
 137       brw_PLN(p, dst, interp, delta_x);
 138    } else {
 139       brw_LINE(p, brw_null_reg(), interp, delta_x);
 140       brw_MAC(p, dst, suboffset(interp, 1), delta_y);
 141    }
 142 }
 143
 144 void
 145 fs_visitor::generate_math(fs_inst *inst,
 146                           struct brw_reg dst, struct brw_reg *src)
 147 {
 148    int op;
 149
 150    switch (inst->opcode) {
 151    case FS_OPCODE_RCP:
 152       op = BRW_MATH_FUNCTION_INV;
 153       break;
 154    case FS_OPCODE_RSQ:
 155       op = BRW_MATH_FUNCTION_RSQ;
 156       break;
 157    case FS_OPCODE_SQRT:
 158       op = BRW_MATH_FUNCTION_SQRT;
 159       break;
 160    case FS_OPCODE_EXP2:
 161       op = BRW_MATH_FUNCTION_EXP;
 162       break;
 163    case FS_OPCODE_LOG2:
 164       op = BRW_MATH_FUNCTION_LOG;
 165       break;
 166    case FS_OPCODE_POW:
 167       op = BRW_MATH_FUNCTION_POW;
 168       break;
 169    case FS_OPCODE_SIN:
 170       op = BRW_MATH_FUNCTION_SIN;
 171       break;
 172    case FS_OPCODE_COS:
 173       op = BRW_MATH_FUNCTION_COS;
 174       break;
 175    default:
 176       assert(!"not reached: unknown math function");
 177       op = 0;
 178       break;
 179    }
 180
 181    if (intel->gen >= 6) {
 182       assert(inst->mlen == 0);
 183
 184       if (inst->opcode == FS_OPCODE_POW) {
 185          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 186          brw_math2(p, dst, op, src[0], src[1]);
 187
 188          if (c->dispatch_width == 16) {
 189             brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 190             brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
 191             brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 192          }
 193       } else {
 194          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 195          brw_math(p, dst,
 196                   op,
 197                   inst->saturate ? BRW_MATH_SATURATE_SATURATE :
 198                   BRW_MATH_SATURATE_NONE,
 199                   0, src[0],
 200                   BRW_MATH_DATA_VECTOR,
 201                   BRW_MATH_PRECISION_FULL);
 202
 203          if (c->dispatch_width == 16) {
 204             brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 205             brw_math(p, sechalf(dst),
 206                      op,
 207                      inst->saturate ? BRW_MATH_SATURATE_SATURATE :
 208                      BRW_MATH_SATURATE_NONE,
 209                      0, sechalf(src[0]),
 210                      BRW_MATH_DATA_VECTOR,
 211                      BRW_MATH_PRECISION_FULL);
 212             brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 213          }
 214       }
 215    } else /* gen <= 5 */{
 216       assert(inst->mlen >= 1);
 217
 218       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 219       brw_math(p, dst,
 220                op,
 221                inst->saturate ? BRW_MATH_SATURATE_SATURATE :
 222                BRW_MATH_SATURATE_NONE,
 223                inst->base_mrf, src[0],
 224                BRW_MATH_DATA_VECTOR,
 225                BRW_MATH_PRECISION_FULL);
 226
 227       if (c->dispatch_width == 16) {
 228          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 229          brw_math(p, sechalf(dst),
 230                   op,
 231                   inst->saturate ? BRW_MATH_SATURATE_SATURATE :
 232                   BRW_MATH_SATURATE_NONE,
 233                   inst->base_mrf + 1, sechalf(src[0]),
 234                   BRW_MATH_DATA_VECTOR,
 235                   BRW_MATH_PRECISION_FULL);
 236
 237          brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 238       }
 239    }
 240 }
 241
 242 void
 243 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 244 {
 245    int msg_type = -1;
 246    int rlen = 4;
 247    uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
 248
 249    if (c->dispatch_width == 16)
 250       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 251
 252    if (intel->gen >= 5) {
 253       switch (inst->opcode) {
 254       case FS_OPCODE_TEX:
 255          if (inst->shadow_compare) {
 256             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
 257          } else {
 258             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
 259          }
 260          break;
 261       case FS_OPCODE_TXB:
 262          if (inst->shadow_compare) {
 263             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
 264          } else {
 265             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
 266          }
 267          break;
 268       case FS_OPCODE_TXL:
 269          if (inst->shadow_compare) {
 270             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
 271          } else {
 272             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
 273          }
 274          break;
 275       case FS_OPCODE_TXD:
 276          assert(!"TXD isn't supported on gen5+ yet.");
 277          break;
 278       }
 279    } else {
 280       switch (inst->opcode) {
 281       case FS_OPCODE_TEX:
 282          /* Note that G45 and older determines shadow compare and dispatch width
 283           * from message length for most messages.
 284           */
 285          assert(c->dispatch_width == 8);
 286          msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
 287          if (inst->shadow_compare) {
 288             assert(inst->mlen == 6);
 289          } else {
 290             assert(inst->mlen <= 4);
 291          }
 292          break;
 293       case FS_OPCODE_TXB:
 294          if (inst->shadow_compare) {
 295             assert(inst->mlen == 6);
 296             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
 297          } else {
 298             assert(inst->mlen == 9);
 299             msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
 300             simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 301          }
 302          break;
 303       case FS_OPCODE_TXL:
 304          if (inst->shadow_compare) {
 305             assert(inst->mlen == 6);
 306             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
 307          } else {
 308             assert(inst->mlen == 9);
 309             msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD;
 310             simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
 311          }
 312          break;
 313       case FS_OPCODE_TXD:
 314          assert(!"TXD isn't supported on gen4 yet.");
 315          break;
 316       }
 317    }
 318    assert(msg_type != -1);
 319
 320    if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
 321       rlen = 8;
 322       dst = vec16(dst);
 323    }
 324
 325    brw_SAMPLE(p,
 326               retype(dst, BRW_REGISTER_TYPE_UW),
 327               inst->base_mrf,
 328               src,
 329               SURF_INDEX_TEXTURE(inst->sampler),
 330               inst->sampler,
 331               WRITEMASK_XYZW,
 332               msg_type,
 333               rlen,
 334               inst->mlen,
 335               0,
 336               inst->header_present,
 337               simd_mode);
 338 }
 339
 340
 341 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 342  * looking like:
 343  *
 344  * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
 345  *
 346  * and we're trying to produce:
 347  *
 348  *           DDX                     DDY
 349  * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
 350  *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
 351  *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
 352  *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
 353  *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
 354  *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
 355  *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
 356  *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
 357  *
 358  * and add another set of two more subspans if in 16-pixel dispatch mode.
 359  *
 360  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
 361  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
 362  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
 363  * between each other.  We could probably do it like ddx and swizzle the right
 364  * order later, but bail for now and just produce
 365  * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
 366  */
 367 void
 368 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 369 {
 370    struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
 371                                  BRW_REGISTER_TYPE_F,
 372                                  BRW_VERTICAL_STRIDE_2,
 373                                  BRW_WIDTH_2,
 374                                  BRW_HORIZONTAL_STRIDE_0,
 375                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 376    struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
 377                                  BRW_REGISTER_TYPE_F,
 378                                  BRW_VERTICAL_STRIDE_2,
 379                                  BRW_WIDTH_2,
 380                                  BRW_HORIZONTAL_STRIDE_0,
 381                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 382    brw_ADD(p, dst, src0, negate(src1));
 383 }
 384
 385 void
 386 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 387 {
 388    struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
 389                                  BRW_REGISTER_TYPE_F,
 390                                  BRW_VERTICAL_STRIDE_4,
 391                                  BRW_WIDTH_4,
 392                                  BRW_HORIZONTAL_STRIDE_0,
 393                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 394    struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
 395                                  BRW_REGISTER_TYPE_F,
 396                                  BRW_VERTICAL_STRIDE_4,
 397                                  BRW_WIDTH_4,
 398                                  BRW_HORIZONTAL_STRIDE_0,
 399                                  BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
 400    brw_ADD(p, dst, src0, negate(src1));
 401 }
 402
 403 void
 404 fs_visitor::generate_discard(fs_inst *inst)
 405 {
 406    struct brw_reg f0 = brw_flag_reg();
 407
 408    if (intel->gen >= 6) {
 409       struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
 410       struct brw_reg some_register;
 411
 412       /* As of gen6, we no longer have the mask register to look at,
 413        * so life gets a bit more complicated.
 414        */
 415
 416       /* Load the flag register with all ones. */
 417       brw_push_insn_state(p);
 418       brw_set_mask_control(p, BRW_MASK_DISABLE);
 419       brw_MOV(p, f0, brw_imm_uw(0xffff));
 420       brw_pop_insn_state(p);
 421
 422       /* Do a comparison that should always fail, to produce 0s in the flag
 423        * reg where we have active channels.
 424        */
 425       some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
 426       brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
 427               BRW_CONDITIONAL_NZ, some_register, some_register);
 428
 429       /* Undo CMP's whacking of predication*/
 430       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 431
 432       brw_push_insn_state(p);
 433       brw_set_mask_control(p, BRW_MASK_DISABLE);
 434       brw_AND(p, g1, f0, g1);
 435       brw_pop_insn_state(p);
 436    } else {
 437       struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 438
 439       brw_push_insn_state(p);
 440       brw_set_mask_control(p, BRW_MASK_DISABLE);
 441       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 442
 443       /* Unlike the 965, we have the mask reg, so we just need
 444        * somewhere to invert that (containing channels to be disabled)
 445        * so it can be ANDed with the mask of pixels still to be
 446        * written. Use the flag reg for consistency with gen6+.
 447        */
 448       brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
 449       brw_AND(p, g0, f0, g0);
 450
 451       brw_pop_insn_state(p);
 452    }
 453 }
 454
 455 void
 456 fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src)
 457 {
 458    assert(inst->mlen != 0);
 459
 460    brw_MOV(p,
 461            retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD),
 462            retype(src, BRW_REGISTER_TYPE_UD));
 463    brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1,
 464                                  inst->offset);
 465 }
 466
 467 void
 468 fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst)
 469 {
 470    assert(inst->mlen != 0);
 471
 472    /* Clear any post destination dependencies that would be ignored by
 473     * the block read.  See the B-Spec for pre-gen5 send instruction.
 474     *
 475     * This could use a better solution, since texture sampling and
 476     * math reads could potentially run into it as well -- anywhere
 477     * that we have a SEND with a destination that is a register that
 478     * was written but not read within the last N instructions (what's
 479     * N?  unsure).  This is rare because of dead code elimination, but
 480     * not impossible.
 481     */
 482    if (intel->gen == 4 && !intel->is_g4x)
 483       brw_MOV(p, brw_null_reg(), dst);
 484
 485    brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1,
 486                                 inst->offset);
 487
 488    if (intel->gen == 4 && !intel->is_g4x) {
 489       /* gen4 errata: destination from a send can't be used as a
 490        * destination until it's been read.  Just read it so we don't
 491        * have to worry.
 492        */
 493       brw_MOV(p, brw_null_reg(), dst);
 494    }
 495 }
 496
 497 void
 498 fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst)
 499 {
 500    assert(inst->mlen != 0);
 501
 502    /* Clear any post destination dependencies that would be ignored by
 503     * the block read.  See the B-Spec for pre-gen5 send instruction.
 504     *
 505     * This could use a better solution, since texture sampling and
 506     * math reads could potentially run into it as well -- anywhere
 507     * that we have a SEND with a destination that is a register that
 508     * was written but not read within the last N instructions (what's
 509     * N?  unsure).  This is rare because of dead code elimination, but
 510     * not impossible.
 511     */
 512    if (intel->gen == 4 && !intel->is_g4x)
 513       brw_MOV(p, brw_null_reg(), dst);
 514
 515    brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
 516                         inst->offset, SURF_INDEX_FRAG_CONST_BUFFER);
 517
 518    if (intel->gen == 4 && !intel->is_g4x) {
 519       /* gen4 errata: destination from a send can't be used as a
 520        * destination until it's been read.  Just read it so we don't
 521        * have to worry.
 522        */
 523       brw_MOV(p, brw_null_reg(), dst);
 524    }
 525 }
 526
 527 static struct brw_reg
 528 brw_reg_from_fs_reg(fs_reg *reg)
 529 {
 530    struct brw_reg brw_reg;
 531
 532    switch (reg->file) {
 533    case GRF:
 534    case ARF:
 535    case MRF:
 536       if (reg->smear == -1) {
 537          brw_reg = brw_vec8_reg(reg->file,
 538                                 reg->hw_reg, 0);
 539       } else {
 540          brw_reg = brw_vec1_reg(reg->file,
 541                                 reg->hw_reg, reg->smear);
 542       }
 543       brw_reg = retype(brw_reg, reg->type);
 544       if (reg->sechalf)
 545          brw_reg = sechalf(brw_reg);
 546       break;
 547    case IMM:
 548       switch (reg->type) {
 549       case BRW_REGISTER_TYPE_F:
 550          brw_reg = brw_imm_f(reg->imm.f);
 551          break;
 552       case BRW_REGISTER_TYPE_D:
 553          brw_reg = brw_imm_d(reg->imm.i);
 554          break;
 555       case BRW_REGISTER_TYPE_UD:
 556          brw_reg = brw_imm_ud(reg->imm.u);
 557          break;
 558       default:
 559          assert(!"not reached");
 560          brw_reg = brw_null_reg();
 561          break;
 562       }
 563       break;
 564    case FIXED_HW_REG:
 565       brw_reg = reg->fixed_hw_reg;
 566       break;
 567    case BAD_FILE:
 568       /* Probably unused. */
 569       brw_reg = brw_null_reg();
 570       break;
 571    case UNIFORM:
 572       assert(!"not reached");
 573       brw_reg = brw_null_reg();
 574       break;
 575    default:
 576       assert(!"not reached");
 577       brw_reg = brw_null_reg();
 578       break;
 579    }
 580    if (reg->abs)
 581       brw_reg = brw_abs(brw_reg);
 582    if (reg->negate)
 583       brw_reg = negate(brw_reg);
 584
 585    return brw_reg;
 586 }
 587
 588 void
 589 fs_visitor::generate_code()
 590 {
 591    int last_native_inst = p->nr_insn;
 592    const char *last_annotation_string = NULL;
 593    ir_instruction *last_annotation_ir = NULL;
 594
 595    int loop_stack_array_size = 16;
 596    int loop_stack_depth = 0;
 597    brw_instruction **loop_stack =
 598       rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
 599    int *if_depth_in_loop =
 600       rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
 601
 602
 603    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
 604       printf("Native code for fragment shader %d (%d-wide dispatch):\n",
 605              ctx->Shader.CurrentFragmentProgram->Name, c->dispatch_width);
 606    }
 607
 608    foreach_iter(exec_list_iterator, iter, this->instructions) {
 609       fs_inst *inst = (fs_inst *)iter.get();
 610       struct brw_reg src[3], dst;
 611
 612       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
 613          if (last_annotation_ir != inst->ir) {
 614             last_annotation_ir = inst->ir;
 615             if (last_annotation_ir) {
 616                printf("   ");
 617                last_annotation_ir->print();
 618                printf("\n");
 619             }
 620          }
 621          if (last_annotation_string != inst->annotation) {
 622             last_annotation_string = inst->annotation;
 623             if (last_annotation_string)
 624                printf("   %s\n", last_annotation_string);
 625          }
 626       }
 627
 628       for (unsigned int i = 0; i < 3; i++) {
 629          src[i] = brw_reg_from_fs_reg(&inst->src[i]);
 630       }
 631       dst = brw_reg_from_fs_reg(&inst->dst);
 632
 633       brw_set_conditionalmod(p, inst->conditional_mod);
 634       brw_set_predicate_control(p, inst->predicated);
 635       brw_set_predicate_inverse(p, inst->predicate_inverse);
 636       brw_set_saturate(p, inst->saturate);
 637
 638       if (inst->force_uncompressed || c->dispatch_width == 8) {
 639          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 640       } else if (inst->force_sechalf) {
 641          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 642       } else {
 643          brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 644       }
 645
 646       switch (inst->opcode) {
 647       case BRW_OPCODE_MOV:
 648          brw_MOV(p, dst, src[0]);
 649          break;
 650       case BRW_OPCODE_ADD:
 651          brw_ADD(p, dst, src[0], src[1]);
 652          break;
 653       case BRW_OPCODE_MUL:
 654          brw_MUL(p, dst, src[0], src[1]);
 655          break;
 656
 657       case BRW_OPCODE_FRC:
 658          brw_FRC(p, dst, src[0]);
 659          break;
 660       case BRW_OPCODE_RNDD:
 661          brw_RNDD(p, dst, src[0]);
 662          break;
 663       case BRW_OPCODE_RNDE:
 664          brw_RNDE(p, dst, src[0]);
 665          break;
 666       case BRW_OPCODE_RNDZ:
 667          brw_RNDZ(p, dst, src[0]);
 668          break;
 669
 670       case BRW_OPCODE_AND:
 671          brw_AND(p, dst, src[0], src[1]);
 672          break;
 673       case BRW_OPCODE_OR:
 674          brw_OR(p, dst, src[0], src[1]);
 675          break;
 676       case BRW_OPCODE_XOR:
 677          brw_XOR(p, dst, src[0], src[1]);
 678          break;
 679       case BRW_OPCODE_NOT:
 680          brw_NOT(p, dst, src[0]);
 681          break;
 682       case BRW_OPCODE_ASR:
 683          brw_ASR(p, dst, src[0], src[1]);
 684          break;
 685       case BRW_OPCODE_SHR:
 686          brw_SHR(p, dst, src[0], src[1]);
 687          break;
 688       case BRW_OPCODE_SHL:
 689          brw_SHL(p, dst, src[0], src[1]);
 690          break;
 691
 692       case BRW_OPCODE_CMP:
 693          brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
 694          break;
 695       case BRW_OPCODE_SEL:
 696          brw_SEL(p, dst, src[0], src[1]);
 697          break;
 698
 699       case BRW_OPCODE_IF:
 700          if (inst->src[0].file != BAD_FILE) {
 701             /* The instruction has an embedded compare (only allowed on gen6) */
 702             assert(intel->gen == 6);
 703             gen6_IF(p, inst->conditional_mod, src[0], src[1]);
 704          } else {
 705             brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
 706          }
 707          if_depth_in_loop[loop_stack_depth]++;
 708          break;
 709
 710       case BRW_OPCODE_ELSE:
 711          brw_ELSE(p);
 712          break;
 713       case BRW_OPCODE_ENDIF:
 714          brw_ENDIF(p);
 715          if_depth_in_loop[loop_stack_depth]--;
 716          break;
 717
 718       case BRW_OPCODE_DO:
 719          loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
 720          if (loop_stack_array_size <= loop_stack_depth) {
 721             loop_stack_array_size *= 2;
 722             loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
 723                                   loop_stack_array_size);
 724             if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
 725                                         loop_stack_array_size);
 726          }
 727          if_depth_in_loop[loop_stack_depth] = 0;
 728          break;
 729
 730       case BRW_OPCODE_BREAK:
 731          brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
 732          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 733          break;
 734       case BRW_OPCODE_CONTINUE:
 735          /* FINISHME: We need to write the loop instruction support still. */
 736          if (intel->gen >= 6)
 737             gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
 738          else
 739             brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
 740          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 741          break;
 742
 743       case BRW_OPCODE_WHILE: {
 744          struct brw_instruction *inst0, *inst1;
 745          GLuint br = 1;
 746
 747          if (intel->gen >= 5)
 748             br = 2;
 749
 750          assert(loop_stack_depth > 0);
 751          loop_stack_depth--;
 752          inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
 753          if (intel->gen < 6) {
 754             /* patch all the BREAK/CONT instructions from last BGNLOOP */
 755             while (inst0 > loop_stack[loop_stack_depth]) {
 756                inst0--;
 757                if (inst0->header.opcode == BRW_OPCODE_BREAK &&
 758                    inst0->bits3.if_else.jump_count == 0) {
 759                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
 760             }
 761                else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
 762                         inst0->bits3.if_else.jump_count == 0) {
 763                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
 764                }
 765             }
 766          }
 767       }
 768          break;
 769
 770       case FS_OPCODE_RCP:
 771       case FS_OPCODE_RSQ:
 772       case FS_OPCODE_SQRT:
 773       case FS_OPCODE_EXP2:
 774       case FS_OPCODE_LOG2:
 775       case FS_OPCODE_POW:
 776       case FS_OPCODE_SIN:
 777       case FS_OPCODE_COS:
 778          generate_math(inst, dst, src);
 779          break;
 780       case FS_OPCODE_PIXEL_X:
 781          generate_pixel_xy(dst, true);
 782          break;
 783       case FS_OPCODE_PIXEL_Y:
 784          generate_pixel_xy(dst, false);
 785          break;
 786       case FS_OPCODE_CINTERP:
 787          brw_MOV(p, dst, src[0]);
 788          break;
 789       case FS_OPCODE_LINTERP:
 790          generate_linterp(inst, dst, src);
 791          break;
 792       case FS_OPCODE_TEX:
 793       case FS_OPCODE_TXB:
 794       case FS_OPCODE_TXD:
 795       case FS_OPCODE_TXL:
 796          generate_tex(inst, dst, src[0]);
 797          break;
 798       case FS_OPCODE_DISCARD:
 799          generate_discard(inst);
 800          break;
 801       case FS_OPCODE_DDX:
 802          generate_ddx(inst, dst, src[0]);
 803          break;
 804       case FS_OPCODE_DDY:
 805          generate_ddy(inst, dst, src[0]);
 806          break;
 807
 808       case FS_OPCODE_SPILL:
 809          generate_spill(inst, src[0]);
 810          break;
 811
 812       case FS_OPCODE_UNSPILL:
 813          generate_unspill(inst, dst);
 814          break;
 815
 816       case FS_OPCODE_PULL_CONSTANT_LOAD:
 817          generate_pull_constant_load(inst, dst);
 818          break;
 819
 820       case FS_OPCODE_FB_WRITE:
 821          generate_fb_write(inst);
 822          break;
 823       default:
 824          if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
 825             _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
 826                           brw_opcodes[inst->opcode].name);
 827          } else {
 828             _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
 829          }
 830          fail("unsupported opcode in FS\n");
 831       }
 832
 833       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
 834          for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
 835             if (0) {
 836                printf("0x%08x 0x%08x 0x%08x 0x%08x ",
 837                       ((uint32_t *)&p->store[i])[3],
 838                       ((uint32_t *)&p->store[i])[2],
 839                       ((uint32_t *)&p->store[i])[1],
 840                       ((uint32_t *)&p->store[i])[0]);
 841             }
 842             brw_disasm(stdout, &p->store[i], intel->gen);
 843          }
 844       }
 845
 846       last_native_inst = p->nr_insn;
 847    }
 848
 849    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
 850       printf("\n");
 851    }
 852
 853    ralloc_free(loop_stack);
 854    ralloc_free(if_depth_in_loop);
 855
 856    brw_set_uip_jip(p);
 857
 858    /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS
 859     * emit issues, it doesn't get the jump distances into the output,
 860     * which is often something we want to debug.  So this is here in
 861     * case you're doing that.
 862     */
 863    if (0) {
 864       if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
 865          for (unsigned int i = 0; i < p->nr_insn; i++) {
 866             printf("0x%08x 0x%08x 0x%08x 0x%08x ",
 867                    ((uint32_t *)&p->store[i])[3],
 868                    ((uint32_t *)&p->store[i])[2],
 869                    ((uint32_t *)&p->store[i])[1],
 870                    ((uint32_t *)&p->store[i])[0]);
 871             brw_disasm(stdout, &p->store[i], intel->gen);
 872          }
 873       }
 874    }
 875 }