src/mesa/drivers/dri/i965/brw_fs_fp.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_fp.cpp
  25  *
  26  * Implementation of the compiler for GL_ARB_fragment_program shaders on top
  27  * of the GLSL compiler backend.
  28  */
  29
  30 #include "brw_context.h"
  31 #include "brw_fs.h"
  32
  33 void
  34 fs_visitor::emit_fp_alu1(enum opcode opcode,
  35                          const struct prog_instruction *fpi,
  36                          fs_reg dst, fs_reg src)
  37 {
  38    for (int i = 0; i < 4; i++) {
  39       if (fpi->DstReg.WriteMask & (1 << i))
  40          emit(opcode, offset(dst, i), offset(src, i));
  41    }
  42 }
  43
  44 void
  45 fs_visitor::emit_fp_alu2(enum opcode opcode,
  46                          const struct prog_instruction *fpi,
  47                          fs_reg dst, fs_reg src0, fs_reg src1)
  48 {
  49    for (int i = 0; i < 4; i++) {
  50       if (fpi->DstReg.WriteMask & (1 << i))
  51          emit(opcode, offset(dst, i),
  52               offset(src0, i), offset(src1, i));
  53    }
  54 }
  55
  56 void
  57 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
  58                            fs_reg dst, fs_reg src0, fs_reg src1)
  59 {
  60    enum brw_conditional_mod conditionalmod;
  61    if (fpi->Opcode == OPCODE_MIN)
  62       conditionalmod = BRW_CONDITIONAL_L;
  63    else
  64       conditionalmod = BRW_CONDITIONAL_GE;
  65
  66    for (int i = 0; i < 4; i++) {
  67       if (fpi->DstReg.WriteMask & (1 << i)) {
  68          emit_minmax(conditionalmod, offset(dst, i),
  69                      offset(src0, i), offset(src1, i));
  70       }
  71    }
  72 }
  73
  74 void
  75 fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod,
  76                         const struct prog_instruction *fpi,
  77                         fs_reg dst, fs_reg src0, fs_reg src1,
  78                         fs_reg one)
  79 {
  80    for (int i = 0; i < 4; i++) {
  81       if (fpi->DstReg.WriteMask & (1 << i)) {
  82          fs_inst *inst;
  83
  84          emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
  85                   conditional_mod));
  86
  87          inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
  88          inst->predicate = BRW_PREDICATE_NORMAL;
  89       }
  90    }
  91 }
  92
  93 void
  94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
  95                                  fs_reg dst, fs_reg src)
  96 {
  97    for (int i = 0; i < 4; i++) {
  98       if (fpi->DstReg.WriteMask & (1 << i))
  99          emit(MOV(offset(dst, i), src));
 100    }
 101 }
 102
 103 void
 104 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
 105                                 const struct prog_instruction *fpi,
 106                                 fs_reg dst, fs_reg src)
 107 {
 108    fs_reg temp = vgrf(glsl_type::float_type);
 109    emit_math(opcode, temp, src);
 110    emit_fp_scalar_write(fpi, dst, temp);
 111 }
 112
 113 void
 114 fs_visitor::emit_fragment_program_code()
 115 {
 116    setup_fp_regs();
 117
 118    /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
 119     * be:
 120     *
 121     * sel.f0 dst 1.0 0.0
 122     *
 123     * instead of
 124     *
 125     * mov    dst 0.0
 126     * mov.f0 dst 1.0
 127     */
 128    fs_reg one = vgrf(glsl_type::float_type);
 129    emit(MOV(one, fs_reg(1.0f)));
 130
 131    for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
 132       const struct prog_instruction *fpi = &prog->Instructions[insn];
 133       base_ir = fpi;
 134
 135       fs_reg dst;
 136       fs_reg src[3];
 137
 138       /* We always emit into a temporary destination register to avoid
 139        * aliasing issues.
 140        */
 141       dst = vgrf(glsl_type::vec4_type);
 142
 143       for (int i = 0; i < 3; i++)
 144          src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
 145
 146       switch (fpi->Opcode) {
 147       case OPCODE_ABS:
 148          src[0].abs = true;
 149          src[0].negate = false;
 150          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 151          break;
 152
 153       case OPCODE_ADD:
 154          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
 155          break;
 156
 157       case OPCODE_CMP:
 158          for (int i = 0; i < 4; i++) {
 159             if (fpi->DstReg.WriteMask & (1 << i)) {
 160                fs_inst *inst;
 161
 162                emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f),
 163                         BRW_CONDITIONAL_L));
 164
 165                inst = emit(BRW_OPCODE_SEL, offset(dst, i),
 166                            offset(src[1], i), offset(src[2], i));
 167                inst->predicate = BRW_PREDICATE_NORMAL;
 168             }
 169          }
 170          break;
 171
 172       case OPCODE_COS:
 173          emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
 174          break;
 175
 176       case OPCODE_DP2:
 177       case OPCODE_DP3:
 178       case OPCODE_DP4:
 179       case OPCODE_DPH: {
 180          fs_reg mul = vgrf(glsl_type::float_type);
 181          fs_reg acc = vgrf(glsl_type::float_type);
 182          int count;
 183
 184          switch (fpi->Opcode) {
 185          case OPCODE_DP2: count = 2; break;
 186          case OPCODE_DP3: count = 3; break;
 187          case OPCODE_DP4: count = 4; break;
 188          case OPCODE_DPH: count = 3; break;
 189          default: unreachable("not reached");
 190          }
 191
 192          emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
 193          for (int i = 1; i < count; i++) {
 194             emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
 195             emit(ADD(acc, acc, mul));
 196          }
 197
 198          if (fpi->Opcode == OPCODE_DPH)
 199             emit(ADD(acc, acc, offset(src[1], 3)));
 200
 201          emit_fp_scalar_write(fpi, dst, acc);
 202          break;
 203       }
 204
 205       case OPCODE_DST:
 206          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 207             emit(MOV(dst, fs_reg(1.0f)));
 208          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 209             emit(MUL(offset(dst, 1),
 210                      offset(src[0], 1), offset(src[1], 1)));
 211          }
 212          if (fpi->DstReg.WriteMask & WRITEMASK_Z)
 213             emit(MOV(offset(dst, 2), offset(src[0], 2)));
 214          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 215             emit(MOV(offset(dst, 3), offset(src[1], 3)));
 216          break;
 217
 218       case OPCODE_EX2:
 219          emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
 220          break;
 221
 222       case OPCODE_FLR:
 223          emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
 224          break;
 225
 226       case OPCODE_FRC:
 227          emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
 228          break;
 229
 230       case OPCODE_KIL: {
 231          for (int i = 0; i < 4; i++) {
 232             /* In most cases the argument to a KIL will be something like
 233              * TEMP[0].wwww, so there's no point in checking whether .w is < 0
 234              * 4 times in a row.
 235              */
 236             if (i > 0 &&
 237                 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
 238                 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
 239                 ((fpi->SrcReg[0].Negate >> i) & 1) ==
 240                 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
 241                continue;
 242             }
 243
 244
 245             /* Emit an instruction that's predicated on the current
 246              * undiscarded pixels, and updates just those pixels to be
 247              * turned off.
 248              */
 249             fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i),
 250                                     fs_reg(0.0f), BRW_CONDITIONAL_GE));
 251             cmp->predicate = BRW_PREDICATE_NORMAL;
 252             cmp->flag_subreg = 1;
 253          }
 254          break;
 255       }
 256
 257       case OPCODE_LG2:
 258          emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
 259          break;
 260
 261       case OPCODE_LIT:
 262          /* From the ARB_fragment_program spec:
 263           *
 264           *      tmp = VectorLoad(op0);
 265           *      if (tmp.x < 0) tmp.x = 0;
 266           *      if (tmp.y < 0) tmp.y = 0;
 267           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 268           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 269           *      result.x = 1.0;
 270           *      result.y = tmp.x;
 271           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 272           *      result.w = 1.0;
 273           *
 274           * Note that we don't do the clamping to +/- 128.  We didn't in
 275           * brw_wm_emit.c either.
 276           */
 277          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 278             emit(MOV(offset(dst, 0), fs_reg(1.0f)));
 279
 280          if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
 281             fs_inst *inst;
 282             emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f),
 283                      BRW_CONDITIONAL_LE));
 284
 285             if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 286                emit(MOV(offset(dst, 1), offset(src[0], 0)));
 287                inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
 288                inst->predicate = BRW_PREDICATE_NORMAL;
 289             }
 290
 291             if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
 292                emit_math(SHADER_OPCODE_POW, offset(dst, 2),
 293                          offset(src[0], 1), offset(src[0], 3));
 294
 295                inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
 296                inst->predicate = BRW_PREDICATE_NORMAL;
 297             }
 298          }
 299
 300          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 301             emit(MOV(offset(dst, 3), fs_reg(1.0f)));
 302
 303          break;
 304
 305       case OPCODE_LRP:
 306          for (int i = 0; i < 4; i++) {
 307             if (fpi->DstReg.WriteMask & (1 << i)) {
 308                fs_reg a = offset(src[0], i);
 309                fs_reg y = offset(src[1], i);
 310                fs_reg x = offset(src[2], i);
 311                emit_lrp(offset(dst, i), x, y, a);
 312             }
 313          }
 314          break;
 315
 316       case OPCODE_MAD:
 317          for (int i = 0; i < 4; i++) {
 318             if (fpi->DstReg.WriteMask & (1 << i)) {
 319                fs_reg temp = vgrf(glsl_type::float_type);
 320                emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
 321                emit(ADD(offset(dst, i), temp, offset(src[2], i)));
 322             }
 323          }
 324          break;
 325
 326       case OPCODE_MAX:
 327          emit_fp_minmax(fpi, dst, src[0], src[1]);
 328          break;
 329
 330       case OPCODE_MOV:
 331          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 332          break;
 333
 334       case OPCODE_MIN:
 335          emit_fp_minmax(fpi, dst, src[0], src[1]);
 336          break;
 337
 338       case OPCODE_MUL:
 339          emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
 340          break;
 341
 342       case OPCODE_POW: {
 343          fs_reg temp = vgrf(glsl_type::float_type);
 344          emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
 345          emit_fp_scalar_write(fpi, dst, temp);
 346          break;
 347       }
 348
 349       case OPCODE_RCP:
 350          emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
 351          break;
 352
 353       case OPCODE_RSQ:
 354          emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
 355          break;
 356
 357       case OPCODE_SCS:
 358          if (fpi->DstReg.WriteMask & WRITEMASK_X) {
 359             emit_math(SHADER_OPCODE_COS, offset(dst, 0),
 360                       offset(src[0], 0));
 361          }
 362
 363          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 364             emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
 365                       offset(src[0], 1));
 366          }
 367          break;
 368
 369       case OPCODE_SGE:
 370          emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
 371          break;
 372
 373       case OPCODE_SIN:
 374          emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
 375          break;
 376
 377       case OPCODE_SLT:
 378          emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
 379          break;
 380
 381       case OPCODE_SUB: {
 382          fs_reg neg_src1 = src[1];
 383          neg_src1.negate = !src[1].negate;
 384
 385          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
 386          break;
 387       }
 388
 389       case OPCODE_TEX:
 390       case OPCODE_TXB:
 391       case OPCODE_TXP: {
 392          ir_texture_opcode op;
 393          fs_reg lod;
 394          fs_reg dpdy;
 395          fs_reg coordinate = src[0];
 396          fs_reg shadow_c;
 397          fs_reg sample_index;
 398          fs_reg texel_offset; /* No offsets; leave as BAD_FILE. */
 399
 400          switch (fpi->Opcode) {
 401          case OPCODE_TEX:
 402             op = ir_tex;
 403             break;
 404          case OPCODE_TXP: {
 405             op = ir_tex;
 406
 407             coordinate = vgrf(glsl_type::vec3_type);
 408             fs_reg invproj = vgrf(glsl_type::float_type);
 409             emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
 410             for (int i = 0; i < 3; i++) {
 411                emit(MUL(offset(coordinate, i),
 412                         offset(src[0], i), invproj));
 413             }
 414             break;
 415          }
 416          case OPCODE_TXB:
 417             op = ir_txb;
 418             lod = offset(src[0], 3);
 419             break;
 420          default:
 421             unreachable("not reached");
 422          }
 423
 424          int coord_components;
 425          switch (fpi->TexSrcTarget) {
 426          case TEXTURE_1D_INDEX:
 427             coord_components = 1;
 428             break;
 429
 430          case TEXTURE_2D_INDEX:
 431          case TEXTURE_1D_ARRAY_INDEX:
 432          case TEXTURE_RECT_INDEX:
 433          case TEXTURE_EXTERNAL_INDEX:
 434             coord_components = 2;
 435             break;
 436
 437          case TEXTURE_3D_INDEX:
 438          case TEXTURE_2D_ARRAY_INDEX:
 439             coord_components = 3;
 440             break;
 441
 442          case TEXTURE_CUBE_INDEX: {
 443             coord_components = 4;
 444
 445             fs_reg temp = vgrf(glsl_type::float_type);
 446             fs_reg cubecoord = vgrf(glsl_type::vec3_type);
 447             fs_reg abscoord = coordinate;
 448             abscoord.negate = false;
 449             abscoord.abs = true;
 450             emit_minmax(BRW_CONDITIONAL_GE, temp,
 451                         offset(abscoord, 0), offset(abscoord, 1));
 452             emit_minmax(BRW_CONDITIONAL_GE, temp,
 453                         temp, offset(abscoord, 2));
 454             emit_math(SHADER_OPCODE_RCP, temp, temp);
 455             for (int i = 0; i < 3; i++) {
 456                emit(MUL(offset(cubecoord, i),
 457                         offset(coordinate, i), temp));
 458             }
 459
 460             coordinate = cubecoord;
 461             break;
 462          }
 463
 464          default:
 465             unreachable("not reached");
 466          }
 467
 468          if (fpi->TexShadow)
 469             shadow_c = offset(coordinate, 2);
 470
 471          emit_texture(op, glsl_type::vec4_type, coordinate, coord_components,
 472                       shadow_c, lod, dpdy, 0, sample_index,
 473                       reg_undef, 0, /* offset, components */
 474                       reg_undef, /* mcs */
 475                       0, /* gather component */
 476                       false, /* is cube array */
 477                       fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
 478                       fpi->TexSrcUnit, fs_reg(fpi->TexSrcUnit),
 479                       fpi->TexSrcUnit);
 480          dst = this->result;
 481
 482          break;
 483       }
 484
 485       case OPCODE_SWZ:
 486          /* Note that SWZ's extended swizzles are handled in the general
 487           * get_src_reg() code.
 488           */
 489          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 490          break;
 491
 492       case OPCODE_XPD:
 493          for (int i = 0; i < 3; i++) {
 494             if (fpi->DstReg.WriteMask & (1 << i)) {
 495                int i1 = (i + 1) % 3;
 496                int i2 = (i + 2) % 3;
 497
 498                fs_reg temp = vgrf(glsl_type::float_type);
 499                fs_reg neg_src1_1 = offset(src[1], i1);
 500                neg_src1_1.negate = !neg_src1_1.negate;
 501                emit(MUL(temp, offset(src[0], i2), neg_src1_1));
 502                emit(MUL(offset(dst, i),
 503                         offset(src[0], i1), offset(src[1], i2)));
 504                emit(ADD(offset(dst, i), offset(dst, i), temp));
 505             }
 506          }
 507          break;
 508
 509       case OPCODE_END:
 510          break;
 511
 512       default:
 513          _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
 514                        _mesa_opcode_string(fpi->Opcode));
 515       }
 516
 517       /* To handle saturates, we emit a MOV with a saturate bit, which
 518        * optimization should fold into the preceding instructions when safe.
 519        */
 520       if (fpi->Opcode != OPCODE_END) {
 521          fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
 522
 523          for (int i = 0; i < 4; i++) {
 524             if (fpi->DstReg.WriteMask & (1 << i)) {
 525                fs_inst *inst = emit(MOV(offset(real_dst, i),
 526                                         offset(dst, i)));
 527                inst->saturate = fpi->SaturateMode;
 528             }
 529          }
 530       }
 531    }
 532
 533    /* Epilogue:
 534     *
 535     * Fragment depth has this strange convention of being the .z component of
 536     * a vec4.  emit_fb_write() wants to see a float value, instead.
 537     */
 538    this->current_annotation = "result.depth write";
 539    if (frag_depth.file != BAD_FILE) {
 540       fs_reg temp = vgrf(glsl_type::float_type);
 541       emit(MOV(temp, offset(frag_depth, 2)));
 542       frag_depth = temp;
 543    }
 544 }
 545
 546 void
 547 fs_visitor::setup_fp_regs()
 548 {
 549    /* PROGRAM_TEMPORARY */
 550    int num_temp = prog->NumTemporaries;
 551    fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
 552    for (int i = 0; i < num_temp; i++)
 553       fp_temp_regs[i] = vgrf(glsl_type::vec4_type);
 554
 555    /* PROGRAM_STATE_VAR etc. */
 556    if (dispatch_width == 8) {
 557       for (unsigned p = 0;
 558            p < prog->Parameters->NumParameters; p++) {
 559          for (unsigned int i = 0; i < 4; i++) {
 560             stage_prog_data->param[uniforms++] =
 561                &prog->Parameters->ParameterValues[p][i];
 562          }
 563       }
 564    }
 565
 566    fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
 567    for (int i = 0; i < VARYING_SLOT_MAX; i++) {
 568       if (prog->InputsRead & BITFIELD64_BIT(i)) {
 569          this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
 570                                                     i);
 571
 572          switch (i) {
 573          case VARYING_SLOT_POS:
 574             {
 575                assert(stage == MESA_SHADER_FRAGMENT);
 576                gl_fragment_program *fp = (gl_fragment_program*) prog;
 577                fp_input_regs[i] =
 578                   *emit_fragcoord_interpolation(fp->PixelCenterInteger,
 579                                                 fp->OriginUpperLeft);
 580             }
 581             break;
 582          case VARYING_SLOT_FACE:
 583             fp_input_regs[i] = *emit_frontfacing_interpolation();
 584             break;
 585          default:
 586             fp_input_regs[i] = vgrf(glsl_type::vec4_type);
 587             emit_general_interpolation(fp_input_regs[i], "fp_input",
 588                                        glsl_type::vec4_type,
 589                                        INTERP_QUALIFIER_NONE,
 590                                        i, false, false);
 591
 592             if (i == VARYING_SLOT_FOGC) {
 593                emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
 594                emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
 595                emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
 596             }
 597
 598             break;
 599          }
 600
 601          this->current_annotation = NULL;
 602       }
 603    }
 604 }
 605
 606 fs_reg
 607 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
 608 {
 609    assert(stage == MESA_SHADER_FRAGMENT);
 610    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
 611
 612    switch (dst->File) {
 613    case PROGRAM_TEMPORARY:
 614       return fp_temp_regs[dst->Index];
 615
 616    case PROGRAM_OUTPUT:
 617       if (dst->Index == FRAG_RESULT_DEPTH) {
 618          if (frag_depth.file == BAD_FILE)
 619             frag_depth = vgrf(glsl_type::vec4_type);
 620          return frag_depth;
 621       } else if (dst->Index == FRAG_RESULT_COLOR) {
 622          if (outputs[0].file == BAD_FILE) {
 623             outputs[0] = vgrf(glsl_type::vec4_type);
 624             output_components[0] = 4;
 625
 626             /* Tell emit_fb_writes() to smear fragment.color across all the
 627              * color attachments.
 628              */
 629             for (int i = 1; i < key->nr_color_regions; i++) {
 630                outputs[i] = outputs[0];
 631                output_components[i] = output_components[0];
 632             }
 633          }
 634          return outputs[0];
 635       } else {
 636          int output_index = dst->Index - FRAG_RESULT_DATA0;
 637          if (outputs[output_index].file == BAD_FILE) {
 638             outputs[output_index] = vgrf(glsl_type::vec4_type);
 639          }
 640          output_components[output_index] = 4;
 641          return outputs[output_index];
 642       }
 643
 644    case PROGRAM_UNDEFINED:
 645       return fs_reg();
 646
 647    default:
 648       _mesa_problem(ctx, "bad dst register file: %s\n",
 649                     _mesa_register_file_name((gl_register_file)dst->File));
 650       return vgrf(glsl_type::vec4_type);
 651    }
 652 }
 653
 654 fs_reg
 655 fs_visitor::get_fp_src_reg(const prog_src_register *src)
 656 {
 657    struct gl_program_parameter_list *plist = prog->Parameters;
 658
 659    fs_reg result;
 660
 661    assert(!src->Abs);
 662
 663    switch (src->File) {
 664    case PROGRAM_UNDEFINED:
 665       return fs_reg();
 666    case PROGRAM_TEMPORARY:
 667       result = fp_temp_regs[src->Index];
 668       break;
 669
 670    case PROGRAM_INPUT:
 671       result = fp_input_regs[src->Index];
 672       break;
 673
 674    case PROGRAM_STATE_VAR:
 675    case PROGRAM_UNIFORM:
 676    case PROGRAM_CONSTANT:
 677       /* We actually want to look at the type in the Parameters list for this,
 678        * because this lets us upload constant builtin uniforms, as actual
 679        * constants.
 680        */
 681       switch (plist->Parameters[src->Index].Type) {
 682       case PROGRAM_CONSTANT: {
 683          result = vgrf(glsl_type::vec4_type);
 684
 685          for (int i = 0; i < 4; i++) {
 686             emit(MOV(offset(result, i),
 687                      fs_reg(plist->ParameterValues[src->Index][i].f)));
 688          }
 689          break;
 690       }
 691
 692       case PROGRAM_STATE_VAR:
 693       case PROGRAM_UNIFORM:
 694          result = fs_reg(UNIFORM, src->Index * 4);
 695          break;
 696
 697       default:
 698          _mesa_problem(ctx, "bad uniform src register file: %s\n",
 699                        _mesa_register_file_name((gl_register_file)src->File));
 700          return vgrf(glsl_type::vec4_type);
 701       }
 702       break;
 703
 704    default:
 705       _mesa_problem(ctx, "bad src register file: %s\n",
 706                     _mesa_register_file_name((gl_register_file)src->File));
 707       return vgrf(glsl_type::vec4_type);
 708    }
 709
 710    if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
 711       fs_reg unswizzled = result;
 712       result = vgrf(glsl_type::vec4_type);
 713       for (int i = 0; i < 4; i++) {
 714          bool negate = src->Negate & (1 << i);
 715          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
 716           * but it costs us nothing to support it.
 717           */
 718          int src_swiz = GET_SWZ(src->Swizzle, i);
 719          if (src_swiz == SWIZZLE_ZERO) {
 720             emit(MOV(offset(result, i), fs_reg(0.0f)));
 721          } else if (src_swiz == SWIZZLE_ONE) {
 722             emit(MOV(offset(result, i),
 723                      negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
 724          } else {
 725             fs_reg src = offset(unswizzled, src_swiz);
 726             if (negate)
 727                src.negate = !src.negate;
 728             emit(MOV(offset(result, i), src));
 729          }
 730       }
 731    }
 732
 733    return result;
 734 }