src/mesa/drivers/dri/i965/brw_fs_fp.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_fp.cpp
  25  *
  26  * Implementation of the compiler for GL_ARB_fragment_program shaders on top
  27  * of the GLSL compiler backend.
  28  */
  29
  30 #include "brw_context.h"
  31 #include "brw_fs.h"
  32
  33 void
  34 fs_visitor::emit_fp_alu1(enum opcode opcode,
  35                          const struct prog_instruction *fpi,
  36                          fs_reg dst, fs_reg src)
  37 {
  38    for (int i = 0; i < 4; i++) {
  39       if (fpi->DstReg.WriteMask & (1 << i))
  40          emit(opcode, offset(dst, i), offset(src, i));
  41    }
  42 }
  43
  44 void
  45 fs_visitor::emit_fp_alu2(enum opcode opcode,
  46                          const struct prog_instruction *fpi,
  47                          fs_reg dst, fs_reg src0, fs_reg src1)
  48 {
  49    for (int i = 0; i < 4; i++) {
  50       if (fpi->DstReg.WriteMask & (1 << i))
  51          emit(opcode, offset(dst, i),
  52               offset(src0, i), offset(src1, i));
  53    }
  54 }
  55
  56 void
  57 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
  58                            fs_reg dst, fs_reg src0, fs_reg src1)
  59 {
  60    uint32_t conditionalmod;
  61    if (fpi->Opcode == OPCODE_MIN)
  62       conditionalmod = BRW_CONDITIONAL_L;
  63    else
  64       conditionalmod = BRW_CONDITIONAL_GE;
  65
  66    for (int i = 0; i < 4; i++) {
  67       if (fpi->DstReg.WriteMask & (1 << i)) {
  68          emit_minmax(conditionalmod, offset(dst, i),
  69                      offset(src0, i), offset(src1, i));
  70       }
  71    }
  72 }
  73
  74 void
  75 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
  76                         const struct prog_instruction *fpi,
  77                         fs_reg dst, fs_reg src0, fs_reg src1,
  78                         fs_reg one)
  79 {
  80    for (int i = 0; i < 4; i++) {
  81       if (fpi->DstReg.WriteMask & (1 << i)) {
  82          fs_inst *inst;
  83
  84          emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
  85                   conditional_mod));
  86
  87          inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
  88          inst->predicate = BRW_PREDICATE_NORMAL;
  89       }
  90    }
  91 }
  92
  93 void
  94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
  95                                  fs_reg dst, fs_reg src)
  96 {
  97    for (int i = 0; i < 4; i++) {
  98       if (fpi->DstReg.WriteMask & (1 << i))
  99          emit(MOV(offset(dst, i), src));
 100    }
 101 }
 102
 103 void
 104 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
 105                                 const struct prog_instruction *fpi,
 106                                 fs_reg dst, fs_reg src)
 107 {
 108    fs_reg temp = fs_reg(this, glsl_type::float_type);
 109    emit_math(opcode, temp, src);
 110    emit_fp_scalar_write(fpi, dst, temp);
 111 }
 112
 113 void
 114 fs_visitor::emit_fragment_program_code()
 115 {
 116    setup_fp_regs();
 117
 118    fs_reg null = fs_reg(brw_null_reg());
 119
 120    /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
 121     * be:
 122     *
 123     * sel.f0 dst 1.0 0.0
 124     *
 125     * instead of
 126     *
 127     * mov    dst 0.0
 128     * mov.f0 dst 1.0
 129     */
 130    fs_reg one = fs_reg(this, glsl_type::float_type);
 131    emit(MOV(one, fs_reg(1.0f)));
 132
 133    for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
 134       const struct prog_instruction *fpi = &prog->Instructions[insn];
 135       base_ir = fpi;
 136
 137       //_mesa_print_instruction(fpi);
 138
 139       fs_reg dst;
 140       fs_reg src[3];
 141
 142       /* We always emit into a temporary destination register to avoid
 143        * aliasing issues.
 144        */
 145       dst = fs_reg(this, glsl_type::vec4_type);
 146
 147       for (int i = 0; i < 3; i++)
 148          src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
 149
 150       switch (fpi->Opcode) {
 151       case OPCODE_ABS:
 152          src[0].abs = true;
 153          src[0].negate = false;
 154          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 155          break;
 156
 157       case OPCODE_ADD:
 158          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
 159          break;
 160
 161       case OPCODE_CMP:
 162          for (int i = 0; i < 4; i++) {
 163             if (fpi->DstReg.WriteMask & (1 << i)) {
 164                fs_inst *inst;
 165
 166                emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
 167                         BRW_CONDITIONAL_L));
 168
 169                inst = emit(BRW_OPCODE_SEL, offset(dst, i),
 170                            offset(src[1], i), offset(src[2], i));
 171                inst->predicate = BRW_PREDICATE_NORMAL;
 172             }
 173          }
 174          break;
 175
 176       case OPCODE_COS:
 177          emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
 178          break;
 179
 180       case OPCODE_DP2:
 181       case OPCODE_DP3:
 182       case OPCODE_DP4:
 183       case OPCODE_DPH: {
 184          fs_reg mul = fs_reg(this, glsl_type::float_type);
 185          fs_reg acc = fs_reg(this, glsl_type::float_type);
 186          int count;
 187
 188          switch (fpi->Opcode) {
 189          case OPCODE_DP2: count = 2; break;
 190          case OPCODE_DP3: count = 3; break;
 191          case OPCODE_DP4: count = 4; break;
 192          case OPCODE_DPH: count = 3; break;
 193          default: assert(!"not reached"); count = 0; break;
 194          }
 195
 196          emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
 197          for (int i = 1; i < count; i++) {
 198             emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
 199             emit(ADD(acc, acc, mul));
 200          }
 201
 202          if (fpi->Opcode == OPCODE_DPH)
 203             emit(ADD(acc, acc, offset(src[1], 3)));
 204
 205          emit_fp_scalar_write(fpi, dst, acc);
 206          break;
 207       }
 208
 209       case OPCODE_DST:
 210          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 211             emit(MOV(dst, fs_reg(1.0f)));
 212          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 213             emit(MUL(offset(dst, 1),
 214                      offset(src[0], 1), offset(src[1], 1)));
 215          }
 216          if (fpi->DstReg.WriteMask & WRITEMASK_Z)
 217             emit(MOV(offset(dst, 2), offset(src[0], 2)));
 218          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 219             emit(MOV(offset(dst, 3), offset(src[1], 3)));
 220          break;
 221
 222       case OPCODE_EX2:
 223          emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
 224          break;
 225
 226       case OPCODE_FLR:
 227          emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
 228          break;
 229
 230       case OPCODE_FRC:
 231          emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
 232          break;
 233
 234       case OPCODE_KIL: {
 235          for (int i = 0; i < 4; i++) {
 236             /* In most cases the argument to a KIL will be something like
 237              * TEMP[0].wwww, so there's no point in checking whether .w is < 0
 238              * 4 times in a row.
 239              */
 240             if (i > 0 &&
 241                 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
 242                 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
 243                 ((fpi->SrcReg[0].Negate >> i) & 1) ==
 244                 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
 245                continue;
 246             }
 247
 248
 249             /* Emit an instruction that's predicated on the current
 250              * undiscarded pixels, and updates just those pixels to be
 251              * turned off.
 252              */
 253             fs_inst *cmp = emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
 254                                     BRW_CONDITIONAL_GE));
 255             cmp->predicate = BRW_PREDICATE_NORMAL;
 256             cmp->flag_subreg = 1;
 257          }
 258          break;
 259       }
 260
 261       case OPCODE_LG2:
 262          emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
 263          break;
 264
 265       case OPCODE_LIT:
 266          /* From the ARB_fragment_program spec:
 267           *
 268           *      tmp = VectorLoad(op0);
 269           *      if (tmp.x < 0) tmp.x = 0;
 270           *      if (tmp.y < 0) tmp.y = 0;
 271           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 272           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 273           *      result.x = 1.0;
 274           *      result.y = tmp.x;
 275           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 276           *      result.w = 1.0;
 277           *
 278           * Note that we don't do the clamping to +/- 128.  We didn't in
 279           * brw_wm_emit.c either.
 280           */
 281          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 282             emit(MOV(offset(dst, 0), fs_reg(1.0f)));
 283
 284          if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
 285             fs_inst *inst;
 286             emit(CMP(null, offset(src[0], 0), fs_reg(0.0f),
 287                      BRW_CONDITIONAL_LE));
 288
 289             if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 290                emit(MOV(offset(dst, 1), offset(src[0], 0)));
 291                inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
 292                inst->predicate = BRW_PREDICATE_NORMAL;
 293             }
 294
 295             if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
 296                emit_math(SHADER_OPCODE_POW, offset(dst, 2),
 297                          offset(src[0], 1), offset(src[0], 3));
 298
 299                inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
 300                inst->predicate = BRW_PREDICATE_NORMAL;
 301             }
 302          }
 303
 304          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 305             emit(MOV(offset(dst, 3), fs_reg(1.0f)));
 306
 307          break;
 308
 309       case OPCODE_LRP:
 310          for (int i = 0; i < 4; i++) {
 311             if (fpi->DstReg.WriteMask & (1 << i)) {
 312                fs_reg a = offset(src[0], i);
 313                fs_reg y = offset(src[1], i);
 314                fs_reg x = offset(src[2], i);
 315                emit_lrp(offset(dst, i), x, y, a);
 316             }
 317          }
 318          break;
 319
 320       case OPCODE_MAD:
 321          for (int i = 0; i < 4; i++) {
 322             if (fpi->DstReg.WriteMask & (1 << i)) {
 323                fs_reg temp = fs_reg(this, glsl_type::float_type);
 324                emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
 325                emit(ADD(offset(dst, i), temp, offset(src[2], i)));
 326             }
 327          }
 328          break;
 329
 330       case OPCODE_MAX:
 331          emit_fp_minmax(fpi, dst, src[0], src[1]);
 332          break;
 333
 334       case OPCODE_MOV:
 335          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 336          break;
 337
 338       case OPCODE_MIN:
 339          emit_fp_minmax(fpi, dst, src[0], src[1]);
 340          break;
 341
 342       case OPCODE_MUL:
 343          emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
 344          break;
 345
 346       case OPCODE_POW: {
 347          fs_reg temp = fs_reg(this, glsl_type::float_type);
 348          emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
 349          emit_fp_scalar_write(fpi, dst, temp);
 350          break;
 351       }
 352
 353       case OPCODE_RCP:
 354          emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
 355          break;
 356
 357       case OPCODE_RSQ:
 358          emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
 359          break;
 360
 361       case OPCODE_SCS:
 362          if (fpi->DstReg.WriteMask & WRITEMASK_X) {
 363             emit_math(SHADER_OPCODE_COS, offset(dst, 0),
 364                       offset(src[0], 0));
 365          }
 366
 367          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 368             emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
 369                       offset(src[0], 1));
 370          }
 371          break;
 372
 373       case OPCODE_SGE:
 374          emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
 375          break;
 376
 377       case OPCODE_SIN:
 378          emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
 379          break;
 380
 381       case OPCODE_SLT:
 382          emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
 383          break;
 384
 385       case OPCODE_SUB: {
 386          fs_reg neg_src1 = src[1];
 387          neg_src1.negate = !src[1].negate;
 388
 389          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
 390          break;
 391       }
 392
 393       case OPCODE_TEX:
 394       case OPCODE_TXB:
 395       case OPCODE_TXP: {
 396          /* We piggy-back on the GLSL IR support for texture setup.  To do so,
 397           * we have to cook up an ir_texture that has the coordinate field
 398           * with appropriate type, and shadow_comparitor set or not.  All the
 399           * other properties of ir_texture are passed in as arguments to the
 400           * emit_texture_gen* function.
 401           */
 402          ir_texture *ir = NULL;
 403
 404          fs_reg lod;
 405          fs_reg dpdy;
 406          fs_reg coordinate = src[0];
 407          fs_reg shadow_c;
 408          fs_reg sample_index;
 409
 410          switch (fpi->Opcode) {
 411          case OPCODE_TEX:
 412             ir = new(mem_ctx) ir_texture(ir_tex);
 413             break;
 414          case OPCODE_TXP: {
 415             ir = new(mem_ctx) ir_texture(ir_tex);
 416
 417             coordinate = fs_reg(this, glsl_type::vec3_type);
 418             fs_reg invproj = fs_reg(this, glsl_type::float_type);
 419             emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
 420             for (int i = 0; i < 3; i++) {
 421                emit(MUL(offset(coordinate, i),
 422                         offset(src[0], i), invproj));
 423             }
 424             break;
 425          }
 426          case OPCODE_TXB:
 427             ir = new(mem_ctx) ir_texture(ir_txb);
 428             lod = offset(src[0], 3);
 429             break;
 430          default:
 431             assert(!"not reached");
 432             break;
 433          }
 434
 435          ir->type = glsl_type::vec4_type;
 436
 437          const glsl_type *coordinate_type;
 438          switch (fpi->TexSrcTarget) {
 439          case TEXTURE_1D_INDEX:
 440             coordinate_type = glsl_type::float_type;
 441             break;
 442
 443          case TEXTURE_2D_INDEX:
 444          case TEXTURE_1D_ARRAY_INDEX:
 445          case TEXTURE_RECT_INDEX:
 446          case TEXTURE_EXTERNAL_INDEX:
 447             coordinate_type = glsl_type::vec2_type;
 448             break;
 449
 450          case TEXTURE_3D_INDEX:
 451          case TEXTURE_2D_ARRAY_INDEX:
 452             coordinate_type = glsl_type::vec3_type;
 453             break;
 454
 455          case TEXTURE_CUBE_INDEX: {
 456             coordinate_type = glsl_type::vec3_type;
 457
 458             fs_reg temp = fs_reg(this, glsl_type::float_type);
 459             fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
 460             fs_reg abscoord = coordinate;
 461             abscoord.negate = false;
 462             abscoord.abs = true;
 463             emit_minmax(BRW_CONDITIONAL_GE, temp,
 464                         offset(abscoord, 0), offset(abscoord, 1));
 465             emit_minmax(BRW_CONDITIONAL_GE, temp,
 466                         temp, offset(abscoord, 2));
 467             emit_math(SHADER_OPCODE_RCP, temp, temp);
 468             for (int i = 0; i < 3; i++) {
 469                emit(MUL(offset(cubecoord, i),
 470                         offset(coordinate, i), temp));
 471             }
 472
 473             coordinate = cubecoord;
 474             break;
 475          }
 476
 477          default:
 478             assert(!"not reached");
 479             coordinate_type = glsl_type::vec2_type;
 480             break;
 481          }
 482
 483          ir_constant_data junk_data;
 484          ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
 485
 486          if (fpi->TexShadow) {
 487             shadow_c = offset(coordinate, 2);
 488             ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
 489          }
 490
 491          coordinate = rescale_texcoord(ir, coordinate,
 492                                        fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
 493                                        fpi->TexSrcUnit, fpi->TexSrcUnit);
 494
 495          fs_inst *inst;
 496          if (brw->gen >= 7) {
 497             inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fpi->TexSrcUnit);
 498          } else if (brw->gen >= 5) {
 499             inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
 500          } else {
 501             inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
 502          }
 503
 504          inst->sampler = fpi->TexSrcUnit;
 505          inst->shadow_compare = fpi->TexShadow;
 506
 507          /* Reuse the GLSL swizzle_result() handler. */
 508          swizzle_result(ir, dst, fpi->TexSrcUnit);
 509          dst = this->result;
 510
 511          break;
 512       }
 513
 514       case OPCODE_SWZ:
 515          /* Note that SWZ's extended swizzles are handled in the general
 516           * get_src_reg() code.
 517           */
 518          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 519          break;
 520
 521       case OPCODE_XPD:
 522          for (int i = 0; i < 3; i++) {
 523             if (fpi->DstReg.WriteMask & (1 << i)) {
 524                int i1 = (i + 1) % 3;
 525                int i2 = (i + 2) % 3;
 526
 527                fs_reg temp = fs_reg(this, glsl_type::float_type);
 528                fs_reg neg_src1_1 = offset(src[1], i1);
 529                neg_src1_1.negate = !neg_src1_1.negate;
 530                emit(MUL(temp, offset(src[0], i2), neg_src1_1));
 531                emit(MUL(offset(dst, i),
 532                         offset(src[0], i1), offset(src[1], i2)));
 533                emit(ADD(offset(dst, i), offset(dst, i), temp));
 534             }
 535          }
 536          break;
 537
 538       case OPCODE_END:
 539          break;
 540
 541       default:
 542          _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
 543                        _mesa_opcode_string(fpi->Opcode));
 544       }
 545
 546       /* To handle saturates, we emit a MOV with a saturate bit, which
 547        * optimization should fold into the preceding instructions when safe.
 548        */
 549       if (fpi->Opcode != OPCODE_END) {
 550          fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
 551
 552          for (int i = 0; i < 4; i++) {
 553             if (fpi->DstReg.WriteMask & (1 << i)) {
 554                fs_inst *inst = emit(MOV(offset(real_dst, i),
 555                                         offset(dst, i)));
 556                inst->saturate = fpi->SaturateMode;
 557             }
 558          }
 559       }
 560    }
 561
 562    /* Epilogue:
 563     *
 564     * Fragment depth has this strange convention of being the .z component of
 565     * a vec4.  emit_fb_write() wants to see a float value, instead.
 566     */
 567    this->current_annotation = "result.depth write";
 568    if (frag_depth.file != BAD_FILE) {
 569       fs_reg temp = fs_reg(this, glsl_type::float_type);
 570       emit(MOV(temp, offset(frag_depth, 2)));
 571       frag_depth = temp;
 572    }
 573 }
 574
 575 void
 576 fs_visitor::setup_fp_regs()
 577 {
 578    /* PROGRAM_TEMPORARY */
 579    int num_temp = prog->NumTemporaries;
 580    fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
 581    for (int i = 0; i < num_temp; i++)
 582       fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
 583
 584    /* PROGRAM_STATE_VAR etc. */
 585    if (dispatch_width == 8) {
 586       for (unsigned p = 0;
 587            p < prog->Parameters->NumParameters; p++) {
 588          for (unsigned int i = 0; i < 4; i++) {
 589             stage_prog_data->param[uniforms++] =
 590                &prog->Parameters->ParameterValues[p][i].f;
 591          }
 592       }
 593    }
 594
 595    fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
 596    for (int i = 0; i < VARYING_SLOT_MAX; i++) {
 597       if (prog->InputsRead & BITFIELD64_BIT(i)) {
 598          /* Make up a dummy instruction to reuse code for emitting
 599           * interpolation.
 600           */
 601          ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
 602                                                     "fp_input",
 603                                                     ir_var_shader_in);
 604          ir->data.location = i;
 605
 606          this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
 607                                                     i);
 608
 609          switch (i) {
 610          case VARYING_SLOT_POS:
 611             ir->data.pixel_center_integer = fp->PixelCenterInteger;
 612             ir->data.origin_upper_left = fp->OriginUpperLeft;
 613             fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
 614             break;
 615          case VARYING_SLOT_FACE:
 616             fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
 617             break;
 618          default:
 619             fp_input_regs[i] = *emit_general_interpolation(ir);
 620
 621             if (i == VARYING_SLOT_FOGC) {
 622                emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
 623                emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
 624                emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
 625             }
 626
 627             break;
 628          }
 629
 630          this->current_annotation = NULL;
 631       }
 632    }
 633 }
 634
 635 fs_reg
 636 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
 637 {
 638    switch (dst->File) {
 639    case PROGRAM_TEMPORARY:
 640       return fp_temp_regs[dst->Index];
 641
 642    case PROGRAM_OUTPUT:
 643       if (dst->Index == FRAG_RESULT_DEPTH) {
 644          if (frag_depth.file == BAD_FILE)
 645             frag_depth = fs_reg(this, glsl_type::vec4_type);
 646          return frag_depth;
 647       } else if (dst->Index == FRAG_RESULT_COLOR) {
 648          if (outputs[0].file == BAD_FILE) {
 649             outputs[0] = fs_reg(this, glsl_type::vec4_type);
 650             output_components[0] = 4;
 651
 652             /* Tell emit_fb_writes() to smear fragment.color across all the
 653              * color attachments.
 654              */
 655             for (int i = 1; i < c->key.nr_color_regions; i++) {
 656                outputs[i] = outputs[0];
 657                output_components[i] = output_components[0];
 658             }
 659          }
 660          return outputs[0];
 661       } else {
 662          int output_index = dst->Index - FRAG_RESULT_DATA0;
 663          if (outputs[output_index].file == BAD_FILE) {
 664             outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
 665          }
 666          output_components[output_index] = 4;
 667          return outputs[output_index];
 668       }
 669
 670    case PROGRAM_UNDEFINED:
 671       return fs_reg();
 672
 673    default:
 674       _mesa_problem(ctx, "bad dst register file: %s\n",
 675                     _mesa_register_file_name((gl_register_file)dst->File));
 676       return fs_reg(this, glsl_type::vec4_type);
 677    }
 678 }
 679
 680 fs_reg
 681 fs_visitor::get_fp_src_reg(const prog_src_register *src)
 682 {
 683    struct gl_program_parameter_list *plist = prog->Parameters;
 684
 685    fs_reg result;
 686
 687    assert(!src->Abs);
 688
 689    switch (src->File) {
 690    case PROGRAM_UNDEFINED:
 691       return fs_reg();
 692    case PROGRAM_TEMPORARY:
 693       result = fp_temp_regs[src->Index];
 694       break;
 695
 696    case PROGRAM_INPUT:
 697       result = fp_input_regs[src->Index];
 698       break;
 699
 700    case PROGRAM_STATE_VAR:
 701    case PROGRAM_UNIFORM:
 702    case PROGRAM_CONSTANT:
 703       /* We actually want to look at the type in the Parameters list for this,
 704        * because this lets us upload constant builtin uniforms, as actual
 705        * constants.
 706        */
 707       switch (plist->Parameters[src->Index].Type) {
 708       case PROGRAM_CONSTANT: {
 709          result = fs_reg(this, glsl_type::vec4_type);
 710
 711          for (int i = 0; i < 4; i++) {
 712             emit(MOV(offset(result, i),
 713                      fs_reg(plist->ParameterValues[src->Index][i].f)));
 714          }
 715          break;
 716       }
 717
 718       case PROGRAM_STATE_VAR:
 719       case PROGRAM_UNIFORM:
 720          result = fs_reg(UNIFORM, src->Index * 4);
 721          break;
 722
 723       default:
 724          _mesa_problem(ctx, "bad uniform src register file: %s\n",
 725                        _mesa_register_file_name((gl_register_file)src->File));
 726          return fs_reg(this, glsl_type::vec4_type);
 727       }
 728       break;
 729
 730    default:
 731       _mesa_problem(ctx, "bad src register file: %s\n",
 732                     _mesa_register_file_name((gl_register_file)src->File));
 733       return fs_reg(this, glsl_type::vec4_type);
 734    }
 735
 736    if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
 737       fs_reg unswizzled = result;
 738       result = fs_reg(this, glsl_type::vec4_type);
 739       for (int i = 0; i < 4; i++) {
 740          bool negate = src->Negate & (1 << i);
 741          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
 742           * but it costs us nothing to support it.
 743           */
 744          int src_swiz = GET_SWZ(src->Swizzle, i);
 745          if (src_swiz == SWIZZLE_ZERO) {
 746             emit(MOV(offset(result, i), fs_reg(0.0f)));
 747          } else if (src_swiz == SWIZZLE_ONE) {
 748             emit(MOV(offset(result, i),
 749                      negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
 750          } else {
 751             fs_reg src = offset(unswizzled, src_swiz);
 752             if (negate)
 753                src.negate = !src.negate;
 754             emit(MOV(offset(result, i), src));
 755          }
 756       }
 757    }
 758
 759    return result;
 760 }