src/mesa/drivers/dri/i965/brw_fs_fp.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_fp.cpp
  25  *
  26  * Implementation of the compiler for GL_ARB_fragment_program shaders on top
  27  * of the GLSL compiler backend.
  28  */
  29
  30 #include "brw_context.h"
  31 #include "brw_fs.h"
  32
  33 static fs_reg
  34 regoffset(fs_reg reg, int i)
  35 {
  36    reg.reg_offset += i;
  37    return reg;
  38 }
  39
  40 void
  41 fs_visitor::emit_fp_alu1(enum opcode opcode,
  42                          const struct prog_instruction *fpi,
  43                          fs_reg dst, fs_reg src)
  44 {
  45    for (int i = 0; i < 4; i++) {
  46       if (fpi->DstReg.WriteMask & (1 << i))
  47          emit(opcode, regoffset(dst, i), regoffset(src, i));
  48    }
  49 }
  50
  51 void
  52 fs_visitor::emit_fp_alu2(enum opcode opcode,
  53                          const struct prog_instruction *fpi,
  54                          fs_reg dst, fs_reg src0, fs_reg src1)
  55 {
  56    for (int i = 0; i < 4; i++) {
  57       if (fpi->DstReg.WriteMask & (1 << i))
  58          emit(opcode, regoffset(dst, i),
  59               regoffset(src0, i), regoffset(src1, i));
  60    }
  61 }
  62
  63 void
  64 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
  65                            fs_reg dst, fs_reg src0, fs_reg src1)
  66 {
  67    uint32_t conditionalmod;
  68    if (fpi->Opcode == OPCODE_MIN)
  69       conditionalmod = BRW_CONDITIONAL_L;
  70    else
  71       conditionalmod = BRW_CONDITIONAL_GE;
  72
  73    for (int i = 0; i < 4; i++) {
  74       if (fpi->DstReg.WriteMask & (1 << i)) {
  75          emit_minmax(conditionalmod, regoffset(dst, i),
  76                      regoffset(src0, i), regoffset(src1, i));
  77       }
  78    }
  79 }
  80
  81 void
  82 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
  83                         const struct prog_instruction *fpi,
  84                         fs_reg dst, fs_reg src0, fs_reg src1,
  85                         fs_reg one)
  86 {
  87    for (int i = 0; i < 4; i++) {
  88       if (fpi->DstReg.WriteMask & (1 << i)) {
  89          fs_inst *inst;
  90
  91          emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
  92                   conditional_mod));
  93
  94          inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
  95          inst->predicate = BRW_PREDICATE_NORMAL;
  96       }
  97    }
  98 }
  99
 100 void
 101 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
 102                                  fs_reg dst, fs_reg src)
 103 {
 104    for (int i = 0; i < 4; i++) {
 105       if (fpi->DstReg.WriteMask & (1 << i))
 106          emit(MOV(regoffset(dst, i), src));
 107    }
 108 }
 109
 110 void
 111 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
 112                                 const struct prog_instruction *fpi,
 113                                 fs_reg dst, fs_reg src)
 114 {
 115    fs_reg temp = fs_reg(this, glsl_type::float_type);
 116    emit_math(opcode, temp, src);
 117    emit_fp_scalar_write(fpi, dst, temp);
 118 }
 119
 120 void
 121 fs_visitor::emit_fragment_program_code()
 122 {
 123    setup_fp_regs();
 124
 125    fs_reg null = fs_reg(brw_null_reg());
 126
 127    /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
 128     * be:
 129     *
 130     * sel.f0 dst 1.0 0.0
 131     *
 132     * instead of
 133     *
 134     * mov    dst 0.0
 135     * mov.f0 dst 1.0
 136     */
 137    fs_reg one = fs_reg(this, glsl_type::float_type);
 138    emit(MOV(one, fs_reg(1.0f)));
 139
 140    for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
 141       const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
 142       base_ir = fpi;
 143
 144       //_mesa_print_instruction(fpi);
 145
 146       fs_reg dst;
 147       fs_reg src[3];
 148
 149       /* We always emit into a temporary destination register to avoid
 150        * aliasing issues.
 151        */
 152       dst = fs_reg(this, glsl_type::vec4_type);
 153
 154       for (int i = 0; i < 3; i++)
 155          src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
 156
 157       switch (fpi->Opcode) {
 158       case OPCODE_ABS:
 159          src[0].abs = true;
 160          src[0].negate = false;
 161          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 162          break;
 163
 164       case OPCODE_ADD:
 165          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
 166          break;
 167
 168       case OPCODE_CMP:
 169          for (int i = 0; i < 4; i++) {
 170             if (fpi->DstReg.WriteMask & (1 << i)) {
 171                fs_inst *inst;
 172
 173                emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
 174                         BRW_CONDITIONAL_L));
 175
 176                inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
 177                            regoffset(src[1], i), regoffset(src[2], i));
 178                inst->predicate = BRW_PREDICATE_NORMAL;
 179             }
 180          }
 181          break;
 182
 183       case OPCODE_COS:
 184          emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
 185          break;
 186
 187       case OPCODE_DP2:
 188       case OPCODE_DP3:
 189       case OPCODE_DP4:
 190       case OPCODE_DPH: {
 191          fs_reg mul = fs_reg(this, glsl_type::float_type);
 192          fs_reg acc = fs_reg(this, glsl_type::float_type);
 193          int count;
 194
 195          switch (fpi->Opcode) {
 196          case OPCODE_DP2: count = 2; break;
 197          case OPCODE_DP3: count = 3; break;
 198          case OPCODE_DP4: count = 4; break;
 199          case OPCODE_DPH: count = 3; break;
 200          default: assert(!"not reached"); count = 0; break;
 201          }
 202
 203          emit(MUL(acc, regoffset(src[0], 0), regoffset(src[1], 0)));
 204          for (int i = 1; i < count; i++) {
 205             emit(MUL(mul, regoffset(src[0], i), regoffset(src[1], i)));
 206             emit(ADD(acc, acc, mul));
 207          }
 208
 209          if (fpi->Opcode == OPCODE_DPH)
 210             emit(ADD(acc, acc, regoffset(src[1], 3)));
 211
 212          emit_fp_scalar_write(fpi, dst, acc);
 213          break;
 214       }
 215
 216       case OPCODE_DST:
 217          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 218             emit(MOV(dst, fs_reg(1.0f)));
 219          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 220             emit(MUL(regoffset(dst, 1),
 221                      regoffset(src[0], 1), regoffset(src[1], 1)));
 222          }
 223          if (fpi->DstReg.WriteMask & WRITEMASK_Z)
 224             emit(MOV(regoffset(dst, 2), regoffset(src[0], 2)));
 225          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 226             emit(MOV(regoffset(dst, 3), regoffset(src[1], 3)));
 227          break;
 228
 229       case OPCODE_EX2:
 230          emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
 231          break;
 232
 233       case OPCODE_FLR:
 234          emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
 235          break;
 236
 237       case OPCODE_FRC:
 238          emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
 239          break;
 240
 241       case OPCODE_KIL: {
 242          for (int i = 0; i < 4; i++) {
 243             /* In most cases the argument to a KIL will be something like
 244              * TEMP[0].wwww, so there's no point in checking whether .w is < 0
 245              * 4 times in a row.
 246              */
 247             if (i > 0 &&
 248                 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
 249                 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
 250                 ((fpi->SrcReg[0].Negate >> i) & 1) ==
 251                 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
 252                continue;
 253             }
 254
 255
 256             /* Emit an instruction that's predicated on the current
 257              * undiscarded pixels, and updates just those pixels to be
 258              * turned off.
 259              */
 260             fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
 261                                     BRW_CONDITIONAL_GE));
 262             cmp->predicate = BRW_PREDICATE_NORMAL;
 263             cmp->flag_subreg = 1;
 264          }
 265          break;
 266       }
 267
 268       case OPCODE_LG2:
 269          emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
 270          break;
 271
 272       case OPCODE_LIT:
 273          /* From the ARB_fragment_program spec:
 274           *
 275           *      tmp = VectorLoad(op0);
 276           *      if (tmp.x < 0) tmp.x = 0;
 277           *      if (tmp.y < 0) tmp.y = 0;
 278           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 279           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 280           *      result.x = 1.0;
 281           *      result.y = tmp.x;
 282           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 283           *      result.w = 1.0;
 284           *
 285           * Note that we don't do the clamping to +/- 128.  We didn't in
 286           * brw_wm_emit.c either.
 287           */
 288          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 289             emit(MOV(regoffset(dst, 0), fs_reg(1.0f)));
 290
 291          if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
 292             fs_inst *inst;
 293             emit(CMP(null, regoffset(src[0], 0), fs_reg(0.0f),
 294                      BRW_CONDITIONAL_LE));
 295
 296             if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 297                emit(MOV(regoffset(dst, 1), regoffset(src[0], 0)));
 298                inst = emit(MOV(regoffset(dst, 1), fs_reg(0.0f)));
 299                inst->predicate = BRW_PREDICATE_NORMAL;
 300             }
 301
 302             if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
 303                emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
 304                          regoffset(src[0], 1), regoffset(src[0], 3));
 305
 306                inst = emit(MOV(regoffset(dst, 2), fs_reg(0.0f)));
 307                inst->predicate = BRW_PREDICATE_NORMAL;
 308             }
 309          }
 310
 311          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 312             emit(MOV(regoffset(dst, 3), fs_reg(1.0f)));
 313
 314          break;
 315
 316       case OPCODE_LRP:
 317          for (int i = 0; i < 4; i++) {
 318             if (fpi->DstReg.WriteMask & (1 << i)) {
 319                fs_reg a = regoffset(src[0], i);
 320                fs_reg y = regoffset(src[1], i);
 321                fs_reg x = regoffset(src[2], i);
 322                emit_lrp(regoffset(dst, i), x, y, a);
 323             }
 324          }
 325          break;
 326
 327       case OPCODE_MAD:
 328          for (int i = 0; i < 4; i++) {
 329             if (fpi->DstReg.WriteMask & (1 << i)) {
 330                fs_reg temp = fs_reg(this, glsl_type::float_type);
 331                emit(MUL(temp, regoffset(src[0], i), regoffset(src[1], i)));
 332                emit(ADD(regoffset(dst, i), temp, regoffset(src[2], i)));
 333             }
 334          }
 335          break;
 336
 337       case OPCODE_MAX:
 338          emit_fp_minmax(fpi, dst, src[0], src[1]);
 339          break;
 340
 341       case OPCODE_MOV:
 342          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 343          break;
 344
 345       case OPCODE_MIN:
 346          emit_fp_minmax(fpi, dst, src[0], src[1]);
 347          break;
 348
 349       case OPCODE_MUL:
 350          emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
 351          break;
 352
 353       case OPCODE_POW: {
 354          fs_reg temp = fs_reg(this, glsl_type::float_type);
 355          emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
 356          emit_fp_scalar_write(fpi, dst, temp);
 357          break;
 358       }
 359
 360       case OPCODE_RCP:
 361          emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
 362          break;
 363
 364       case OPCODE_RSQ:
 365          emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
 366          break;
 367
 368       case OPCODE_SCS:
 369          if (fpi->DstReg.WriteMask & WRITEMASK_X) {
 370             emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
 371                       regoffset(src[0], 0));
 372          }
 373
 374          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 375             emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
 376                       regoffset(src[0], 1));
 377          }
 378          break;
 379
 380       case OPCODE_SGE:
 381          emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
 382          break;
 383
 384       case OPCODE_SIN:
 385          emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
 386          break;
 387
 388       case OPCODE_SLT:
 389          emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
 390          break;
 391
 392       case OPCODE_SUB: {
 393          fs_reg neg_src1 = src[1];
 394          neg_src1.negate = !src[1].negate;
 395
 396          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
 397          break;
 398       }
 399
 400       case OPCODE_TEX:
 401       case OPCODE_TXB:
 402       case OPCODE_TXP: {
 403          /* We piggy-back on the GLSL IR support for texture setup.  To do so,
 404           * we have to cook up an ir_texture that has the coordinate field
 405           * with appropriate type, and shadow_comparitor set or not.  All the
 406           * other properties of ir_texture are passed in as arguments to the
 407           * emit_texture_gen* function.
 408           */
 409          ir_texture *ir = NULL;
 410
 411          fs_reg lod;
 412          fs_reg dpdy;
 413          fs_reg coordinate = src[0];
 414          fs_reg shadow_c;
 415          fs_reg sample_index;
 416
 417          switch (fpi->Opcode) {
 418          case OPCODE_TEX:
 419             ir = new(mem_ctx) ir_texture(ir_tex);
 420             break;
 421          case OPCODE_TXP: {
 422             ir = new(mem_ctx) ir_texture(ir_tex);
 423
 424             coordinate = fs_reg(this, glsl_type::vec3_type);
 425             fs_reg invproj = fs_reg(this, glsl_type::float_type);
 426             emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
 427             for (int i = 0; i < 3; i++) {
 428                emit(MUL(regoffset(coordinate, i),
 429                         regoffset(src[0], i), invproj));
 430             }
 431             break;
 432          }
 433          case OPCODE_TXB:
 434             ir = new(mem_ctx) ir_texture(ir_txb);
 435             lod = regoffset(src[0], 3);
 436             break;
 437          default:
 438             assert(!"not reached");
 439             break;
 440          }
 441
 442          ir->type = glsl_type::vec4_type;
 443
 444          const glsl_type *coordinate_type;
 445          switch (fpi->TexSrcTarget) {
 446          case TEXTURE_1D_INDEX:
 447             coordinate_type = glsl_type::float_type;
 448             break;
 449
 450          case TEXTURE_2D_INDEX:
 451          case TEXTURE_1D_ARRAY_INDEX:
 452          case TEXTURE_RECT_INDEX:
 453          case TEXTURE_EXTERNAL_INDEX:
 454             coordinate_type = glsl_type::vec2_type;
 455             break;
 456
 457          case TEXTURE_3D_INDEX:
 458          case TEXTURE_2D_ARRAY_INDEX:
 459             coordinate_type = glsl_type::vec3_type;
 460             break;
 461
 462          case TEXTURE_CUBE_INDEX: {
 463             coordinate_type = glsl_type::vec3_type;
 464
 465             fs_reg temp = fs_reg(this, glsl_type::float_type);
 466             fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
 467             fs_reg abscoord = coordinate;
 468             abscoord.negate = false;
 469             abscoord.abs = true;
 470             emit_minmax(BRW_CONDITIONAL_GE, temp,
 471                         regoffset(abscoord, 0), regoffset(abscoord, 1));
 472             emit_minmax(BRW_CONDITIONAL_GE, temp,
 473                         temp, regoffset(abscoord, 2));
 474             emit_math(SHADER_OPCODE_RCP, temp, temp);
 475             for (int i = 0; i < 3; i++) {
 476                emit(MUL(regoffset(cubecoord, i),
 477                         regoffset(coordinate, i), temp));
 478             }
 479
 480             coordinate = cubecoord;
 481             break;
 482          }
 483
 484          default:
 485             assert(!"not reached");
 486             coordinate_type = glsl_type::vec2_type;
 487             break;
 488          }
 489
 490          ir_constant_data junk_data;
 491          ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
 492
 493          coordinate = rescale_texcoord(ir, coordinate,
 494                                        fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
 495                                        fpi->TexSrcUnit, fpi->TexSrcUnit);
 496
 497          if (fpi->TexShadow) {
 498             shadow_c = regoffset(coordinate, 2);
 499             ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
 500          }
 501
 502          fs_inst *inst;
 503          if (brw->gen >= 7) {
 504             inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
 505          } else if (brw->gen >= 5) {
 506             inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
 507          } else {
 508             inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
 509          }
 510
 511          inst->sampler = fpi->TexSrcUnit;
 512          inst->shadow_compare = fpi->TexShadow;
 513
 514          /* Reuse the GLSL swizzle_result() handler. */
 515          swizzle_result(ir, dst, fpi->TexSrcUnit);
 516          dst = this->result;
 517
 518          break;
 519       }
 520
 521       case OPCODE_SWZ:
 522          /* Note that SWZ's extended swizzles are handled in the general
 523           * get_src_reg() code.
 524           */
 525          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 526          break;
 527
 528       case OPCODE_XPD:
 529          for (int i = 0; i < 3; i++) {
 530             if (fpi->DstReg.WriteMask & (1 << i)) {
 531                int i1 = (i + 1) % 3;
 532                int i2 = (i + 2) % 3;
 533
 534                fs_reg temp = fs_reg(this, glsl_type::float_type);
 535                fs_reg neg_src1_1 = regoffset(src[1], i1);
 536                neg_src1_1.negate = !neg_src1_1.negate;
 537                emit(MUL(temp, regoffset(src[0], i2), neg_src1_1));
 538                emit(MUL(regoffset(dst, i),
 539                         regoffset(src[0], i1), regoffset(src[1], i2)));
 540                emit(ADD(regoffset(dst, i), regoffset(dst, i), temp));
 541             }
 542          }
 543          break;
 544
 545       case OPCODE_END:
 546          break;
 547
 548       default:
 549          _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
 550                        _mesa_opcode_string(fpi->Opcode));
 551       }
 552
 553       /* To handle saturates, we emit a MOV with a saturate bit, which
 554        * optimization should fold into the preceding instructions when safe.
 555        */
 556       if (fpi->Opcode != OPCODE_END) {
 557          fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
 558
 559          for (int i = 0; i < 4; i++) {
 560             if (fpi->DstReg.WriteMask & (1 << i)) {
 561                fs_inst *inst = emit(MOV(regoffset(real_dst, i),
 562                                         regoffset(dst, i)));
 563                inst->saturate = fpi->SaturateMode;
 564             }
 565          }
 566       }
 567    }
 568
 569    /* Epilogue:
 570     *
 571     * Fragment depth has this strange convention of being the .z component of
 572     * a vec4.  emit_fb_write() wants to see a float value, instead.
 573     */
 574    this->current_annotation = "result.depth write";
 575    if (frag_depth.file != BAD_FILE) {
 576       fs_reg temp = fs_reg(this, glsl_type::float_type);
 577       emit(MOV(temp, regoffset(frag_depth, 2)));
 578       frag_depth = temp;
 579    }
 580 }
 581
 582 void
 583 fs_visitor::setup_fp_regs()
 584 {
 585    /* PROGRAM_TEMPORARY */
 586    int num_temp = fp->Base.NumTemporaries;
 587    fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
 588    for (int i = 0; i < num_temp; i++)
 589       fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
 590
 591    /* PROGRAM_STATE_VAR etc. */
 592    if (dispatch_width == 8) {
 593       for (unsigned p = 0;
 594            p < fp->Base.Parameters->NumParameters; p++) {
 595          for (unsigned int i = 0; i < 4; i++) {
 596             c->prog_data.param[c->prog_data.nr_params++] =
 597                &fp->Base.Parameters->ParameterValues[p][i].f;
 598          }
 599       }
 600    }
 601
 602    fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
 603    for (int i = 0; i < VARYING_SLOT_MAX; i++) {
 604       if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
 605          /* Make up a dummy instruction to reuse code for emitting
 606           * interpolation.
 607           */
 608          ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
 609                                                     "fp_input",
 610                                                     ir_var_shader_in);
 611          ir->location = i;
 612
 613          this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
 614                                                     i);
 615
 616          switch (i) {
 617          case VARYING_SLOT_POS:
 618             ir->pixel_center_integer = fp->PixelCenterInteger;
 619             ir->origin_upper_left = fp->OriginUpperLeft;
 620             fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
 621             break;
 622          case VARYING_SLOT_FACE:
 623             fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
 624             break;
 625          default:
 626             fp_input_regs[i] = *emit_general_interpolation(ir);
 627
 628             if (i == VARYING_SLOT_FOGC) {
 629                emit(MOV(regoffset(fp_input_regs[i], 1), fs_reg(0.0f)));
 630                emit(MOV(regoffset(fp_input_regs[i], 2), fs_reg(0.0f)));
 631                emit(MOV(regoffset(fp_input_regs[i], 3), fs_reg(1.0f)));
 632             }
 633
 634             break;
 635          }
 636
 637          this->current_annotation = NULL;
 638       }
 639    }
 640 }
 641
 642 fs_reg
 643 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
 644 {
 645    switch (dst->File) {
 646    case PROGRAM_TEMPORARY:
 647       return fp_temp_regs[dst->Index];
 648
 649    case PROGRAM_OUTPUT:
 650       if (dst->Index == FRAG_RESULT_DEPTH) {
 651          if (frag_depth.file == BAD_FILE)
 652             frag_depth = fs_reg(this, glsl_type::vec4_type);
 653          return frag_depth;
 654       } else if (dst->Index == FRAG_RESULT_COLOR) {
 655          if (outputs[0].file == BAD_FILE) {
 656             outputs[0] = fs_reg(this, glsl_type::vec4_type);
 657             output_components[0] = 4;
 658
 659             /* Tell emit_fb_writes() to smear fragment.color across all the
 660              * color attachments.
 661              */
 662             for (int i = 1; i < c->key.nr_color_regions; i++) {
 663                outputs[i] = outputs[0];
 664                output_components[i] = output_components[0];
 665             }
 666          }
 667          return outputs[0];
 668       } else {
 669          int output_index = dst->Index - FRAG_RESULT_DATA0;
 670          if (outputs[output_index].file == BAD_FILE) {
 671             outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
 672          }
 673          output_components[output_index] = 4;
 674          return outputs[output_index];
 675       }
 676
 677    case PROGRAM_UNDEFINED:
 678       return fs_reg();
 679
 680    default:
 681       _mesa_problem(ctx, "bad dst register file: %s\n",
 682                     _mesa_register_file_name((gl_register_file)dst->File));
 683       return fs_reg(this, glsl_type::vec4_type);
 684    }
 685 }
 686
 687 fs_reg
 688 fs_visitor::get_fp_src_reg(const prog_src_register *src)
 689 {
 690    struct gl_program_parameter_list *plist = fp->Base.Parameters;
 691
 692    fs_reg result;
 693
 694    assert(!src->Abs);
 695
 696    switch (src->File) {
 697    case PROGRAM_UNDEFINED:
 698       return fs_reg();
 699    case PROGRAM_TEMPORARY:
 700       result = fp_temp_regs[src->Index];
 701       break;
 702
 703    case PROGRAM_INPUT:
 704       result = fp_input_regs[src->Index];
 705       break;
 706
 707    case PROGRAM_STATE_VAR:
 708    case PROGRAM_UNIFORM:
 709    case PROGRAM_CONSTANT:
 710       /* We actually want to look at the type in the Parameters list for this,
 711        * because this lets us upload constant builtin uniforms, as actual
 712        * constants.
 713        */
 714       switch (plist->Parameters[src->Index].Type) {
 715       case PROGRAM_CONSTANT: {
 716          result = fs_reg(this, glsl_type::vec4_type);
 717
 718          for (int i = 0; i < 4; i++) {
 719             emit(MOV(regoffset(result, i),
 720                      fs_reg(plist->ParameterValues[src->Index][i].f)));
 721          }
 722          break;
 723       }
 724
 725       case PROGRAM_STATE_VAR:
 726       case PROGRAM_UNIFORM:
 727          result = fs_reg(UNIFORM, src->Index * 4);
 728          break;
 729
 730       default:
 731          _mesa_problem(ctx, "bad uniform src register file: %s\n",
 732                        _mesa_register_file_name((gl_register_file)src->File));
 733          return fs_reg(this, glsl_type::vec4_type);
 734       }
 735       break;
 736
 737    default:
 738       _mesa_problem(ctx, "bad src register file: %s\n",
 739                     _mesa_register_file_name((gl_register_file)src->File));
 740       return fs_reg(this, glsl_type::vec4_type);
 741    }
 742
 743    if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
 744       fs_reg unswizzled = result;
 745       result = fs_reg(this, glsl_type::vec4_type);
 746       for (int i = 0; i < 4; i++) {
 747          bool negate = src->Negate & (1 << i);
 748          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
 749           * but it costs us nothing to support it.
 750           */
 751          int src_swiz = GET_SWZ(src->Swizzle, i);
 752          if (src_swiz == SWIZZLE_ZERO) {
 753             emit(MOV(regoffset(result, i), fs_reg(0.0f)));
 754          } else if (src_swiz == SWIZZLE_ONE) {
 755             emit(MOV(regoffset(result, i),
 756                      negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
 757          } else {
 758             fs_reg src = regoffset(unswizzled, src_swiz);
 759             if (negate)
 760                src.negate = !src.negate;
 761             emit(MOV(regoffset(result, i), src));
 762          }
 763       }
 764    }
 765
 766    return result;
 767 }