src/mesa/drivers/dri/i965/brw_fs_fp.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_fp.cpp
  25  *
  26  * Implementation of the compiler for GL_ARB_fragment_program shaders on top
  27  * of the GLSL compiler backend.
  28  */
  29
  30 #include "brw_context.h"
  31 #include "brw_fs.h"
  32
  33 static fs_reg
  34 regoffset(fs_reg reg, int i)
  35 {
  36    reg.reg_offset += i;
  37    return reg;
  38 }
  39
  40 void
  41 fs_visitor::emit_fp_alu1(enum opcode opcode,
  42                          const struct prog_instruction *fpi,
  43                          fs_reg dst, fs_reg src)
  44 {
  45    for (int i = 0; i < 4; i++) {
  46       if (fpi->DstReg.WriteMask & (1 << i))
  47          emit(opcode, regoffset(dst, i), regoffset(src, i));
  48    }
  49 }
  50
  51 void
  52 fs_visitor::emit_fp_alu2(enum opcode opcode,
  53                          const struct prog_instruction *fpi,
  54                          fs_reg dst, fs_reg src0, fs_reg src1)
  55 {
  56    for (int i = 0; i < 4; i++) {
  57       if (fpi->DstReg.WriteMask & (1 << i))
  58          emit(opcode, regoffset(dst, i),
  59               regoffset(src0, i), regoffset(src1, i));
  60    }
  61 }
  62
  63 void
  64 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
  65                            fs_reg dst, fs_reg src0, fs_reg src1)
  66 {
  67    uint32_t conditionalmod;
  68    if (fpi->Opcode == OPCODE_MIN)
  69       conditionalmod = BRW_CONDITIONAL_L;
  70    else
  71       conditionalmod = BRW_CONDITIONAL_GE;
  72
  73    for (int i = 0; i < 4; i++) {
  74       if (fpi->DstReg.WriteMask & (1 << i)) {
  75          emit_minmax(conditionalmod, regoffset(dst, i),
  76                      regoffset(src0, i), regoffset(src1, i));
  77       }
  78    }
  79 }
  80
  81 void
  82 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
  83                         const struct prog_instruction *fpi,
  84                         fs_reg dst, fs_reg src0, fs_reg src1,
  85                         fs_reg one)
  86 {
  87    for (int i = 0; i < 4; i++) {
  88       if (fpi->DstReg.WriteMask & (1 << i)) {
  89          fs_inst *inst;
  90
  91          emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
  92                   conditional_mod));
  93
  94          inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
  95          inst->predicate = BRW_PREDICATE_NORMAL;
  96       }
  97    }
  98 }
  99
 100 void
 101 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
 102                                  fs_reg dst, fs_reg src)
 103 {
 104    for (int i = 0; i < 4; i++) {
 105       if (fpi->DstReg.WriteMask & (1 << i))
 106          emit(MOV(regoffset(dst, i), src));
 107    }
 108 }
 109
 110 void
 111 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
 112                                 const struct prog_instruction *fpi,
 113                                 fs_reg dst, fs_reg src)
 114 {
 115    fs_reg temp = fs_reg(this, glsl_type::float_type);
 116    emit_math(opcode, temp, src);
 117    emit_fp_scalar_write(fpi, dst, temp);
 118 }
 119
 120 void
 121 fs_visitor::emit_fragment_program_code()
 122 {
 123    setup_fp_regs();
 124
 125    fs_reg null = fs_reg(brw_null_reg());
 126
 127    /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
 128     * be:
 129     *
 130     * sel.f0 dst 1.0 0.0
 131     *
 132     * instead of
 133     *
 134     * mov    dst 0.0
 135     * mov.f0 dst 1.0
 136     */
 137    fs_reg one = fs_reg(this, glsl_type::float_type);
 138    emit(MOV(one, fs_reg(1.0f)));
 139
 140    for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
 141       const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
 142       base_ir = fpi;
 143
 144       //_mesa_print_instruction(fpi);
 145
 146       fs_reg dst;
 147       fs_reg src[3];
 148
 149       /* We always emit into a temporary destination register to avoid
 150        * aliasing issues.
 151        */
 152       dst = fs_reg(this, glsl_type::vec4_type);
 153
 154       for (int i = 0; i < 3; i++)
 155          src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
 156
 157       switch (fpi->Opcode) {
 158       case OPCODE_ABS:
 159          src[0].abs = true;
 160          src[0].negate = false;
 161          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 162          break;
 163
 164       case OPCODE_ADD:
 165          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
 166          break;
 167
 168       case OPCODE_CMP:
 169          for (int i = 0; i < 4; i++) {
 170             if (fpi->DstReg.WriteMask & (1 << i)) {
 171                fs_inst *inst;
 172
 173                emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
 174                         BRW_CONDITIONAL_L));
 175
 176                inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
 177                            regoffset(src[1], i), regoffset(src[2], i));
 178                inst->predicate = BRW_PREDICATE_NORMAL;
 179             }
 180          }
 181          break;
 182
 183       case OPCODE_COS:
 184          emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
 185          break;
 186
 187       case OPCODE_DP2:
 188       case OPCODE_DP3:
 189       case OPCODE_DP4:
 190       case OPCODE_DPH: {
 191          fs_reg mul = fs_reg(this, glsl_type::float_type);
 192          fs_reg acc = fs_reg(this, glsl_type::float_type);
 193          int count;
 194
 195          switch (fpi->Opcode) {
 196          case OPCODE_DP2: count = 2; break;
 197          case OPCODE_DP3: count = 3; break;
 198          case OPCODE_DP4: count = 4; break;
 199          case OPCODE_DPH: count = 3; break;
 200          default: assert(!"not reached"); count = 0; break;
 201          }
 202
 203          emit(MUL(acc, regoffset(src[0], 0), regoffset(src[1], 0)));
 204          for (int i = 1; i < count; i++) {
 205             emit(MUL(mul, regoffset(src[0], i), regoffset(src[1], i)));
 206             emit(ADD(acc, acc, mul));
 207          }
 208
 209          if (fpi->Opcode == OPCODE_DPH)
 210             emit(ADD(acc, acc, regoffset(src[1], 3)));
 211
 212          emit_fp_scalar_write(fpi, dst, acc);
 213          break;
 214       }
 215
 216       case OPCODE_DST:
 217          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 218             emit(MOV(dst, fs_reg(1.0f)));
 219          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 220             emit(MUL(regoffset(dst, 1),
 221                      regoffset(src[0], 1), regoffset(src[1], 1)));
 222          }
 223          if (fpi->DstReg.WriteMask & WRITEMASK_Z)
 224             emit(MOV(regoffset(dst, 2), regoffset(src[0], 2)));
 225          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 226             emit(MOV(regoffset(dst, 3), regoffset(src[1], 3)));
 227          break;
 228
 229       case OPCODE_EX2:
 230          emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
 231          break;
 232
 233       case OPCODE_FLR:
 234          emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
 235          break;
 236
 237       case OPCODE_FRC:
 238          emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
 239          break;
 240
 241       case OPCODE_KIL: {
 242          for (int i = 0; i < 4; i++) {
 243             /* In most cases the argument to a KIL will be something like
 244              * TEMP[0].wwww, so there's no point in checking whether .w is < 0
 245              * 4 times in a row.
 246              */
 247             if (i > 0 &&
 248                 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
 249                 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
 250                 ((fpi->SrcReg[0].Negate >> i) & 1) ==
 251                 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
 252                continue;
 253             }
 254
 255             emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
 256                      BRW_CONDITIONAL_L));
 257
 258             if (intel->gen < 6 && dispatch_width == 16)
 259                fail("Can't support (non-uniform) control flow on 16-wide");
 260             emit(IF(BRW_PREDICATE_NORMAL));
 261             emit(FS_OPCODE_DISCARD);
 262             emit(BRW_OPCODE_ENDIF);
 263          }
 264          break;
 265       }
 266
 267       case OPCODE_LG2:
 268          emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
 269          break;
 270
 271       case OPCODE_LIT:
 272          /* From the ARB_fragment_program spec:
 273           *
 274           *      tmp = VectorLoad(op0);
 275           *      if (tmp.x < 0) tmp.x = 0;
 276           *      if (tmp.y < 0) tmp.y = 0;
 277           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 278           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 279           *      result.x = 1.0;
 280           *      result.y = tmp.x;
 281           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 282           *      result.w = 1.0;
 283           *
 284           * Note that we don't do the clamping to +/- 128.  We didn't in
 285           * brw_wm_emit.c either.
 286           */
 287          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 288             emit(MOV(regoffset(dst, 0), fs_reg(1.0f)));
 289
 290          if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
 291             fs_inst *inst;
 292             emit(CMP(null, regoffset(src[0], 0), fs_reg(0.0f),
 293                      BRW_CONDITIONAL_LE));
 294
 295             if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 296                emit(MOV(regoffset(dst, 1), regoffset(src[0], 0)));
 297                inst = emit(MOV(regoffset(dst, 1), fs_reg(0.0f)));
 298                inst->predicate = BRW_PREDICATE_NORMAL;
 299             }
 300
 301             if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
 302                emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
 303                          regoffset(src[0], 1), regoffset(src[0], 3));
 304
 305                inst = emit(MOV(regoffset(dst, 2), fs_reg(0.0f)));
 306                inst->predicate = BRW_PREDICATE_NORMAL;
 307             }
 308          }
 309
 310          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 311             emit(MOV(regoffset(dst, 3), fs_reg(1.0f)));
 312
 313          break;
 314
 315       case OPCODE_LRP:
 316          for (int i = 0; i < 4; i++) {
 317             if (fpi->DstReg.WriteMask & (1 << i)) {
 318                fs_reg neg_src0 = regoffset(src[0], i);
 319                neg_src0.negate = !neg_src0.negate;
 320                fs_reg temp = fs_reg(this, glsl_type::float_type);
 321                fs_reg temp2 = fs_reg(this, glsl_type::float_type);
 322                emit(ADD(temp, neg_src0, fs_reg(1.0f)));
 323                emit(MUL(temp, temp, regoffset(src[2], i)));
 324                emit(MUL(temp2, regoffset(src[0], i), regoffset(src[1], i)));
 325                emit(ADD(regoffset(dst, i), temp, temp2));
 326             }
 327          }
 328          break;
 329
 330       case OPCODE_MAD:
 331          for (int i = 0; i < 4; i++) {
 332             if (fpi->DstReg.WriteMask & (1 << i)) {
 333                fs_reg temp = fs_reg(this, glsl_type::float_type);
 334                emit(MUL(temp, regoffset(src[0], i), regoffset(src[1], i)));
 335                emit(ADD(regoffset(dst, i), temp, regoffset(src[2], i)));
 336             }
 337          }
 338          break;
 339
 340       case OPCODE_MAX:
 341          emit_fp_minmax(fpi, dst, src[0], src[1]);
 342          break;
 343
 344       case OPCODE_MOV:
 345          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 346          break;
 347
 348       case OPCODE_MIN:
 349          emit_fp_minmax(fpi, dst, src[0], src[1]);
 350          break;
 351
 352       case OPCODE_MUL:
 353          emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
 354          break;
 355
 356       case OPCODE_POW: {
 357          fs_reg temp = fs_reg(this, glsl_type::float_type);
 358          emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
 359          emit_fp_scalar_write(fpi, dst, temp);
 360          break;
 361       }
 362
 363       case OPCODE_RCP:
 364          emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
 365          break;
 366
 367       case OPCODE_RSQ:
 368          emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
 369          break;
 370
 371       case OPCODE_SCS:
 372          if (fpi->DstReg.WriteMask & WRITEMASK_X) {
 373             emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
 374                       regoffset(src[0], 0));
 375          }
 376
 377          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 378             emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
 379                       regoffset(src[0], 1));
 380          }
 381          break;
 382
 383       case OPCODE_SGE:
 384          emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
 385          break;
 386
 387       case OPCODE_SIN:
 388          emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
 389          break;
 390
 391       case OPCODE_SLT:
 392          emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
 393          break;
 394
 395       case OPCODE_SUB: {
 396          fs_reg neg_src1 = src[1];
 397          neg_src1.negate = !src[1].negate;
 398
 399          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
 400          break;
 401       }
 402
 403       case OPCODE_TEX:
 404       case OPCODE_TXB:
 405       case OPCODE_TXP: {
 406          /* We piggy-back on the GLSL IR support for texture setup.  To do so,
 407           * we have to cook up an ir_texture that has the coordinate field
 408           * with appropriate type, and shadow_comparitor set or not.  All the
 409           * other properties of ir_texture are passed in as arguments to the
 410           * emit_texture_gen* function.
 411           */
 412          ir_texture *ir = NULL;
 413
 414          fs_reg lod;
 415          fs_reg dpdy;
 416          fs_reg coordinate = src[0];
 417          fs_reg shadow_c;
 418
 419          switch (fpi->Opcode) {
 420          case OPCODE_TEX:
 421             ir = new(mem_ctx) ir_texture(ir_tex);
 422             break;
 423          case OPCODE_TXP: {
 424             ir = new(mem_ctx) ir_texture(ir_tex);
 425
 426             coordinate = fs_reg(this, glsl_type::vec3_type);
 427             fs_reg invproj = fs_reg(this, glsl_type::float_type);
 428             emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
 429             for (int i = 0; i < 3; i++) {
 430                emit(MUL(regoffset(coordinate, i),
 431                         regoffset(src[0], i), invproj));
 432             }
 433             break;
 434          }
 435          case OPCODE_TXB:
 436             ir = new(mem_ctx) ir_texture(ir_txb);
 437             lod = regoffset(src[0], 3);
 438             break;
 439          default:
 440             assert(!"not reached");
 441             break;
 442          }
 443
 444          const glsl_type *coordinate_type;
 445          switch (fpi->TexSrcTarget) {
 446          case TEXTURE_1D_INDEX:
 447             coordinate_type = glsl_type::float_type;
 448             break;
 449
 450          case TEXTURE_2D_INDEX:
 451          case TEXTURE_1D_ARRAY_INDEX:
 452          case TEXTURE_RECT_INDEX:
 453          case TEXTURE_EXTERNAL_INDEX:
 454             coordinate_type = glsl_type::vec2_type;
 455             break;
 456
 457          case TEXTURE_3D_INDEX:
 458          case TEXTURE_2D_ARRAY_INDEX:
 459             coordinate_type = glsl_type::vec3_type;
 460             break;
 461
 462          case TEXTURE_CUBE_INDEX: {
 463             coordinate_type = glsl_type::vec3_type;
 464
 465             fs_reg temp = fs_reg(this, glsl_type::float_type);
 466             fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
 467             fs_reg abscoord = coordinate;
 468             abscoord.negate = false;
 469             abscoord.abs = true;
 470             emit_minmax(BRW_CONDITIONAL_GE, temp,
 471                         regoffset(abscoord, 0), regoffset(abscoord, 1));
 472             emit_minmax(BRW_CONDITIONAL_GE, temp,
 473                         temp, regoffset(abscoord, 2));
 474             emit_math(SHADER_OPCODE_RCP, temp, temp);
 475             for (int i = 0; i < 3; i++) {
 476                emit(MUL(regoffset(cubecoord, i),
 477                         regoffset(coordinate, i), temp));
 478             }
 479
 480             coordinate = cubecoord;
 481             break;
 482          }
 483
 484          default:
 485             assert(!"not reached");
 486             coordinate_type = glsl_type::vec2_type;
 487             break;
 488          }
 489
 490          ir_constant_data junk_data;
 491          ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
 492
 493          coordinate = rescale_texcoord(ir, coordinate,
 494                                        fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
 495                                        fpi->TexSrcUnit, fpi->TexSrcUnit);
 496
 497          if (fpi->TexShadow) {
 498             shadow_c = regoffset(coordinate, 2);
 499             ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
 500          }
 501
 502          fs_inst *inst;
 503          if (intel->gen >= 7) {
 504             inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy);
 505          } else if (intel->gen >= 5) {
 506             inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy);
 507          } else {
 508             inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
 509          }
 510
 511          inst->sampler = fpi->TexSrcUnit;
 512          inst->shadow_compare = fpi->TexShadow;
 513
 514          /* Reuse the GLSL swizzle_result() handler. */
 515          swizzle_result(ir, dst, fpi->TexSrcUnit);
 516          dst = this->result;
 517
 518          break;
 519       }
 520
 521       case OPCODE_SWZ:
 522          /* Note that SWZ's extended swizzles are handled in the general
 523           * get_src_reg() code.
 524           */
 525          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 526          break;
 527
 528       case OPCODE_XPD:
 529          for (int i = 0; i < 3; i++) {
 530             if (fpi->DstReg.WriteMask & (1 << i)) {
 531                int i1 = (i + 1) % 3;
 532                int i2 = (i + 2) % 3;
 533
 534                fs_reg temp = fs_reg(this, glsl_type::float_type);
 535                fs_reg neg_src1_1 = regoffset(src[1], i1);
 536                neg_src1_1.negate = !neg_src1_1.negate;
 537                emit(MUL(temp, regoffset(src[0], i2), neg_src1_1));
 538                emit(MUL(regoffset(dst, i),
 539                         regoffset(src[0], i1), regoffset(src[1], i2)));
 540                emit(ADD(regoffset(dst, i), regoffset(dst, i), temp));
 541             }
 542          }
 543          break;
 544
 545       case OPCODE_END:
 546          break;
 547
 548       default:
 549          _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
 550                        _mesa_opcode_string(fpi->Opcode));
 551       }
 552
 553       /* To handle saturates, we emit a MOV with a saturate bit, which
 554        * optimization should fold into the preceding instructions when safe.
 555        */
 556       if (fpi->Opcode != OPCODE_END) {
 557          fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
 558
 559          for (int i = 0; i < 4; i++) {
 560             if (fpi->DstReg.WriteMask & (1 << i)) {
 561                fs_inst *inst = emit(MOV(regoffset(real_dst, i),
 562                                         regoffset(dst, i)));
 563                inst->saturate = fpi->SaturateMode;
 564             }
 565          }
 566       }
 567    }
 568
 569    /* Epilogue:
 570     *
 571     * Fragment depth has this strange convention of being the .z component of
 572     * a vec4.  emit_fb_write() wants to see a float value, instead.
 573     */
 574    this->current_annotation = "result.depth write";
 575    if (frag_depth.file != BAD_FILE) {
 576       fs_reg temp = fs_reg(this, glsl_type::float_type);
 577       emit(MOV(temp, regoffset(frag_depth, 2)));
 578       frag_depth = temp;
 579    }
 580 }
 581
 582 void
 583 fs_visitor::setup_fp_regs()
 584 {
 585    /* PROGRAM_TEMPORARY */
 586    int num_temp = fp->Base.NumTemporaries;
 587    fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
 588    for (int i = 0; i < num_temp; i++)
 589       fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
 590
 591    /* PROGRAM_STATE_VAR etc. */
 592    if (dispatch_width == 8) {
 593       for (unsigned p = 0;
 594            p < fp->Base.Parameters->NumParameters; p++) {
 595          for (unsigned int i = 0; i < 4; i++) {
 596             this->param_index[c->prog_data.nr_params] = p;
 597             this->param_offset[c->prog_data.nr_params] = i;
 598             c->prog_data.nr_params++;
 599          }
 600       }
 601    }
 602
 603    fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX);
 604    for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
 605       if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
 606          /* Make up a dummy instruction to reuse code for emitting
 607           * interpolation.
 608           */
 609          ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
 610                                                     "fp_input",
 611                                                     ir_var_in);
 612          ir->location = i;
 613
 614          this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
 615                                                     i);
 616
 617          switch (i) {
 618          case FRAG_ATTRIB_WPOS:
 619             ir->pixel_center_integer = fp->PixelCenterInteger;
 620             ir->origin_upper_left = fp->OriginUpperLeft;
 621             fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
 622             break;
 623          case FRAG_ATTRIB_FACE:
 624             fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
 625             break;
 626          default:
 627             fp_input_regs[i] = *emit_general_interpolation(ir);
 628
 629             if (i == FRAG_ATTRIB_FOGC) {
 630                emit(MOV(regoffset(fp_input_regs[i], 1), fs_reg(0.0f)));
 631                emit(MOV(regoffset(fp_input_regs[i], 2), fs_reg(0.0f)));
 632                emit(MOV(regoffset(fp_input_regs[i], 3), fs_reg(1.0f)));
 633             }
 634
 635             break;
 636          }
 637
 638          this->current_annotation = NULL;
 639       }
 640    }
 641 }
 642
 643 fs_reg
 644 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
 645 {
 646    switch (dst->File) {
 647    case PROGRAM_TEMPORARY:
 648       return fp_temp_regs[dst->Index];
 649
 650    case PROGRAM_OUTPUT:
 651       if (dst->Index == FRAG_RESULT_DEPTH) {
 652          if (frag_depth.file == BAD_FILE)
 653             frag_depth = fs_reg(this, glsl_type::vec4_type);
 654          return frag_depth;
 655       } else if (dst->Index == FRAG_RESULT_COLOR) {
 656          if (outputs[0].file == BAD_FILE) {
 657             outputs[0] = fs_reg(this, glsl_type::vec4_type);
 658             output_components[0] = 4;
 659
 660             /* Tell emit_fb_writes() to smear fragment.color across all the
 661              * color attachments.
 662              */
 663             for (int i = 1; i < c->key.nr_color_regions; i++) {
 664                outputs[i] = outputs[0];
 665                output_components[i] = output_components[0];
 666             }
 667          }
 668          return outputs[0];
 669       } else {
 670          int output_index = dst->Index - FRAG_RESULT_DATA0;
 671          if (outputs[output_index].file == BAD_FILE) {
 672             outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
 673          }
 674          output_components[output_index] = 4;
 675          return outputs[output_index];
 676       }
 677
 678    case PROGRAM_UNDEFINED:
 679       return fs_reg();
 680
 681    default:
 682       _mesa_problem(ctx, "bad dst register file: %s\n",
 683                     _mesa_register_file_name((gl_register_file)dst->File));
 684       return fs_reg(this, glsl_type::vec4_type);
 685    }
 686 }
 687
 688 fs_reg
 689 fs_visitor::get_fp_src_reg(const prog_src_register *src)
 690 {
 691    struct gl_program_parameter_list *plist = fp->Base.Parameters;
 692
 693    fs_reg result;
 694
 695    assert(!src->Abs);
 696
 697    switch (src->File) {
 698    case PROGRAM_UNDEFINED:
 699       return fs_reg();
 700    case PROGRAM_TEMPORARY:
 701       result = fp_temp_regs[src->Index];
 702       break;
 703
 704    case PROGRAM_INPUT:
 705       result = fp_input_regs[src->Index];
 706       break;
 707
 708    case PROGRAM_STATE_VAR:
 709    case PROGRAM_UNIFORM:
 710    case PROGRAM_CONSTANT:
 711       /* We actually want to look at the type in the Parameters list for this,
 712        * because this lets us upload constant builtin uniforms, as actual
 713        * constants.
 714        */
 715       switch (plist->Parameters[src->Index].Type) {
 716       case PROGRAM_CONSTANT: {
 717          result = fs_reg(this, glsl_type::vec4_type);
 718
 719          for (int i = 0; i < 4; i++) {
 720             emit(MOV(regoffset(result, i),
 721                      fs_reg(plist->ParameterValues[src->Index][i].f)));
 722          }
 723          break;
 724       }
 725
 726       case PROGRAM_STATE_VAR:
 727       case PROGRAM_UNIFORM:
 728          result = fs_reg(UNIFORM, src->Index * 4);
 729          break;
 730
 731       default:
 732          _mesa_problem(ctx, "bad uniform src register file: %s\n",
 733                        _mesa_register_file_name((gl_register_file)src->File));
 734          return fs_reg(this, glsl_type::vec4_type);
 735       }
 736       break;
 737
 738    default:
 739       _mesa_problem(ctx, "bad src register file: %s\n",
 740                     _mesa_register_file_name((gl_register_file)src->File));
 741       return fs_reg(this, glsl_type::vec4_type);
 742    }
 743
 744    if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
 745       fs_reg unswizzled = result;
 746       result = fs_reg(this, glsl_type::vec4_type);
 747       for (int i = 0; i < 4; i++) {
 748          bool negate = src->Negate & (1 << i);
 749          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
 750           * but it costs us nothing to support it.
 751           */
 752          int src_swiz = GET_SWZ(src->Swizzle, i);
 753          if (src_swiz == SWIZZLE_ZERO) {
 754             emit(MOV(regoffset(result, i), fs_reg(0.0f)));
 755          } else if (src_swiz == SWIZZLE_ONE) {
 756             emit(MOV(regoffset(result, i),
 757                      negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
 758          } else {
 759             fs_reg src = regoffset(unswizzled, src_swiz);
 760             if (negate)
 761                src.negate = !src.negate;
 762             emit(MOV(regoffset(result, i), src));
 763          }
 764       }
 765    }
 766
 767    return result;
 768 }