src/mesa/drivers/dri/i965/brw_fs_fp.cpp

   1 /*
   2  * Copyright © 2012 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /** @file brw_fs_fp.cpp
  25  *
  26  * Implementation of the compiler for GL_ARB_fragment_program shaders on top
  27  * of the GLSL compiler backend.
  28  */
  29
  30 #include "brw_context.h"
  31 #include "brw_fs.h"
  32
  33 static fs_reg
  34 regoffset(fs_reg reg, int i)
  35 {
  36    reg.reg_offset += i;
  37    return reg;
  38 }
  39
  40 void
  41 fs_visitor::emit_fp_alu1(enum opcode opcode,
  42                          const struct prog_instruction *fpi,
  43                          fs_reg dst, fs_reg src)
  44 {
  45    for (int i = 0; i < 4; i++) {
  46       if (fpi->DstReg.WriteMask & (1 << i))
  47          emit(opcode, regoffset(dst, i), regoffset(src, i));
  48    }
  49 }
  50
  51 void
  52 fs_visitor::emit_fp_alu2(enum opcode opcode,
  53                          const struct prog_instruction *fpi,
  54                          fs_reg dst, fs_reg src0, fs_reg src1)
  55 {
  56    for (int i = 0; i < 4; i++) {
  57       if (fpi->DstReg.WriteMask & (1 << i))
  58          emit(opcode, regoffset(dst, i),
  59               regoffset(src0, i), regoffset(src1, i));
  60    }
  61 }
  62
  63 void
  64 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
  65                            fs_reg dst, fs_reg src0, fs_reg src1)
  66 {
  67    uint32_t conditionalmod;
  68    if (fpi->Opcode == OPCODE_MIN)
  69       conditionalmod = BRW_CONDITIONAL_L;
  70    else
  71       conditionalmod = BRW_CONDITIONAL_GE;
  72
  73    for (int i = 0; i < 4; i++) {
  74       if (fpi->DstReg.WriteMask & (1 << i)) {
  75          emit_minmax(conditionalmod, regoffset(dst, i),
  76                      regoffset(src0, i), regoffset(src1, i));
  77       }
  78    }
  79 }
  80
  81 void
  82 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
  83                         const struct prog_instruction *fpi,
  84                         fs_reg dst, fs_reg src0, fs_reg src1,
  85                         fs_reg one)
  86 {
  87    for (int i = 0; i < 4; i++) {
  88       if (fpi->DstReg.WriteMask & (1 << i)) {
  89          fs_inst *inst;
  90
  91          inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()),
  92                      regoffset(src0, i), regoffset(src1, i));
  93          inst->conditional_mod = conditional_mod;
  94
  95          inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
  96          inst->predicated = true;
  97       }
  98    }
  99 }
 100
 101 void
 102 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
 103                                  fs_reg dst, fs_reg src)
 104 {
 105    for (int i = 0; i < 4; i++) {
 106       if (fpi->DstReg.WriteMask & (1 << i))
 107          emit(BRW_OPCODE_MOV, regoffset(dst, i), src);
 108    }
 109 }
 110
 111 void
 112 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
 113                                 const struct prog_instruction *fpi,
 114                                 fs_reg dst, fs_reg src)
 115 {
 116    fs_reg temp = fs_reg(this, glsl_type::float_type);
 117    emit_math(opcode, temp, src);
 118    emit_fp_scalar_write(fpi, dst, temp);
 119 }
 120
 121 void
 122 fs_visitor::emit_fragment_program_code()
 123 {
 124    setup_fp_regs();
 125
 126    fs_reg null = fs_reg(brw_null_reg());
 127
 128    /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
 129     * be:
 130     *
 131     * sel.f0 dst 1.0 0.0
 132     *
 133     * instead of
 134     *
 135     * mov    dst 0.0
 136     * mov.f0 dst 1.0
 137     */
 138    fs_reg one = fs_reg(this, glsl_type::float_type);
 139    emit(BRW_OPCODE_MOV, one, fs_reg(1.0f));
 140
 141    for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
 142       const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
 143       base_ir = fpi;
 144
 145       //_mesa_print_instruction(fpi);
 146
 147       fs_reg dst;
 148       fs_reg src[3];
 149
 150       /* We always emit into a temporary destination register to avoid
 151        * aliasing issues.
 152        */
 153       dst = fs_reg(this, glsl_type::vec4_type);
 154
 155       for (int i = 0; i < 3; i++)
 156          src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
 157
 158       switch (fpi->Opcode) {
 159       case OPCODE_ABS:
 160          src[0].abs = true;
 161          src[0].negate = false;
 162          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 163          break;
 164
 165       case OPCODE_ADD:
 166          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
 167          break;
 168
 169       case OPCODE_CMP:
 170          for (int i = 0; i < 4; i++) {
 171             if (fpi->DstReg.WriteMask & (1 << i)) {
 172                fs_inst *inst;
 173
 174                inst = emit(BRW_OPCODE_CMP, null,
 175                            regoffset(src[0], i), fs_reg(0.0f));
 176                inst->conditional_mod = BRW_CONDITIONAL_L;
 177
 178                inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
 179                            regoffset(src[1], i), regoffset(src[2], i));
 180                inst->predicated = true;
 181             }
 182          }
 183          break;
 184
 185       case OPCODE_COS:
 186          emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
 187          break;
 188
 189       case OPCODE_DP2:
 190       case OPCODE_DP3:
 191       case OPCODE_DP4:
 192       case OPCODE_DPH: {
 193          fs_reg mul = fs_reg(this, glsl_type::float_type);
 194          fs_reg acc = fs_reg(this, glsl_type::float_type);
 195          int count;
 196
 197          switch (fpi->Opcode) {
 198          case OPCODE_DP2: count = 2; break;
 199          case OPCODE_DP3: count = 3; break;
 200          case OPCODE_DP4: count = 4; break;
 201          case OPCODE_DPH: count = 3; break;
 202          default: assert(!"not reached"); count = 0; break;
 203          }
 204
 205          emit(BRW_OPCODE_MUL, acc,
 206               regoffset(src[0], 0), regoffset(src[1], 0));
 207          for (int i = 1; i < count; i++) {
 208             emit(BRW_OPCODE_MUL, mul,
 209                  regoffset(src[0], i), regoffset(src[1], i));
 210             emit(BRW_OPCODE_ADD, acc, acc, mul);
 211          }
 212
 213          if (fpi->Opcode == OPCODE_DPH)
 214             emit(BRW_OPCODE_ADD, acc, acc, regoffset(src[1], 3));
 215
 216          emit_fp_scalar_write(fpi, dst, acc);
 217          break;
 218       }
 219
 220       case OPCODE_DST:
 221          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 222             emit(BRW_OPCODE_MOV, dst, fs_reg(1.0f));
 223          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 224             emit(BRW_OPCODE_MUL, regoffset(dst, 1),
 225                  regoffset(src[0], 1), regoffset(src[1], 1));
 226          }
 227          if (fpi->DstReg.WriteMask & WRITEMASK_Z)
 228             emit(BRW_OPCODE_MOV, regoffset(dst, 2), regoffset(src[0], 2));
 229          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 230             emit(BRW_OPCODE_MOV, regoffset(dst, 3), regoffset(src[1], 3));
 231          break;
 232
 233       case OPCODE_EX2:
 234          emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
 235          break;
 236
 237       case OPCODE_FLR:
 238          emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
 239          break;
 240
 241       case OPCODE_FRC:
 242          emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
 243          break;
 244
 245       case OPCODE_KIL: {
 246          for (int i = 0; i < 4; i++) {
 247             /* In most cases the argument to a KIL will be something like
 248              * TEMP[0].wwww, so there's no point in checking whether .w is < 0
 249              * 4 times in a row.
 250              */
 251             if (i > 0 &&
 252                 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
 253                 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
 254                 ((fpi->SrcReg[0].Negate >> i) & 1) ==
 255                 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
 256                continue;
 257             }
 258
 259             fs_inst *inst = emit(BRW_OPCODE_CMP, null,
 260                                  regoffset(src[0], i), 0.0f);
 261             inst->conditional_mod = BRW_CONDITIONAL_L;
 262
 263             inst = emit(BRW_OPCODE_IF);
 264             inst->predicated = true;
 265             emit(FS_OPCODE_DISCARD);
 266             emit(BRW_OPCODE_ENDIF);
 267          }
 268          break;
 269       }
 270
 271       case OPCODE_LG2:
 272          emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
 273          break;
 274
 275       case OPCODE_LIT:
 276          /* From the ARB_fragment_program spec:
 277           *
 278           *      tmp = VectorLoad(op0);
 279           *      if (tmp.x < 0) tmp.x = 0;
 280           *      if (tmp.y < 0) tmp.y = 0;
 281           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
 282           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
 283           *      result.x = 1.0;
 284           *      result.y = tmp.x;
 285           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
 286           *      result.w = 1.0;
 287           *
 288           * Note that we don't do the clamping to +/- 128.  We didn't in
 289           * brw_wm_emit.c either.
 290           */
 291          if (fpi->DstReg.WriteMask & WRITEMASK_X)
 292             emit(BRW_OPCODE_MOV, regoffset(dst, 0), fs_reg(1.0f));
 293
 294          if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
 295             fs_inst *inst;
 296             inst = emit(BRW_OPCODE_CMP, null,
 297                         regoffset(src[0], 0), fs_reg(0.0f));
 298             inst->conditional_mod = BRW_CONDITIONAL_LE;
 299
 300             if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 301                emit(BRW_OPCODE_MOV, regoffset(dst, 1), regoffset(src[0], 0));
 302                inst = emit(BRW_OPCODE_MOV, regoffset(dst, 1), fs_reg(0.0f));
 303                inst->predicated = true;
 304             }
 305
 306             if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
 307                emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
 308                          regoffset(src[0], 1), regoffset(src[0], 3));
 309
 310                inst = emit(BRW_OPCODE_MOV, regoffset(dst, 2), fs_reg(0.0f));
 311                inst->predicated = true;
 312             }
 313          }
 314
 315          if (fpi->DstReg.WriteMask & WRITEMASK_W)
 316             emit(BRW_OPCODE_MOV, regoffset(dst, 3), fs_reg(1.0f));
 317
 318          break;
 319
 320       case OPCODE_LRP:
 321          for (int i = 0; i < 4; i++) {
 322             if (fpi->DstReg.WriteMask & (1 << i)) {
 323                fs_reg neg_src0 = regoffset(src[0], i);
 324                neg_src0.negate = !neg_src0.negate;
 325                fs_reg temp = fs_reg(this, glsl_type::float_type);
 326                fs_reg temp2 = fs_reg(this, glsl_type::float_type);
 327                emit(BRW_OPCODE_ADD, temp, neg_src0, fs_reg(1.0f));
 328                emit(BRW_OPCODE_MUL, temp, temp, regoffset(src[2], i));
 329                emit(BRW_OPCODE_MUL, temp2,
 330                     regoffset(src[0], i), regoffset(src[1], i));
 331                emit(BRW_OPCODE_ADD, regoffset(dst, i), temp, temp2);
 332             }
 333          }
 334          break;
 335
 336       case OPCODE_MAD:
 337          for (int i = 0; i < 4; i++) {
 338             if (fpi->DstReg.WriteMask & (1 << i)) {
 339                fs_reg temp = fs_reg(this, glsl_type::float_type);
 340                emit(BRW_OPCODE_MUL, temp,
 341                     regoffset(src[0], i), regoffset(src[1], i));
 342                emit(BRW_OPCODE_ADD, regoffset(dst, i),
 343                     temp, regoffset(src[2], i));
 344             }
 345          }
 346          break;
 347
 348       case OPCODE_MAX:
 349          emit_fp_minmax(fpi, dst, src[0], src[1]);
 350          break;
 351
 352       case OPCODE_MOV:
 353          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 354          break;
 355
 356       case OPCODE_MIN:
 357          emit_fp_minmax(fpi, dst, src[0], src[1]);
 358          break;
 359
 360       case OPCODE_MUL:
 361          emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
 362          break;
 363
 364       case OPCODE_POW: {
 365          fs_reg temp = fs_reg(this, glsl_type::float_type);
 366          emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
 367          emit_fp_scalar_write(fpi, dst, temp);
 368          break;
 369       }
 370
 371       case OPCODE_RCP:
 372          emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
 373          break;
 374
 375       case OPCODE_RSQ:
 376          emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
 377          break;
 378
 379       case OPCODE_SCS:
 380          if (fpi->DstReg.WriteMask & WRITEMASK_X) {
 381             emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
 382                       regoffset(src[0], 0));
 383          }
 384
 385          if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
 386             emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
 387                       regoffset(src[0], 1));
 388          }
 389          break;
 390
 391       case OPCODE_SGE:
 392          emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
 393          break;
 394
 395       case OPCODE_SIN:
 396          emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
 397          break;
 398
 399       case OPCODE_SLT:
 400          emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
 401          break;
 402
 403       case OPCODE_SUB: {
 404          fs_reg neg_src1 = src[1];
 405          neg_src1.negate = !src[1].negate;
 406
 407          emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
 408          break;
 409       }
 410
 411       case OPCODE_TEX:
 412       case OPCODE_TXB:
 413       case OPCODE_TXP: {
 414          /* We piggy-back on the GLSL IR support for texture setup.  To do so,
 415           * we have to cook up an ir_texture that has the coordinate field
 416           * with appropriate type, and shadow_comparitor set or not.  All the
 417           * other properties of ir_texture are passed in as arguments to the
 418           * emit_texture_gen* function.
 419           */
 420          ir_texture *ir = NULL;
 421
 422          fs_reg lod;
 423          fs_reg dpdy;
 424          fs_reg coordinate = src[0];
 425          fs_reg shadow_c;
 426
 427          switch (fpi->Opcode) {
 428          case OPCODE_TEX:
 429             ir = new(mem_ctx) ir_texture(ir_tex);
 430             break;
 431          case OPCODE_TXP: {
 432             ir = new(mem_ctx) ir_texture(ir_tex);
 433
 434             coordinate = fs_reg(this, glsl_type::vec3_type);
 435             fs_reg invproj = fs_reg(this, glsl_type::float_type);
 436             emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
 437             for (int i = 0; i < 3; i++) {
 438                emit(BRW_OPCODE_MUL, regoffset(coordinate, i),
 439                     regoffset(src[0], i), invproj);
 440             }
 441             break;
 442          }
 443          case OPCODE_TXB:
 444             ir = new(mem_ctx) ir_texture(ir_txb);
 445             lod = regoffset(src[0], 3);
 446             break;
 447          default:
 448             assert(!"not reached");
 449             break;
 450          }
 451
 452          const glsl_type *coordinate_type;
 453          switch (fpi->TexSrcTarget) {
 454          case TEXTURE_1D_INDEX:
 455             coordinate_type = glsl_type::float_type;
 456             break;
 457
 458          case TEXTURE_2D_INDEX:
 459          case TEXTURE_1D_ARRAY_INDEX:
 460          case TEXTURE_RECT_INDEX:
 461          case TEXTURE_EXTERNAL_INDEX:
 462             coordinate_type = glsl_type::vec2_type;
 463             break;
 464
 465          case TEXTURE_3D_INDEX:
 466          case TEXTURE_2D_ARRAY_INDEX:
 467             coordinate_type = glsl_type::vec3_type;
 468             break;
 469
 470          case TEXTURE_CUBE_INDEX: {
 471             coordinate_type = glsl_type::vec3_type;
 472
 473             fs_reg temp = fs_reg(this, glsl_type::float_type);
 474             fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
 475             fs_reg abscoord = coordinate;
 476             abscoord.negate = false;
 477             abscoord.abs = true;
 478             emit_minmax(BRW_CONDITIONAL_GE, temp,
 479                         regoffset(abscoord, 0), regoffset(abscoord, 1));
 480             emit_minmax(BRW_CONDITIONAL_GE, temp,
 481                         temp, regoffset(abscoord, 2));
 482             emit_math(SHADER_OPCODE_RCP, temp, temp);
 483             for (int i = 0; i < 3; i++) {
 484                emit(BRW_OPCODE_MUL, regoffset(cubecoord, i),
 485                     regoffset(coordinate, i), temp);
 486             }
 487
 488             coordinate = cubecoord;
 489             break;
 490          }
 491
 492          default:
 493             assert(!"not reached");
 494             coordinate_type = glsl_type::vec2_type;
 495             break;
 496          }
 497
 498          ir_constant_data junk_data;
 499          ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
 500
 501          coordinate = rescale_texcoord(ir, coordinate,
 502                                        fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
 503                                        fpi->TexSrcUnit, fpi->TexSrcUnit);
 504
 505          if (fpi->TexShadow) {
 506             shadow_c = regoffset(coordinate, 2);
 507             ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
 508          }
 509
 510          fs_inst *inst;
 511          if (intel->gen >= 7) {
 512             inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy);
 513          } else if (intel->gen >= 5) {
 514             inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy);
 515          } else {
 516             inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
 517          }
 518
 519          inst->sampler = fpi->TexSrcUnit;
 520          inst->shadow_compare = fpi->TexShadow;
 521
 522          /* Reuse the GLSL swizzle_result() handler. */
 523          swizzle_result(ir, dst, fpi->TexSrcUnit);
 524          dst = this->result;
 525
 526          break;
 527       }
 528
 529       case OPCODE_SWZ:
 530          /* Note that SWZ's extended swizzles are handled in the general
 531           * get_src_reg() code.
 532           */
 533          emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
 534          break;
 535
 536       case OPCODE_XPD:
 537          for (int i = 0; i < 3; i++) {
 538             if (fpi->DstReg.WriteMask & (1 << i)) {
 539                int i1 = (i + 1) % 3;
 540                int i2 = (i + 2) % 3;
 541
 542                fs_reg temp = fs_reg(this, glsl_type::float_type);
 543                fs_reg neg_src1_1 = regoffset(src[1], i1);
 544                neg_src1_1.negate = !neg_src1_1.negate;
 545                emit(BRW_OPCODE_MUL, temp,
 546                     regoffset(src[0], i2), neg_src1_1);
 547                emit(BRW_OPCODE_MUL, regoffset(dst, i),
 548                     regoffset(src[0], i1), regoffset(src[1], i2));
 549                emit(BRW_OPCODE_ADD, regoffset(dst, i),
 550                     regoffset(dst, i), temp);
 551             }
 552          }
 553          break;
 554
 555       case OPCODE_END:
 556          break;
 557
 558       default:
 559          _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
 560                        _mesa_opcode_string(fpi->Opcode));
 561       }
 562
 563       /* To handle saturates, we emit a MOV with a saturate bit, which
 564        * optimization should fold into the preceding instructions when safe.
 565        */
 566       if (fpi->Opcode != OPCODE_END) {
 567          fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
 568
 569          for (int i = 0; i < 4; i++) {
 570             if (fpi->DstReg.WriteMask & (1 << i)) {
 571                fs_inst *inst = emit(BRW_OPCODE_MOV,
 572                                     regoffset(real_dst, i),
 573                                     regoffset(dst, i));
 574                inst->saturate = fpi->SaturateMode;
 575             }
 576          }
 577       }
 578    }
 579
 580    /* Epilogue:
 581     *
 582     * Fragment depth has this strange convention of being the .z component of
 583     * a vec4.  emit_fb_write() wants to see a float value, instead.
 584     */
 585    this->current_annotation = "result.depth write";
 586    if (frag_depth.file != BAD_FILE) {
 587       fs_reg temp = fs_reg(this, glsl_type::float_type);
 588       emit(BRW_OPCODE_MOV, temp, regoffset(frag_depth, 2));
 589       frag_depth = temp;
 590    }
 591 }
 592
 593 void
 594 fs_visitor::setup_fp_regs()
 595 {
 596    /* PROGRAM_TEMPORARY */
 597    int num_temp = fp->Base.NumTemporaries;
 598    fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
 599    for (int i = 0; i < num_temp; i++)
 600       fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
 601
 602    /* PROGRAM_STATE_VAR etc. */
 603    if (c->dispatch_width == 8) {
 604       for (unsigned p = 0;
 605            p < c->fp->program.Base.Parameters->NumParameters; p++) {
 606          for (unsigned int i = 0; i < 4; i++) {
 607             this->param_index[c->prog_data.nr_params] = p;
 608             this->param_offset[c->prog_data.nr_params] = i;
 609             c->prog_data.nr_params++;
 610          }
 611       }
 612    }
 613
 614    fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX);
 615    for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
 616       if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
 617          /* Make up a dummy instruction to reuse code for emitting
 618           * interpolation.
 619           */
 620          ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
 621                                                     "fp_input",
 622                                                     ir_var_in);
 623          ir->location = i;
 624
 625          this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
 626                                                     i);
 627
 628          switch (i) {
 629          case FRAG_ATTRIB_WPOS:
 630             ir->pixel_center_integer = fp->PixelCenterInteger;
 631             ir->origin_upper_left = fp->OriginUpperLeft;
 632             fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
 633             break;
 634          case FRAG_ATTRIB_FACE:
 635             fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
 636             break;
 637          default:
 638             fp_input_regs[i] = *emit_general_interpolation(ir);
 639
 640             if (i == FRAG_ATTRIB_FOGC) {
 641                emit(BRW_OPCODE_MOV,
 642                     regoffset(fp_input_regs[i], 1), fs_reg(0.0f));
 643                emit(BRW_OPCODE_MOV,
 644                     regoffset(fp_input_regs[i], 2), fs_reg(0.0f));
 645                emit(BRW_OPCODE_MOV,
 646                     regoffset(fp_input_regs[i], 3), fs_reg(1.0f));
 647             }
 648
 649             break;
 650          }
 651
 652          this->current_annotation = NULL;
 653       }
 654    }
 655 }
 656
 657 fs_reg
 658 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
 659 {
 660    switch (dst->File) {
 661    case PROGRAM_TEMPORARY:
 662       return fp_temp_regs[dst->Index];
 663
 664    case PROGRAM_OUTPUT:
 665       if (dst->Index == FRAG_RESULT_DEPTH) {
 666          if (frag_depth.file == BAD_FILE)
 667             frag_depth = fs_reg(this, glsl_type::vec4_type);
 668          return frag_depth;
 669       } else if (dst->Index == FRAG_RESULT_COLOR) {
 670          if (outputs[0].file == BAD_FILE) {
 671             outputs[0] = fs_reg(this, glsl_type::vec4_type);
 672             output_components[0] = 4;
 673
 674             /* Tell emit_fb_writes() to smear fragment.color across all the
 675              * color attachments.
 676              */
 677             for (int i = 1; i < c->key.nr_color_regions; i++) {
 678                outputs[i] = outputs[0];
 679                output_components[i] = output_components[0];
 680             }
 681          }
 682          return outputs[0];
 683       } else {
 684          int output_index = dst->Index - FRAG_RESULT_DATA0;
 685          if (outputs[output_index].file == BAD_FILE) {
 686             outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
 687          }
 688          output_components[output_index] = 4;
 689          return outputs[output_index];
 690       }
 691
 692    case PROGRAM_UNDEFINED:
 693       return fs_reg();
 694
 695    default:
 696       _mesa_problem(ctx, "bad dst register file: %s\n",
 697                     _mesa_register_file_name((gl_register_file)dst->File));
 698       return fs_reg(this, glsl_type::vec4_type);
 699    }
 700 }
 701
 702 fs_reg
 703 fs_visitor::get_fp_src_reg(const prog_src_register *src)
 704 {
 705    struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
 706
 707    fs_reg result;
 708
 709    assert(!src->Abs);
 710
 711    switch (src->File) {
 712    case PROGRAM_UNDEFINED:
 713       return fs_reg();
 714    case PROGRAM_TEMPORARY:
 715       result = fp_temp_regs[src->Index];
 716       break;
 717
 718    case PROGRAM_INPUT:
 719       result = fp_input_regs[src->Index];
 720       break;
 721
 722    case PROGRAM_STATE_VAR:
 723    case PROGRAM_UNIFORM:
 724    case PROGRAM_CONSTANT:
 725       /* We actually want to look at the type in the Parameters list for this,
 726        * because this lets us upload constant builtin uniforms, as actual
 727        * constants.
 728        */
 729       switch (plist->Parameters[src->Index].Type) {
 730       case PROGRAM_CONSTANT: {
 731          result = fs_reg(this, glsl_type::vec4_type);
 732
 733          for (int i = 0; i < 4; i++) {
 734             emit(BRW_OPCODE_MOV, regoffset(result, i),
 735                  fs_reg(plist->ParameterValues[src->Index][i].f));
 736          }
 737          break;
 738       }
 739
 740       case PROGRAM_STATE_VAR:
 741       case PROGRAM_UNIFORM:
 742          result = fs_reg(UNIFORM, src->Index * 4);
 743          break;
 744
 745       default:
 746          _mesa_problem(ctx, "bad uniform src register file: %s\n",
 747                        _mesa_register_file_name((gl_register_file)src->File));
 748          return fs_reg(this, glsl_type::vec4_type);
 749       }
 750       break;
 751
 752    default:
 753       _mesa_problem(ctx, "bad src register file: %s\n",
 754                     _mesa_register_file_name((gl_register_file)src->File));
 755       return fs_reg(this, glsl_type::vec4_type);
 756    }
 757
 758    if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
 759       fs_reg unswizzled = result;
 760       result = fs_reg(this, glsl_type::vec4_type);
 761       for (int i = 0; i < 4; i++) {
 762          bool negate = src->Negate & (1 << i);
 763          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
 764           * but it costs us nothing to support it.
 765           */
 766          int src_swiz = GET_SWZ(src->Swizzle, i);
 767          if (src_swiz == SWIZZLE_ZERO) {
 768             emit(BRW_OPCODE_MOV, regoffset(result, i), fs_reg(0.0f));
 769          } else if (src_swiz == SWIZZLE_ONE) {
 770             emit(BRW_OPCODE_MOV, regoffset(result, i),
 771                  negate ? fs_reg(-1.0f) : fs_reg(1.0f));
 772          } else {
 773             fs_reg src = regoffset(unswizzled, src_swiz);
 774             if (negate)
 775                src.negate = !src.negate;
 776             emit(BRW_OPCODE_MOV, regoffset(result, i), src);
 777          }
 778       }
 779    }
 780
 781    return result;
 782 }