src/mesa/drivers/dri/i965/brw_fs_builder.h

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright © 2010-2015 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24
  25 #ifndef BRW_FS_BUILDER_H
  26 #define BRW_FS_BUILDER_H
  27
  28 #include "brw_ir_fs.h"
  29 #include "brw_shader.h"
  30 #include "brw_context.h"
  31
  32 namespace brw {
  33    /**
  34     * Toolbox to assemble an FS IR program out of individual instructions.
  35     *
  36     * This object is meant to have an interface consistent with
  37     * brw::vec4_builder.  They cannot be fully interchangeable because
  38     * brw::fs_builder generates scalar code while brw::vec4_builder generates
  39     * vector code.
  40     */
  41    class fs_builder {
  42    public:
  43       /** Type used in this IR to represent a source of an instruction. */
  44       typedef fs_reg src_reg;
  45
  46       /** Type used in this IR to represent the destination of an instruction. */
  47       typedef fs_reg dst_reg;
  48
  49       /** Type used in this IR to represent an instruction. */
  50       typedef fs_inst instruction;
  51
  52       /**
  53        * Construct an fs_builder that inserts instructions into \p shader.
  54        * \p dispatch_width gives the native execution width of the program.
  55        */
  56       fs_builder(backend_shader *shader,
  57                  unsigned dispatch_width) :
  58          shader(shader), block(NULL), cursor(NULL),
  59          _dispatch_width(dispatch_width),
  60          _group(0),
  61          force_writemask_all(false),
  62          annotation()
  63       {
  64       }
  65
  66       /**
  67        * Construct an fs_builder that inserts instructions into \p shader
  68        * before instruction \p inst in basic block \p block.  The default
  69        * execution controls and debug annotation are initialized from the
  70        * instruction passed as argument.
  71        */
  72       fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) :
  73          shader(shader), block(block), cursor(inst),
  74          _dispatch_width(inst->exec_size),
  75          _group(inst->force_sechalf ? 8 : 0),
  76          force_writemask_all(inst->force_writemask_all)
  77       {
  78          annotation.str = inst->annotation;
  79          annotation.ir = inst->ir;
  80       }
  81
  82       /**
  83        * Construct an fs_builder that inserts instructions before \p cursor in
  84        * basic block \p block, inheriting other code generation parameters
  85        * from this.
  86        */
  87       fs_builder
  88       at(bblock_t *block, exec_node *cursor) const
  89       {
  90          fs_builder bld = *this;
  91          bld.block = block;
  92          bld.cursor = cursor;
  93          return bld;
  94       }
  95
  96       /**
  97        * Construct an fs_builder appending instructions at the end of the
  98        * instruction list of the shader, inheriting other code generation
  99        * parameters from this.
 100        */
 101       fs_builder
 102       at_end() const
 103       {
 104          return at(NULL, (exec_node *)&shader->instructions.tail);
 105       }
 106
 107       /**
 108        * Construct a builder specifying the default SIMD width and group of
 109        * channel enable signals, inheriting other code generation parameters
 110        * from this.
 111        *
 112        * \p n gives the default SIMD width, \p i gives the slot group used for
 113        * predication and control flow masking in multiples of \p n channels.
 114        */
 115       fs_builder
 116       group(unsigned n, unsigned i) const
 117       {
 118          assert(force_writemask_all ||
 119                 (n <= dispatch_width() && i < dispatch_width() / n));
 120          fs_builder bld = *this;
 121          bld._dispatch_width = n;
 122          bld._group += i * n;
 123          return bld;
 124       }
 125
 126       /**
 127        * Alias for group() with width equal to eight.
 128        */
 129       fs_builder
 130       half(unsigned i) const
 131       {
 132          return group(8, i);
 133       }
 134
 135       /**
 136        * Construct a builder with per-channel control flow execution masking
 137        * disabled if \p b is true.  If control flow execution masking is
 138        * already disabled this has no effect.
 139        */
 140       fs_builder
 141       exec_all(bool b = true) const
 142       {
 143          fs_builder bld = *this;
 144          if (b)
 145             bld.force_writemask_all = true;
 146          return bld;
 147       }
 148
 149       /**
 150        * Construct a builder with the given debug annotation info.
 151        */
 152       fs_builder
 153       annotate(const char *str, const void *ir = NULL) const
 154       {
 155          fs_builder bld = *this;
 156          bld.annotation.str = str;
 157          bld.annotation.ir = ir;
 158          return bld;
 159       }
 160
 161       /**
 162        * Get the SIMD width in use.
 163        */
 164       unsigned
 165       dispatch_width() const
 166       {
 167          return _dispatch_width;
 168       }
 169
 170       /**
 171        * Allocate a virtual register of natural vector size (one for this IR)
 172        * and SIMD width.  \p n gives the amount of space to allocate in
 173        * dispatch_width units (which is just enough space for one logical
 174        * component in this IR).
 175        */
 176       dst_reg
 177       vgrf(enum brw_reg_type type, unsigned n = 1) const
 178       {
 179          if (n > 0)
 180             return dst_reg(GRF, shader->alloc.allocate(
 181                               DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
 182                                            REG_SIZE)),
 183                            type);
 184          else
 185             return retype(null_reg_ud(), type);
 186       }
 187
 188       /**
 189        * Create a null register of floating type.
 190        */
 191       dst_reg
 192       null_reg_f() const
 193       {
 194          return dst_reg(retype(brw_null_vec(dispatch_width()),
 195                                BRW_REGISTER_TYPE_F));
 196       }
 197
 198       /**
 199        * Create a null register of signed integer type.
 200        */
 201       dst_reg
 202       null_reg_d() const
 203       {
 204          return dst_reg(retype(brw_null_vec(dispatch_width()),
 205                                BRW_REGISTER_TYPE_D));
 206       }
 207
 208       /**
 209        * Create a null register of unsigned integer type.
 210        */
 211       dst_reg
 212       null_reg_ud() const
 213       {
 214          return dst_reg(retype(brw_null_vec(dispatch_width()),
 215                                BRW_REGISTER_TYPE_UD));
 216       }
 217
 218       /**
 219        * Get the mask of SIMD channels enabled by dispatch and not yet
 220        * disabled by discard.
 221        */
 222       src_reg
 223       sample_mask_reg() const
 224       {
 225          const bool uses_kill =
 226             (shader->stage == MESA_SHADER_FRAGMENT &&
 227              ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill);
 228          return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) :
 229                  uses_kill ? brw_flag_reg(0, 1) :
 230                  retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
 231       }
 232
 233       /**
 234        * Insert an instruction into the program.
 235        */
 236       instruction *
 237       emit(const instruction &inst) const
 238       {
 239          return emit(new(shader->mem_ctx) instruction(inst));
 240       }
 241
 242       /**
 243        * Create and insert a nullary control instruction into the program.
 244        */
 245       instruction *
 246       emit(enum opcode opcode) const
 247       {
 248          return emit(instruction(opcode, dispatch_width()));
 249       }
 250
 251       /**
 252        * Create and insert a nullary instruction into the program.
 253        */
 254       instruction *
 255       emit(enum opcode opcode, const dst_reg &dst) const
 256       {
 257          return emit(instruction(opcode, dispatch_width(), dst));
 258       }
 259
 260       /**
 261        * Create and insert a unary instruction into the program.
 262        */
 263       instruction *
 264       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
 265       {
 266          switch (opcode) {
 267          case SHADER_OPCODE_RCP:
 268          case SHADER_OPCODE_RSQ:
 269          case SHADER_OPCODE_SQRT:
 270          case SHADER_OPCODE_EXP2:
 271          case SHADER_OPCODE_LOG2:
 272          case SHADER_OPCODE_SIN:
 273          case SHADER_OPCODE_COS:
 274             return fix_math_instruction(
 275                emit(instruction(opcode, dispatch_width(), dst,
 276                                 fix_math_operand(src0))));
 277
 278          default:
 279             return emit(instruction(opcode, dispatch_width(), dst, src0));
 280          }
 281       }
 282
 283       /**
 284        * Create and insert a binary instruction into the program.
 285        */
 286       instruction *
 287       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 288            const src_reg &src1) const
 289       {
 290          switch (opcode) {
 291          case SHADER_OPCODE_POW:
 292          case SHADER_OPCODE_INT_QUOTIENT:
 293          case SHADER_OPCODE_INT_REMAINDER:
 294             return fix_math_instruction(
 295                emit(instruction(opcode, dispatch_width(), dst,
 296                                 fix_math_operand(src0),
 297                                 fix_math_operand(src1))));
 298
 299          default:
 300             return emit(instruction(opcode, dispatch_width(), dst, src0, src1));
 301
 302          }
 303       }
 304
 305       /**
 306        * Create and insert a ternary instruction into the program.
 307        */
 308       instruction *
 309       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 310            const src_reg &src1, const src_reg &src2) const
 311       {
 312          switch (opcode) {
 313          case BRW_OPCODE_BFE:
 314          case BRW_OPCODE_BFI2:
 315          case BRW_OPCODE_MAD:
 316          case BRW_OPCODE_LRP:
 317             return emit(instruction(opcode, dispatch_width(), dst,
 318                                     fix_3src_operand(src0),
 319                                     fix_3src_operand(src1),
 320                                     fix_3src_operand(src2)));
 321
 322          default:
 323             return emit(instruction(opcode, dispatch_width(), dst,
 324                                     src0, src1, src2));
 325          }
 326       }
 327
 328       /**
 329        * Create and insert an instruction with a variable number of sources
 330        * into the program.
 331        */
 332       instruction *
 333       emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[],
 334            unsigned n) const
 335       {
 336          return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
 337       }
 338
 339       /**
 340        * Insert a preallocated instruction into the program.
 341        */
 342       instruction *
 343       emit(instruction *inst) const
 344       {
 345          assert(inst->exec_size == dispatch_width() ||
 346                 force_writemask_all);
 347          assert(_group == 0 || _group == 8);
 348
 349          inst->force_sechalf = (_group == 8);
 350          inst->force_writemask_all = force_writemask_all;
 351          inst->annotation = annotation.str;
 352          inst->ir = annotation.ir;
 353
 354          if (block)
 355             static_cast<instruction *>(cursor)->insert_before(block, inst);
 356          else
 357             cursor->insert_before(inst);
 358
 359          return inst;
 360       }
 361
 362       /**
 363        * Select \p src0 if the comparison of both sources with the given
 364        * conditional mod evaluates to true, otherwise select \p src1.
 365        *
 366        * Generally useful to get the minimum or maximum of two values.
 367        */
 368       void
 369       emit_minmax(const dst_reg &dst, const src_reg &src0,
 370                   const src_reg &src1, brw_conditional_mod mod) const
 371       {
 372          if (shader->devinfo->gen >= 6) {
 373             set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
 374                                  fix_unsigned_negate(src1)));
 375          } else {
 376             CMP(null_reg_d(), src0, src1, mod);
 377             set_predicate(BRW_PREDICATE_NORMAL,
 378                           SEL(dst, src0, src1));
 379          }
 380       }
 381
 382       /**
 383        * Copy any live channel from \p src to the first channel of the result.
 384        */
 385       src_reg
 386       emit_uniformize(const src_reg &src) const
 387       {
 388          const fs_builder ubld = exec_all();
 389          const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
 390          const dst_reg dst = component(vgrf(src.type), 0);
 391
 392          ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
 393          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
 394
 395          return src_reg(dst);
 396       }
 397
 398       /**
 399        * Assorted arithmetic ops.
 400        * @{
 401        */
 402 #define ALU1(op)                                        \
 403       instruction *                                     \
 404       op(const dst_reg &dst, const src_reg &src0) const \
 405       {                                                 \
 406          return emit(BRW_OPCODE_##op, dst, src0);       \
 407       }
 408
 409 #define ALU2(op)                                                        \
 410       instruction *                                                     \
 411       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
 412       {                                                                 \
 413          return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
 414       }
 415
 416 #define ALU2_ACC(op)                                                    \
 417       instruction *                                                     \
 418       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
 419       {                                                                 \
 420          instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
 421          inst->writes_accumulator = true;                               \
 422          return inst;                                                   \
 423       }
 424
 425 #define ALU3(op)                                                        \
 426       instruction *                                                     \
 427       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
 428          const src_reg &src2) const                                     \
 429       {                                                                 \
 430          return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
 431       }
 432
 433       ALU2(ADD)
 434       ALU2_ACC(ADDC)
 435       ALU2(AND)
 436       ALU2(ASR)
 437       ALU2(AVG)
 438       ALU3(BFE)
 439       ALU2(BFI1)
 440       ALU3(BFI2)
 441       ALU1(BFREV)
 442       ALU1(CBIT)
 443       ALU2(CMPN)
 444       ALU3(CSEL)
 445       ALU2(DP2)
 446       ALU2(DP3)
 447       ALU2(DP4)
 448       ALU2(DPH)
 449       ALU1(F16TO32)
 450       ALU1(F32TO16)
 451       ALU1(FBH)
 452       ALU1(FBL)
 453       ALU1(FRC)
 454       ALU2(LINE)
 455       ALU1(LZD)
 456       ALU2(MAC)
 457       ALU2_ACC(MACH)
 458       ALU3(MAD)
 459       ALU1(MOV)
 460       ALU2(MUL)
 461       ALU1(NOT)
 462       ALU2(OR)
 463       ALU2(PLN)
 464       ALU1(RNDD)
 465       ALU1(RNDE)
 466       ALU1(RNDU)
 467       ALU1(RNDZ)
 468       ALU2(SAD2)
 469       ALU2_ACC(SADA2)
 470       ALU2(SEL)
 471       ALU2(SHL)
 472       ALU2(SHR)
 473       ALU2_ACC(SUBB)
 474       ALU2(XOR)
 475
 476 #undef ALU3
 477 #undef ALU2_ACC
 478 #undef ALU2
 479 #undef ALU1
 480       /** @} */
 481
 482       /**
 483        * CMP: Sets the low bit of the destination channels with the result
 484        * of the comparison, while the upper bits are undefined, and updates
 485        * the flag register with the packed 16 bits of the result.
 486        */
 487       instruction *
 488       CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
 489           brw_conditional_mod condition) const
 490       {
 491          /* Take the instruction:
 492           *
 493           * CMP null<d> src0<f> src1<f>
 494           *
 495           * Original gen4 does type conversion to the destination type
 496           * before comparison, producing garbage results for floating
 497           * point comparisons.
 498           *
 499           * The destination type doesn't matter on newer generations,
 500           * so we set the type to match src0 so we can compact the
 501           * instruction.
 502           */
 503          return set_condmod(condition,
 504                             emit(BRW_OPCODE_CMP, retype(dst, src0.type),
 505                                  fix_unsigned_negate(src0),
 506                                  fix_unsigned_negate(src1)));
 507       }
 508
 509       /**
 510        * Gen4 predicated IF.
 511        */
 512       instruction *
 513       IF(brw_predicate predicate) const
 514       {
 515          return set_predicate(predicate, emit(BRW_OPCODE_IF));
 516       }
 517
 518       /**
 519        * Emit a linear interpolation instruction.
 520        */
 521       instruction *
 522       LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
 523           const src_reg &a) const
 524       {
 525          if (shader->devinfo->gen >= 6) {
 526             /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
 527              * we need to reorder the operands.
 528              */
 529             return emit(BRW_OPCODE_LRP, dst, a, y, x);
 530
 531          } else {
 532             /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
 533             const dst_reg y_times_a = vgrf(dst.type);
 534             const dst_reg one_minus_a = vgrf(dst.type);
 535             const dst_reg x_times_one_minus_a = vgrf(dst.type);
 536
 537             MUL(y_times_a, y, a);
 538             ADD(one_minus_a, negate(a), src_reg(1.0f));
 539             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
 540             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
 541          }
 542       }
 543
 544       /**
 545        * Collect a number of registers in a contiguous range of registers.
 546        */
 547       instruction *
 548       LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src,
 549                    unsigned sources, unsigned header_size) const
 550       {
 551          instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources);
 552          inst->header_size = header_size;
 553          inst->regs_written = header_size +
 554                               (sources - header_size) * (dispatch_width() / 8);
 555
 556          return inst;
 557       }
 558
 559       backend_shader *shader;
 560
 561    private:
 562       /**
 563        * Workaround for negation of UD registers.  See comment in
 564        * fs_generator::generate_code() for more details.
 565        */
 566       src_reg
 567       fix_unsigned_negate(const src_reg &src) const
 568       {
 569          if (src.type == BRW_REGISTER_TYPE_UD &&
 570              src.negate) {
 571             dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
 572             MOV(temp, src);
 573             return src_reg(temp);
 574          } else {
 575             return src;
 576          }
 577       }
 578
 579       /**
 580        * Workaround for source register modes not supported by the ternary
 581        * instruction encoding.
 582        */
 583       src_reg
 584       fix_3src_operand(const src_reg &src) const
 585       {
 586          if (src.file == GRF || src.file == UNIFORM || src.stride > 1) {
 587             return src;
 588          } else {
 589             dst_reg expanded = vgrf(src.type);
 590             MOV(expanded, src);
 591             return expanded;
 592          }
 593       }
 594
 595       /**
 596        * Workaround for source register modes not supported by the math
 597        * instruction.
 598        */
 599       src_reg
 600       fix_math_operand(const src_reg &src) const
 601       {
 602          /* Can't do hstride == 0 args on gen6 math, so expand it out. We
 603           * might be able to do better by doing execsize = 1 math and then
 604           * expanding that result out, but we would need to be careful with
 605           * masking.
 606           *
 607           * Gen6 hardware ignores source modifiers (negate and abs) on math
 608           * instructions, so we also move to a temp to set those up.
 609           *
 610           * Gen7 relaxes most of the above restrictions, but still can't use IMM
 611           * operands to math
 612           */
 613          if ((shader->devinfo->gen == 6 &&
 614               (src.file == IMM || src.file == UNIFORM ||
 615                src.abs || src.negate)) ||
 616              (shader->devinfo->gen == 7 && src.file == IMM)) {
 617             const dst_reg tmp = vgrf(src.type);
 618             MOV(tmp, src);
 619             return tmp;
 620          } else {
 621             return src;
 622          }
 623       }
 624
 625       /**
 626        * Workaround other weirdness of the math instruction.
 627        */
 628       instruction *
 629       fix_math_instruction(instruction *inst) const
 630       {
 631          if (shader->devinfo->gen < 6) {
 632             inst->base_mrf = 2;
 633             inst->mlen = inst->sources * dispatch_width() / 8;
 634
 635             if (inst->sources > 1) {
 636                /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
 637                 * "Message Payload":
 638                 *
 639                 * "Operand0[7].  For the INT DIV functions, this operand is the
 640                 *  denominator."
 641                 *  ...
 642                 * "Operand1[7].  For the INT DIV functions, this operand is the
 643                 *  numerator."
 644                 */
 645                const bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
 646                const fs_reg src0 = is_int_div ? inst->src[1] : inst->src[0];
 647                const fs_reg src1 = is_int_div ? inst->src[0] : inst->src[1];
 648
 649                inst->resize_sources(1);
 650                inst->src[0] = src0;
 651
 652                at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type),
 653                                    src1);
 654             }
 655          }
 656
 657          return inst;
 658       }
 659
 660       bblock_t *block;
 661       exec_node *cursor;
 662
 663       unsigned _dispatch_width;
 664       unsigned _group;
 665       bool force_writemask_all;
 666
 667       /** Debug annotation info. */
 668       struct {
 669          const char *str;
 670          const void *ir;
 671       } annotation;
 672    };
 673 }
 674
 675 #endif