src/mesa/drivers/dri/i965/brw_vec4_builder.h

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright © 2010-2015 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24
  25 #ifndef BRW_VEC4_BUILDER_H
  26 #define BRW_VEC4_BUILDER_H
  27
  28 #include "brw_ir_vec4.h"
  29 #include "brw_ir_allocator.h"
  30 #include "brw_context.h"
  31
  32 namespace brw {
  33    /**
  34     * Toolbox to assemble a VEC4 IR program out of individual instructions.
  35     *
  36     * This object is meant to have an interface consistent with
  37     * brw::fs_builder.  They cannot be fully interchangeable because
  38     * brw::fs_builder generates scalar code while brw::vec4_builder generates
  39     * vector code.
  40     */
  41    class vec4_builder {
  42    public:
  43       /** Type used in this IR to represent a source of an instruction. */
  44       typedef brw::src_reg src_reg;
  45
  46       /** Type used in this IR to represent the destination of an instruction. */
  47       typedef brw::dst_reg dst_reg;
  48
  49       /** Type used in this IR to represent an instruction. */
  50       typedef vec4_instruction instruction;
  51
  52       /**
  53        * Construct a vec4_builder that inserts instructions into \p shader.
  54        */
  55       vec4_builder(backend_shader *shader) :
  56          shader(shader), block(NULL), cursor(NULL),
  57          force_writemask_all(false),
  58          annotation()
  59       {
  60       }
  61
  62       /**
  63        * Construct a vec4_builder that inserts instructions into \p shader
  64        * before instruction \p inst in basic block \p block.  The default
  65        * execution controls and debug annotation are initialized from the
  66        * instruction passed as argument.
  67        */
  68       vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
  69          shader(shader), block(block), cursor(inst),
  70          force_writemask_all(inst->force_writemask_all)
  71       {
  72          annotation.str = inst->annotation;
  73          annotation.ir = inst->ir;
  74       }
  75
  76       /**
  77        * Construct a vec4_builder that inserts instructions before \p cursor
  78        * in basic block \p block, inheriting other code generation parameters
  79        * from this.
  80        */
  81       vec4_builder
  82       at(bblock_t *block, exec_node *cursor) const
  83       {
  84          vec4_builder bld = *this;
  85          bld.block = block;
  86          bld.cursor = cursor;
  87          return bld;
  88       }
  89
  90       /**
  91        * Construct a vec4_builder appending instructions at the end of the
  92        * instruction list of the shader, inheriting other code generation
  93        * parameters from this.
  94        */
  95       vec4_builder
  96       at_end() const
  97       {
  98          return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
  99       }
 100
 101       /**
 102        * Construct a builder with per-channel control flow execution masking
 103        * disabled if \p b is true.  If control flow execution masking is
 104        * already disabled this has no effect.
 105        */
 106       vec4_builder
 107       exec_all(bool b = true) const
 108       {
 109          vec4_builder bld = *this;
 110          if (b)
 111             bld.force_writemask_all = true;
 112          return bld;
 113       }
 114
 115       /**
 116        * Construct a builder with the given debug annotation info.
 117        */
 118       vec4_builder
 119       annotate(const char *str, const void *ir = NULL) const
 120       {
 121          vec4_builder bld = *this;
 122          bld.annotation.str = str;
 123          bld.annotation.ir = ir;
 124          return bld;
 125       }
 126
 127       /**
 128        * Get the SIMD width in use.
 129        */
 130       unsigned
 131       dispatch_width() const
 132       {
 133          return 8;
 134       }
 135
 136       /**
 137        * Allocate a virtual register of natural vector size (four for this IR)
 138        * and SIMD width.  \p n gives the amount of space to allocate in
 139        * dispatch_width units (which is just enough space for four logical
 140        * components in this IR).
 141        */
 142       dst_reg
 143       vgrf(enum brw_reg_type type, unsigned n = 1) const
 144       {
 145          assert(dispatch_width() <= 32);
 146
 147          if (n > 0)
 148             return retype(dst_reg(VGRF, shader->alloc.allocate(
 149                                      n * DIV_ROUND_UP(type_sz(type), 4))),
 150                            type);
 151          else
 152             return retype(null_reg_ud(), type);
 153       }
 154
 155       /**
 156        * Create a null register of floating type.
 157        */
 158       dst_reg
 159       null_reg_f() const
 160       {
 161          return dst_reg(retype(brw_null_vec(dispatch_width()),
 162                                BRW_REGISTER_TYPE_F));
 163       }
 164
 165       /**
 166        * Create a null register of signed integer type.
 167        */
 168       dst_reg
 169       null_reg_d() const
 170       {
 171          return dst_reg(retype(brw_null_vec(dispatch_width()),
 172                                BRW_REGISTER_TYPE_D));
 173       }
 174
 175       /**
 176        * Create a null register of unsigned integer type.
 177        */
 178       dst_reg
 179       null_reg_ud() const
 180       {
 181          return dst_reg(retype(brw_null_vec(dispatch_width()),
 182                                BRW_REGISTER_TYPE_UD));
 183       }
 184
 185       /**
 186        * Insert an instruction into the program.
 187        */
 188       instruction *
 189       emit(const instruction &inst) const
 190       {
 191          return emit(new(shader->mem_ctx) instruction(inst));
 192       }
 193
 194       /**
 195        * Create and insert a nullary control instruction into the program.
 196        */
 197       instruction *
 198       emit(enum opcode opcode) const
 199       {
 200          return emit(instruction(opcode));
 201       }
 202
 203       /**
 204        * Create and insert a nullary instruction into the program.
 205        */
 206       instruction *
 207       emit(enum opcode opcode, const dst_reg &dst) const
 208       {
 209          return emit(instruction(opcode, dst));
 210       }
 211
 212       /**
 213        * Create and insert a unary instruction into the program.
 214        */
 215       instruction *
 216       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
 217       {
 218          switch (opcode) {
 219          case SHADER_OPCODE_RCP:
 220          case SHADER_OPCODE_RSQ:
 221          case SHADER_OPCODE_SQRT:
 222          case SHADER_OPCODE_EXP2:
 223          case SHADER_OPCODE_LOG2:
 224          case SHADER_OPCODE_SIN:
 225          case SHADER_OPCODE_COS:
 226             return fix_math_instruction(
 227                emit(instruction(opcode, dst,
 228                                 fix_math_operand(src0))));
 229
 230          default:
 231             return emit(instruction(opcode, dst, src0));
 232          }
 233       }
 234
 235       /**
 236        * Create and insert a binary instruction into the program.
 237        */
 238       instruction *
 239       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 240            const src_reg &src1) const
 241       {
 242          switch (opcode) {
 243          case SHADER_OPCODE_POW:
 244          case SHADER_OPCODE_INT_QUOTIENT:
 245          case SHADER_OPCODE_INT_REMAINDER:
 246             return fix_math_instruction(
 247                emit(instruction(opcode, dst,
 248                                 fix_math_operand(src0),
 249                                 fix_math_operand(src1))));
 250
 251          default:
 252             return emit(instruction(opcode, dst, src0, src1));
 253          }
 254       }
 255
 256       /**
 257        * Create and insert a ternary instruction into the program.
 258        */
 259       instruction *
 260       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 261            const src_reg &src1, const src_reg &src2) const
 262       {
 263          switch (opcode) {
 264          case BRW_OPCODE_BFE:
 265          case BRW_OPCODE_BFI2:
 266          case BRW_OPCODE_MAD:
 267          case BRW_OPCODE_LRP:
 268             return emit(instruction(opcode, dst,
 269                                     fix_3src_operand(src0),
 270                                     fix_3src_operand(src1),
 271                                     fix_3src_operand(src2)));
 272
 273          default:
 274             return emit(instruction(opcode, dst, src0, src1, src2));
 275          }
 276       }
 277
 278       /**
 279        * Insert a preallocated instruction into the program.
 280        */
 281       instruction *
 282       emit(instruction *inst) const
 283       {
 284          inst->force_writemask_all = force_writemask_all;
 285          inst->annotation = annotation.str;
 286          inst->ir = annotation.ir;
 287
 288          if (block)
 289             static_cast<instruction *>(cursor)->insert_before(block, inst);
 290          else
 291             cursor->insert_before(inst);
 292
 293          return inst;
 294       }
 295
 296       /**
 297        * Select \p src0 if the comparison of both sources with the given
 298        * conditional mod evaluates to true, otherwise select \p src1.
 299        *
 300        * Generally useful to get the minimum or maximum of two values.
 301        */
 302       instruction *
 303       emit_minmax(const dst_reg &dst, const src_reg &src0,
 304                   const src_reg &src1, brw_conditional_mod mod) const
 305       {
 306          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
 307
 308          return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
 309                                      fix_unsigned_negate(src1)));
 310       }
 311
 312       /**
 313        * Copy any live channel from \p src to the first channel of the result.
 314        */
 315       src_reg
 316       emit_uniformize(const src_reg &src) const
 317       {
 318          const vec4_builder ubld = exec_all();
 319          const dst_reg chan_index =
 320             writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
 321          const dst_reg dst = vgrf(src.type);
 322
 323          ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
 324          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
 325
 326          return src_reg(dst);
 327       }
 328
 329       /**
 330        * Assorted arithmetic ops.
 331        * @{
 332        */
 333 #define ALU1(op)                                        \
 334       instruction *                                     \
 335       op(const dst_reg &dst, const src_reg &src0) const \
 336       {                                                 \
 337          return emit(BRW_OPCODE_##op, dst, src0);       \
 338       }
 339
 340 #define ALU2(op)                                                        \
 341       instruction *                                                     \
 342       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
 343       {                                                                 \
 344          return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
 345       }
 346
 347 #define ALU2_ACC(op)                                                    \
 348       instruction *                                                     \
 349       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
 350       {                                                                 \
 351          instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
 352          inst->writes_accumulator = true;                               \
 353          return inst;                                                   \
 354       }
 355
 356 #define ALU3(op)                                                        \
 357       instruction *                                                     \
 358       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
 359          const src_reg &src2) const                                     \
 360       {                                                                 \
 361          return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
 362       }
 363
 364       ALU2(ADD)
 365       ALU2_ACC(ADDC)
 366       ALU2(AND)
 367       ALU2(ASR)
 368       ALU2(AVG)
 369       ALU3(BFE)
 370       ALU2(BFI1)
 371       ALU3(BFI2)
 372       ALU1(BFREV)
 373       ALU1(CBIT)
 374       ALU2(CMPN)
 375       ALU3(CSEL)
 376       ALU1(DIM)
 377       ALU2(DP2)
 378       ALU2(DP3)
 379       ALU2(DP4)
 380       ALU2(DPH)
 381       ALU1(F16TO32)
 382       ALU1(F32TO16)
 383       ALU1(FBH)
 384       ALU1(FBL)
 385       ALU1(FRC)
 386       ALU2(LINE)
 387       ALU1(LZD)
 388       ALU2(MAC)
 389       ALU2_ACC(MACH)
 390       ALU3(MAD)
 391       ALU1(MOV)
 392       ALU2(MUL)
 393       ALU1(NOT)
 394       ALU2(OR)
 395       ALU2(PLN)
 396       ALU1(RNDD)
 397       ALU1(RNDE)
 398       ALU1(RNDU)
 399       ALU1(RNDZ)
 400       ALU2(SAD2)
 401       ALU2_ACC(SADA2)
 402       ALU2(SEL)
 403       ALU2(SHL)
 404       ALU2(SHR)
 405       ALU2_ACC(SUBB)
 406       ALU2(XOR)
 407
 408 #undef ALU3
 409 #undef ALU2_ACC
 410 #undef ALU2
 411 #undef ALU1
 412       /** @} */
 413
 414       /**
 415        * CMP: Sets the low bit of the destination channels with the result
 416        * of the comparison, while the upper bits are undefined, and updates
 417        * the flag register with the packed 16 bits of the result.
 418        */
 419       instruction *
 420       CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
 421           brw_conditional_mod condition) const
 422       {
 423          /* Take the instruction:
 424           *
 425           * CMP null<d> src0<f> src1<f>
 426           *
 427           * Original gen4 does type conversion to the destination type
 428           * before comparison, producing garbage results for floating
 429           * point comparisons.
 430           *
 431           * The destination type doesn't matter on newer generations,
 432           * so we set the type to match src0 so we can compact the
 433           * instruction.
 434           */
 435          return set_condmod(condition,
 436                             emit(BRW_OPCODE_CMP, retype(dst, src0.type),
 437                                  fix_unsigned_negate(src0),
 438                                  fix_unsigned_negate(src1)));
 439       }
 440
 441       /**
 442        * Gen4 predicated IF.
 443        */
 444       instruction *
 445       IF(brw_predicate predicate) const
 446       {
 447          return set_predicate(predicate, emit(BRW_OPCODE_IF));
 448       }
 449
 450       /**
 451        * Gen6 IF with embedded comparison.
 452        */
 453       instruction *
 454       IF(const src_reg &src0, const src_reg &src1,
 455          brw_conditional_mod condition) const
 456       {
 457          assert(shader->devinfo->gen == 6);
 458          return set_condmod(condition,
 459                             emit(BRW_OPCODE_IF,
 460                                  null_reg_d(),
 461                                  fix_unsigned_negate(src0),
 462                                  fix_unsigned_negate(src1)));
 463       }
 464
 465       /**
 466        * Emit a linear interpolation instruction.
 467        */
 468       instruction *
 469       LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
 470           const src_reg &a) const
 471       {
 472          if (shader->devinfo->gen >= 6) {
 473             /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
 474              * we need to reorder the operands.
 475              */
 476             return emit(BRW_OPCODE_LRP, dst, a, y, x);
 477
 478          } else {
 479             /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
 480             const dst_reg y_times_a = vgrf(dst.type);
 481             const dst_reg one_minus_a = vgrf(dst.type);
 482             const dst_reg x_times_one_minus_a = vgrf(dst.type);
 483
 484             MUL(y_times_a, y, a);
 485             ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
 486             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
 487             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
 488          }
 489       }
 490
 491       backend_shader *shader;
 492
 493    protected:
 494       /**
 495        * Workaround for negation of UD registers.  See comment in
 496        * fs_generator::generate_code() for the details.
 497        */
 498       src_reg
 499       fix_unsigned_negate(const src_reg &src) const
 500       {
 501          if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
 502             dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
 503             MOV(temp, src);
 504             return src_reg(temp);
 505          } else {
 506             return src;
 507          }
 508       }
 509
 510       /**
 511        * Workaround for register access modes not supported by the ternary
 512        * instruction encoding.
 513        */
 514       src_reg
 515       fix_3src_operand(const src_reg &src) const
 516       {
 517          /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
 518           * able to use vertical stride of zero to replicate the vec4 uniform, like
 519           *
 520           *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
 521           *
 522           * But you can't, since vertical stride is always four in three-source
 523           * instructions. Instead, insert a MOV instruction to do the replication so
 524           * that the three-source instruction can consume it.
 525           */
 526
 527          /* The MOV is only needed if the source is a uniform or immediate. */
 528          if (src.file != UNIFORM && src.file != IMM)
 529             return src;
 530
 531          if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
 532             return src;
 533
 534          const dst_reg expanded = vgrf(src.type);
 535          emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
 536          return src_reg(expanded);
 537       }
 538
 539       /**
 540        * Workaround for register access modes not supported by the math
 541        * instruction.
 542        */
 543       src_reg
 544       fix_math_operand(const src_reg &src) const
 545       {
 546          /* The gen6 math instruction ignores the source modifiers --
 547           * swizzle, abs, negate, and at least some parts of the register
 548           * region description.
 549           *
 550           * Rather than trying to enumerate all these cases, *always* expand the
 551           * operand to a temp GRF for gen6.
 552           *
 553           * For gen7, keep the operand as-is, except if immediate, which gen7 still
 554           * can't use.
 555           */
 556          if (shader->devinfo->gen == 6 ||
 557              (shader->devinfo->gen == 7 && src.file == IMM)) {
 558             const dst_reg tmp = vgrf(src.type);
 559             MOV(tmp, src);
 560             return src_reg(tmp);
 561          } else {
 562             return src;
 563          }
 564       }
 565
 566       /**
 567        * Workaround other weirdness of the math instruction.
 568        */
 569       instruction *
 570       fix_math_instruction(instruction *inst) const
 571       {
 572          if (shader->devinfo->gen == 6 &&
 573              inst->dst.writemask != WRITEMASK_XYZW) {
 574             const dst_reg tmp = vgrf(inst->dst.type);
 575             MOV(inst->dst, src_reg(tmp));
 576             inst->dst = tmp;
 577
 578          } else if (shader->devinfo->gen < 6) {
 579             const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
 580             inst->base_mrf = 1;
 581             inst->mlen = sources;
 582          }
 583
 584          return inst;
 585       }
 586
 587       bblock_t *block;
 588       exec_node *cursor;
 589
 590       bool force_writemask_all;
 591
 592       /** Debug annotation info. */
 593       struct {
 594          const char *str;
 595          const void *ir;
 596       } annotation;
 597    };
 598 }
 599
 600 #endif