src/mesa/drivers/dri/i965/brw_vec4_builder.h

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright © 2010-2015 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24
  25 #ifndef BRW_VEC4_BUILDER_H
  26 #define BRW_VEC4_BUILDER_H
  27
  28 #include "brw_ir_vec4.h"
  29 #include "brw_ir_allocator.h"
  30 #include "brw_context.h"
  31
  32 namespace brw {
  33    /**
  34     * Toolbox to assemble a VEC4 IR program out of individual instructions.
  35     *
  36     * This object is meant to have an interface consistent with
  37     * brw::fs_builder.  They cannot be fully interchangeable because
  38     * brw::fs_builder generates scalar code while brw::vec4_builder generates
  39     * vector code.
  40     */
  41    class vec4_builder {
  42    public:
  43       /** Type used in this IR to represent a source of an instruction. */
  44       typedef brw::src_reg src_reg;
  45
  46       /** Type used in this IR to represent the destination of an instruction. */
  47       typedef brw::dst_reg dst_reg;
  48
  49       /** Type used in this IR to represent an instruction. */
  50       typedef vec4_instruction instruction;
  51
  52       /**
  53        * Construct a vec4_builder that inserts instructions into \p shader.
  54        */
  55       vec4_builder(backend_shader *shader) :
  56          shader(shader), block(NULL), cursor(NULL),
  57          force_writemask_all(false),
  58          annotation()
  59       {
  60       }
  61
  62       /**
  63        * Construct a vec4_builder that inserts instructions into \p shader
  64        * before instruction \p inst in basic block \p block.  The default
  65        * execution controls and debug annotation are initialized from the
  66        * instruction passed as argument.
  67        */
  68       vec4_builder(backend_shader *shader, bblock_t *block, instruction *inst) :
  69          shader(shader), block(block), cursor(inst),
  70          force_writemask_all(inst->force_writemask_all)
  71       {
  72          annotation.str = inst->annotation;
  73          annotation.ir = inst->ir;
  74       }
  75
  76       /**
  77        * Construct a vec4_builder that inserts instructions before \p cursor
  78        * in basic block \p block, inheriting other code generation parameters
  79        * from this.
  80        */
  81       vec4_builder
  82       at(bblock_t *block, exec_node *cursor) const
  83       {
  84          vec4_builder bld = *this;
  85          bld.block = block;
  86          bld.cursor = cursor;
  87          return bld;
  88       }
  89
  90       /**
  91        * Construct a vec4_builder appending instructions at the end of the
  92        * instruction list of the shader, inheriting other code generation
  93        * parameters from this.
  94        */
  95       vec4_builder
  96       at_end() const
  97       {
  98          return at(NULL, (exec_node *)&shader->instructions.tail);
  99       }
 100
 101       /**
 102        * Construct a builder with per-channel control flow execution masking
 103        * disabled if \p b is true.  If control flow execution masking is
 104        * already disabled this has no effect.
 105        */
 106       vec4_builder
 107       exec_all(bool b = true) const
 108       {
 109          vec4_builder bld = *this;
 110          if (b)
 111             bld.force_writemask_all = true;
 112          return bld;
 113       }
 114
 115       /**
 116        * Construct a builder with the given debug annotation info.
 117        */
 118       vec4_builder
 119       annotate(const char *str, const void *ir = NULL) const
 120       {
 121          vec4_builder bld = *this;
 122          bld.annotation.str = str;
 123          bld.annotation.ir = ir;
 124          return bld;
 125       }
 126
 127       /**
 128        * Get the SIMD width in use.
 129        */
 130       unsigned
 131       dispatch_width() const
 132       {
 133          return 8;
 134       }
 135
 136       /**
 137        * Allocate a virtual register of natural vector size (four for this IR)
 138        * and SIMD width.  \p n gives the amount of space to allocate in
 139        * dispatch_width units (which is just enough space for four logical
 140        * components in this IR).
 141        */
 142       dst_reg
 143       vgrf(enum brw_reg_type type, unsigned n = 1) const
 144       {
 145          assert(dispatch_width() <= 32);
 146
 147          if (n > 0)
 148             return retype(dst_reg(VGRF, shader->alloc.allocate(
 149                                      n * DIV_ROUND_UP(type_sz(type), 4))),
 150                            type);
 151          else
 152             return retype(null_reg_ud(), type);
 153       }
 154
 155       /**
 156        * Create a null register of floating type.
 157        */
 158       dst_reg
 159       null_reg_f() const
 160       {
 161          return dst_reg(retype(brw_null_vec(dispatch_width()),
 162                                BRW_REGISTER_TYPE_F));
 163       }
 164
 165       /**
 166        * Create a null register of signed integer type.
 167        */
 168       dst_reg
 169       null_reg_d() const
 170       {
 171          return dst_reg(retype(brw_null_vec(dispatch_width()),
 172                                BRW_REGISTER_TYPE_D));
 173       }
 174
 175       /**
 176        * Create a null register of unsigned integer type.
 177        */
 178       dst_reg
 179       null_reg_ud() const
 180       {
 181          return dst_reg(retype(brw_null_vec(dispatch_width()),
 182                                BRW_REGISTER_TYPE_UD));
 183       }
 184
 185       /**
 186        * Insert an instruction into the program.
 187        */
 188       instruction *
 189       emit(const instruction &inst) const
 190       {
 191          return emit(new(shader->mem_ctx) instruction(inst));
 192       }
 193
 194       /**
 195        * Create and insert a nullary control instruction into the program.
 196        */
 197       instruction *
 198       emit(enum opcode opcode) const
 199       {
 200          return emit(instruction(opcode));
 201       }
 202
 203       /**
 204        * Create and insert a nullary instruction into the program.
 205        */
 206       instruction *
 207       emit(enum opcode opcode, const dst_reg &dst) const
 208       {
 209          return emit(instruction(opcode, dst));
 210       }
 211
 212       /**
 213        * Create and insert a unary instruction into the program.
 214        */
 215       instruction *
 216       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0) const
 217       {
 218          switch (opcode) {
 219          case SHADER_OPCODE_RCP:
 220          case SHADER_OPCODE_RSQ:
 221          case SHADER_OPCODE_SQRT:
 222          case SHADER_OPCODE_EXP2:
 223          case SHADER_OPCODE_LOG2:
 224          case SHADER_OPCODE_SIN:
 225          case SHADER_OPCODE_COS:
 226             return fix_math_instruction(
 227                emit(instruction(opcode, dst,
 228                                 fix_math_operand(src0))));
 229
 230          default:
 231             return emit(instruction(opcode, dst, src0));
 232          }
 233       }
 234
 235       /**
 236        * Create and insert a binary instruction into the program.
 237        */
 238       instruction *
 239       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 240            const src_reg &src1) const
 241       {
 242          switch (opcode) {
 243          case SHADER_OPCODE_POW:
 244          case SHADER_OPCODE_INT_QUOTIENT:
 245          case SHADER_OPCODE_INT_REMAINDER:
 246             return fix_math_instruction(
 247                emit(instruction(opcode, dst,
 248                                 fix_math_operand(src0),
 249                                 fix_math_operand(src1))));
 250
 251          default:
 252             return emit(instruction(opcode, dst, src0, src1));
 253          }
 254       }
 255
 256       /**
 257        * Create and insert a ternary instruction into the program.
 258        */
 259       instruction *
 260       emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
 261            const src_reg &src1, const src_reg &src2) const
 262       {
 263          switch (opcode) {
 264          case BRW_OPCODE_BFE:
 265          case BRW_OPCODE_BFI2:
 266          case BRW_OPCODE_MAD:
 267          case BRW_OPCODE_LRP:
 268             return emit(instruction(opcode, dst,
 269                                     fix_3src_operand(src0),
 270                                     fix_3src_operand(src1),
 271                                     fix_3src_operand(src2)));
 272
 273          default:
 274             return emit(instruction(opcode, dst, src0, src1, src2));
 275          }
 276       }
 277
 278       /**
 279        * Insert a preallocated instruction into the program.
 280        */
 281       instruction *
 282       emit(instruction *inst) const
 283       {
 284          inst->force_writemask_all = force_writemask_all;
 285          inst->annotation = annotation.str;
 286          inst->ir = annotation.ir;
 287
 288          if (block)
 289             static_cast<instruction *>(cursor)->insert_before(block, inst);
 290          else
 291             cursor->insert_before(inst);
 292
 293          return inst;
 294       }
 295
 296       /**
 297        * Select \p src0 if the comparison of both sources with the given
 298        * conditional mod evaluates to true, otherwise select \p src1.
 299        *
 300        * Generally useful to get the minimum or maximum of two values.
 301        */
 302       instruction *
 303       emit_minmax(const dst_reg &dst, const src_reg &src0,
 304                   const src_reg &src1, brw_conditional_mod mod) const
 305       {
 306          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
 307
 308          return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
 309                                      fix_unsigned_negate(src1)));
 310       }
 311
 312       /**
 313        * Copy any live channel from \p src to the first channel of the result.
 314        */
 315       src_reg
 316       emit_uniformize(const src_reg &src) const
 317       {
 318          const vec4_builder ubld = exec_all();
 319          const dst_reg chan_index =
 320             writemask(vgrf(BRW_REGISTER_TYPE_UD), WRITEMASK_X);
 321          const dst_reg dst = vgrf(src.type);
 322
 323          ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
 324          ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
 325
 326          return src_reg(dst);
 327       }
 328
 329       /**
 330        * Assorted arithmetic ops.
 331        * @{
 332        */
 333 #define ALU1(op)                                        \
 334       instruction *                                     \
 335       op(const dst_reg &dst, const src_reg &src0) const \
 336       {                                                 \
 337          return emit(BRW_OPCODE_##op, dst, src0);       \
 338       }
 339
 340 #define ALU2(op)                                                        \
 341       instruction *                                                     \
 342       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
 343       {                                                                 \
 344          return emit(BRW_OPCODE_##op, dst, src0, src1);                 \
 345       }
 346
 347 #define ALU2_ACC(op)                                                    \
 348       instruction *                                                     \
 349       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
 350       {                                                                 \
 351          instruction *inst = emit(BRW_OPCODE_##op, dst, src0, src1);    \
 352          inst->writes_accumulator = true;                               \
 353          return inst;                                                   \
 354       }
 355
 356 #define ALU3(op)                                                        \
 357       instruction *                                                     \
 358       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
 359          const src_reg &src2) const                                     \
 360       {                                                                 \
 361          return emit(BRW_OPCODE_##op, dst, src0, src1, src2);           \
 362       }
 363
 364       ALU2(ADD)
 365       ALU2_ACC(ADDC)
 366       ALU2(AND)
 367       ALU2(ASR)
 368       ALU2(AVG)
 369       ALU3(BFE)
 370       ALU2(BFI1)
 371       ALU3(BFI2)
 372       ALU1(BFREV)
 373       ALU1(CBIT)
 374       ALU2(CMPN)
 375       ALU3(CSEL)
 376       ALU2(DP2)
 377       ALU2(DP3)
 378       ALU2(DP4)
 379       ALU2(DPH)
 380       ALU1(F16TO32)
 381       ALU1(F32TO16)
 382       ALU1(FBH)
 383       ALU1(FBL)
 384       ALU1(FRC)
 385       ALU2(LINE)
 386       ALU1(LZD)
 387       ALU2(MAC)
 388       ALU2_ACC(MACH)
 389       ALU3(MAD)
 390       ALU1(MOV)
 391       ALU2(MUL)
 392       ALU1(NOT)
 393       ALU2(OR)
 394       ALU2(PLN)
 395       ALU1(RNDD)
 396       ALU1(RNDE)
 397       ALU1(RNDU)
 398       ALU1(RNDZ)
 399       ALU2(SAD2)
 400       ALU2_ACC(SADA2)
 401       ALU2(SEL)
 402       ALU2(SHL)
 403       ALU2(SHR)
 404       ALU2_ACC(SUBB)
 405       ALU2(XOR)
 406
 407 #undef ALU3
 408 #undef ALU2_ACC
 409 #undef ALU2
 410 #undef ALU1
 411       /** @} */
 412
 413       /**
 414        * CMP: Sets the low bit of the destination channels with the result
 415        * of the comparison, while the upper bits are undefined, and updates
 416        * the flag register with the packed 16 bits of the result.
 417        */
 418       instruction *
 419       CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
 420           brw_conditional_mod condition) const
 421       {
 422          /* Take the instruction:
 423           *
 424           * CMP null<d> src0<f> src1<f>
 425           *
 426           * Original gen4 does type conversion to the destination type
 427           * before comparison, producing garbage results for floating
 428           * point comparisons.
 429           *
 430           * The destination type doesn't matter on newer generations,
 431           * so we set the type to match src0 so we can compact the
 432           * instruction.
 433           */
 434          return set_condmod(condition,
 435                             emit(BRW_OPCODE_CMP, retype(dst, src0.type),
 436                                  fix_unsigned_negate(src0),
 437                                  fix_unsigned_negate(src1)));
 438       }
 439
 440       /**
 441        * Gen4 predicated IF.
 442        */
 443       instruction *
 444       IF(brw_predicate predicate) const
 445       {
 446          return set_predicate(predicate, emit(BRW_OPCODE_IF));
 447       }
 448
 449       /**
 450        * Gen6 IF with embedded comparison.
 451        */
 452       instruction *
 453       IF(const src_reg &src0, const src_reg &src1,
 454          brw_conditional_mod condition) const
 455       {
 456          assert(shader->devinfo->gen == 6);
 457          return set_condmod(condition,
 458                             emit(BRW_OPCODE_IF,
 459                                  null_reg_d(),
 460                                  fix_unsigned_negate(src0),
 461                                  fix_unsigned_negate(src1)));
 462       }
 463
 464       /**
 465        * Emit a linear interpolation instruction.
 466        */
 467       instruction *
 468       LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
 469           const src_reg &a) const
 470       {
 471          if (shader->devinfo->gen >= 6) {
 472             /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
 473              * we need to reorder the operands.
 474              */
 475             return emit(BRW_OPCODE_LRP, dst, a, y, x);
 476
 477          } else {
 478             /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
 479             const dst_reg y_times_a = vgrf(dst.type);
 480             const dst_reg one_minus_a = vgrf(dst.type);
 481             const dst_reg x_times_one_minus_a = vgrf(dst.type);
 482
 483             MUL(y_times_a, y, a);
 484             ADD(one_minus_a, negate(a), brw_imm_f(1.0f));
 485             MUL(x_times_one_minus_a, x, src_reg(one_minus_a));
 486             return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a));
 487          }
 488       }
 489
 490       backend_shader *shader;
 491
 492    protected:
 493       /**
 494        * Workaround for negation of UD registers.  See comment in
 495        * fs_generator::generate_code() for the details.
 496        */
 497       src_reg
 498       fix_unsigned_negate(const src_reg &src) const
 499       {
 500          if (src.type == BRW_REGISTER_TYPE_UD && src.negate) {
 501             dst_reg temp = vgrf(BRW_REGISTER_TYPE_UD);
 502             MOV(temp, src);
 503             return src_reg(temp);
 504          } else {
 505             return src;
 506          }
 507       }
 508
 509       /**
 510        * Workaround for register access modes not supported by the ternary
 511        * instruction encoding.
 512        */
 513       src_reg
 514       fix_3src_operand(const src_reg &src) const
 515       {
 516          /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
 517           * able to use vertical stride of zero to replicate the vec4 uniform, like
 518           *
 519           *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
 520           *
 521           * But you can't, since vertical stride is always four in three-source
 522           * instructions. Instead, insert a MOV instruction to do the replication so
 523           * that the three-source instruction can consume it.
 524           */
 525
 526          /* The MOV is only needed if the source is a uniform or immediate. */
 527          if (src.file != UNIFORM && src.file != IMM)
 528             return src;
 529
 530          if (src.file == UNIFORM && brw_is_single_value_swizzle(src.swizzle))
 531             return src;
 532
 533          const dst_reg expanded = vgrf(src.type);
 534          emit(VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
 535          return src_reg(expanded);
 536       }
 537
 538       /**
 539        * Workaround for register access modes not supported by the math
 540        * instruction.
 541        */
 542       src_reg
 543       fix_math_operand(const src_reg &src) const
 544       {
 545          /* The gen6 math instruction ignores the source modifiers --
 546           * swizzle, abs, negate, and at least some parts of the register
 547           * region description.
 548           *
 549           * Rather than trying to enumerate all these cases, *always* expand the
 550           * operand to a temp GRF for gen6.
 551           *
 552           * For gen7, keep the operand as-is, except if immediate, which gen7 still
 553           * can't use.
 554           */
 555          if (shader->devinfo->gen == 6 ||
 556              (shader->devinfo->gen == 7 && src.file == IMM)) {
 557             const dst_reg tmp = vgrf(src.type);
 558             MOV(tmp, src);
 559             return src_reg(tmp);
 560          } else {
 561             return src;
 562          }
 563       }
 564
 565       /**
 566        * Workaround other weirdness of the math instruction.
 567        */
 568       instruction *
 569       fix_math_instruction(instruction *inst) const
 570       {
 571          if (shader->devinfo->gen == 6 &&
 572              inst->dst.writemask != WRITEMASK_XYZW) {
 573             const dst_reg tmp = vgrf(inst->dst.type);
 574             MOV(inst->dst, src_reg(tmp));
 575             inst->dst = tmp;
 576
 577          } else if (shader->devinfo->gen < 6) {
 578             const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
 579             inst->base_mrf = 1;
 580             inst->mlen = sources;
 581          }
 582
 583          return inst;
 584       }
 585
 586       bblock_t *block;
 587       exec_node *cursor;
 588
 589       bool force_writemask_all;
 590
 591       /** Debug annotation info. */
 592       struct {
 593          const char *str;
 594          const void *ir;
 595       } annotation;
 596    };
 597 }
 598
 599 #endif