src/arch/gcn3/operand.hh

   1 /*
   2  * Copyright (c) 2017 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Authors: Anthony Gutierrez
  34  */
  35
  36 #ifndef __ARCH_GCN3_OPERAND_HH__
  37 #define __ARCH_GCN3_OPERAND_HH__
  38
  39 #include <array>
  40
  41 #include "arch/gcn3/registers.hh"
  42 #include "arch/generic/vec_reg.hh"
  43 #include "gpu-compute/scalar_register_file.hh"
  44 #include "gpu-compute/vector_register_file.hh"
  45 #include "gpu-compute/wavefront.hh"
  46
  47 /**
  48  * classes that represnt vector/scalar operands in GCN3 ISA. these classes
  49  * wrap the generic vector register type (i.e., src/arch/generic/vec_reg.hh)
  50  * and allow them to be manipulated in ways that are unique to GCN3 insts.
  51  */
  52
  53 namespace Gcn3ISA
  54 {
  55     /**
  56      * convenience traits so we can automatically infer the correct FP type
  57      * without looking at the number of dwords (i.e., to determine if we
  58      * need a float or a double when creating FP constants).
  59      */
  60     template<typename T> struct OpTraits { typedef float FloatT; };
  61     template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };
  62     template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };
  63
  64     class Operand
  65     {
  66       public:
  67         Operand() = delete;
  68
  69         Operand(GPUDynInstPtr gpuDynInst, int opIdx)
  70             : _gpuDynInst(gpuDynInst), _opIdx(opIdx)
  71         {
  72             assert(_gpuDynInst);
  73             assert(_opIdx >= 0);
  74         }
  75
  76         /**
  77          * read from and write to the underlying register(s) that
  78          * this operand is referring to.
  79          */
  80         virtual void read() = 0;
  81         virtual void write() = 0;
  82
  83       protected:
  84         /**
  85          * instruction object that owns this operand
  86          */
  87         GPUDynInstPtr _gpuDynInst;
  88         /**
  89          * op selector value for this operand. note that this is not
  90          * the same as the register file index, be it scalar or vector.
  91          * this could refer to inline constants, system regs, or even
  92          * special values.
  93          */
  94         int _opIdx;
  95     };
  96
  97     template<typename DataType, bool Const, size_t NumDwords>
  98     class ScalarOperand;
  99
 100     template<typename DataType, bool Const,
 101         size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>
 102     class VecOperand final : public Operand
 103     {
 104       static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
 105             "Incorrect number of DWORDS for GCN3 operand.");
 106
 107       public:
 108         VecOperand() = delete;
 109
 110         VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
 111             : Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
 112               negMod(false), scRegData(gpuDynInst, _opIdx),
 113               vrfData{{ nullptr }}
 114         {
 115             vecReg.zero();
 116         }
 117
 118         ~VecOperand()
 119         {
 120         }
 121
 122         /**
 123          * certain vector operands can read from the vrf/srf or constants.
 124          * we use this method to first determine the type of the operand,
 125          * then we read from the appropriate source. if vector we read
 126          * directly from the vrf. if scalar, we read in the data through
 127          * the scalar operand component. this should only be used for VSRC
 128          * operands.
 129          */
 130         void
 131         readSrc()
 132         {
 133             if (isVectorReg(_opIdx)) {
 134                 _opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()
 135                     ->reservedScalarRegs);
 136                 read();
 137             } else {
 138                 readScalar();
 139             }
 140         }
 141
 142         /**
 143          * read from the vrf. this should only be used by vector inst
 144          * source operands that are explicitly vector (i.e., VSRC).
 145          */
 146         void
 147         read() override
 148         {
 149             assert(_gpuDynInst);
 150             assert(_gpuDynInst->wavefront());
 151             assert(_gpuDynInst->computeUnit());
 152             Wavefront *wf = _gpuDynInst->wavefront();
 153             ComputeUnit *cu = _gpuDynInst->computeUnit();
 154
 155             for (auto i = 0; i < NumDwords; ++i) {
 156                 int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx + i);
 157                 vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
 158
 159                 DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);
 160                 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
 161             }
 162
 163             if (NumDwords == 1) {
 164                 assert(vrfData[0]);
 165                 auto vgpr = vecReg.template as<DataType>();
 166                 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
 167                 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 168                     std::memcpy((void*)&vgpr[lane],
 169                         (void*)&reg_file_vgpr[lane], sizeof(DataType));
 170                 }
 171             } else if (NumDwords == 2) {
 172                 assert(vrfData[0]);
 173                 assert(vrfData[1]);
 174                 auto vgpr = vecReg.template as<VecElemU64>();
 175                 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
 176                 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
 177
 178                 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 179                     VecElemU64 tmp_val(0);
 180                     ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
 181                     ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
 182                     vgpr[lane] = tmp_val;
 183                 }
 184             }
 185         }
 186
 187         /**
 188          * write to the vrf. we maintain a copy of the underlying vector
 189          * reg(s) for this operand (i.e., vrfData/scRegData), as well as a
 190          * temporary vector register representation (i.e., vecReg) of the
 191          * vector register, which allows the execute() methods of instructions
 192          * to easily write their operand data using operator[] regardless of
 193          * their size. after the result is calculated we use write() to write
 194          * the data to the actual register file storage. this allows us to do
 195          * type conversion, etc., in a single call as opposed to doing it
 196          * in each execute() method.
 197          */
 198         void
 199         write() override
 200         {
 201             assert(_gpuDynInst);
 202             assert(_gpuDynInst->wavefront());
 203             assert(_gpuDynInst->computeUnit());
 204             Wavefront *wf = _gpuDynInst->wavefront();
 205             ComputeUnit *cu = _gpuDynInst->computeUnit();
 206             VectorMask &exec_mask = _gpuDynInst->isLoad()
 207                 ? _gpuDynInst->exec_mask : wf->execMask();
 208
 209             if (NumDwords == 1) {
 210                 int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx);
 211                 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
 212                 assert(vrfData[0]);
 213                 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
 214                 auto vgpr = vecReg.template as<DataType>();
 215
 216                 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 217                     if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
 218                         std::memcpy((void*)&reg_file_vgpr[lane],
 219                             (void*)&vgpr[lane], sizeof(DataType));
 220                     }
 221                 }
 222
 223                 DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);
 224                 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
 225             } else if (NumDwords == 2) {
 226                 int vgprIdx0 = cu->registerManager.mapVgpr(wf, _opIdx);
 227                 int vgprIdx1 = cu->registerManager.mapVgpr(wf, _opIdx + 1);
 228                 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);
 229                 vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);
 230                 assert(vrfData[0]);
 231                 assert(vrfData[1]);
 232                 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
 233                 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
 234                 auto vgpr = vecReg.template as<VecElemU64>();
 235
 236                 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
 237                     if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
 238                         reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];
 239                         reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];
 240                     }
 241                 }
 242
 243                 DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
 244                 cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);
 245                 cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);
 246             }
 247         }
 248
 249         void
 250         negModifier()
 251         {
 252             negMod = true;
 253         }
 254
 255         void
 256         absModifier()
 257         {
 258             absMod = true;
 259         }
 260
 261         /**
 262          * getter [] operator. only enable if this operand is constant
 263          * (i.e, a source operand) and if it can be represented using
 264          * primitive types (i.e., 8b to 64b primitives).
 265          */
 266         template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
 267         typename std::enable_if<Condition, const DataType>::type
 268         operator[](size_t idx) const
 269         {
 270             assert(idx < NumVecElemPerVecReg);
 271
 272             if (scalar) {
 273                 DataType ret_val = scRegData.rawData();
 274
 275                 if (absMod) {
 276                     assert(std::is_floating_point<DataType>::value);
 277                     ret_val = std::fabs(ret_val);
 278                 }
 279
 280                 if (negMod) {
 281                     assert(std::is_floating_point<DataType>::value);
 282                     ret_val = -ret_val;
 283                 }
 284
 285                 return ret_val;
 286             } else {
 287                 auto vgpr = vecReg.template as<DataType>();
 288                 DataType ret_val = vgpr[idx];
 289
 290                 if (absMod) {
 291                     assert(std::is_floating_point<DataType>::value);
 292                     ret_val = std::fabs(ret_val);
 293                 }
 294
 295                 if (negMod) {
 296                     assert(std::is_floating_point<DataType>::value);
 297                     ret_val = -ret_val;
 298                 }
 299
 300                 return ret_val;
 301             }
 302         }
 303
 304         /**
 305          * setter [] operator. only enable if this operand is non-constant
 306          * (i.e, a destination operand) and if it can be represented using
 307          * primitive types (i.e., 8b to 64b primitives).
 308          */
 309         template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
 310         typename std::enable_if<Condition, DataType&>::type
 311         operator[](size_t idx)
 312         {
 313             assert(!scalar);
 314             assert(idx < NumVecElemPerVecReg);
 315
 316             return vecReg.template as<DataType>()[idx];
 317         }
 318
 319         private:
 320           /**
 321            * if we determine that this operand is a scalar (reg or constant)
 322            * then we read the scalar data into the scalar operand data member.
 323            */
 324           void
 325           readScalar()
 326           {
 327               scalar = true;
 328               scRegData.read();
 329           }
 330
 331           using VecRegCont = typename std::conditional<NumDwords == 2,
 332               VecRegContainerU64, typename std::conditional<sizeof(DataType)
 333                   == sizeof(VecElemU16), VecRegContainerU16,
 334                       typename std::conditional<sizeof(DataType)
 335                           == sizeof(VecElemU8), VecRegContainerU8,
 336                               VecRegContainerU32>::type>::type>::type;
 337
 338           /**
 339            * whether this operand a scalar or not.
 340            */
 341           bool scalar;
 342           /**
 343            * absolute value and negative modifiers. VOP3 instructions
 344            * may indicate that their input/output operands must be
 345            * modified, either by taking the absolute value or negating
 346            * them. these bools indicate which modifier, if any, to use.
 347            */
 348           bool absMod;
 349           bool negMod;
 350           /**
 351            * this holds all the operand data in a single vector register
 352            * object (i.e., if an operand is 64b, this will hold the data
 353            * from both registers the operand is using).
 354            */
 355           VecRegCont vecReg;
 356           /**
 357            * for src operands that read scalars (i.e., scalar regs or
 358            * a scalar constant).
 359            */
 360           ScalarOperand<DataType, Const, NumDwords> scRegData;
 361           /**
 362            * pointers to the underlyding registers (i.e., the actual
 363            * registers in the register file).
 364            */
 365           std::array<VecRegContainerU32*, NumDwords> vrfData;
 366     };
 367
 368     template<typename DataType, bool Const,
 369         size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>
 370     class ScalarOperand final : public Operand
 371     {
 372       static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
 373             "Incorrect number of DWORDS for GCN3 operand.");
 374       public:
 375         ScalarOperand() = delete;
 376
 377         ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
 378             : Operand(gpuDynInst, opIdx)
 379         {
 380             std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));
 381         }
 382
 383         ~ScalarOperand()
 384         {
 385         }
 386
 387         /**
 388          * we store scalar data in a std::array, however if we need the
 389          * full operand data we use this method to copy all elements of
 390          * the scalar operand data to a single primitive container. only
 391          * useful for 8b to 64b primitive types, as they are the only types
 392          * that we need to perform computation on.
 393          */
 394         template<bool Condition = NumDwords == 1 || NumDwords == 2>
 395         typename std::enable_if<Condition, DataType>::type
 396         rawData() const
 397         {
 398             assert(sizeof(DataType) <= sizeof(srfData));
 399             DataType raw_data((DataType)0);
 400             std::memcpy((void*)&raw_data, (void*)srfData.data(),
 401                 sizeof(DataType));
 402
 403             return raw_data;
 404         }
 405
 406         void*
 407         rawDataPtr()
 408         {
 409             return (void*)srfData.data();
 410         }
 411
 412         void
 413         read() override
 414         {
 415             Wavefront *wf = _gpuDynInst->wavefront();
 416             ComputeUnit *cu = _gpuDynInst->computeUnit();
 417
 418             if (!isScalarReg(_opIdx)) {
 419                 readSpecialVal();
 420             } else {
 421                 for (auto i = 0; i < NumDwords; ++i) {
 422                     int sgprIdx = regIdx(i);
 423                     srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);
 424                     DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);
 425                     cu->srf[wf->simdId]->printReg(wf, sgprIdx);
 426                 }
 427             }
 428         }
 429
 430         void
 431         write() override
 432         {
 433             Wavefront *wf = _gpuDynInst->wavefront();
 434             ComputeUnit *cu = _gpuDynInst->computeUnit();
 435
 436             if (!isScalarReg(_opIdx)) {
 437                 if (_opIdx == REG_EXEC_LO) {
 438                     uint64_t new_exec_mask_val(0);
 439                     std::memcpy((void*)&new_exec_mask_val,
 440                         (void*)srfData.data(), sizeof(new_exec_mask_val));
 441                     VectorMask new_exec_mask(new_exec_mask_val);
 442                     wf->execMask() = new_exec_mask;
 443                     DPRINTF(GPUSRF, "Write EXEC\n");
 444                     DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
 445                 } else {
 446                     _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
 447                 }
 448             } else {
 449                 for (auto i = 0; i < NumDwords; ++i) {
 450                     int sgprIdx = regIdx(i);
 451                     auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);
 452                     if (_gpuDynInst->isLoad()) {
 453                         assert(sizeof(DataType) <= sizeof(ScalarRegU64));
 454                         sgpr = reinterpret_cast<ScalarRegU32*>(
 455                             _gpuDynInst->scalar_data)[i];
 456                     } else {
 457                         sgpr = srfData[i];
 458                     }
 459                     DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);
 460                     cu->srf[wf->simdId]->printReg(wf, sgprIdx);
 461                 }
 462             }
 463         }
 464
 465         /**
 466          * bit access to scalar data. primarily used for setting vcc bits.
 467          */
 468         template<bool Condition = NumDwords == 1 || NumDwords == 2>
 469         typename std::enable_if<Condition, void>::type
 470         setBit(int bit, int bit_val)
 471         {
 472             DataType &sgpr = *((DataType*)srfData.data());
 473             replaceBits(sgpr, bit, bit_val);
 474         }
 475
 476         template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
 477         typename std::enable_if<Condition, ScalarOperand&>::type
 478         operator=(DataType rhs)
 479         {
 480             std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));
 481             return *this;
 482         }
 483
 484       private:
 485         /**
 486          * we have determined that we are not reading our scalar operand data
 487          * from the register file, so here we figure out which special value
 488          * we are reading (i.e., float constant, int constant, inline
 489          * constant, or various other system registers (e.g., exec mask).
 490          */
 491         void
 492         readSpecialVal()
 493         {
 494             assert(NumDwords == 1 || NumDwords == 2);
 495
 496             switch(_opIdx) {
 497               case REG_EXEC_LO:
 498                 {
 499                     assert(NumDwords == 2);
 500                     ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
 501                         execMask().to_ullong();
 502                     std::memcpy((void*)srfData.data(), (void*)&exec_mask,
 503                         sizeof(srfData));
 504                     DPRINTF(GPUSRF, "Read EXEC\n");
 505                     DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
 506                 }
 507                 break;
 508               case REG_SRC_SWDA:
 509               case REG_SRC_DPP:
 510               case REG_SRC_LITERAL:
 511                 assert(NumDwords == 1);
 512                 srfData[0] = _gpuDynInst->srcLiteral();
 513                 break;
 514               case REG_POS_HALF:
 515                 {
 516                     typename OpTraits<DataType>::FloatT pos_half = 0.5;
 517                     std::memcpy((void*)srfData.data(), (void*)&pos_half,
 518                         sizeof(srfData));
 519
 520                 }
 521                 break;
 522               case REG_NEG_HALF:
 523                 {
 524                     typename OpTraits<DataType>::FloatT neg_half = -0.5;
 525                     std::memcpy((void*)srfData.data(), (void*)&neg_half,
 526                         sizeof(srfData));
 527                 }
 528                 break;
 529               case REG_POS_ONE:
 530                 {
 531                     typename OpTraits<DataType>::FloatT pos_one = 1.0;
 532                     std::memcpy(srfData.data(), &pos_one, sizeof(srfData));
 533                 }
 534                 break;
 535               case REG_NEG_ONE:
 536                 {
 537                     typename OpTraits<DataType>::FloatT neg_one = -1.0;
 538                     std::memcpy(srfData.data(), &neg_one, sizeof(srfData));
 539                 }
 540                 break;
 541               case REG_POS_TWO:
 542                 {
 543                     typename OpTraits<DataType>::FloatT pos_two = 2.0;
 544                     std::memcpy(srfData.data(), &pos_two, sizeof(srfData));
 545                 }
 546                 break;
 547               case REG_NEG_TWO:
 548                 {
 549                     typename OpTraits<DataType>::FloatT neg_two = -2.0;
 550                     std::memcpy(srfData.data(), &neg_two, sizeof(srfData));
 551                 }
 552                 break;
 553               case REG_POS_FOUR:
 554                 {
 555                     typename OpTraits<DataType>::FloatT pos_four = 4.0;
 556                     std::memcpy(srfData.data(), &pos_four, sizeof(srfData));
 557                 }
 558                 break;
 559               case REG_NEG_FOUR:
 560                 {
 561                     typename OpTraits<DataType>::FloatT neg_four = -4.0;
 562                     std::memcpy((void*)srfData.data(), (void*)&neg_four ,
 563                         sizeof(srfData));
 564                 }
 565                 break;
 566                 case REG_PI:
 567                 {
 568                     assert(sizeof(DataType) == sizeof(ScalarRegF64)
 569                         || sizeof(DataType) == sizeof(ScalarRegF32));
 570
 571                     const ScalarRegU32 pi_u32(0x3e22f983UL);
 572                     const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);
 573
 574                     if (sizeof(DataType) == sizeof(ScalarRegF64)) {
 575                         std::memcpy((void*)srfData.data(),
 576                             (void*)&pi_u64, sizeof(srfData));
 577                     } else {
 578                         std::memcpy((void*)srfData.data(),
 579                             (void*)&pi_u32, sizeof(srfData));
 580                     }
 581                 }
 582                 break;
 583               default:
 584                 {
 585                     assert(sizeof(DataType) <= sizeof(srfData));
 586                     DataType misc_val
 587                         = (DataType)_gpuDynInst->readMiscReg(_opIdx);
 588                     std::memcpy((void*)srfData.data(), (void*)&misc_val,
 589                         sizeof(DataType));
 590                 }
 591             }
 592         }
 593
 594         /**
 595          * for scalars we need to do some extra work to figure out how to
 596          * map the op selector to the sgpr idx because some op selectors
 597          * do not map directly to the srf (i.e., vcc/flat_scratch).
 598          */
 599         int
 600         regIdx(int dword) const
 601         {
 602             Wavefront *wf = _gpuDynInst->wavefront();
 603             ComputeUnit *cu = _gpuDynInst->computeUnit();
 604             int sgprIdx(-1);
 605
 606             if (_opIdx == REG_VCC_LO) {
 607                 sgprIdx = cu->registerManager
 608                     .mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
 609             } else if (_opIdx == REG_FLAT_SCRATCH_HI) {
 610                 sgprIdx = cu->registerManager
 611                     .mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
 612             } else if (_opIdx == REG_FLAT_SCRATCH_LO) {
 613                 assert(NumDwords == 1);
 614                 sgprIdx = cu->registerManager
 615                     .mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
 616             } else {
 617                 sgprIdx = cu->registerManager.mapSgpr(wf, _opIdx + dword);
 618             }
 619
 620             assert(sgprIdx > -1);
 621
 622             return sgprIdx;
 623         }
 624
 625         /**
 626          * in GCN3 each register is represented as a 32b unsigned value,
 627          * however operands may require up to 16 registers, so we store
 628          * all the individual 32b components here. for sub-dword operand
 629          * we still consider them to be 1 dword because the minimum size
 630          * of a register is 1 dword. this class will take care to do the
 631          * proper packing/unpacking of sub-dword operands.
 632          */
 633         std::array<ScalarRegU32, NumDwords> srfData;
 634     };
 635
 636     // typedefs for the various sizes/types of scalar operands
 637     using ScalarOperandU8 = ScalarOperand<ScalarRegU8, false, 1>;
 638     using ScalarOperandI8 = ScalarOperand<ScalarRegI8, false, 1>;
 639     using ScalarOperandU16 = ScalarOperand<ScalarRegU16, false, 1>;
 640     using ScalarOperandI16 = ScalarOperand<ScalarRegI16, false, 1>;
 641     using ScalarOperandU32 = ScalarOperand<ScalarRegU32, false>;
 642     using ScalarOperandI32 = ScalarOperand<ScalarRegI32, false>;
 643     using ScalarOperandF32 = ScalarOperand<ScalarRegF32, false>;
 644     using ScalarOperandU64 = ScalarOperand<ScalarRegU64, false>;
 645     using ScalarOperandI64 = ScalarOperand<ScalarRegI64, false>;
 646     using ScalarOperandF64 = ScalarOperand<ScalarRegF64, false>;
 647     using ScalarOperandU128 = ScalarOperand<ScalarRegU32, false, 4>;
 648     using ScalarOperandU256 = ScalarOperand<ScalarRegU32, false, 8>;
 649     using ScalarOperandU512 = ScalarOperand<ScalarRegU32, false, 16>;
 650     // non-writeable versions of scalar operands
 651     using ConstScalarOperandU8 = ScalarOperand<ScalarRegU8, true, 1>;
 652     using ConstScalarOperandI8 = ScalarOperand<ScalarRegI8, true, 1>;
 653     using ConstScalarOperandU16 = ScalarOperand<ScalarRegU16, true, 1>;
 654     using ConstScalarOperandI16 = ScalarOperand<ScalarRegI16, true, 1>;
 655     using ConstScalarOperandU32 = ScalarOperand<ScalarRegU32, true>;
 656     using ConstScalarOperandI32 = ScalarOperand<ScalarRegI32, true>;
 657     using ConstScalarOperandF32 = ScalarOperand<ScalarRegF32, true>;
 658     using ConstScalarOperandU64 = ScalarOperand<ScalarRegU64, true>;
 659     using ConstScalarOperandI64 = ScalarOperand<ScalarRegI64, true>;
 660     using ConstScalarOperandF64 = ScalarOperand<ScalarRegF64, true>;
 661     using ConstScalarOperandU128 = ScalarOperand<ScalarRegU32, true, 4>;
 662     using ConstScalarOperandU256 = ScalarOperand<ScalarRegU32, true, 8>;
 663     using ConstScalarOperandU512 = ScalarOperand<ScalarRegU32, true, 16>;
 664     // typedefs for the various sizes/types of vector operands
 665     using VecOperandU8 = VecOperand<VecElemU8, false, 1>;
 666     using VecOperandI8 = VecOperand<VecElemI8, false, 1>;
 667     using VecOperandU16 = VecOperand<VecElemU16, false, 1>;
 668     using VecOperandI16 = VecOperand<VecElemI16, false, 1>;
 669     using VecOperandU32 = VecOperand<VecElemU32, false>;
 670     using VecOperandI32 = VecOperand<VecElemI32, false>;
 671     using VecOperandF32 = VecOperand<VecElemF32, false>;
 672     using VecOperandU64 = VecOperand<VecElemU64, false>;
 673     using VecOperandF64 = VecOperand<VecElemF64, false>;
 674     using VecOperandI64 = VecOperand<VecElemI64, false>;
 675     using VecOperandU96 = VecOperand<VecElemU32, false, 3>;
 676     using VecOperandU128 = VecOperand<VecElemU32, false, 4>;
 677     using VecOperandU256 = VecOperand<VecElemU32, false, 8>;
 678     using VecOperandU512 = VecOperand<VecElemU32, false, 16>;
 679     // non-writeable versions of vector operands
 680     using ConstVecOperandU8 = VecOperand<VecElemU8, true, 1>;
 681     using ConstVecOperandI8 = VecOperand<VecElemI8, true, 1>;
 682     using ConstVecOperandU16 = VecOperand<VecElemU16, true, 1>;
 683     using ConstVecOperandI16 = VecOperand<VecElemI16, true, 1>;
 684     using ConstVecOperandU32 = VecOperand<VecElemU32, true>;
 685     using ConstVecOperandI32 = VecOperand<VecElemI32, true>;
 686     using ConstVecOperandF32 = VecOperand<VecElemF32, true>;
 687     using ConstVecOperandU64 = VecOperand<VecElemU64, true>;
 688     using ConstVecOperandI64 = VecOperand<VecElemI64, true>;
 689     using ConstVecOperandF64 = VecOperand<VecElemF64, true>;
 690     using ConstVecOperandU96 = VecOperand<VecElemU32, true, 3>;
 691     using ConstVecOperandU128 = VecOperand<VecElemU32, true, 4>;
 692     using ConstVecOperandU256 = VecOperand<VecElemU32, true, 8>;
 693     using ConstVecOperandU512 = VecOperand<VecElemU32, true, 16>;
 694 }
 695
 696 #endif // __ARCH_GCN3_OPERAND_HH__