2 * Copyright (c) 2017 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Anthony Gutierrez
36 #ifndef __ARCH_GCN3_OPERAND_HH__
37 #define __ARCH_GCN3_OPERAND_HH__
41 #include "arch/gcn3/registers.hh"
42 #include "arch/generic/vec_reg.hh"
43 #include "gpu-compute/scalar_register_file.hh"
44 #include "gpu-compute/vector_register_file.hh"
45 #include "gpu-compute/wavefront.hh"
48 * classes that represnt vector/scalar operands in GCN3 ISA. these classes
49 * wrap the generic vector register type (i.e., src/arch/generic/vec_reg.hh)
50 * and allow them to be manipulated in ways that are unique to GCN3 insts.
56 * convenience traits so we can automatically infer the correct FP type
57 * without looking at the number of dwords (i.e., to determine if we
58 * need a float or a double when creating FP constants).
60 template<typename T> struct OpTraits { typedef float FloatT; };
61 template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };
62 template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };
69 Operand(GPUDynInstPtr gpuDynInst, int opIdx)
70 : _gpuDynInst(gpuDynInst), _opIdx(opIdx)
77 * read from and write to the underlying register(s) that
78 * this operand is referring to.
80 virtual void read() = 0;
81 virtual void write() = 0;
85 * instruction object that owns this operand
87 GPUDynInstPtr _gpuDynInst;
89 * op selector value for this operand. note that this is not
90 * the same as the register file index, be it scalar or vector.
91 * this could refer to inline constants, system regs, or even
97 template<typename DataType, bool Const, size_t NumDwords>
100 template<typename DataType, bool Const,
101 size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>
102 class VecOperand final : public Operand
104 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
105 "Incorrect number of DWORDS for GCN3 operand.");
108 VecOperand() = delete;
110 VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
111 : Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
112 negMod(false), scRegData(gpuDynInst, _opIdx),
123 * certain vector operands can read from the vrf/srf or constants.
124 * we use this method to first determine the type of the operand,
125 * then we read from the appropriate source. if vector we read
126 * directly from the vrf. if scalar, we read in the data through
127 * the scalar operand component. this should only be used for VSRC
133 if (isVectorReg(_opIdx)) {
134 _opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()
135 ->reservedScalarRegs);
143 * read from the vrf. this should only be used by vector inst
144 * source operands that are explicitly vector (i.e., VSRC).
150 assert(_gpuDynInst->wavefront());
151 assert(_gpuDynInst->computeUnit());
152 Wavefront *wf = _gpuDynInst->wavefront();
153 ComputeUnit *cu = _gpuDynInst->computeUnit();
155 for (auto i = 0; i < NumDwords; ++i) {
156 int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx + i);
157 vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
159 DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);
160 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
163 if (NumDwords == 1) {
165 auto vgpr = vecReg.template as<DataType>();
166 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
167 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
168 std::memcpy((void*)&vgpr[lane],
169 (void*)®_file_vgpr[lane], sizeof(DataType));
171 } else if (NumDwords == 2) {
174 auto vgpr = vecReg.template as<VecElemU64>();
175 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
176 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
178 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
179 VecElemU64 tmp_val(0);
180 ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
181 ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
182 vgpr[lane] = tmp_val;
188 * write to the vrf. we maintain a copy of the underlying vector
189 * reg(s) for this operand (i.e., vrfData/scRegData), as well as a
190 * temporary vector register representation (i.e., vecReg) of the
191 * vector register, which allows the execute() methods of instructions
192 * to easily write their operand data using operator[] regardless of
193 * their size. after the result is calculated we use write() to write
194 * the data to the actual register file storage. this allows us to do
195 * type conversion, etc., in a single call as opposed to doing it
196 * in each execute() method.
202 assert(_gpuDynInst->wavefront());
203 assert(_gpuDynInst->computeUnit());
204 Wavefront *wf = _gpuDynInst->wavefront();
205 ComputeUnit *cu = _gpuDynInst->computeUnit();
206 VectorMask &exec_mask = _gpuDynInst->isLoad()
207 ? _gpuDynInst->exec_mask : wf->execMask();
209 if (NumDwords == 1) {
210 int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx);
211 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
213 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
214 auto vgpr = vecReg.template as<DataType>();
216 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
217 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
218 std::memcpy((void*)®_file_vgpr[lane],
219 (void*)&vgpr[lane], sizeof(DataType));
223 DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);
224 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
225 } else if (NumDwords == 2) {
226 int vgprIdx0 = cu->registerManager.mapVgpr(wf, _opIdx);
227 int vgprIdx1 = cu->registerManager.mapVgpr(wf, _opIdx + 1);
228 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);
229 vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);
232 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
233 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
234 auto vgpr = vecReg.template as<VecElemU64>();
236 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
237 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
238 reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];
239 reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];
243 DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
244 cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);
245 cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);
262 * getter [] operator. only enable if this operand is constant
263 * (i.e, a source operand) and if it can be represented using
264 * primitive types (i.e., 8b to 64b primitives).
266 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
267 typename std::enable_if<Condition, const DataType>::type
268 operator[](size_t idx) const
270 assert(idx < NumVecElemPerVecReg);
273 DataType ret_val = scRegData.rawData();
276 assert(std::is_floating_point<DataType>::value);
277 ret_val = std::fabs(ret_val);
281 assert(std::is_floating_point<DataType>::value);
287 auto vgpr = vecReg.template as<DataType>();
288 DataType ret_val = vgpr[idx];
291 assert(std::is_floating_point<DataType>::value);
292 ret_val = std::fabs(ret_val);
296 assert(std::is_floating_point<DataType>::value);
305 * setter [] operator. only enable if this operand is non-constant
306 * (i.e, a destination operand) and if it can be represented using
307 * primitive types (i.e., 8b to 64b primitives).
309 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
310 typename std::enable_if<Condition, DataType&>::type
311 operator[](size_t idx)
314 assert(idx < NumVecElemPerVecReg);
316 return vecReg.template as<DataType>()[idx];
321 * if we determine that this operand is a scalar (reg or constant)
322 * then we read the scalar data into the scalar operand data member.
331 using VecRegCont = typename std::conditional<NumDwords == 2,
332 VecRegContainerU64, typename std::conditional<sizeof(DataType)
333 == sizeof(VecElemU16), VecRegContainerU16,
334 typename std::conditional<sizeof(DataType)
335 == sizeof(VecElemU8), VecRegContainerU8,
336 VecRegContainerU32>::type>::type>::type;
339 * whether this operand a scalar or not.
343 * absolute value and negative modifiers. VOP3 instructions
344 * may indicate that their input/output operands must be
345 * modified, either by taking the absolute value or negating
346 * them. these bools indicate which modifier, if any, to use.
351 * this holds all the operand data in a single vector register
352 * object (i.e., if an operand is 64b, this will hold the data
353 * from both registers the operand is using).
357 * for src operands that read scalars (i.e., scalar regs or
358 * a scalar constant).
360 ScalarOperand<DataType, Const, NumDwords> scRegData;
362 * pointers to the underlyding registers (i.e., the actual
363 * registers in the register file).
365 std::array<VecRegContainerU32*, NumDwords> vrfData;
368 template<typename DataType, bool Const,
369 size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>
370 class ScalarOperand final : public Operand
372 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
373 "Incorrect number of DWORDS for GCN3 operand.");
375 ScalarOperand() = delete;
377 ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
378 : Operand(gpuDynInst, opIdx)
380 std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));
388 * we store scalar data in a std::array, however if we need the
389 * full operand data we use this method to copy all elements of
390 * the scalar operand data to a single primitive container. only
391 * useful for 8b to 64b primitive types, as they are the only types
392 * that we need to perform computation on.
394 template<bool Condition = NumDwords == 1 || NumDwords == 2>
395 typename std::enable_if<Condition, DataType>::type
398 assert(sizeof(DataType) <= sizeof(srfData));
399 DataType raw_data((DataType)0);
400 std::memcpy((void*)&raw_data, (void*)srfData.data(),
409 return (void*)srfData.data();
415 Wavefront *wf = _gpuDynInst->wavefront();
416 ComputeUnit *cu = _gpuDynInst->computeUnit();
418 if (!isScalarReg(_opIdx)) {
421 for (auto i = 0; i < NumDwords; ++i) {
422 int sgprIdx = regIdx(i);
423 srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);
424 DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);
425 cu->srf[wf->simdId]->printReg(wf, sgprIdx);
433 Wavefront *wf = _gpuDynInst->wavefront();
434 ComputeUnit *cu = _gpuDynInst->computeUnit();
436 if (!isScalarReg(_opIdx)) {
437 if (_opIdx == REG_EXEC_LO) {
438 uint64_t new_exec_mask_val(0);
439 std::memcpy((void*)&new_exec_mask_val,
440 (void*)srfData.data(), sizeof(new_exec_mask_val));
441 VectorMask new_exec_mask(new_exec_mask_val);
442 wf->execMask() = new_exec_mask;
443 DPRINTF(GPUSRF, "Write EXEC\n");
444 DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
446 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
449 for (auto i = 0; i < NumDwords; ++i) {
450 int sgprIdx = regIdx(i);
451 auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);
452 if (_gpuDynInst->isLoad()) {
453 assert(sizeof(DataType) <= sizeof(ScalarRegU64));
454 sgpr = reinterpret_cast<ScalarRegU32*>(
455 _gpuDynInst->scalar_data)[i];
459 DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);
460 cu->srf[wf->simdId]->printReg(wf, sgprIdx);
466 * bit access to scalar data. primarily used for setting vcc bits.
468 template<bool Condition = NumDwords == 1 || NumDwords == 2>
469 typename std::enable_if<Condition, void>::type
470 setBit(int bit, int bit_val)
472 DataType &sgpr = *((DataType*)srfData.data());
473 replaceBits(sgpr, bit, bit_val);
476 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
477 typename std::enable_if<Condition, ScalarOperand&>::type
478 operator=(DataType rhs)
480 std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));
486 * we have determined that we are not reading our scalar operand data
487 * from the register file, so here we figure out which special value
488 * we are reading (i.e., float constant, int constant, inline
489 * constant, or various other system registers (e.g., exec mask).
494 assert(NumDwords == 1 || NumDwords == 2);
499 assert(NumDwords == 2);
500 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
501 execMask().to_ullong();
502 std::memcpy((void*)srfData.data(), (void*)&exec_mask,
504 DPRINTF(GPUSRF, "Read EXEC\n");
505 DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
510 case REG_SRC_LITERAL:
511 assert(NumDwords == 1);
512 srfData[0] = _gpuDynInst->srcLiteral();
516 typename OpTraits<DataType>::FloatT pos_half = 0.5;
517 std::memcpy((void*)srfData.data(), (void*)&pos_half,
524 typename OpTraits<DataType>::FloatT neg_half = -0.5;
525 std::memcpy((void*)srfData.data(), (void*)&neg_half,
531 typename OpTraits<DataType>::FloatT pos_one = 1.0;
532 std::memcpy(srfData.data(), &pos_one, sizeof(srfData));
537 typename OpTraits<DataType>::FloatT neg_one = -1.0;
538 std::memcpy(srfData.data(), &neg_one, sizeof(srfData));
543 typename OpTraits<DataType>::FloatT pos_two = 2.0;
544 std::memcpy(srfData.data(), &pos_two, sizeof(srfData));
549 typename OpTraits<DataType>::FloatT neg_two = -2.0;
550 std::memcpy(srfData.data(), &neg_two, sizeof(srfData));
555 typename OpTraits<DataType>::FloatT pos_four = 4.0;
556 std::memcpy(srfData.data(), &pos_four, sizeof(srfData));
561 typename OpTraits<DataType>::FloatT neg_four = -4.0;
562 std::memcpy((void*)srfData.data(), (void*)&neg_four ,
568 assert(sizeof(DataType) == sizeof(ScalarRegF64)
569 || sizeof(DataType) == sizeof(ScalarRegF32));
571 const ScalarRegU32 pi_u32(0x3e22f983UL);
572 const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);
574 if (sizeof(DataType) == sizeof(ScalarRegF64)) {
575 std::memcpy((void*)srfData.data(),
576 (void*)&pi_u64, sizeof(srfData));
578 std::memcpy((void*)srfData.data(),
579 (void*)&pi_u32, sizeof(srfData));
585 assert(sizeof(DataType) <= sizeof(srfData));
587 = (DataType)_gpuDynInst->readMiscReg(_opIdx);
588 std::memcpy((void*)srfData.data(), (void*)&misc_val,
595 * for scalars we need to do some extra work to figure out how to
596 * map the op selector to the sgpr idx because some op selectors
597 * do not map directly to the srf (i.e., vcc/flat_scratch).
600 regIdx(int dword) const
602 Wavefront *wf = _gpuDynInst->wavefront();
603 ComputeUnit *cu = _gpuDynInst->computeUnit();
606 if (_opIdx == REG_VCC_LO) {
607 sgprIdx = cu->registerManager
608 .mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
609 } else if (_opIdx == REG_FLAT_SCRATCH_HI) {
610 sgprIdx = cu->registerManager
611 .mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
612 } else if (_opIdx == REG_FLAT_SCRATCH_LO) {
613 assert(NumDwords == 1);
614 sgprIdx = cu->registerManager
615 .mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
617 sgprIdx = cu->registerManager.mapSgpr(wf, _opIdx + dword);
620 assert(sgprIdx > -1);
626 * in GCN3 each register is represented as a 32b unsigned value,
627 * however operands may require up to 16 registers, so we store
628 * all the individual 32b components here. for sub-dword operand
629 * we still consider them to be 1 dword because the minimum size
630 * of a register is 1 dword. this class will take care to do the
631 * proper packing/unpacking of sub-dword operands.
633 std::array<ScalarRegU32, NumDwords> srfData;
636 // typedefs for the various sizes/types of scalar operands
637 using ScalarOperandU8 = ScalarOperand<ScalarRegU8, false, 1>;
638 using ScalarOperandI8 = ScalarOperand<ScalarRegI8, false, 1>;
639 using ScalarOperandU16 = ScalarOperand<ScalarRegU16, false, 1>;
640 using ScalarOperandI16 = ScalarOperand<ScalarRegI16, false, 1>;
641 using ScalarOperandU32 = ScalarOperand<ScalarRegU32, false>;
642 using ScalarOperandI32 = ScalarOperand<ScalarRegI32, false>;
643 using ScalarOperandF32 = ScalarOperand<ScalarRegF32, false>;
644 using ScalarOperandU64 = ScalarOperand<ScalarRegU64, false>;
645 using ScalarOperandI64 = ScalarOperand<ScalarRegI64, false>;
646 using ScalarOperandF64 = ScalarOperand<ScalarRegF64, false>;
647 using ScalarOperandU128 = ScalarOperand<ScalarRegU32, false, 4>;
648 using ScalarOperandU256 = ScalarOperand<ScalarRegU32, false, 8>;
649 using ScalarOperandU512 = ScalarOperand<ScalarRegU32, false, 16>;
650 // non-writeable versions of scalar operands
651 using ConstScalarOperandU8 = ScalarOperand<ScalarRegU8, true, 1>;
652 using ConstScalarOperandI8 = ScalarOperand<ScalarRegI8, true, 1>;
653 using ConstScalarOperandU16 = ScalarOperand<ScalarRegU16, true, 1>;
654 using ConstScalarOperandI16 = ScalarOperand<ScalarRegI16, true, 1>;
655 using ConstScalarOperandU32 = ScalarOperand<ScalarRegU32, true>;
656 using ConstScalarOperandI32 = ScalarOperand<ScalarRegI32, true>;
657 using ConstScalarOperandF32 = ScalarOperand<ScalarRegF32, true>;
658 using ConstScalarOperandU64 = ScalarOperand<ScalarRegU64, true>;
659 using ConstScalarOperandI64 = ScalarOperand<ScalarRegI64, true>;
660 using ConstScalarOperandF64 = ScalarOperand<ScalarRegF64, true>;
661 using ConstScalarOperandU128 = ScalarOperand<ScalarRegU32, true, 4>;
662 using ConstScalarOperandU256 = ScalarOperand<ScalarRegU32, true, 8>;
663 using ConstScalarOperandU512 = ScalarOperand<ScalarRegU32, true, 16>;
664 // typedefs for the various sizes/types of vector operands
665 using VecOperandU8 = VecOperand<VecElemU8, false, 1>;
666 using VecOperandI8 = VecOperand<VecElemI8, false, 1>;
667 using VecOperandU16 = VecOperand<VecElemU16, false, 1>;
668 using VecOperandI16 = VecOperand<VecElemI16, false, 1>;
669 using VecOperandU32 = VecOperand<VecElemU32, false>;
670 using VecOperandI32 = VecOperand<VecElemI32, false>;
671 using VecOperandF32 = VecOperand<VecElemF32, false>;
672 using VecOperandU64 = VecOperand<VecElemU64, false>;
673 using VecOperandF64 = VecOperand<VecElemF64, false>;
674 using VecOperandI64 = VecOperand<VecElemI64, false>;
675 using VecOperandU96 = VecOperand<VecElemU32, false, 3>;
676 using VecOperandU128 = VecOperand<VecElemU32, false, 4>;
677 using VecOperandU256 = VecOperand<VecElemU32, false, 8>;
678 using VecOperandU512 = VecOperand<VecElemU32, false, 16>;
679 // non-writeable versions of vector operands
680 using ConstVecOperandU8 = VecOperand<VecElemU8, true, 1>;
681 using ConstVecOperandI8 = VecOperand<VecElemI8, true, 1>;
682 using ConstVecOperandU16 = VecOperand<VecElemU16, true, 1>;
683 using ConstVecOperandI16 = VecOperand<VecElemI16, true, 1>;
684 using ConstVecOperandU32 = VecOperand<VecElemU32, true>;
685 using ConstVecOperandI32 = VecOperand<VecElemI32, true>;
686 using ConstVecOperandF32 = VecOperand<VecElemF32, true>;
687 using ConstVecOperandU64 = VecOperand<VecElemU64, true>;
688 using ConstVecOperandI64 = VecOperand<VecElemI64, true>;
689 using ConstVecOperandF64 = VecOperand<VecElemF64, true>;
690 using ConstVecOperandU96 = VecOperand<VecElemU32, true, 3>;
691 using ConstVecOperandU128 = VecOperand<VecElemU32, true, 4>;
692 using ConstVecOperandU256 = VecOperand<VecElemU32, true, 8>;
693 using ConstVecOperandU512 = VecOperand<VecElemU32, true, 16>;
696 #endif // __ARCH_GCN3_OPERAND_HH__