2 * Copyright (c) 2015-2018 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 /** \file arch/generic/vec_reg.hh
39 * Vector Registers layout specification.
41 * This register type is to be used to model the SIMD registers.
42 * It takes into account the possibility that different architectural names
43 * may overlap (like for ARMv8 AArch32 for example).
45 * The design is having a basic vector register container that holds the
46 * bytes, unaware of anything else. This is implemented by VecRegContainer.
47 * As the (maximum) length of the physical vector register is a compile-time
48 * constant, it is defined as a template parameter.
50 * This file also describes two views of the container that have semantic
51 * information about the bytes. The first of this views is VecRegT.
52 * A VecRegT is a view of a VecRegContainer (by reference). The VecRegT has
53 * a type (VecElem) to which bytes are casted, and the amount of such
54 * elements that the vector contains (NumElems). The size of a view,
55 * calculated as sizeof(VecElem) * NumElems must match the size of the
56 * underlying container. As VecRegT has some degree of type information it
57 * has vector semantics, and defines the index operator ([]) to get
58 * references to particular bytes understood as a VecElem.
59 * The second view of a container implemented in this file is VecLaneT, which
60 * is a view of a subset of the container.
61 * A VecLaneT is a view of a lane of a vector register, where a lane is
62 * identified by a type (VecElem) and an index (although the view is
63 * unaware of its index). Operations on the lane are directly applied to
64 * the corresponding bytes of the underlying VecRegContainer through a
67 * The intended usage is requesting views to the VecRegContainer via the
68 * member 'as' for VecRegT and the member 'laneView' for VecLaneT. Kindly
69 * find an example of usage in the following.
72 * // We declare 512 bits vectors
73 * using Vec512 = VecRegContainer<64>;
75 * // We implement the physical vector register file
76 * Vec512 physicalVecRegFile[NUM_VREGS];
78 * // Usage example, for a macro op:
79 * VecFloat8Add(ExecContext* xd) {
80 * // Request source vector register to the execution context (const as it
82 * const Vec512& vsrc1raw = xc->readVecRegOperand(this, 0);
83 * // View it as a vector of floats (we could just specify the first
84 * // template parametre, the second has a default value that works, and the
85 * // last one is derived by the constness of vsrc1raw).
86 * VecRegT<float, 8, true>& vsrc1 = vsrc1raw->as<float, 8>();
88 * // Second source and view
89 * const Vec512& vsrc2raw = xc->readVecRegOperand(this, 1);
90 * VecRegT<float, 8, true>& vsrc2 = vsrc2raw->as<float, 8>();
92 * // Destination and view
94 * VecRegT<float, 8, false>& vdst = vdstraw->as<float, 8>();
96 * for (auto i = 0; i < 8; i++) {
97 * // This asignment sets the bits in the underlying Vec512: vdstraw
98 * vdst[i] = vsrc1[i] + vsrc2[i];
100 * xc->setWriteRegOperand(this, 0, vdstraw);
103 * // Usage example, for a micro op that operates over lane number _lidx:
104 * VecFloatLaneAdd(ExecContext* xd) {
105 * // Request source vector register to the execution context (const as it
107 * const Vec512& vsrc1raw = xc->readVecRegOperand(this, 0);
108 * // View it as a lane of a vector of floats (we could just specify the
109 * // first template parametre, the second is derived by the constness of
111 * VecLaneT<float, true>& src1 = vsrc1raw->laneView<float>(this->_lidx);
113 * // Second source and view
114 * const Vec512& vsrc2raw = xc->readVecRegOperand(this, 1);
115 * VecLaneT<float, true>& src2 = vsrc2raw->laneView<float>(this->_lidx);
117 * // (Writable) destination and view
118 * // As this is a partial write, we need the exec context to support that
119 * // through, e.g., 'readVecRegOperandToWrite' returning a writable
120 * // reference to the register
121 * Vec512 vdstraw = xc->readVecRegOperandToWrite(this, 3);
122 * VecLaneT<float, false>& dst = vdstraw->laneView<float>(this->_lidx);
125 * // There is no need to copy the value back into the exec context, as
126 * // the assignment to dst modifies the appropriate bytes in vdstraw which
127 * // is in turn, a reference to the register in the cpu model.
128 * // For operations that do conditional writeback, we can decouple the
130 * // auto tmp = src1 + src2;
132 * // dst = tmp; // do writeback
134 * // // do not do writeback
140 #ifndef __ARCH_GENERIC_VEC_REG_HH__
141 #define __ARCH_GENERIC_VEC_REG_HH__
147 #include <type_traits>
150 #include "base/cprintf.hh"
151 #include "base/logging.hh"
153 constexpr unsigned MaxVecRegLenInBytes = 4096;
156 class VecRegContainer;
158 /** Vector Register Abstraction
159 * This generic class is a view in a particularization of MVC, to vector
160 * registers. There is a VecRegContainer that implements the model, and
161 * contains the data. To that model we can interpose different instantiations
162 * of VecRegT to view the container as a vector of NumElems elems of type
164 * @tparam VecElem Type of each element of the vector.
165 * @tparam NumElems Amount of components of the vector.
166 * @tparam Const Indicate if the underlying container can be modified through
169 template <typename VecElem, size_t NumElems, bool Const>
172 /** Size of the register in bytes. */
173 static constexpr inline size_t
176 return sizeof(VecElem) * NumElems;
179 /** Container type alias. */
180 using Container = typename std::conditional<Const,
181 const VecRegContainer<size()>,
182 VecRegContainer<size()>>::type;
184 /** My type alias. */
185 using MyClass = VecRegT<VecElem, NumElems, Const>;
186 /** Reference to container. */
187 Container& container;
191 VecRegT(Container& cnt) : container(cnt) {};
193 /** Zero the container. */
194 template<bool Condition = !Const>
195 typename std::enable_if<Condition, void>::type
196 zero() { container.zero(); }
198 template<bool Condition = !Const>
199 typename std::enable_if<Condition, MyClass&>::type
200 operator=(const MyClass& that)
202 container = that.container;
206 /** Index operator. */
207 const VecElem& operator[](size_t idx) const
209 return container.template raw_ptr<VecElem>()[idx];
212 /** Index operator. */
213 template<bool Condition = !Const>
214 typename std::enable_if<Condition, VecElem&>::type
215 operator[](size_t idx)
217 return container.template raw_ptr<VecElem>()[idx];
220 /** Equality operator.
221 * Required to compare thread contexts.
223 template<typename VE2, size_t NE2, bool C2>
225 operator==(const VecRegT<VE2, NE2, C2>& that) const
227 return container == that.container;
229 /** Inequality operator.
230 * Required to compare thread contexts.
232 template<typename VE2, size_t NE2, bool C2>
234 operator!=(const VecRegT<VE2, NE2, C2>& that) const
236 return !operator==(that);
239 /** Output stream operator. */
241 operator<<(std::ostream& os, const MyClass& vr)
243 /* 0-sized is not allowed */
244 os << "[" << std::hex << (uint32_t)vr[0];
245 for (uint32_t e = 1; e < vr.size(); e++)
246 os << " " << std::hex << (uint32_t)vr[e];
251 const std::string print() const { return csprintf("%s", *this); }
253 * Cast to VecRegContainer&
254 * It is useful to get the reference to the container for ISA tricks,
255 * because casting to reference prevents unnecessary copies.
257 operator Container&() { return container; }
260 /* Forward declaration. */
261 template <typename VecElem, bool Const>
265 * Vector Register Abstraction
266 * This generic class is the model in a particularization of MVC, to vector
267 * registers. The model has functionality to create views of itself, or a
268 * portion through the method 'as
269 * @tparam Sz Size of the container in bytes.
271 template <size_t SIZE>
272 class VecRegContainer
274 static_assert(SIZE > 0,
275 "Cannot create Vector Register Container of zero size");
276 static_assert(SIZE <= MaxVecRegLenInBytes,
277 "Vector Register size limit exceeded");
279 static constexpr inline size_t size() { return SIZE; };
280 using Container = std::array<uint8_t, SIZE>;
282 // 16-byte aligned to support 128bit element view
283 alignas(16) Container container;
284 using MyClass = VecRegContainer<SIZE>;
288 VecRegContainer(const VecRegContainer &) = default;
289 /* This is required for de-serialisation. */
290 VecRegContainer(const std::vector<uint8_t>& that)
292 assert(that.size() >= SIZE);
293 std::memcpy(container.data(), &that[0], SIZE);
296 /** Zero the container. */
297 void zero() { memset(container.data(), 0, SIZE); }
299 /** Assignment operators. */
301 /** From VecRegContainer */
302 MyClass& operator=(const MyClass& that)
306 memcpy(container.data(), that.container.data(), SIZE);
310 /** From appropriately sized uint8_t[]. */
311 MyClass& operator=(const Container& that)
313 std::memcpy(container.data(), that.data(), SIZE);
317 /** From vector<uint8_t>.
318 * This is required for de-serialisation.
320 MyClass& operator=(const std::vector<uint8_t>& that)
322 assert(that.size() >= SIZE);
323 std::memcpy(container.data(), that.data(), SIZE);
328 /** Copy the contents into the input buffer. */
330 /** To appropriately sized uint8_t[] */
331 void copyTo(Container& dst) const
333 std::memcpy(dst.data(), container.data(), SIZE);
336 /** To vector<uint8_t>
337 * This is required for serialisation.
339 void copyTo(std::vector<uint8_t>& dst) const
342 std::memcpy(dst.data(), container.data(), SIZE);
346 /** Equality operator.
347 * Required to compare thread contexts.
351 operator==(const VecRegContainer<S2>& that) const
354 !memcmp(container.data(), that.container.data(), SIZE);
356 /** Inequality operator.
357 * Required to compare thread contexts.
361 operator!=(const VecRegContainer<S2>& that) const
363 return !operator==(that);
366 const std::string print() const { return csprintf("%s", *this); }
367 /** Get pointer to bytes. */
368 template <typename Ret>
369 const Ret* raw_ptr() const { return (const Ret*)container.data(); }
371 template <typename Ret>
372 Ret* raw_ptr() { return (Ret*)container.data(); }
376 * Create a view of this container as a vector of VecElems with an
377 * optional amount of elements. If the amount of elements is provided,
378 * the size of the container is checked, to test bounds. If it is not
379 * provided, the length is inferred from the container size and the
381 * @tparam VecElem Type of each element of the vector for the view.
382 * @tparam NumElem Amount of elements in the view.
385 template <typename VecElem, size_t NumElems=(SIZE / sizeof(VecElem))>
386 VecRegT<VecElem, NumElems, true> as() const
388 static_assert(SIZE % sizeof(VecElem) == 0,
389 "VecElem does not evenly divide the register size");
390 static_assert(sizeof(VecElem) * NumElems <= SIZE,
391 "Viewing VecReg as something bigger than it is");
392 return VecRegT<VecElem, NumElems, true>(*this);
395 template <typename VecElem, size_t NumElems=(SIZE / sizeof(VecElem))>
396 VecRegT<VecElem, NumElems, false> as()
398 static_assert(SIZE % sizeof(VecElem) == 0,
399 "VecElem does not evenly divide the register size");
400 static_assert(sizeof(VecElem) * NumElems <= SIZE,
401 "Viewing VecReg as something bigger than it is");
402 return VecRegT<VecElem, NumElems, false>(*this);
405 template <typename VecElem, int LaneIdx>
406 VecLaneT<VecElem, false> laneView();
407 template <typename VecElem, int LaneIdx>
408 VecLaneT<VecElem, true> laneView() const;
409 template <typename VecElem>
410 VecLaneT<VecElem, false> laneView(int laneIdx);
411 template <typename VecElem>
412 VecLaneT<VecElem, true> laneView(int laneIdx) const;
416 * Used for serialization.
418 friend std::ostream& operator<<(std::ostream& os, const MyClass& v)
420 for (auto& b: v.container) {
421 os << csprintf("%02x", b);
427 /** We define an auxiliary abstraction for LaneData. The ISA should care
428 * about the semantics of a, e.g., 32bit element, treating it as a signed or
429 * unsigned int, or a float depending on the semantics of a particular
430 * instruction. On the other hand, the cpu model should only care about it
431 * being a 32-bit value. */
441 /** LaneSize is an abstraction of a LS byte value for the execution and thread
442 * contexts to handle values just depending on its width. That way, the ISA
443 * can request, for example, the second 4 byte lane of register 5 to the model.
444 * The model serves that value, agnostic of the semantics of those bits. Then,
445 * it is up to the ISA to interpret those bits as a float, or as an uint.
446 * To maximize the utility, this class implements the assignment operator and
447 * the casting to equal-size types.
448 * As opposed to a RegLaneT, LaneData is not 'backed' by a VecRegContainer.
450 * When data is passed and is susceptible to being copied, use LaneData, as
451 * copying the primitive type is build on is cheap.
452 * When data is passed as references (const or not), use RegLaneT, as all
453 * operations happen 'in place', avoiding any copies (no copies is always
454 * cheaper than cheap copies), especially when things are inlined, and
455 * references are not explicitly passed.
457 template <LaneSize LS>
461 /** Alias to the native type of the appropriate size. */
462 using UnderlyingType =
463 typename std::conditional<LS == LaneSize::EightByte, uint64_t,
464 typename std::conditional<LS == LaneSize::FourByte, uint32_t,
465 typename std::conditional<LS == LaneSize::TwoByte, uint16_t,
466 typename std::conditional<LS == LaneSize::Byte, uint8_t,
472 static constexpr auto ByteSz = sizeof(UnderlyingType);
474 using MyClass = LaneData<LS>;
477 template <typename T> explicit
478 LaneData(typename std::enable_if<sizeof(T) == ByteSz, const T&>::type t)
481 template <typename T>
482 typename std::enable_if<sizeof(T) == ByteSz, MyClass&>::type
483 operator=(const T& that)
489 typename std::enable_if<sizeof(T) == ByteSz, int>::type I = 0>
491 return *static_cast<const T*>(&_val);
495 /** Output operator overload for LaneData<Size>. */
496 template <LaneSize LS>
498 operator<<(std::ostream& os, const LaneData<LS>& d)
500 return os << static_cast<typename LaneData<LS>::UnderlyingType>(d);
503 /** Vector Lane abstraction
504 * Another view of a container. This time only a partial part of it is exposed.
505 * @tparam VecElem Type of each element of the vector.
506 * @tparam Const Indicate if the underlying container can be modified through
511 template <typename VecElem, bool Const>
515 /** VecRegContainer friendship to access private VecLaneT constructors.
516 * Only VecRegContainers can build VecLanes.
519 friend VecLaneT<VecElem, !Const>;
521 /*template <size_t Sz>
522 friend class VecRegContainer;*/
523 friend class VecRegContainer<8>;
524 friend class VecRegContainer<16>;
525 friend class VecRegContainer<32>;
526 friend class VecRegContainer<64>;
527 friend class VecRegContainer<128>;
528 friend class VecRegContainer<256>;
529 friend class VecRegContainer<MaxVecRegLenInBytes>;
531 /** My type alias. */
532 using MyClass = VecLaneT<VecElem, Const>;
535 using Cont = typename std::conditional<Const,
538 static_assert(!std::is_const<VecElem>::value || Const,
539 "Asked for non-const lane of const type!");
540 static_assert(std::is_integral<VecElem>::value,
541 "VecElem type is not integral!");
542 /** Reference to data. */
546 VecLaneT(Cont& cont) : container(cont) { }
549 /** Assignment operators.
550 * Assignment operators are only enabled if the underlying container is
554 template <bool Assignable = !Const>
555 typename std::enable_if<Assignable, MyClass&>::type
556 operator=(const VecElem& that) {
562 * Generic bitwise assignment. Narrowing and widening assignemnts are
563 * not allowed, pre-treatment of the rhs is required to conform.
565 template <bool Assignable = !Const, typename T>
566 typename std::enable_if<Assignable, MyClass&>::type
567 operator=(const T& that) {
568 static_assert(sizeof(T) >= sizeof(VecElem),
569 "Attempt to perform widening bitwise copy.");
570 static_assert(sizeof(T) <= sizeof(VecElem),
571 "Attempt to perform narrowing bitwise copy.");
572 container = static_cast<VecElem>(that);
576 /** Cast to vecElem. */
577 operator VecElem() const { return container; }
579 /** Constification. */
580 template <bool Cond = !Const, typename std::enable_if<Cond, int>::type = 0>
581 operator VecLaneT<typename std::enable_if<Cond, VecElem>::type, true>()
583 return VecLaneT<VecElem, true>(container);
588 template<typename T, bool Const>
589 struct add_const<VecLaneT<T, Const>> { typedef VecLaneT<T, true> type; };
592 /** View as the Nth lane of type VecElem. */
594 template <typename VecElem, int LaneIdx>
595 VecLaneT<VecElem, false>
596 VecRegContainer<Sz>::laneView()
598 return VecLaneT<VecElem, false>(as<VecElem>()[LaneIdx]);
601 /** View as the const Nth lane of type VecElem. */
603 template <typename VecElem, int LaneIdx>
604 VecLaneT<VecElem, true>
605 VecRegContainer<Sz>::laneView() const
607 return VecLaneT<VecElem, true>(as<VecElem>()[LaneIdx]);
610 /** View as the Nth lane of type VecElem. */
612 template <typename VecElem>
613 VecLaneT<VecElem, false>
614 VecRegContainer<Sz>::laneView(int laneIdx)
616 return VecLaneT<VecElem, false>(as<VecElem>()[laneIdx]);
619 /** View as the const Nth lane of type VecElem. */
621 template <typename VecElem>
622 VecLaneT<VecElem, true>
623 VecRegContainer<Sz>::laneView(int laneIdx) const
625 return VecLaneT<VecElem, true>(as<VecElem>()[laneIdx]);
628 using VecLane8 = VecLaneT<uint8_t, false>;
629 using VecLane16 = VecLaneT<uint16_t, false>;
630 using VecLane32 = VecLaneT<uint32_t, false>;
631 using VecLane64 = VecLaneT<uint64_t, false>;
633 using ConstVecLane8 = VecLaneT<uint8_t, true>;
634 using ConstVecLane16 = VecLaneT<uint16_t, true>;
635 using ConstVecLane32 = VecLaneT<uint32_t, true>;
636 using ConstVecLane64 = VecLaneT<uint64_t, true>;
639 * Calls required for serialization/deserialization
644 to_number(const std::string& value, VecRegContainer<Sz>& v)
646 fatal_if(value.size() > 2 * VecRegContainer<Sz>::size(),
647 "Vector register value overflow at unserialize");
649 for (int i = 0; i < VecRegContainer<Sz>::size(); i++) {
651 if (2 * i < value.size())
652 b = stoul(value.substr(i * 2, 2), nullptr, 16);
653 v.template raw_ptr<uint8_t>()[i] = b;
660 * Dummy type aliases and constants for architectures that do not implement
664 using DummyVecElem = uint32_t;
665 constexpr unsigned DummyNumVecElemPerVecReg = 2;
666 using DummyVecReg = VecRegT<DummyVecElem, DummyNumVecElemPerVecReg, false>;
667 using DummyConstVecReg = VecRegT<DummyVecElem, DummyNumVecElemPerVecReg, true>;
668 using DummyVecRegContainer = DummyVecReg::Container;
669 constexpr size_t DummyVecRegSizeBytes = DummyNumVecElemPerVecReg *
670 sizeof(DummyVecElem);
673 #endif /* __ARCH_GENERIC_VEC_REG_HH__ */