From 25474167e5b247d1b91fbf802c5b396a63ae705e Mon Sep 17 00:00:00 2001 From: Giacomo Gabrielli Date: Tue, 16 Oct 2018 16:04:08 +0100 Subject: [PATCH] arch,cpu: Add vector predicate registers Latest-gen. vector/SIMD extensions, including the Arm Scalable Vector Extension (SVE), introduce the notion of a predicate register file. This changeset adds this feature across architectures and CPU models. Change-Id: Iebcadbad89c0a582ff8b1b70de353305db603946 Signed-off-by: Giacomo Gabrielli Reviewed-on: https://gem5-review.googlesource.com/c/13715 Maintainer: Andreas Sandberg Reviewed-by: Jason Lowe-Power --- src/arch/SConscript | 6 +- src/arch/alpha/isa.hh | 6 + src/arch/alpha/registers.hh | 27 +- src/arch/arm/isa.hh | 10 + src/arch/arm/registers.hh | 15 +- src/arch/generic/vec_pred_reg.hh | 404 ++++++++++++++++++++++++++++++ src/arch/generic/vec_reg.hh | 14 ++ src/arch/isa_parser.py | 94 ++++++- src/arch/mips/isa.hh | 6 + src/arch/mips/registers.hh | 27 +- src/arch/null/registers.hh | 23 +- src/arch/power/isa.hh | 6 + src/arch/power/registers.hh | 27 +- src/arch/riscv/isa.hh | 1 + src/arch/riscv/registers.hh | 29 ++- src/arch/sparc/isa.hh | 6 + src/arch/sparc/registers.hh | 27 +- src/arch/x86/isa.hh | 6 + src/arch/x86/registers.hh | 28 ++- src/cpu/base_dyn_inst.hh | 22 ++ src/cpu/checker/cpu.hh | 35 ++- src/cpu/checker/thread_context.hh | 24 +- src/cpu/exec_context.hh | 19 +- src/cpu/inst_res.hh | 24 +- src/cpu/minor/exec_context.hh | 27 +- src/cpu/minor/scoreboard.cc | 7 +- src/cpu/minor/scoreboard.hh | 5 +- src/cpu/o3/O3CPU.py | 2 + src/cpu/o3/comm.hh | 5 +- src/cpu/o3/cpu.cc | 84 +++++++ src/cpu/o3/cpu.hh | 19 ++ src/cpu/o3/dyn_inst.hh | 25 ++ src/cpu/o3/free_list.hh | 23 +- src/cpu/o3/inst_queue_impl.hh | 1 + src/cpu/o3/regfile.cc | 21 +- src/cpu/o3/regfile.hh | 46 +++- src/cpu/o3/rename.hh | 3 +- src/cpu/o3/rename_impl.hh | 11 +- src/cpu/o3/rename_map.cc | 4 +- src/cpu/o3/rename_map.hh | 25 +- src/cpu/o3/thread_context.hh | 23 +- src/cpu/o3/thread_context_impl.hh | 24 ++ src/cpu/reg_class.cc | 3 +- src/cpu/reg_class.hh | 7 +- src/cpu/simple/exec_context.hh | 34 ++- src/cpu/simple_thread.hh | 59 ++++- src/cpu/static_inst.hh | 29 ++- src/cpu/thread_context.cc | 26 +- src/cpu/thread_context.hh | 34 ++- src/sim/insttracer.hh | 38 ++- 50 files changed, 1370 insertions(+), 101 deletions(-) create mode 100644 src/arch/generic/vec_pred_reg.hh diff --git a/src/arch/SConscript b/src/arch/SConscript index 5ea7a6a75..ed583aa5a 100644 --- a/src/arch/SConscript +++ b/src/arch/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- -# Copyright (c) 2016 ARM Limited +# Copyright (c) 2016-2017 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -226,6 +226,8 @@ Export('ISADesc') DebugFlag('IntRegs') DebugFlag('FloatRegs') DebugFlag('VecRegs') +DebugFlag('VecPredRegs') DebugFlag('CCRegs') DebugFlag('MiscRegs') -CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'MiscRegs' ]) +CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'VecRegs', 'VecPredRegs', + 'CCRegs', 'MiscRegs' ]) diff --git a/src/arch/alpha/isa.hh b/src/arch/alpha/isa.hh index 54e12022a..2b183f0e3 100644 --- a/src/arch/alpha/isa.hh +++ b/src/arch/alpha/isa.hh @@ -121,6 +121,12 @@ namespace AlphaISA return reg; } + int + flattenVecPredIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh index 6c71320b6..218390597 100644 --- a/src/arch/alpha/registers.hh +++ b/src/arch/alpha/registers.hh @@ -34,6 +34,7 @@ #include "arch/alpha/generated/max_inst_regs.hh" #include "arch/alpha/ipr.hh" #include "arch/generic/types.hh" +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "base/types.hh" @@ -56,14 +57,20 @@ typedef RegVal MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; -// dummy typedefs since we don't have vector regs -constexpr unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; -// This has to be one to prevent warnings that are treated as errors -constexpr unsigned NumVecRegs = 1; +// Not applicable to Alpha +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to Alpha +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; enum MiscRegIndex { @@ -96,6 +103,10 @@ const int NumFloatArchRegs = 32; const int NumIntRegs = NumIntArchRegs + NumPALShadowRegs; const int NumFloatRegs = NumFloatArchRegs; +const int NumVecRegs = 1; // Not applicable to Alpha + // (1 to prevent warnings) +const int NumVecPredRegs = 1; // Not applicable to Alpha + // (1 to prevent warnings) const int NumCCRegs = 0; const int NumMiscRegs = NUM_MISCREGS; diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index a3e89b544..b98610bfc 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -446,6 +446,9 @@ namespace ArmISA case VecElemClass: return RegId(VecElemClass, flattenVecElemIndex(regId.index()), regId.elemIndex()); + case VecPredRegClass: + return RegId(VecPredRegClass, + flattenVecPredIndex(regId.index())); case CCRegClass: return RegId(CCRegClass, flattenCCIndex(regId.index())); case MiscRegClass: @@ -507,6 +510,13 @@ namespace ArmISA return reg; } + int + flattenVecPredIndex(int reg) const + { + assert(reg >= 0); + return reg; + } + int flattenCCIndex(int reg) const { diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh index 8346f454b..8960f9f92 100644 --- a/src/arch/arm/registers.hh +++ b/src/arch/arm/registers.hh @@ -47,6 +47,8 @@ #include "arch/arm/generated/max_inst_regs.hh" #include "arch/arm/intregs.hh" #include "arch/arm/miscregs.hh" +#include "arch/arm/types.hh" +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" namespace ArmISA { @@ -66,6 +68,15 @@ using VecReg = ::VecRegT; using ConstVecReg = ::VecRegT; using VecRegContainer = VecReg::Container; +constexpr size_t VecRegSizeBytes = NumVecElemPerVecReg * sizeof(VecElem); + +// Dummy typedefs +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; + // condition code register; must be at least 32 bits for FpCondCodes typedef uint64_t CCReg; @@ -82,12 +93,14 @@ const int NumVecSpecialRegs = 8; const int NumIntRegs = NUM_INTREGS; const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs; const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs; +const int NumVecPredRegs = 1; const int NumCCRegs = NUM_CCREGS; const int NumMiscRegs = NUM_MISCREGS; #define ISA_HAS_CC_REGS -const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumVecRegs + NumMiscRegs; +const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumVecRegs + + NumVecPredRegs + NumMiscRegs; // semantically meaningful register indices const int ReturnValueReg = 0; diff --git a/src/arch/generic/vec_pred_reg.hh b/src/arch/generic/vec_pred_reg.hh new file mode 100644 index 000000000..9ff9915ef --- /dev/null +++ b/src/arch/generic/vec_pred_reg.hh @@ -0,0 +1,404 @@ +// Copyright (c) 2017 ARM Limited +// All rights reserved +// +// The license below extends only to copyright in the software and shall +// not be construed as granting a license to any other intellectual +// property including but not limited to intellectual property relating +// to a hardware implementation of the functionality of the software +// licensed hereunder. You may use the software subject to the license +// terms below provided that you ensure that this notice is replicated +// unmodified and in its entirety in all distributions of the software, +// modified or unmodified, in source code or in binary form. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: Giacomo Gabrielli +// Rekai Gonzalez +// Javier Setoain + +#ifndef __ARCH_GENERIC_VEC_PRED_REG_HH__ +#define __ARCH_GENERIC_VEC_PRED_REG_HH__ + +#include +#include +#include + +#include "arch/generic/vec_reg.hh" +#include "base/cprintf.hh" + +template +class VecPredRegContainer; + +/// Predicate register view. +/// +/// This generic class implements the View in an MVC pattern, similarly to +/// @see VecRegT. Since predicates are mainly used in conjunction with vectors +/// to specify which lanes are active in a vector operation, the class is +/// templated on the vector element type to simplify ISA definitions. +/// @tparam VecElem Type of the vector elements. +/// @tparam NumElems Number of vector elements making up the view. +/// @tparam Packed True if the predicate register relies on a packed +/// representation, i.e. adjacent bits refer to different vector elements +/// irrespective of the vector element size (e.g. this is the case for +/// AVX-512). If false, the predicate register relies on an unpacked +/// representation, where each bit refers to the corresponding byte in a vector +/// register (e.g. this is the case for ARM SVE). +/// @tparam Const True if the underlying container can be modified through +/// the view. +template +class VecPredRegT +{ + protected: + /// Size of the register in bits. + static constexpr size_t NUM_BITS = Packed ? NumElems : + sizeof(VecElem) * NumElems; + + public: + /// Container type alias. + using Container = typename std::conditional< + Const, + const VecPredRegContainer, + VecPredRegContainer>::type; + + protected: + // Alias for this type + using MyClass = VecPredRegT; + /// Container corresponding to this view. + Container& container; + + public: + VecPredRegT(Container& c) : container(c) {} + + /// Reset the register to an all-false value. + template + typename std::enable_if::type + reset() { container.reset(); } + + /// Reset the register to an all-true value. + template + typename std::enable_if::type + set() { container.set(); } + + template + typename std::enable_if::type + operator=(const MyClass& that) + { + container = that.container; + return *this; + } + + const bool& + operator[](size_t idx) const + { + return container[idx * (Packed ? 1 : sizeof(VecElem))]; + } + + template + typename std::enable_if::type + operator[](size_t idx) + { + return container[idx * (Packed ? 1 : sizeof(VecElem))]; + } + + /// Return an element of the predicate register as it appears + /// in the raw (untyped) internal representation + uint8_t + get_raw(size_t idx) const + { + return container.get_bits(idx * (Packed ? 1 : sizeof(VecElem)), + (Packed ? 1 : sizeof(VecElem))); + } + + /// Write a raw value in an element of the predicate register + template + typename std::enable_if::type + set_raw(size_t idx, uint8_t val) + { + container.set_bits(idx * (Packed ? 1 : sizeof(VecElem)), + (Packed ? 1 : sizeof(VecElem)), val); + } + + /// Equality operator, required to compare thread contexts. + template + bool + operator==(const VecPredRegT& that) const + { + return container == that.container; + } + + /// Inequality operator, required to compare thread contexts. + template + bool + operator!=(const VecPredRegT& that) const + { + return !operator==(that); + } + + friend std::ostream& + operator<<(std::ostream& os, const MyClass& p) + { + // 0-sized is not allowed + os << '[' << p.container[0]; + for (int i = 0; i < p.NUM_BITS; ++i) { + os << " " << (p.container[i] ? 1 : 0); + } + os << ']'; + return os; + } + + /// Returns a string representation of the register content. + const std::string print() const { return csprintf("%s", *this); } + + /// Returns true if the first active element of the register is true. + /// @param mask Input mask used to filter the predicates to be tested. + /// @param actual_num_elems Actual number of vector elements considered for + /// the test (corresponding to the current vector length). + template + bool + firstActive(const VecPredRegT& mask, + size_t actual_num_elems) const + { + assert(actual_num_elems <= NumElems); + for (int i = 0; i < actual_num_elems; ++i) { + if (mask[i]) { + return (*this)[i]; + } + } + return false; + } + + /// Returns true if there are no active elements in the register. + /// @param mask Input mask used to filter the predicates to be tested. + /// @param actual_num_elems Actual number of vector elements considered for + /// the test (corresponding to the current vector length). + template + bool + noneActive(const VecPredRegT& mask, + size_t actual_num_elems) const + { + assert(actual_num_elems <= NumElems); + for (int i = 0; i < actual_num_elems; ++i) { + if (mask[i] && operator[](i)) { + return false; + } + } + return true; + } + + /// Returns true if the last active element of the register is true. + /// @param mask Input mask used to filter the predicates to be tested. + /// @param actual_num_elems Actual number of vector elements considered for + /// the test (corresponding to the current vector length). + template + bool + lastActive(const VecPredRegT& mask, + size_t actual_num_elems) const + { + assert(actual_num_elems <= NumElems); + for (int i = actual_num_elems - 1; i >= 0; --i) { + if (mask[i]) { + return operator[](i); + } + } + return false; + } +}; + +/// Generic predicate register container. +/// +/// This generic class implements the Model in an MVC pattern, similarly to +/// @see VecRegContainer. +/// @tparam NumBits Size of the container in bits. +/// @tparam Packed See @VecRegT. +template +class VecPredRegContainer +{ + static_assert(NumBits > 0, + "Size of a predicate register must be > 0"); + + public: + static constexpr size_t NUM_BITS = NumBits; + using Container = std::array; + + private: + Container container; + // Alias for this type + using MyClass = VecPredRegContainer; + + public: + VecPredRegContainer() {} + + MyClass& + operator=(const MyClass& that) + { + if (&that == this) + return *this; + container = that.container; + return *this; + } + + /// Required for de-serialization. + MyClass& + operator=(const std::vector& that) + { + assert(that.size() == NUM_BITS); + std::copy(that.begin(), that.end(), container.begin()); + return *this; + } + + /// Resets the predicate register to an all-false register. + void + reset() + { + container.fill(false); + } + + /// Sets the predicate register to an all-true value. + void + set() + { + container.fill(true); + } + + /// Equality operator, required to compare thread contexts. + template + inline bool + operator==(const VecPredRegContainer& that) const + { + return NumBits == N2 && Packed == P2 && container == that.container; + } + + /// Inequality operator, required to compare thread contexts. + template + bool + operator!=(const VecPredRegContainer& that) const + { + return !operator==(that); + } + + /// Returns a reference to a specific element of the internal container. + bool& operator[](size_t idx) { return container[idx]; } + + /// Returns a const reference to a specific element of the internal + /// container. + const bool& operator[](size_t idx) const { return container[idx]; } + + /// Returns a subset of bits starting from a specific element in the + /// container. + uint8_t + get_bits(size_t idx, uint8_t nbits) const + { + assert(nbits > 0 && nbits <= 8 && (idx + nbits - 1) < NumBits); + uint8_t v = 0; + idx = idx + nbits - 1; + for (int i = 0; i < nbits; ++i, --idx) { + v <<= 1; + v |= container[idx]; + } + return v; + } + + /// Set a subset of bits starting from a specific element in the + /// container. + void + set_bits(size_t idx, uint8_t nbits, uint8_t bval) + { + assert(nbits > 0 && nbits <= 8 && (idx + nbits - 1) < NumBits); + for (int i = 0; i < nbits; ++i, ++idx) { + container[idx] = bval & 1; + bval >>= 1; + } + } + + /// Returns a string representation of the register content. + const std::string print() const { return csprintf("%s", *this); } + + friend std::ostream& + operator<<(std::ostream& os, const MyClass& v) + { + for (auto b: v.container) { + os << csprintf("%d", b); + } + return os; + } + + /// Create a view of this container. + /// + /// If NumElems is provided, the size of the container is bounds-checked, + /// otherwise the size is inferred from the container size. + /// @tparam VecElem Type of the vector elements. + /// @tparam NumElems Number of vector elements making up the view. + /// @{ + template + VecPredRegT as() const + { + static_assert((Packed && NumElems <= NumBits) || + (!Packed && + NumBits % sizeof(VecElem) == 0 && + sizeof(VecElem) * NumElems <= NumBits), + "Container size incompatible with view size"); + return VecPredRegT(*this); + } + + template + VecPredRegT as() + { + static_assert((Packed && NumElems <= NumBits) || + (!Packed && + NumBits % sizeof(VecElem) == 0 && + sizeof(VecElem) * NumElems <= NumBits), + "Container size incompatible with view size"); + return VecPredRegT(*this); + } + /// @} +}; + +/// Helper functions used for serialization/de-serialization +template +inline bool +to_number(const std::string& value, VecPredRegContainer& p) +{ + int i = 0; + for (const auto& c: value) { + p[i] = (c == '1'); + } + return true; +} + +/// Dummy type aliases and constants for architectures that do not implement +/// vector predicate registers. +/// @{ +constexpr bool DummyVecPredRegHasPackedRepr = false; +using DummyVecPredReg = VecPredRegT; +using DummyConstVecPredReg = VecPredRegT; +using DummyVecPredRegContainer = DummyVecPredReg::Container; +constexpr size_t DummyVecPredRegSizeBits = 8; +/// @} + +#endif // __ARCH_GENERIC_VEC_PRED_REG_HH__ diff --git a/src/arch/generic/vec_reg.hh b/src/arch/generic/vec_reg.hh index 7145af4cf..f26a8c8ad 100644 --- a/src/arch/generic/vec_reg.hh +++ b/src/arch/generic/vec_reg.hh @@ -648,4 +648,18 @@ to_number(const std::string& value, VecRegContainer& v) } /** @} */ +/** + * Dummy type aliases and constants for architectures that do not implement + * vector registers. + */ +/** @{ */ +using DummyVecElem = uint32_t; +constexpr unsigned DummyNumVecElemPerVecReg = 2; +using DummyVecReg = VecRegT; +using DummyConstVecReg = VecRegT; +using DummyVecRegContainer = DummyVecReg::Container; +constexpr size_t DummyVecRegSizeBytes = DummyNumVecElemPerVecReg * + sizeof(DummyVecElem); +/** @} */ + #endif /* __ARCH_GENERIC_VEC_REG_HH__ */ diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py index 755f966eb..16004c009 100755 --- a/src/arch/isa_parser.py +++ b/src/arch/isa_parser.py @@ -490,6 +490,9 @@ class Operand(object): def isVecElem(self): return 0 + def isVecPredReg(self): + return 0 + def isPCState(self): return 0 @@ -795,10 +798,9 @@ class VecRegOperand(Operand): wb = ''' if (traceData) { - warn_once("Vectors not supported yet in tracedata"); - /*traceData->setData(final_val);*/ + traceData->setData(tmp_d%d); } - ''' + ''' % self.dest_reg_idx return wb def finalize(self, predRead, predWrite): @@ -860,6 +862,88 @@ class VecElemOperand(Operand): return c_write +class VecPredRegOperand(Operand): + reg_class = 'VecPredRegClass' + + def __init__(self, parser, full_name, ext, is_src, is_dest): + Operand.__init__(self, parser, full_name, ext, is_src, is_dest) + self.parser = parser + + def isReg(self): + return 1 + + def isVecPredReg(self): + return 1 + + def makeDecl(self): + return '' + + def makeConstructor(self, predRead, predWrite): + c_src = '' + c_dest = '' + + if self.is_src: + c_src = src_reg_constructor % (self.reg_class, self.reg_spec) + + if self.is_dest: + c_dest = dst_reg_constructor % (self.reg_class, self.reg_spec) + c_dest += '\n\t_numVecPredDestRegs++;' + + return c_src + c_dest + + def makeRead(self, predRead): + func = 'readVecPredRegOperand' + if self.read_code != None: + return self.buildReadCode(func) + + if predRead: + rindex = '_sourceIndex++' + else: + rindex = '%d' % self.src_reg_idx + + c_read = '\t\t%s& tmp_s%s = xc->%s(this, %s);\n' % ( + 'const TheISA::VecPredRegContainer', rindex, func, rindex) + if self.ext: + c_read += '\t\tauto %s = tmp_s%s.as<%s>();\n' % ( + self.base_name, rindex, + self.parser.operandTypeMap[self.ext]) + return c_read + + def makeReadW(self, predWrite): + func = 'getWritableVecPredRegOperand' + if self.read_code != None: + return self.buildReadCode(func) + + if predWrite: + rindex = '_destIndex++' + else: + rindex = '%d' % self.dest_reg_idx + + c_readw = '\t\t%s& tmp_d%s = xc->%s(this, %s);\n' % ( + 'TheISA::VecPredRegContainer', rindex, func, rindex) + if self.ext: + c_readw += '\t\tauto %s = tmp_d%s.as<%s>();\n' % ( + self.base_name, rindex, + self.parser.operandTypeMap[self.ext]) + return c_readw + + def makeWrite(self, predWrite): + func = 'setVecPredRegOperand' + if self.write_code != None: + return self.buildWriteCode(func) + + wb = ''' + if (traceData) { + traceData->setData(tmp_d%d); + } + ''' % self.dest_reg_idx + return wb + + def finalize(self, predRead, predWrite): + super(VecPredRegOperand, self).finalize(predRead, predWrite) + if self.is_dest: + self.op_rd = self.makeReadW(predWrite) + self.op_rd + class CCRegOperand(Operand): reg_class = 'CCRegClass' @@ -1113,6 +1197,7 @@ class OperandList(object): self.numFPDestRegs = 0 self.numIntDestRegs = 0 self.numVecDestRegs = 0 + self.numVecPredDestRegs = 0 self.numCCDestRegs = 0 self.numMiscDestRegs = 0 self.memOperand = None @@ -1136,6 +1221,8 @@ class OperandList(object): self.numIntDestRegs += 1 elif op_desc.isVecReg(): self.numVecDestRegs += 1 + elif op_desc.isVecPredReg(): + self.numVecPredDestRegs += 1 elif op_desc.isCCReg(): self.numCCDestRegs += 1 elif op_desc.isControlReg(): @@ -1344,6 +1431,7 @@ class InstObjParams(object): header += '\n\t_numFPDestRegs = 0;' header += '\n\t_numVecDestRegs = 0;' header += '\n\t_numVecElemDestRegs = 0;' + header += '\n\t_numVecPredDestRegs = 0;' header += '\n\t_numIntDestRegs = 0;' header += '\n\t_numCCDestRegs = 0;' diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh index ffcb3f1dc..cea2d5412 100644 --- a/src/arch/mips/isa.hh +++ b/src/arch/mips/isa.hh @@ -165,6 +165,12 @@ namespace MipsISA return reg; } + int + flattenVecPredIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh index 6f7097b08..633199c94 100644 --- a/src/arch/mips/registers.hh +++ b/src/arch/mips/registers.hh @@ -32,6 +32,7 @@ #ifndef __ARCH_MIPS_REGISTERS_HH__ #define __ARCH_MIPS_REGISTERS_HH__ +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "arch/mips/generated/max_inst_regs.hh" #include "base/logging.hh" @@ -55,6 +56,10 @@ const int NumFloatSpecialRegs = 5; const int MaxShadowRegSets = 16; // Maximum number of shadow register sets const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; //HI & LO Regs const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;// +const int NumVecRegs = 1; // Not applicable to MIPS + // (1 to prevent warnings) +const int NumVecPredRegs = 1; // Not applicable to MIPS + // (1 to prevent warnings) const int NumCCRegs = 0; const uint32_t MIPS32_QNAN = 0x7fbfffff; @@ -289,14 +294,20 @@ typedef RegVal MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; -// dummy typedefs since we don't have vector regs -constexpr unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; -// This has to be one to prevent warnings that are treated as errors -constexpr unsigned NumVecRegs = 1; +// Not applicable to MIPS +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to MIPS +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; } // namespace MipsISA diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh index fb815af4a..ff9e0cda6 100644 --- a/src/arch/null/registers.hh +++ b/src/arch/null/registers.hh @@ -40,6 +40,7 @@ #ifndef __ARCH_NULL_REGISTERS_HH__ #define __ARCH_NULL_REGISTERS_HH__ +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "arch/types.hh" #include "base/types.hh" @@ -52,14 +53,20 @@ typedef uint8_t CCReg; typedef RegVal MiscReg; const RegIndex ZeroReg = 0; -// dummy typedefs since we don't have vector regs -constexpr unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; -// This has to be one to prevent warnings that are treated as errors -constexpr unsigned NumVecRegs = 1; +// Not applicable to null +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to null +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; } diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh index 4e9fdb00a..3f26f57de 100644 --- a/src/arch/power/isa.hh +++ b/src/arch/power/isa.hh @@ -113,6 +113,12 @@ class ISA : public SimObject return reg; } + int + flattenVecPredIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh index 989b4c52a..e8de218e7 100644 --- a/src/arch/power/registers.hh +++ b/src/arch/power/registers.hh @@ -31,6 +31,7 @@ #ifndef __ARCH_POWER_REGISTERS_HH__ #define __ARCH_POWER_REGISTERS_HH__ +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "arch/power/generated/max_inst_regs.hh" #include "arch/power/miscregs.hh" @@ -54,14 +55,20 @@ typedef RegVal MiscReg; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; -// dummy typedefs since we don't have vector regs -constexpr unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; -// This has to be one to prevent warnings that are treated as errors -constexpr unsigned NumVecRegs = 1; +// Not applicable to Power +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to Power +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; // Constants Related to the number of registers const int NumIntArchRegs = 32; @@ -75,6 +82,10 @@ const int NumInternalProcRegs = 0; const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs; +const int NumVecRegs = 1; // Not applicable to Power + // (1 to prevent warnings) +const int NumVecPredRegs = 1; // Not applicable to Power + // (1 to prevent warnings) const int NumCCRegs = 0; const int NumMiscRegs = NUM_MISCREGS; diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 2602f6dde..0107f8e92 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -84,6 +84,7 @@ class ISA : public SimObject int flattenFloatIndex(int reg) const { return reg; } int flattenVecIndex(int reg) const { return reg; } int flattenVecElemIndex(int reg) const { return reg; } + int flattenVecPredIndex(int reg) const { return reg; } int flattenCCIndex(int reg) const { return reg; } int flattenMiscIndex(int reg) const { return reg; } diff --git a/src/arch/riscv/registers.hh b/src/arch/riscv/registers.hh index 2de154e22..a67274221 100644 --- a/src/arch/riscv/registers.hh +++ b/src/arch/riscv/registers.hh @@ -52,6 +52,7 @@ #include #include "arch/generic/types.hh" +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "arch/isa_traits.hh" #include "arch/riscv/generated/max_inst_regs.hh" @@ -68,19 +69,31 @@ typedef RegVal FloatRegBits; typedef uint8_t CCReg; // Not applicable to Riscv typedef RegVal MiscReg; -// dummy typedefs since we don't have vector regs -const unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; +// Not applicable to RISC-V +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to RISC-V +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; const int NumIntArchRegs = 32; const int NumMicroIntRegs = 1; const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs; const int NumFloatRegs = 32; -// This has to be one to prevent warnings that are treated as errors -const unsigned NumVecRegs = 1; + +const unsigned NumVecRegs = 1; // Not applicable to RISC-V + // (1 to prevent warnings) +const int NumVecPredRegs = 1; // Not applicable to RISC-V + // (1 to prevent warnings) + const int NumCCRegs = 0; // Semantically meaningful register indices diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh index 8ad729862..6cda32038 100644 --- a/src/arch/sparc/isa.hh +++ b/src/arch/sparc/isa.hh @@ -234,6 +234,12 @@ class ISA : public SimObject return reg; } + int + flattenVecPredIndex(int reg) const + { + return reg; + } + // dummy int flattenCCIndex(int reg) const diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh index 5f12b98cb..d9b182e7f 100644 --- a/src/arch/sparc/registers.hh +++ b/src/arch/sparc/registers.hh @@ -32,6 +32,7 @@ #ifndef __ARCH_SPARC_REGISTERS_HH__ #define __ARCH_SPARC_REGISTERS_HH__ +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "arch/sparc/generated/max_inst_regs.hh" #include "arch/sparc/miscregs.hh" @@ -48,14 +49,20 @@ using SparcISAInst::MaxMiscDestRegs; // dummy typedef since we don't have CC regs typedef uint8_t CCReg; -// dummy typedefs since we don't have vector regs -constexpr unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; -// This has to be one to prevent warnings that are treated as errors -constexpr unsigned NumVecRegs = 1; +// Not applicable to SPARC +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to SPARC +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; // semantically meaningful register indices const int ZeroReg = 0; // architecturally meaningful @@ -70,6 +77,10 @@ const int SyscallPseudoReturnReg = 9; const int NumIntArchRegs = 32; const int NumIntRegs = (MaxGL + 1) * 8 + NWindows * 16 + NumMicroIntRegs; +const int NumVecRegs = 1; // Not applicable to SPARC + // (1 to prevent warnings) +const int NumVecPredRegs = 1; // Not applicable to SPARC + // (1 to prevent warnings) const int NumCCRegs = 0; const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs; diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh index b61face09..7ad464643 100644 --- a/src/arch/x86/isa.hh +++ b/src/arch/x86/isa.hh @@ -116,6 +116,12 @@ namespace X86ISA return reg; } + int + flattenVecPredIndex(int reg) const + { + return reg; + } + int flattenCCIndex(int reg) const { diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh index 509f7a111..893822263 100644 --- a/src/arch/x86/registers.hh +++ b/src/arch/x86/registers.hh @@ -41,6 +41,7 @@ #ifndef __ARCH_X86_REGISTERS_HH__ #define __ARCH_X86_REGISTERS_HH__ +#include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" #include "arch/x86/generated/max_inst_regs.hh" #include "arch/x86/regs/int.hh" @@ -77,6 +78,11 @@ enum DependenceTags { Max_Reg_Index = Misc_Reg_Base + NumMiscRegs }; +const int NumVecRegs = 1; // Not applicable to x86 + // (1 to prevent warnings) +const int NumVecPredRegs = 1; // Not applicable to x86 + // (1 to prevent warnings) + // semantically meaningful register indices //There is no such register in X86 const int ZeroReg = NUM_INTREGS; @@ -94,14 +100,20 @@ typedef RegVal IntReg; typedef uint64_t CCReg; typedef RegVal MiscReg; -// dummy typedefs since we don't have vector regs -constexpr unsigned NumVecElemPerVecReg = 2; -using VecElem = uint32_t; -using VecReg = ::VecRegT; -using ConstVecReg = ::VecRegT; -using VecRegContainer = VecReg::Container; -// This has to be one to prevent warnings that are treated as errors -constexpr unsigned NumVecRegs = 1; +// Not applicable to x86 +using VecElem = ::DummyVecElem; +using VecReg = ::DummyVecReg; +using ConstVecReg = ::DummyConstVecReg; +using VecRegContainer = ::DummyVecRegContainer; +constexpr unsigned NumVecElemPerVecReg = ::DummyNumVecElemPerVecReg; +constexpr size_t VecRegSizeBytes = ::DummyVecRegSizeBytes; + +// Not applicable to x86 +using VecPredReg = ::DummyVecPredReg; +using ConstVecPredReg = ::DummyConstVecPredReg; +using VecPredRegContainer = ::DummyVecPredRegContainer; +constexpr size_t VecPredRegSizeBits = ::DummyVecPredRegSizeBits; +constexpr bool VecPredRegHasPackedRepr = ::DummyVecPredRegHasPackedRepr; //These floating point types are correct for mmx, but not //technically for x87 (80 bits) or at all for xmm (128 bits) diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh index d81b58bdf..b87fd8b4e 100644 --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -584,6 +584,11 @@ class BaseDynInst : public ExecContext, public RefCounted { return staticInst->numVecElemDestRegs(); } + int8_t + numVecPredDestRegs() const + { + return staticInst->numVecPredDestRegs(); + } /** Returns the logical register index of the i'th destination register. */ const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); } @@ -638,6 +643,16 @@ class BaseDynInst : public ExecContext, public RefCounted InstResult::ResultType::VecElem)); } } + + /** Predicate result. */ + template + void setVecPredResult(T&& t) + { + if (instFlags[RecordResult]) { + instResult.push(InstResult(std::forward(t), + InstResult::ResultType::VecPredReg)); + } + } /** @} */ /** Records an integer register being set to a value. */ @@ -672,6 +687,13 @@ class BaseDynInst : public ExecContext, public RefCounted setVecElemResult(val); } + /** Record a vector register being set to a value */ + void setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) + { + setVecPredResult(val); + } + /** Records that one of the source registers is ready. */ void markSrcRegReady(); diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index 4468689bd..9d6061ad8 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2016 ARM Limited + * Copyright (c) 2011, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -304,6 +304,22 @@ class CheckerCPU : public BaseCPU, public ExecContext return thread->readVecElem(reg); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->readVecPredReg(reg); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->getWritableVecPredReg(reg); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) override { @@ -336,6 +352,14 @@ class CheckerCPU : public BaseCPU, public ExecContext InstResult::ResultType::VecElem)); } + template + void + setVecPredResult(T&& t) + { + result.push(InstResult(std::forward(t), + InstResult::ResultType::VecPredReg)); + } + void setIntRegOperand(const StaticInst *si, int idx, RegVal val) override { @@ -383,6 +407,15 @@ class CheckerCPU : public BaseCPU, public ExecContext setVecElemResult(val); } + void setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + thread->setVecPredReg(reg, val); + setVecPredResult(val); + } + bool readPredicate() const override { return thread->readPredicate(); } void diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh index b5a2079ea..8ce5a740d 100644 --- a/src/cpu/checker/thread_context.hh +++ b/src/cpu/checker/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -263,6 +263,12 @@ class CheckerThreadContext : public ThreadContext const VecElem& readVecElem(const RegId& reg) const { return actualTC->readVecElem(reg); } + const VecPredRegContainer& readVecPredReg(const RegId& reg) const override + { return actualTC->readVecPredReg(reg); } + + VecPredRegContainer& getWritableVecPredReg(const RegId& reg) override + { return actualTC->getWritableVecPredReg(reg); } + CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } @@ -294,6 +300,13 @@ class CheckerThreadContext : public ThreadContext checkerTC->setVecElem(reg, val); } + void + setVecPredReg(const RegId& reg, const VecPredRegContainer& val) + { + actualTC->setVecPredReg(reg, val); + checkerTC->setVecPredReg(reg, val); + } + void setCCReg(int reg_idx, CCReg val) { @@ -428,6 +441,15 @@ class CheckerThreadContext : public ThreadContext const ElemIndex& elem_idx, const VecElem& val) { actualTC->setVecElemFlat(idx, elem_idx, val); } + const VecPredRegContainer& readVecPredRegFlat(int idx) const override + { return actualTC->readVecPredRegFlat(idx); } + + VecPredRegContainer& getWritableVecPredRegFlat(int idx) override + { return actualTC->getWritableVecPredRegFlat(idx); } + + void setVecPredRegFlat(int idx, const VecPredRegContainer& val) override + { actualTC->setVecPredRegFlat(idx, val); } + CCReg readCCRegFlat(int idx) { return actualTC->readCCRegFlat(idx); } diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh index 75f428b87..87af91623 100644 --- a/src/cpu/exec_context.hh +++ b/src/cpu/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016 ARM Limited + * Copyright (c) 2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -77,6 +77,7 @@ class ExecContext { typedef TheISA::CCReg CCReg; using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: /** @@ -168,6 +169,22 @@ class ExecContext { const VecElem val) = 0; /** @} */ + /** Predicate registers interface. */ + /** @{ */ + /** Reads source predicate register operand. */ + virtual const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const = 0; + + /** Gets destination predicate register operand for modification. */ + virtual VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) = 0; + + /** Sets a destination predicate register operand to a value. */ + virtual void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) = 0; + /** @} */ + /** * @{ * @name Condition Code Registers diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh index 9b6a23d95..bf9c649ef 100644 --- a/src/cpu/inst_res.hh +++ b/src/cpu/inst_res.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -48,12 +48,14 @@ class InstResult { using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: union MultiResult { uint64_t integer; double dbl; VecRegContainer vector; VecElem vecElem; + VecPredRegContainer pred; MultiResult() {} }; @@ -61,6 +63,7 @@ class InstResult { Scalar, VecElem, VecReg, + VecPredReg, NumResultTypes, Invalid }; @@ -87,6 +90,9 @@ class InstResult { /** Vector result. */ explicit InstResult(const VecRegContainer& v, const ResultType& t) : type(t) { result.vector = v; } + /** Predicate result. */ + explicit InstResult(const VecPredRegContainer& v, const ResultType& t) + : type(t) { result.pred = v; } InstResult& operator=(const InstResult& that) { type = that.type; @@ -104,6 +110,10 @@ class InstResult { case ResultType::VecReg: result.vector = that.result.vector; break; + case ResultType::VecPredReg: + result.pred = that.result.pred; + break; + default: panic("Assigning result from unknown result type"); break; @@ -124,6 +134,8 @@ class InstResult { return result.vecElem == that.result.vecElem; case ResultType::VecReg: return result.vector == that.result.vector; + case ResultType::VecPredReg: + return result.pred == that.result.pred; case ResultType::Invalid: return false; default: @@ -143,6 +155,8 @@ class InstResult { bool isVector() const { return type == ResultType::VecReg; } /** Is this a vector element result?. */ bool isVecElem() const { return type == ResultType::VecElem; } + /** Is this a predicate result?. */ + bool isPred() const { return type == ResultType::VecPredReg; } /** Is this a valid result?. */ bool isValid() const { return type != ResultType::Invalid; } /** @} */ @@ -177,6 +191,14 @@ class InstResult { panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!"); return result.vecElem; } + + const VecPredRegContainer& + asPred() const + { + panic_if(!isPred(), "Converting scalar (or invalid) to predicate!!"); + return result.pred; + } + /** @} */ }; diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh index b9ed3971f..4cb67372e 100644 --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2014, 2016 ARM Limited + * Copyright (c) 2011-2014, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -161,6 +161,22 @@ class ExecContext : public ::ExecContext return thread.readVecElem(reg); } + const TheISA::VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecPredReg()); + return thread.readVecPredReg(reg); + } + + TheISA::VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + return thread.getWritableVecPredReg(reg); + } + void setIntRegOperand(const StaticInst *si, int idx, RegVal val) override { @@ -186,6 +202,15 @@ class ExecContext : public ::ExecContext thread.setVecReg(reg, val); } + void + setVecPredRegOperand(const StaticInst *si, int idx, + const TheISA::VecPredRegContainer& val) + { + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + thread.setVecPredReg(reg, val); + } + /** Vector Register Lane Interfaces. */ /** @{ */ /** Reads source vector 8bit operand. */ diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc index 196d035eb..5c0e86a67 100644 --- a/src/cpu/minor/scoreboard.cc +++ b/src/cpu/minor/scoreboard.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, 2016 ARM Limited + * Copyright (c) 2013-2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -77,6 +77,11 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index) TheISA::NumFloatRegs + reg.flatIndex(); ret = true; break; + case VecPredRegClass: + scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs + + TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index(); + ret = true; + break; case CCRegClass: scoreboard_index = TheISA::NumIntRegs + reg.index(); ret = true; diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh index 37ae8da0a..b21e14e24 100644 --- a/src/cpu/minor/scoreboard.hh +++ b/src/cpu/minor/scoreboard.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014, 2016 ARM Limited + * Copyright (c) 2013-2014, 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -95,7 +95,8 @@ class Scoreboard : public Named Named(name), numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs + TheISA::NumFloatRegs + - (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg)), + (TheISA::NumVecRegs * TheISA::NumVecElemPerVecReg) + + TheISA::NumVecPredRegs), numResults(numRegs, 0), numUnpredictableResults(numRegs, 0), fuIndices(numRegs, 0), diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py index 32cc19010..e73c09334 100644 --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -150,6 +150,8 @@ class DerivO3CPU(BaseCPU): _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5 numPhysVecRegs = Param.Unsigned(256, "Number of physical vector " "registers") + numPhysVecPredRegs = Param.Unsigned(32, "Number of physical predicate " + "registers") numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs, "Number of physical cc registers") numIQEntries = Param.Unsigned(64, "Number of instruction queue entries") diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index f5be5a804..df518b1e4 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2016 ARM Limited + * Copyright (c) 2011, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -121,6 +121,9 @@ class PhysRegId : private RegId { /** @Return true if it is a vector element physical register. */ bool isVectorPhysElem() const { return isVecElem(); } + /** @return true if it is a vector predicate physical register. */ + bool isVecPredPhysReg() const { return isVecPredReg(); } + /** @Return true if it is a condition-code physical register. */ bool isMiscPhysReg() const { return isMiscReg(); } diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index 5d92d92dc..ef3b17202 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -159,6 +159,7 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) regFile(params->numPhysIntRegs, params->numPhysFloatRegs, params->numPhysVecRegs, + params->numPhysVecPredRegs, params->numPhysCCRegs, vecMode), @@ -258,6 +259,7 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs); + assert(params->numPhysVecPredRegs >= numThreads * TheISA::NumVecPredRegs); assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs); rename.setScoreboard(&scoreboard); @@ -325,6 +327,13 @@ FullO3CPU::FullO3CPU(DerivO3CPUParams *params) } } + for (RegIndex ridx = 0; ridx < TheISA::NumVecPredRegs; ++ridx) { + PhysRegIdPtr phys_reg = freeList.getVecPredReg(); + renameMap[tid].setEntry(RegId(VecPredRegClass, ridx), phys_reg); + commitRenameMap[tid].setEntry( + RegId(VecPredRegClass, ridx), phys_reg); + } + for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) { PhysRegIdPtr phys_reg = freeList.getCCReg(); renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg); @@ -538,6 +547,16 @@ FullO3CPU::regStats() .desc("number of vector regfile writes") .prereq(vecRegfileWrites); + vecPredRegfileReads + .name(name() + ".pred_regfile_reads") + .desc("number of predicate regfile reads") + .prereq(vecPredRegfileReads); + + vecPredRegfileWrites + .name(name() + ".pred_regfile_writes") + .desc("number of predicate regfile writes") + .prereq(vecPredRegfileWrites); + ccRegfileReads .name(name() + ".cc_regfile_reads") .desc("number of cc regfile reads") @@ -883,6 +902,14 @@ FullO3CPU::removeThread(ThreadID tid) freeList.addReg(phys_reg); } + // Unbind Float Regs from Rename Map + for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) { + PhysRegIdPtr phys_reg = renameMap[tid].lookup( + RegId(VecPredRegClass, preg)); + scoreboard.unsetReg(phys_reg); + freeList.addReg(phys_reg); + } + // Unbind condition-code Regs from Rename Map for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs; reg_id.index()++) { @@ -1333,6 +1360,24 @@ FullO3CPU::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem& return regFile.readVecElem(phys_reg); } +template +auto +FullO3CPU::readVecPredReg(PhysRegIdPtr phys_reg) const + -> const VecPredRegContainer& +{ + vecPredRegfileReads++; + return regFile.readVecPredReg(phys_reg); +} + +template +auto +FullO3CPU::getWritableVecPredReg(PhysRegIdPtr phys_reg) + -> VecPredRegContainer& +{ + vecPredRegfileWrites++; + return regFile.getWritableVecPredReg(phys_reg); +} + template CCReg FullO3CPU::readCCReg(PhysRegIdPtr phys_reg) @@ -1373,6 +1418,15 @@ FullO3CPU::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val) regFile.setVecElem(phys_reg, val); } +template +void +FullO3CPU::setVecPredReg(PhysRegIdPtr phys_reg, + const VecPredRegContainer& val) +{ + vecPredRegfileWrites++; + regFile.setVecPredReg(phys_reg, val); +} + template void FullO3CPU::setCCReg(PhysRegIdPtr phys_reg, CCReg val) @@ -1433,6 +1487,26 @@ FullO3CPU::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, return readVecElem(phys_reg); } +template +auto +FullO3CPU::readArchVecPredReg(int reg_idx, ThreadID tid) const + -> const VecPredRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + return readVecPredReg(phys_reg); +} + +template +auto +FullO3CPU::getWritableArchVecPredReg(int reg_idx, ThreadID tid) + -> VecPredRegContainer& +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + return getWritableVecPredReg(phys_reg); +} + template CCReg FullO3CPU::readArchCCReg(int reg_idx, ThreadID tid) @@ -1486,6 +1560,16 @@ FullO3CPU::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, setVecElem(phys_reg, val); } +template +void +FullO3CPU::setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, + ThreadID tid) +{ + PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup( + RegId(VecPredRegClass, reg_idx)); + setVecPredReg(phys_reg, val); +} + template void FullO3CPU::setArchCCReg(int reg_idx, CCReg val, ThreadID tid) diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index b5f754056..30ed4ef3b 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -107,6 +107,8 @@ class FullO3CPU : public BaseO3CPU using VecElem = TheISA::VecElem; using VecRegContainer = TheISA::VecRegContainer; + using VecPredRegContainer = TheISA::VecPredRegContainer; + typedef O3ThreadState ImplState; typedef O3ThreadState Thread; @@ -457,6 +459,10 @@ class FullO3CPU : public BaseO3CPU const VecElem& readVecElem(PhysRegIdPtr reg_idx) const; + const VecPredRegContainer& readVecPredReg(PhysRegIdPtr reg_idx) const; + + VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr reg_idx); + TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg); void setIntReg(PhysRegIdPtr phys_reg, RegVal val); @@ -467,6 +473,8 @@ class FullO3CPU : public BaseO3CPU void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val); + void setVecPredReg(PhysRegIdPtr reg_idx, const VecPredRegContainer& val); + void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val); RegVal readArchIntReg(int reg_idx, ThreadID tid); @@ -501,6 +509,11 @@ class FullO3CPU : public BaseO3CPU const VecElem& readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, ThreadID tid) const; + const VecPredRegContainer& readArchVecPredReg(int reg_idx, + ThreadID tid) const; + + VecPredRegContainer& getWritableArchVecPredReg(int reg_idx, ThreadID tid); + TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid); /** Architectural register accessors. Looks up in the commit @@ -512,6 +525,9 @@ class FullO3CPU : public BaseO3CPU void setArchFloatRegBits(int reg_idx, RegVal val, ThreadID tid); + void setArchVecPredReg(int reg_idx, const VecPredRegContainer& val, + ThreadID tid); + void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid); void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx, @@ -805,6 +821,9 @@ class FullO3CPU : public BaseO3CPU //number of vector register file accesses mutable Stats::Scalar vecRegfileReads; Stats::Scalar vecRegfileWrites; + //number of predicate register file accesses + mutable Stats::Scalar vecPredRegfileReads; + Stats::Scalar vecPredRegfileWrites; //number of CC register file accesses Stats::Scalar ccRegfileReads; Stats::Scalar ccRegfileWrites; diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 5bd0f8e47..9793f4ead 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -70,6 +70,7 @@ class BaseO3DynInst : public BaseDynInst using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg; + using VecPredRegContainer = TheISA::VecPredRegContainer; enum { MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs @@ -231,6 +232,10 @@ class BaseO3DynInst : public BaseDynInst this->setVecElemOperand(this->staticInst.get(), idx, this->cpu->readVecElem(prev_phys_reg)); break; + case VecPredRegClass: + this->setVecPredRegOperand(this->staticInst.get(), idx, + this->cpu->readVecPredReg(prev_phys_reg)); + break; case CCRegClass: this->setCCRegOperand(this->staticInst.get(), idx, this->cpu->readCCReg(prev_phys_reg)); @@ -361,6 +366,18 @@ class BaseO3DynInst : public BaseDynInst return this->cpu->readVecElem(this->_srcRegIdx[idx]); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + return this->cpu->readVecPredReg(this->_srcRegIdx[idx]); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) { return this->cpu->readCCReg(this->_srcRegIdx[idx]); @@ -399,6 +416,14 @@ class BaseO3DynInst : public BaseDynInst BaseDynInst::setVecElemOperand(si, idx, val); } + void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + this->cpu->setVecPredReg(this->_destRegIdx[idx], val); + BaseDynInst::setVecPredRegOperand(si, idx, val); + } + void setCCRegOperand(const StaticInst *si, int idx, CCReg val) { this->cpu->setCCReg(this->_destRegIdx[idx], val); diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh index e7a899cdf..46bebf30d 100644 --- a/src/cpu/o3/free_list.hh +++ b/src/cpu/o3/free_list.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -138,6 +138,9 @@ class UnifiedFreeList SimpleFreeList vecElemList; /** @} */ + /** The list of free predicate registers. */ + SimpleFreeList predList; + /** The list of free condition-code registers. */ SimpleFreeList ccList; @@ -183,6 +186,9 @@ class UnifiedFreeList /** Gets a free vector elemenet register. */ PhysRegIdPtr getVecElem() { return vecElemList.getReg(); } + /** Gets a free predicate register. */ + PhysRegIdPtr getVecPredReg() { return predList.getReg(); } + /** Gets a free cc register. */ PhysRegIdPtr getCCReg() { return ccList.getReg(); } @@ -207,6 +213,9 @@ class UnifiedFreeList vecElemList.addReg(freed_reg); } + /** Adds a predicate register back to the free list. */ + void addVecPredReg(PhysRegIdPtr freed_reg) { predList.addReg(freed_reg); } + /** Adds a cc register back to the free list. */ void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); } @@ -222,6 +231,9 @@ class UnifiedFreeList /** Checks if there are any free vector registers. */ bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); } + /** Checks if there are any free predicate registers. */ + bool hasFreeVecPredRegs() const { return predList.hasFreeRegs(); } + /** Checks if there are any free cc registers. */ bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); } @@ -237,6 +249,9 @@ class UnifiedFreeList /** Returns the number of free vector registers. */ unsigned numFreeVecElems() const { return vecElemList.numFreeRegs(); } + /** Returns the number of free predicate registers. */ + unsigned numFreeVecPredRegs() const { return predList.numFreeRegs(); } + /** Returns the number of free cc registers. */ unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); } }; @@ -267,6 +282,9 @@ UnifiedFreeList::addRegs(InputIt first, InputIt last) case VecElemClass: vecElemList.addRegs(first, last); break; + case VecPredRegClass: + predList.addRegs(first, last); + break; case CCRegClass: ccList.addRegs(first, last); break; @@ -297,6 +315,9 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg) case VecElemClass: vecElemList.addReg(freed_reg); break; + case VecPredRegClass: + predList.addReg(freed_reg); + break; case CCRegClass: ccList.addReg(freed_reg); break; diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index 4a55a91ea..ddd7b6d5f 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -104,6 +104,7 @@ InstructionQueue::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs + params->numPhysVecRegs + params->numPhysVecRegs * TheISA::NumVecElemPerVecReg + + params->numPhysVecPredRegs + params->numPhysCCRegs; //Create an entry for each physical register within the diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc index 2f41e2ac2..cc4bba6b0 100644 --- a/src/cpu/o3/regfile.cc +++ b/src/cpu/o3/regfile.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -52,22 +52,26 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, + unsigned _numPhysicalVecPredRegs, unsigned _numPhysicalCCRegs, VecMode vmode) : intRegFile(_numPhysicalIntRegs), floatRegFile(_numPhysicalFloatRegs), vectorRegFile(_numPhysicalVecRegs), + vecPredRegFile(_numPhysicalVecPredRegs), ccRegFile(_numPhysicalCCRegs), numPhysicalIntRegs(_numPhysicalIntRegs), numPhysicalFloatRegs(_numPhysicalFloatRegs), numPhysicalVecRegs(_numPhysicalVecRegs), numPhysicalVecElemRegs(_numPhysicalVecRegs * NumVecElemPerVecReg), + numPhysicalVecPredRegs(_numPhysicalVecPredRegs), numPhysicalCCRegs(_numPhysicalCCRegs), totalNumRegs(_numPhysicalIntRegs + _numPhysicalFloatRegs + _numPhysicalVecRegs + _numPhysicalVecRegs * NumVecElemPerVecReg + + _numPhysicalVecPredRegs + _numPhysicalCCRegs), vecMode(vmode) { @@ -108,6 +112,12 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs, } } + // The next batch of the registers are the predicate physical + // registers; put them onto the predicate free list. + for (phys_reg = 0; phys_reg < numPhysicalVecPredRegs; phys_reg++) { + vecPredRegIds.emplace_back(VecPredRegClass, phys_reg, flat_reg_idx++); + } + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) { @@ -159,6 +169,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList) else freeList->addRegs(vecElemIds.begin(), vecElemIds.end()); + // The next batch of the registers are the predicate physical + // registers; put them onto the predicate free list. + for (reg_idx = 0; reg_idx < numPhysicalVecPredRegs; reg_idx++) { + assert(vecPredRegIds[reg_idx].index() == reg_idx); + } + freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end()); + // The rest of the registers are the condition-code physical // registers; put them onto the condition-code free list. for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) { @@ -191,6 +208,8 @@ PhysRegFile::getRegIds(RegClass cls) -> IdRange return std::make_pair(vecRegIds.begin(), vecRegIds.end()); case VecElemClass: return std::make_pair(vecElemIds.begin(), vecElemIds.end()); + case VecPredRegClass: + return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end()); case CCRegClass: return std::make_pair(ccRegIds.begin(), ccRegIds.end()); case MiscRegClass: diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh index 9d9113240..4077c99a4 100644 --- a/src/cpu/o3/regfile.hh +++ b/src/cpu/o3/regfile.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -70,6 +70,7 @@ class PhysRegFile using VecRegContainer = TheISA::VecRegContainer; using PhysIds = std::vector; using VecMode = Enums::VecRegRenameMode; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: using IdRange = std::pair; @@ -89,6 +90,10 @@ class PhysRegFile std::vector vecRegIds; std::vector vecElemIds; + /** Predicate register file. */ + std::vector vecPredRegFile; + std::vector vecPredRegIds; + /** Condition-code register file. */ std::vector ccRegFile; std::vector ccRegIds; @@ -116,6 +121,11 @@ class PhysRegFile */ unsigned numPhysicalVecElemRegs; + /** + * Number of physical predicate registers + */ + unsigned numPhysicalVecPredRegs; + /** * Number of physical CC registers */ @@ -135,6 +145,7 @@ class PhysRegFile PhysRegFile(unsigned _numPhysicalIntRegs, unsigned _numPhysicalFloatRegs, unsigned _numPhysicalVecRegs, + unsigned _numPhysicalVecPredRegs, unsigned _numPhysicalCCRegs, VecMode vmode ); @@ -154,6 +165,8 @@ class PhysRegFile unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; } /** @return the number of vector physical registers. */ unsigned numVecPhysRegs() const { return numPhysicalVecRegs; } + /** @return the number of predicate physical registers. */ + unsigned numPredPhysRegs() const { return numPhysicalVecPredRegs; } /** @return the number of vector physical registers. */ unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; } @@ -201,7 +214,7 @@ class PhysRegFile DPRINTF(IEW, "RegFile: Access to vector register %i, has " "data %s\n", int(phys_reg->index()), - vectorRegFile[phys_reg->index()].as().print()); + vectorRegFile[phys_reg->index()].print()); return vectorRegFile[phys_reg->index()]; } @@ -258,6 +271,24 @@ class PhysRegFile return val; } + /** Reads a predicate register. */ + const VecPredRegContainer& readVecPredReg(PhysRegIdPtr phys_reg) const + { + assert(phys_reg->isVecPredPhysReg()); + + DPRINTF(IEW, "RegFile: Access to predicate register %i, has " + "data %s\n", int(phys_reg->index()), + vecPredRegFile[phys_reg->index()].print()); + + return vecPredRegFile[phys_reg->index()]; + } + + VecPredRegContainer& getWritableVecPredReg(PhysRegIdPtr phys_reg) + { + /* const_cast for not duplicating code above. */ + return const_cast(readVecPredReg(phys_reg)); + } + /** Reads a condition-code register. */ CCReg readCCReg(PhysRegIdPtr phys_reg) @@ -321,6 +352,17 @@ class PhysRegFile val; } + /** Sets a predicate register to the given value. */ + void setVecPredReg(PhysRegIdPtr phys_reg, const VecPredRegContainer& val) + { + assert(phys_reg->isVecPredPhysReg()); + + DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n", + int(phys_reg->index()), val.print()); + + vecPredRegFile[phys_reg->index()] = val; + } + /** Sets a condition-code register to the given value. */ void setCCReg(PhysRegIdPtr phys_reg, CCReg val) diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index bd5e72dec..a091c0908 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012 ARM Limited + * Copyright (c) 2012, 2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -514,6 +514,7 @@ class DefaultRename Stats::Scalar intRenameLookups; Stats::Scalar fpRenameLookups; Stats::Scalar vecRenameLookups; + Stats::Scalar vecPredRenameLookups; /** Stat for total number of committed renaming mappings. */ Stats::Scalar renameCommittedMaps; /** Stat for total number of mappings that were undone due to a squash. */ diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index ed5dfb6eb..b63163f04 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -196,6 +196,10 @@ DefaultRename::regStats() .name(name() + ".vec_rename_lookups") .desc("Number of vector rename lookups") .prereq(vecRenameLookups); + vecPredRenameLookups + .name(name() + ".vec_pred_rename_lookups") + .desc("Number of vector predicate rename lookups") + .prereq(vecPredRenameLookups); } template @@ -659,6 +663,7 @@ DefaultRename::renameInsts(ThreadID tid) inst->numFPDestRegs(), inst->numVecDestRegs(), inst->numVecElemDestRegs(), + inst->numVecPredDestRegs(), inst->numCCDestRegs())) { DPRINTF(Rename, "Blocking due to lack of free " "physical registers to rename to.\n"); @@ -1041,6 +1046,9 @@ DefaultRename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) case VecElemClass: vecRenameLookups++; break; + case VecPredRegClass: + vecPredRenameLookups++; + break; case CCRegClass: case MiscRegClass: break; @@ -1256,7 +1264,7 @@ DefaultRename::readFreeEntries(ThreadID tid) } DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, " - "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n", + "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i)\n", tid, freeEntries[tid].iqEntries, freeEntries[tid].robEntries, @@ -1266,6 +1274,7 @@ DefaultRename::readFreeEntries(ThreadID tid) renameMap[tid]->numFreeIntEntries(), renameMap[tid]->numFreeFloatEntries(), renameMap[tid]->numFreeVecEntries(), + renameMap[tid]->numFreePredEntries(), renameMap[tid]->numFreeCCEntries()); DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n", diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc index 86c43932c..603f1ff36 100644 --- a/src/cpu/o3/rename_map.cc +++ b/src/cpu/o3/rename_map.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016,2019 ARM Limited + * Copyright (c) 2016-2017,2019 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -120,6 +120,8 @@ UnifiedRenameMap::init(PhysRegFile *_regFile, vecElemMap.init(TheISA::NumVecRegs * NVecElems, &(freeList->vecElemList), (RegIndex)-1); + predMap.init(TheISA::NumVecPredRegs, &(freeList->predList), (RegIndex)-1); + ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1); } diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh index d30668027..5424633e5 100644 --- a/src/cpu/o3/rename_map.hh +++ b/src/cpu/o3/rename_map.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016 ARM Limited + * Copyright (c) 2015-2017 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -172,6 +172,7 @@ class UnifiedRenameMap private: static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg; using VecReg = TheISA::VecReg; + using VecPredReg = TheISA::VecPredReg; /** The integer register rename map */ SimpleRenameMap intMap; @@ -188,6 +189,9 @@ class UnifiedRenameMap /** The vector element register rename map */ SimpleRenameMap vecElemMap; + /** The predicate register rename map */ + SimpleRenameMap predMap; + using VecMode = Enums::VecRegRenameMode; VecMode vecMode; @@ -235,6 +239,8 @@ class UnifiedRenameMap case VecElemClass: assert(vecMode == Enums::Elem); return vecElemMap.rename(arch_reg); + case VecPredRegClass: + return predMap.rename(arch_reg); case CCRegClass: return ccMap.rename(arch_reg); case MiscRegClass: @@ -276,6 +282,9 @@ class UnifiedRenameMap assert(vecMode == Enums::Elem); return vecElemMap.lookup(arch_reg); + case VecPredRegClass: + return predMap.lookup(arch_reg); + case CCRegClass: return ccMap.lookup(arch_reg); @@ -319,6 +328,10 @@ class UnifiedRenameMap assert(vecMode == Enums::Elem); return vecElemMap.setEntry(arch_reg, phys_reg); + case VecPredRegClass: + assert(phys_reg->isVecPredPhysReg()); + return predMap.setEntry(arch_reg, phys_reg); + case CCRegClass: assert(phys_reg->isCCPhysReg()); return ccMap.setEntry(arch_reg, phys_reg); @@ -345,10 +358,11 @@ class UnifiedRenameMap */ unsigned numFreeEntries() const { - return std::min( + return std::min(std::min( std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()), vecMode == Enums::Full ? vecMap.numFreeEntries() - : vecElemMap.numFreeEntries()); + : vecElemMap.numFreeEntries()), + predMap.numFreeEntries()); } unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); } @@ -359,18 +373,21 @@ class UnifiedRenameMap ? vecMap.numFreeEntries() : vecElemMap.numFreeEntries(); } + unsigned numFreePredEntries() const { return predMap.numFreeEntries(); } unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); } /** * Return whether there are enough registers to serve the request. */ bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs, - uint32_t vecElemRegs, uint32_t ccRegs) const + uint32_t vecElemRegs, uint32_t vecPredRegs, + uint32_t ccRegs) const { return intRegs <= intMap.numFreeEntries() && floatRegs <= floatMap.numFreeEntries() && vectorRegs <= vecMap.numFreeEntries() && vecElemRegs <= vecElemMap.numFreeEntries() && + vecPredRegs <= predMap.numFreeEntries() && ccRegs <= ccMap.numFreeEntries(); } /** diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh index c74936469..7858f5a0a 100644 --- a/src/cpu/o3/thread_context.hh +++ b/src/cpu/o3/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -263,6 +263,14 @@ class O3ThreadContext : public ThreadContext return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex()); } + virtual const VecPredRegContainer& readVecPredReg(const RegId& id) const { + return readVecPredRegFlat(flattenRegId(id).index()); + } + + virtual VecPredRegContainer& getWritableVecPredReg(const RegId& id) { + return getWritableVecPredRegFlat(flattenRegId(id).index()); + } + virtual CCReg readCCReg(int reg_idx) { return readCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index()); @@ -294,6 +302,13 @@ class O3ThreadContext : public ThreadContext setVecElemFlat(flattenRegId(reg).index(), reg.elemIndex(), val); } + virtual void + setVecPredReg(const RegId& reg, + const VecPredRegContainer& val) + { + setVecPredRegFlat(flattenRegId(reg).index(), val); + } + virtual void setCCReg(int reg_idx, CCReg val) { @@ -403,6 +418,12 @@ class O3ThreadContext : public ThreadContext virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, const VecElem& val); + virtual const VecPredRegContainer& readVecPredRegFlat(int idx) + const override; + virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) override; + virtual void setVecPredRegFlat(int idx, + const VecPredRegContainer& val) override; + virtual CCReg readCCRegFlat(int idx); virtual void setCCRegFlat(int idx, CCReg val); }; diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh index bd2bf63b0..59562ba3b 100644 --- a/src/cpu/o3/thread_context_impl.hh +++ b/src/cpu/o3/thread_context_impl.hh @@ -232,6 +232,20 @@ O3ThreadContext::readVecElemFlat(const RegIndex& idx, return cpu->readArchVecElem(idx, elemIndex, thread->threadId()); } +template +const TheISA::VecPredRegContainer& +O3ThreadContext::readVecPredRegFlat(int reg_id) const +{ + return cpu->readArchVecPredReg(reg_id, thread->threadId()); +} + +template +TheISA::VecPredRegContainer& +O3ThreadContext::getWritableVecPredRegFlat(int reg_id) +{ + return cpu->getWritableArchVecPredReg(reg_id, thread->threadId()); +} + template TheISA::CCReg O3ThreadContext::readCCRegFlat(int reg_idx) @@ -275,6 +289,16 @@ O3ThreadContext::setVecElemFlat(const RegIndex& idx, conditionalSquash(); } +template +void +O3ThreadContext::setVecPredRegFlat(int reg_idx, + const VecPredRegContainer& val) +{ + cpu->setArchVecPredReg(reg_idx, val, thread->threadId()); + + conditionalSquash(); +} + template void O3ThreadContext::setCCRegFlat(int reg_idx, TheISA::CCReg val) diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc index 16c1949ee..eeb06adcc 100644 --- a/src/cpu/reg_class.cc +++ b/src/cpu/reg_class.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ const char *RegId::regClassStrings[] = { "FloatRegClass", "VecRegClass", "VecElemClass", + "VecPredRegClass", "CCRegClass", "MiscRegClass" }; diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh index 69da9cf7e..70cfab39e 100644 --- a/src/cpu/reg_class.hh +++ b/src/cpu/reg_class.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016 ARM Limited + * Copyright (c) 2016-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -60,6 +60,7 @@ enum RegClass { VecRegClass, /** Vector Register Native Elem lane. */ VecElemClass, + VecPredRegClass, CCRegClass, ///< Condition-code register MiscRegClass ///< Control (misc) register }; @@ -151,6 +152,9 @@ class RegId { /** @Return true if it is a condition-code physical register. */ bool isVecElem() const { return regClass == VecElemClass; } + /** @Return true if it is a predicate physical register. */ + bool isVecPredReg() const { return regClass == VecPredRegClass; } + /** @Return true if it is a condition-code physical register. */ bool isCCReg() const { return regClass == CCRegClass; } @@ -179,6 +183,7 @@ class RegId { case IntRegClass: case FloatRegClass: case VecRegClass: + case VecPredRegClass: case CCRegClass: case MiscRegClass: return regIdx; diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh index cbca34123..d2107b89a 100644 --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2016 ARM Limited + * Copyright (c) 2014-2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -121,6 +121,10 @@ class SimpleExecContext : public ExecContext { mutable Stats::Scalar numVecRegReads; Stats::Scalar numVecRegWrites; + // Number of predicate register file accesses + mutable Stats::Scalar numVecPredRegReads; + Stats::Scalar numVecPredRegWrites; + // Number of condition code register file accesses Stats::Scalar numCCRegReads; Stats::Scalar numCCRegWrites; @@ -333,6 +337,34 @@ class SimpleExecContext : public ExecContext { thread->setVecElem(reg, val); } + const VecPredRegContainer& + readVecPredRegOperand(const StaticInst *si, int idx) const override + { + numVecPredRegReads++; + const RegId& reg = si->srcRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->readVecPredReg(reg); + } + + VecPredRegContainer& + getWritableVecPredRegOperand(const StaticInst *si, int idx) override + { + numVecPredRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + return thread->getWritableVecPredReg(reg); + } + + void + setVecPredRegOperand(const StaticInst *si, int idx, + const VecPredRegContainer& val) override + { + numVecPredRegWrites++; + const RegId& reg = si->destRegIdx(idx); + assert(reg.isVecPredReg()); + thread->setVecPredReg(reg, val); + } + CCReg readCCRegOperand(const StaticInst *si, int idx) override { diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh index 211a4c89f..00355c602 100644 --- a/src/cpu/simple_thread.hh +++ b/src/cpu/simple_thread.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -58,6 +58,7 @@ #include "debug/CCRegs.hh" #include "debug/FloatRegs.hh" #include "debug/IntRegs.hh" +#include "debug/VecPredRegs.hh" #include "debug/VecRegs.hh" #include "mem/page_table.hh" #include "mem/request.hh" @@ -102,6 +103,7 @@ class SimpleThread : public ThreadState typedef TheISA::CCReg CCReg; using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; public: typedef ThreadContext::Status Status; @@ -109,6 +111,7 @@ class SimpleThread : public ThreadState RegVal floatRegs[TheISA::NumFloatRegs]; RegVal intRegs[TheISA::NumIntRegs]; VecRegContainer vecRegs[TheISA::NumVecRegs]; + VecPredRegContainer vecPredRegs[TheISA::NumVecPredRegs]; #ifdef ISA_HAS_CC_REGS TheISA::CCReg ccRegs[TheISA::NumCCRegs]; #endif @@ -228,6 +231,9 @@ class SimpleThread : public ThreadState for (int i = 0; i < TheISA::NumVecRegs; i++) { vecRegs[i].zero(); } + for (int i = 0; i < TheISA::NumVecPredRegs; i++) { + vecPredRegs[i].reset(); + } #ifdef ISA_HAS_CC_REGS memset(ccRegs, 0, sizeof(ccRegs)); #endif @@ -266,7 +272,7 @@ class SimpleThread : public ThreadState assert(flatIndex < TheISA::NumVecRegs); const VecRegContainer& regVal = readVecRegFlat(flatIndex); DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n", - reg.index(), flatIndex, regVal.as().print()); + reg.index(), flatIndex, regVal.print()); return regVal; } @@ -277,7 +283,7 @@ class SimpleThread : public ThreadState assert(flatIndex < TheISA::NumVecRegs); VecRegContainer& regVal = getWritableVecRegFlat(flatIndex); DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n", - reg.index(), flatIndex, regVal.as().print()); + reg.index(), flatIndex, regVal.print()); return regVal; } @@ -350,6 +356,28 @@ class SimpleThread : public ThreadState return regVal; } + const VecPredRegContainer& + readVecPredReg(const RegId& reg) const + { + int flatIndex = isa->flattenVecPredIndex(reg.index()); + assert(flatIndex < TheISA::NumVecPredRegs); + const VecPredRegContainer& regVal = readVecPredRegFlat(flatIndex); + DPRINTF(VecPredRegs, "Reading predicate reg %d (%d) as %s.\n", + reg.index(), flatIndex, regVal.print()); + return regVal; + } + + VecPredRegContainer& + getWritableVecPredReg(const RegId& reg) + { + int flatIndex = isa->flattenVecPredIndex(reg.index()); + assert(flatIndex < TheISA::NumVecPredRegs); + VecPredRegContainer& regVal = getWritableVecPredRegFlat(flatIndex); + DPRINTF(VecPredRegs, + "Reading predicate reg %d (%d) as %s for modify.\n", + reg.index(), flatIndex, regVal.print()); + return regVal; + } CCReg readCCReg(int reg_idx) { @@ -410,6 +438,16 @@ class SimpleThread : public ThreadState " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, val); } + void + setVecPredReg(const RegId& reg, const VecPredRegContainer& val) + { + int flatIndex = isa->flattenVecPredIndex(reg.index()); + assert(flatIndex < TheISA::NumVecPredRegs); + setVecPredRegFlat(flatIndex, val); + DPRINTF(VecPredRegs, "Setting predicate reg %d (%d) to %s.\n", + reg.index(), flatIndex, val.print()); + } + void setCCReg(int reg_idx, CCReg val) { @@ -568,6 +606,21 @@ class SimpleThread : public ThreadState vecRegs[reg].as()[elemIndex] = val; } + const VecPredRegContainer& readVecPredRegFlat(const RegIndex& reg) const + { + return vecPredRegs[reg]; + } + + VecPredRegContainer& getWritableVecPredRegFlat(const RegIndex& reg) + { + return vecPredRegs[reg]; + } + + void setVecPredRegFlat(const RegIndex& reg, const VecPredRegContainer& val) + { + vecPredRegs[reg] = val; + } + #ifdef ISA_HAS_CC_REGS CCReg readCCRegFlat(int idx) { return ccRegs[idx]; } void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; } diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh index 70d933c31..bcb53f5ea 100644 --- a/src/cpu/static_inst.hh +++ b/src/cpu/static_inst.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2017 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2003-2005 The Regents of The University of Michigan * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved. @@ -105,16 +117,17 @@ class StaticInst : public RefCounted, public StaticInstFlags /** @{ */ int8_t _numVecDestRegs; int8_t _numVecElemDestRegs; + int8_t _numVecPredDestRegs; /** @} */ public: /// @name Register information. - /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and - /// numVecelemDestRegs() equals numDestRegs(). The former two functions - /// are used to track physical register usage for machines with separate - /// int & FP reg files, the next two is for machines with vector register - /// file. + /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs(), + /// numVecElemDestRegs() and numVecPredDestRegs() equals numDestRegs(). + /// The former two functions are used to track physical register usage for + /// machines with separate int & FP reg files, the next three are for + /// machines with vector and predicate register files. //@{ /// Number of source registers. int8_t numSrcRegs() const { return _numSrcRegs; } @@ -128,6 +141,8 @@ class StaticInst : public RefCounted, public StaticInstFlags int8_t numVecDestRegs() const { return _numVecDestRegs; } /// Number of vector element destination regs. int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; } + /// Number of predicate destination regs. + int8_t numVecPredDestRegs() const { return _numVecPredDestRegs; } /// Number of coprocesor destination regs. int8_t numCCDestRegs() const { return _numCCDestRegs; } //@} @@ -248,8 +263,8 @@ class StaticInst : public RefCounted, public StaticInstFlags StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0), _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0), - _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst), - mnemonic(_mnemonic), cachedDisassembly(0) + _numVecDestRegs(0), _numVecElemDestRegs(0), _numVecPredDestRegs(0), + machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) { } public: diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc index 2d907a055..7597dbfb2 100644 --- a/src/cpu/thread_context.cc +++ b/src/cpu/thread_context.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2016 ARM Limited + * Copyright (c) 2012, 2016-2017 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -43,6 +43,7 @@ #include "cpu/thread_context.hh" +#include "arch/generic/vec_pred_reg.hh" #include "arch/kernel_stats.hh" #include "base/logging.hh" #include "base/trace.hh" @@ -86,6 +87,17 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two) panic("Vec reg idx %d doesn't match, one: %#x, two: %#x", i, t1, t2); } + + // Then loop through the predicate registers. + for (int i = 0; i < TheISA::NumVecPredRegs; ++i) { + RegId rid(VecPredRegClass, i); + const TheISA::VecPredRegContainer& t1 = one->readVecPredReg(rid); + const TheISA::VecPredRegContainer& t2 = two->readVecPredReg(rid); + if (t1 != t2) + panic("Pred reg idx %d doesn't match, one: %#x, two: %#x", + i, t1, t2); + } + for (int i = 0; i < TheISA::NumMiscRegs; ++i) { RegVal t1 = one->readMiscRegNoEffect(i); RegVal t2 = two->readMiscRegNoEffect(i); @@ -168,6 +180,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp) } SERIALIZE_CONTAINER(vecRegs); + std::vector vecPredRegs(NumVecPredRegs); + for (int i = 0; i < NumVecPredRegs; ++i) { + vecPredRegs[i] = tc.readVecPredRegFlat(i); + } + SERIALIZE_CONTAINER(vecPredRegs); + RegVal intRegs[NumIntRegs]; for (int i = 0; i < NumIntRegs; ++i) intRegs[i] = tc.readIntRegFlat(i); @@ -203,6 +221,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp) tc.setVecRegFlat(i, vecRegs[i]); } + std::vector vecPredRegs(NumVecPredRegs); + UNSERIALIZE_CONTAINER(vecPredRegs); + for (int i = 0; i < NumVecPredRegs; ++i) { + tc.setVecPredRegFlat(i, vecPredRegs[i]); + } + RegVal intRegs[NumIntRegs]; UNSERIALIZE_ARRAY(intRegs, NumIntRegs); for (int i = 0; i < NumIntRegs; ++i) diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh index cad073b4f..6dde68650 100644 --- a/src/cpu/thread_context.hh +++ b/src/cpu/thread_context.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2012, 2016 ARM Limited + * Copyright (c) 2011-2012, 2016-2018 ARM Limited * Copyright (c) 2013 Advanced Micro Devices, Inc. * All rights reserved * @@ -98,6 +98,8 @@ class ThreadContext typedef TheISA::CCReg CCReg; using VecRegContainer = TheISA::VecRegContainer; using VecElem = TheISA::VecElem; + using VecPredRegContainer = TheISA::VecPredRegContainer; + public: enum Status @@ -242,6 +244,10 @@ class ThreadContext virtual const VecElem& readVecElem(const RegId& reg) const = 0; + virtual const VecPredRegContainer& readVecPredReg(const RegId& reg) + const = 0; + virtual VecPredRegContainer& getWritableVecPredReg(const RegId& reg) = 0; + virtual CCReg readCCReg(int reg_idx) = 0; virtual void setIntReg(int reg_idx, RegVal val) = 0; @@ -252,6 +258,9 @@ class ThreadContext virtual void setVecElem(const RegId& reg, const VecElem& val) = 0; + virtual void setVecPredReg(const RegId& reg, + const VecPredRegContainer& val) = 0; + virtual void setCCReg(int reg_idx, CCReg val) = 0; virtual TheISA::PCState pcState() = 0; @@ -341,6 +350,11 @@ class ThreadContext virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx, const VecElem& val) = 0; + virtual const VecPredRegContainer& readVecPredRegFlat(int idx) const = 0; + virtual VecPredRegContainer& getWritableVecPredRegFlat(int idx) = 0; + virtual void setVecPredRegFlat(int idx, + const VecPredRegContainer& val) = 0; + virtual CCReg readCCRegFlat(int idx) = 0; virtual void setCCRegFlat(int idx, CCReg val) = 0; /** @} */ @@ -502,6 +516,12 @@ class ProxyThreadContext : public ThreadContext const VecElem& readVecElem(const RegId& reg) const { return actualTC->readVecElem(reg); } + const VecPredRegContainer& readVecPredReg(const RegId& reg) const + { return actualTC->readVecPredReg(reg); } + + VecPredRegContainer& getWritableVecPredReg(const RegId& reg) + { return actualTC->getWritableVecPredReg(reg); } + CCReg readCCReg(int reg_idx) { return actualTC->readCCReg(reg_idx); } @@ -514,6 +534,9 @@ class ProxyThreadContext : public ThreadContext void setVecReg(const RegId& reg, const VecRegContainer& val) { actualTC->setVecReg(reg, val); } + void setVecPredReg(const RegId& reg, const VecPredRegContainer& val) + { actualTC->setVecPredReg(reg, val); } + void setVecElem(const RegId& reg, const VecElem& val) { actualTC->setVecElem(reg, val); } @@ -590,6 +613,15 @@ class ProxyThreadContext : public ThreadContext const VecElem& val) { actualTC->setVecElemFlat(id, elemIndex, val); } + const VecPredRegContainer& readVecPredRegFlat(int id) const + { return actualTC->readVecPredRegFlat(id); } + + VecPredRegContainer& getWritableVecPredRegFlat(int id) + { return actualTC->getWritableVecPredRegFlat(id); } + + void setVecPredRegFlat(int idx, const VecPredRegContainer& val) + { actualTC->setVecPredRegFlat(idx, val); } + CCReg readCCRegFlat(int idx) { return actualTC->readCCRegFlat(idx); } diff --git a/src/sim/insttracer.hh b/src/sim/insttracer.hh index d57f5a04d..c1efd2118 100644 --- a/src/sim/insttracer.hh +++ b/src/sim/insttracer.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 ARM Limited + * Copyright (c) 2014, 2017 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -44,6 +44,8 @@ #ifndef __INSTRECORD_HH__ #define __INSTRECORD_HH__ +#include "arch/generic/vec_pred_reg.hh" +#include "arch/generic/vec_reg.hh" #include "base/types.hh" #include "cpu/inst_seq.hh" #include "cpu/static_inst.hh" @@ -95,6 +97,9 @@ class InstRecord union { uint64_t as_int; double as_double; + ::VecRegContainer* as_vec; + ::VecPredRegContainer* as_pred; } data; /** @defgroup fetch_seq @@ -118,7 +123,9 @@ class InstRecord DataInt16 = 2, DataInt32 = 4, DataInt64 = 8, - DataDouble = 3 + DataDouble = 3, + DataVec = 5, + DataVecPred = 6 } data_status; /** @ingroup memory @@ -150,7 +157,16 @@ class InstRecord fetch_seq_valid(false), cp_seq_valid(false), predicate(true) { } - virtual ~InstRecord() { } + virtual ~InstRecord() + { + if (data_status == DataVec) { + assert(data.as_vec); + delete data.as_vec; + } else if (data_status == DataVecPred) { + assert(data.as_pred); + delete data.as_pred; + } + } void setWhen(Tick new_when) { when = new_when; } void setMem(Addr a, Addr s, unsigned f) @@ -181,6 +197,22 @@ class InstRecord void setData(double d) { data.as_double = d; data_status = DataDouble; } + void + setData(::VecRegContainer& d) + { + data.as_vec = new ::VecRegContainer(d); + data_status = DataVec; + } + + void + setData(::VecPredRegContainer& d) + { + data.as_pred = new ::VecPredRegContainer< + TheISA::VecPredRegSizeBits, TheISA::VecPredRegHasPackedRepr>(d); + data_status = DataVecPred; + } + void setFetchSeq(InstSeqNum seq) { fetch_seq = seq; fetch_seq_valid = true; } -- 2.30.2