From 00da08902918da13fccc3f2266b7b2f5d0080708 Mon Sep 17 00:00:00 2001
From: Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
Date: Wed, 5 Apr 2017 13:24:00 -0500
Subject: [PATCH] cpu: Added interface for vector reg file

This patch adds some more functionality to the cpu model and the arch to
interface with the vector register file.

This change consists mainly of augmenting ThreadContexts and ExecContexts
with calls to get/set full vectors, underlying microarchitectural elements
or lanes. Those are meant to interface with the vector register file. All
classes that implement this interface also get an appropriate implementation.

This requires implementing the vector register file for the different
models using the VecRegContainer class.

This change set also updates the Result abstraction to contemplate the
possibility of having a vector as result.

The changes also affect how the remote_gdb connection works.

There are some (nasty) side effects, such as the need to define dummy
numPhysVecRegs parameter values for architectures that do not implement
vector extensions.

Nathanael Premillieu's work with an increasing number of fixes and
improvements of mine.

Change-Id: Iee65f4e8b03abfe1e94e6940a51b68d0977fd5bb
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
[ Fix RISCV build issues and CC reg free list initialisation ]
Signed-off-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/2705
---
 configs/common/cores/arm/O3_ARM_v7a.py |   1 +
 src/arch/SConscript                    |  13 ++
 src/arch/alpha/isa.hh                  |  12 ++
 src/arch/alpha/registers.hh            |  10 ++
 src/arch/arm/ArmISA.py                 |   5 +
 src/arch/arm/insts/static_inst.cc      |   6 +
 src/arch/arm/insts/static_inst.hh      |   1 +
 src/arch/arm/isa.cc                    |   1 +
 src/arch/arm/isa.hh                    |  40 ++++++
 src/arch/arm/nativetrace.cc            |  10 +-
 src/arch/arm/registers.hh              |  18 ++-
 src/arch/arm/remote_gdb.cc             |  10 +-
 src/arch/arm/remote_gdb.hh             |   4 +-
 src/arch/arm/utility.cc                |   3 +
 src/arch/generic/ISACommon.py          |  50 ++++++++
 src/arch/generic/SConscript            |  13 ++
 src/arch/generic/traits.hh             |  63 ++++++++++
 src/arch/generic/types.hh              |   3 +
 src/arch/isa_parser.py                 |   6 +-
 src/arch/mips/isa.hh                   |  12 ++
 src/arch/mips/registers.hh             |  10 ++
 src/arch/null/registers.hh             |  10 ++
 src/arch/power/isa.hh                  |  12 ++
 src/arch/power/registers.hh            |  10 ++
 src/arch/riscv/isa.hh                  |  12 ++
 src/arch/riscv/registers.hh            |  10 ++
 src/arch/sparc/isa.hh                  |  14 +++
 src/arch/sparc/registers.hh            |  10 ++
 src/arch/x86/isa.hh                    |  14 +++
 src/arch/x86/registers.hh              |  10 ++
 src/cpu/base_dyn_inst.hh               |  43 ++++++-
 src/cpu/checker/cpu.hh                 | 138 +++++++++++++++++++++
 src/cpu/checker/cpu_impl.hh            |  28 ++++-
 src/cpu/checker/thread_context.hh      |  83 ++++++++++++-
 src/cpu/exec_context.hh                |  61 +++++++++-
 src/cpu/inst_res.hh                    |  52 ++++++++
 src/cpu/minor/dyn_inst.cc              |  11 +-
 src/cpu/minor/exec_context.hh          | 122 ++++++++++++++++++-
 src/cpu/minor/scoreboard.cc            |  15 ++-
 src/cpu/minor/scoreboard.hh            |   4 +-
 src/cpu/o3/O3CPU.py                    |  14 +++
 src/cpu/o3/comm.hh                     |  25 +++-
 src/cpu/o3/cpu.cc                      | 143 +++++++++++++++++++++-
 src/cpu/o3/cpu.hh                      |  89 +++++++++++++-
 src/cpu/o3/dyn_inst.hh                 | 132 ++++++++++++++++++--
 src/cpu/o3/free_list.hh                | 100 +++++++++++++++
 src/cpu/o3/inst_queue_impl.hh          |   5 +-
 src/cpu/o3/regfile.cc                  | 120 +++++++++++++++++-
 src/cpu/o3/regfile.hh                  | 161 ++++++++++++++++++++++++-
 src/cpu/o3/rename.hh                   |   3 +-
 src/cpu/o3/rename_impl.hh              |  19 ++-
 src/cpu/o3/rename_map.cc               |  91 +++++++++++++-
 src/cpu/o3/rename_map.hh               |  82 ++++++++++++-
 src/cpu/o3/thread_context.hh           |  97 ++++++++++++++-
 src/cpu/o3/thread_context_impl.hh      |  42 ++++++-
 src/cpu/reg_class.cc                   |  14 +++
 src/cpu/reg_class.hh                   |  44 ++++++-
 src/cpu/reg_class_impl.hh              |   5 +
 src/cpu/simple/base.cc                 |  10 ++
 src/cpu/simple/exec_context.hh         | 126 ++++++++++++++++++-
 src/cpu/simple_thread.hh               | 158 +++++++++++++++++++++++-
 src/cpu/static_inst.hh                 |  23 +++-
 src/cpu/thread_context.cc              |  24 +++-
 src/cpu/thread_context.hh              | 120 +++++++++++++++++-
 src/sim/serialize.cc                   |   3 +
 65 files changed, 2525 insertions(+), 75 deletions(-)
 create mode 100644 src/arch/generic/ISACommon.py
 create mode 100644 src/arch/generic/traits.hh

diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py
index f5c2c711a..fde4d3c60 100644
--- a/configs/common/cores/arm/O3_ARM_v7a.py
+++ b/configs/common/cores/arm/O3_ARM_v7a.py
@@ -139,6 +139,7 @@ class O3_ARM_v7a_3(DerivO3CPU):
     forwardComSize = 5
     numPhysIntRegs = 128
     numPhysFloatRegs = 192
+    numPhysVecRegs = 48
     numIQEntries = 32
     numROBEntries = 40
 
diff --git a/src/arch/SConscript b/src/arch/SConscript
index ea940560d..e30069c04 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -1,5 +1,17 @@
 # -*- mode:python -*-
 
+# Copyright (c) 2016 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2006 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -202,6 +214,7 @@ env.Append(BUILDERS = {'ScanISA' :
 
 DebugFlag('IntRegs')
 DebugFlag('FloatRegs')
+DebugFlag('VecRegs')
 DebugFlag('CCRegs')
 DebugFlag('MiscRegs')
 CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'CCRegs', 'MiscRegs' ])
diff --git a/src/arch/alpha/isa.hh b/src/arch/alpha/isa.hh
index 80d8ab149..36e708450 100644
--- a/src/arch/alpha/isa.hh
+++ b/src/arch/alpha/isa.hh
@@ -110,6 +110,18 @@ namespace AlphaISA
             return reg;
         }
 
+        int
+        flattenVecIndex(int reg) const
+        {
+            return reg;
+        }
+
+        int
+        flattenVecElemIndex(int reg) const
+        {
+            return reg;
+        }
+
         // dummy
         int
         flattenCCIndex(int reg) const
diff --git a/src/arch/alpha/registers.hh b/src/arch/alpha/registers.hh
index 03bbd8aaf..151ea7d7c 100644
--- a/src/arch/alpha/registers.hh
+++ b/src/arch/alpha/registers.hh
@@ -34,6 +34,7 @@
 #include "arch/alpha/generated/max_inst_regs.hh"
 #include "arch/alpha/ipr.hh"
 #include "arch/generic/types.hh"
+#include "arch/generic/vec_reg.hh"
 #include "base/types.hh"
 
 namespace AlphaISA {
@@ -56,6 +57,15 @@ typedef uint64_t MiscReg;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
+// dummy typedefs since we don't have vector regs
+constexpr unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+// This has to be one to prevent warnings that are treated as errors
+constexpr unsigned NumVecRegs = 1;
+
 union AnyReg
 {
     IntReg  intreg;
diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 73ef4a09d..7956570bd 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -41,6 +41,7 @@ from m5.proxy import *
 from m5.SimObject import SimObject
 
 from ArmPMU import ArmPMU
+from ISACommon import VecRegRenameMode
 
 # Enum for DecoderFlavour
 class DecoderFlavour(Enum): vals = ['Generic']
@@ -86,6 +87,10 @@ class ArmISA(SimObject):
     id_aa64afr1_el1 = Param.UInt64(0x0000000000000000,
         "AArch64 Auxiliary Feature Register 1")
 
+    # Initial vector register rename mode
+    vecRegRenameMode = Param.VecRegRenameMode('Full',
+        "Initial rename mode for vecregs")
+
     # 1 CTX CMPs | 2 WRPs | 2 BRPs | !PMU | !Trace | Debug v8-A
     id_aa64dfr0_el1 = Param.UInt64(0x0000000000101006,
         "AArch64 Debug Feature Register 0")
diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index 99d1b817d..8501715d5 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -330,6 +330,12 @@ ArmStaticInst::printFloatReg(std::ostream &os, RegIndex reg_idx) const
     ccprintf(os, "f%d", reg_idx);
 }
 
+void
+ArmStaticInst::printVecReg(std::ostream &os, RegIndex reg_idx) const
+{
+    ccprintf(os, "v%d", reg_idx);
+}
+
 void
 ArmStaticInst::printCCReg(std::ostream &os, RegIndex reg_idx) const
 {
diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh
index 19af99a0f..486d30fe4 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -157,6 +157,7 @@ class ArmStaticInst : public StaticInst
     /// dependence tag number (FP or int).
     void printIntReg(std::ostream &os, RegIndex reg_idx) const;
     void printFloatReg(std::ostream &os, RegIndex reg_idx) const;
+    void printVecReg(std::ostream &os, RegIndex reg_idx) const;
     void printCCReg(std::ostream &os, RegIndex reg_idx) const;
     void printMiscReg(std::ostream &os, RegIndex reg_idx) const;
     void printMnemonic(std::ostream &os,
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index c54d7746d..a490e5fb7 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -209,6 +209,7 @@ ISA::ISA(Params *p)
     : SimObject(p),
       system(NULL),
       _decoderFlavour(p->decoderFlavour),
+      _vecRegRenameMode(p->vecRegRenameMode),
       pmu(p->pmu),
       lookUpMiscReg(NUM_MISCREGS, {0,0})
 {
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 8de90dc93..e96de7922 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -48,7 +48,9 @@
 #include "arch/arm/system.hh"
 #include "arch/arm/tlb.hh"
 #include "arch/arm/types.hh"
+#include "arch/generic/traits.hh"
 #include "debug/Checkpoint.hh"
+#include "enums/VecRegRenameMode.hh"
 #include "sim/sim_object.hh"
 #include "enums/DecoderFlavour.hh"
 
@@ -68,6 +70,7 @@ namespace ArmISA
 
         // Micro Architecture
         const Enums::DecoderFlavour _decoderFlavour;
+        const Enums::VecRegRenameMode _vecRegRenameMode;
 
         /** Dummy device for to handle non-existing ISA devices */
         DummyISADevice dummyDevice;
@@ -185,6 +188,10 @@ namespace ArmISA
                 return RegId(IntRegClass, flattenIntIndex(regId.index()));
               case FloatRegClass:
                 return RegId(FloatRegClass, flattenFloatIndex(regId.index()));
+              case VecRegClass:
+                return RegId(VecRegClass, flattenVecIndex(regId.index()));
+              case VecElemClass:
+                return RegId(VecElemClass, flattenVecElemIndex(regId.index()));
               case CCRegClass:
                 return RegId(CCRegClass, flattenCCIndex(regId.index()));
               case MiscRegClass:
@@ -232,6 +239,20 @@ namespace ArmISA
             return reg;
         }
 
+        int
+        flattenVecIndex(int reg) const
+        {
+            assert(reg >= 0);
+            return reg;
+        }
+
+        int
+        flattenVecElemIndex(int reg) const
+        {
+            assert(reg >= 0);
+            return reg;
+        }
+
         int
         flattenCCIndex(int reg) const
         {
@@ -406,6 +427,12 @@ namespace ArmISA
 
         Enums::DecoderFlavour decoderFlavour() const { return _decoderFlavour; }
 
+        Enums::VecRegRenameMode
+        vecRegRenameMode() const
+        {
+            return _vecRegRenameMode;
+        }
+
         /// Explicitly import the otherwise hidden startup
         using SimObject::startup;
 
@@ -417,4 +444,17 @@ namespace ArmISA
     };
 }
 
+template<>
+struct initRenameMode<ArmISA::ISA>
+{
+    static Enums::VecRegRenameMode mode(const ArmISA::ISA* isa)
+    {
+        return isa->vecRegRenameMode();
+    }
+    static bool equals(const ArmISA::ISA* isa1, const ArmISA::ISA* isa2)
+    {
+        return mode(isa1) == mode(isa2);
+    }
+};
+
 #endif
diff --git a/src/arch/arm/nativetrace.cc b/src/arch/arm/nativetrace.cc
index fcb13fb2a..395232e00 100644
--- a/src/arch/arm/nativetrace.cc
+++ b/src/arch/arm/nativetrace.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2011, 2014 ARM Limited
+ * Copyright (c) 2010-2011, 2014, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -125,10 +125,10 @@ Trace::ArmNativeTrace::ThreadState::update(ThreadContext *tc)
     newState[STATE_CPSR] = cpsr;
     changed[STATE_CPSR] = (newState[STATE_CPSR] != oldState[STATE_CPSR]);
 
-    for (int i = 0; i < NumFloatV7ArchRegs; i += 2) {
-        newState[STATE_F0 + (i >> 1)] =
-            static_cast<uint64_t>(tc->readFloatRegBits(i + 1)) << 32 |
-            tc->readFloatRegBits(i);
+    for (int i = 0; i < NumVecV7ArchRegs; i++) {
+        auto vec(tc->readVecReg(RegId(VecRegClass,i)).as<uint64_t, 2>());
+        newState[STATE_F0 + 2*i] = vec[0];
+        newState[STATE_F0 + 2*i + 1] = vec[1];
     }
     newState[STATE_FPSCR] = tc->readMiscRegNoEffect(MISCREG_FPSCR) |
                             tc->readCCReg(CCREG_FP);
diff --git a/src/arch/arm/registers.hh b/src/arch/arm/registers.hh
index 2e1ad1881..0a617e4dc 100644
--- a/src/arch/arm/registers.hh
+++ b/src/arch/arm/registers.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2011, 2014 ARM Limited
+ * Copyright (c) 2010-2011, 2014, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -43,10 +43,11 @@
 #ifndef __ARCH_ARM_REGISTERS_HH__
 #define __ARCH_ARM_REGISTERS_HH__
 
+#include "arch/arm/ccregs.hh"
 #include "arch/arm/generated/max_inst_regs.hh"
 #include "arch/arm/intregs.hh"
-#include "arch/arm/ccregs.hh"
 #include "arch/arm/miscregs.hh"
+#include "arch/generic/vec_reg.hh"
 
 namespace ArmISA {
 
@@ -64,6 +65,13 @@ typedef uint64_t IntReg;
 typedef uint32_t FloatRegBits;
 typedef float FloatReg;
 
+// Number of VecElem per Vector Register, computed based on the vector length
+constexpr unsigned NumVecElemPerVecReg = 4;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+
 // cop-0/cop-1 system control register
 typedef uint64_t MiscReg;
 
@@ -76,15 +84,19 @@ const int NumIntArchRegs = NUM_ARCH_INTREGS;
 const int NumFloatV7ArchRegs  = 64;
 const int NumFloatV8ArchRegs  = 128;
 const int NumFloatSpecialRegs = 32;
+const int NumVecV7ArchRegs  = 64;
+const int NumVecV8ArchRegs  = 32;
+const int NumVecSpecialRegs = 8;
 
 const int NumIntRegs = NUM_INTREGS;
 const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
+const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
 const int NumCCRegs = NUM_CCREGS;
 const int NumMiscRegs = NUM_MISCREGS;
 
 #define ISA_HAS_CC_REGS
 
-const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumMiscRegs;
+const int TotalNumRegs = NumIntRegs + NumFloatRegs + NumVecRegs + NumMiscRegs;
 
 // semantically meaningful register indices
 const int ReturnValueReg = 0;
diff --git a/src/arch/arm/remote_gdb.cc b/src/arch/arm/remote_gdb.cc
index eefe62b42..d934d53d3 100644
--- a/src/arch/arm/remote_gdb.cc
+++ b/src/arch/arm/remote_gdb.cc
@@ -1,7 +1,7 @@
 /*
  * Copyright 2015 LabWare
  * Copyright 2014 Google Inc.
- * Copyright (c) 2010, 2013 ARM Limited
+ * Copyright (c) 2010, 2013, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -212,6 +212,10 @@ RemoteGDB::AArch64GdbRegCache::getRegs(ThreadContext *context)
         r.v[i + 2] = context->readFloatRegBits(i + 0);
         r.v[i + 3] = context->readFloatRegBits(i + 1);
     }
+
+    for (int i = 0; i < 32; i ++) {
+        r.vec[i] = context->readVecReg(RegId(VecRegClass,i));
+    }
 }
 
 void
@@ -234,6 +238,10 @@ RemoteGDB::AArch64GdbRegCache::setRegs(ThreadContext *context) const
         context->setFloatRegBits(i + 0, r.v[i + 2]);
         context->setFloatRegBits(i + 1, r.v[i + 3]);
     }
+
+    for (int i = 0; i < 32; i ++) {
+        context->setVecReg(RegId(VecRegClass, i), r.vec[i]);
+    }
 }
 
 void
diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh
index acd6f32d2..328fbadb3 100644
--- a/src/arch/arm/remote_gdb.hh
+++ b/src/arch/arm/remote_gdb.hh
@@ -1,7 +1,7 @@
 /*
  * Copyright 2015 LabWare
  * Copyright 2014 Google, Inc.
- * Copyright (c) 2013 ARM Limited
+ * Copyright (c) 2013, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -51,6 +51,7 @@
 #include <algorithm>
 
 #include "arch/arm/utility.hh"
+#include "arch/generic/vec_reg.hh"
 #include "base/remote_gdb.hh"
 
 class System;
@@ -96,6 +97,7 @@ class RemoteGDB : public BaseRemoteGDB
           uint64_t pc;
           uint64_t cpsr;
           uint32_t v[32*4];
+          ArmISA::VecRegContainer vec[32];
         } r;
       public:
         char *data() const { return (char *)&r; }
diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index 1437801a2..a58ca8111 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -154,6 +154,9 @@ copyRegs(ThreadContext *src, ThreadContext *dest)
     for (int i = 0; i < NumFloatRegs; i++)
         dest->setFloatRegFlat(i, src->readFloatRegFlat(i));
 
+    for (int i = 0; i < NumVecRegs; i++)
+        dest->setVecRegFlat(i, src->readVecRegFlat(i));
+
     for (int i = 0; i < NumCCRegs; i++)
         dest->setCCReg(i, src->readCCReg(i));
 
diff --git a/src/arch/generic/ISACommon.py b/src/arch/generic/ISACommon.py
new file mode 100644
index 000000000..7777dc27e
--- /dev/null
+++ b/src/arch/generic/ISACommon.py
@@ -0,0 +1,50 @@
+# Copyright (c) 2016 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Rekai Gonzalez
+
+from m5.params import *
+from m5.proxy import *
+from m5.SimObject import SimObject
+
+class VecRegRenameMode(Enum):
+    '''Enum for Rename Mode in rename map
+        Elem: Each native-elem in a vector register is renamed independently.
+        Full: Vectors are renamed as one unit.'''
+
+    vals = ['Full', 'Elem']
+
+
+__all__ = ['VecRegRenameMode']
diff --git a/src/arch/generic/SConscript b/src/arch/generic/SConscript
index c87ad671f..7123eaf4a 100644
--- a/src/arch/generic/SConscript
+++ b/src/arch/generic/SConscript
@@ -1,3 +1,15 @@
+# Copyright (c) 2016 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2012 Google
 # All rights reserved.
 #
@@ -36,6 +48,7 @@ Source('mmapped_ipr.cc')
 Source('tlb.cc')
 
 SimObject('BaseTLB.py')
+SimObject('ISACommon.py')
 
 DebugFlag('TLB')
 Source('pseudo_inst.cc')
diff --git a/src/arch/generic/traits.hh b/src/arch/generic/traits.hh
new file mode 100644
index 000000000..3dc6b30ee
--- /dev/null
+++ b/src/arch/generic/traits.hh
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Rekai Gonzalez
+ */
+
+/* Auxiliary structs for architecture traits. */
+
+#ifndef __ARCH_COMMON_TRAITS_HH__
+#define __ARCH_COMMON_TRAITS_HH__
+
+#include "enums/VecRegRenameMode.hh"
+
+/** Helper structure to get the vector register mode for a given ISA.
+ * This way we implement a default 'full' mode, and only those ISA that care
+ * have to actually specialise the template to forward the call to the
+ * appropriate member of the ISA.
+ */
+template <typename ISA>
+struct initRenameMode
+{
+    static Enums::VecRegRenameMode mode(const ISA*) { return Enums::Full; }
+    /**
+     * Compare the initial rename mode of two instances of the ISA.
+     * Result is true by definition, as the default mode is Full.
+     * */
+    static bool equals(const ISA*, const ISA*) { return true; }
+};
+
+#endif /* __ARCH_COMMON_TRAITS_HH__ */
diff --git a/src/arch/generic/types.hh b/src/arch/generic/types.hh
index bb6eafd66..353112913 100644
--- a/src/arch/generic/types.hh
+++ b/src/arch/generic/types.hh
@@ -40,6 +40,9 @@
 // Logical register index type.
 typedef uint16_t RegIndex;
 
+/** Logical vector register elem index type. */
+using ElemIndex = uint16_t;
+
 namespace GenericISA
 {
 
diff --git a/src/arch/isa_parser.py b/src/arch/isa_parser.py
index 610197e38..759b50c0d 100755
--- a/src/arch/isa_parser.py
+++ b/src/arch/isa_parser.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014 ARM Limited
+# Copyright (c) 2014, 2016 ARM Limited
 # All rights reserved
 #
 # The license below extends only to copyright in the software and shall
@@ -865,8 +865,8 @@ class OperandList(object):
             op_desc = self.find_base(op_base)
             if op_desc:
                 if op_desc.ext != op_ext:
-                    error('Inconsistent extensions for operand %s' % \
-                          op_base)
+                    error ('Inconsistent extensions for operand %s' % \
+                            op_base)
                 op_desc.is_src = op_desc.is_src or is_src
                 op_desc.is_dest = op_desc.is_dest or is_dest
             else:
diff --git a/src/arch/mips/isa.hh b/src/arch/mips/isa.hh
index c751cb168..c38b7cec0 100644
--- a/src/arch/mips/isa.hh
+++ b/src/arch/mips/isa.hh
@@ -180,6 +180,18 @@ namespace MipsISA
             return reg;
         }
 
+        int
+        flattenVecIndex(int reg) const
+        {
+            return reg;
+        }
+
+        int
+        flattenVecElemIndex(int reg) const
+        {
+            return reg;
+        }
+
         // dummy
         int
         flattenCCIndex(int reg) const
diff --git a/src/arch/mips/registers.hh b/src/arch/mips/registers.hh
index c7cdb6522..f5dd325cd 100644
--- a/src/arch/mips/registers.hh
+++ b/src/arch/mips/registers.hh
@@ -32,6 +32,7 @@
 #ifndef __ARCH_MIPS_REGISTERS_HH__
 #define __ARCH_MIPS_REGISTERS_HH__
 
+#include "arch/generic/vec_reg.hh"
 #include "arch/mips/generated/max_inst_regs.hh"
 #include "base/misc.hh"
 #include "base/types.hh"
@@ -289,6 +290,15 @@ typedef uint64_t MiscReg;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
+// dummy typedefs since we don't have vector regs
+constexpr unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+// This has to be one to prevent warnings that are treated as errors
+constexpr unsigned NumVecRegs = 1;
+
 typedef union {
     IntReg   intreg;
     FloatReg fpreg;
diff --git a/src/arch/null/registers.hh b/src/arch/null/registers.hh
index 6d1ecf1c5..3d27d95a2 100644
--- a/src/arch/null/registers.hh
+++ b/src/arch/null/registers.hh
@@ -40,6 +40,7 @@
 #ifndef __ARCH_NULL_REGISTERS_HH__
 #define __ARCH_NULL_REGISTERS_HH__
 
+#include "arch/generic/vec_reg.hh"
 #include "arch/types.hh"
 #include "base/types.hh"
 
@@ -52,6 +53,15 @@ typedef uint8_t CCReg;
 typedef uint64_t MiscReg;
 const RegIndex ZeroReg = 0;
 
+// dummy typedefs since we don't have vector regs
+constexpr unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+// This has to be one to prevent warnings that are treated as errors
+constexpr unsigned NumVecRegs = 1;
+
 }
 
 #endif // __ARCH_NULL_REGISTERS_HH__
diff --git a/src/arch/power/isa.hh b/src/arch/power/isa.hh
index 475b4d2f8..edac96d59 100644
--- a/src/arch/power/isa.hh
+++ b/src/arch/power/isa.hh
@@ -101,6 +101,18 @@ class ISA : public SimObject
         return reg;
     }
 
+    int
+    flattenVecIndex(int reg) const
+    {
+        return reg;
+    }
+
+    int
+    flattenVecElemIndex(int reg) const
+    {
+        return reg;
+    }
+
     // dummy
     int
     flattenCCIndex(int reg) const
diff --git a/src/arch/power/registers.hh b/src/arch/power/registers.hh
index 742809db1..4e8c9e9f4 100644
--- a/src/arch/power/registers.hh
+++ b/src/arch/power/registers.hh
@@ -31,6 +31,7 @@
 #ifndef __ARCH_POWER_REGISTERS_HH__
 #define __ARCH_POWER_REGISTERS_HH__
 
+#include "arch/generic/vec_reg.hh"
 #include "arch/power/generated/max_inst_regs.hh"
 #include "arch/power/miscregs.hh"
 
@@ -53,6 +54,15 @@ typedef uint64_t MiscReg;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
+// dummy typedefs since we don't have vector regs
+constexpr unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+// This has to be one to prevent warnings that are treated as errors
+constexpr unsigned NumVecRegs = 1;
+
 // Constants Related to the number of registers
 const int NumIntArchRegs = 32;
 
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index 3f2412303..578057aa0 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -97,6 +97,18 @@ class ISA : public SimObject
         return reg;
     }
 
+    int
+    flattenVecIndex(int reg) const
+    {
+        return reg;
+    }
+
+    int
+    flattenVecElemIndex(int reg) const
+    {
+        return reg;
+    }
+
     // dummy
     int
     flattenCCIndex(int reg) const
diff --git a/src/arch/riscv/registers.hh b/src/arch/riscv/registers.hh
index 2666784e5..6ae1c1691 100644
--- a/src/arch/riscv/registers.hh
+++ b/src/arch/riscv/registers.hh
@@ -51,6 +51,7 @@
 #include <string>
 
 #include "arch/generic/types.hh"
+#include "arch/generic/vec_reg.hh"
 #include "arch/isa_traits.hh"
 #include "arch/riscv/generated/max_inst_regs.hh"
 #include "base/types.hh"
@@ -67,10 +68,19 @@ typedef double FloatReg;
 typedef uint8_t CCReg; // Not applicable to Riscv
 typedef uint64_t MiscReg;
 
+// dummy typedefs since we don't have vector regs
+const unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+
 const int NumIntArchRegs = 32;
 const int NumMicroIntRegs = 1;
 const int NumIntRegs = NumIntArchRegs + NumMicroIntRegs;
 const int NumFloatRegs = 32;
+// This has to be one to prevent warnings that are treated as errors
+const unsigned NumVecRegs = 1;
 const int NumCCRegs = 0;
 const int NumMiscRegs = 4096;
 
diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh
index ded5b34ff..82fee0d00 100644
--- a/src/arch/sparc/isa.hh
+++ b/src/arch/sparc/isa.hh
@@ -202,6 +202,8 @@ class ISA : public SimObject
             return RegId(CCRegClass, flattenCCIndex(regId.index()));
           case MiscRegClass:
             return RegId(MiscRegClass, flattenMiscIndex(regId.index()));
+          default:
+            break;
         }
         return regId;
     }
@@ -221,6 +223,18 @@ class ISA : public SimObject
         return reg;
     }
 
+    int
+    flattenVecIndex(int reg) const
+    {
+        return reg;
+    }
+
+    int
+    flattenVecElemIndex(int reg) const
+    {
+        return reg;
+    }
+
     // dummy
     int
     flattenCCIndex(int reg) const
diff --git a/src/arch/sparc/registers.hh b/src/arch/sparc/registers.hh
index 62c876f3d..596fdf4d0 100644
--- a/src/arch/sparc/registers.hh
+++ b/src/arch/sparc/registers.hh
@@ -32,6 +32,7 @@
 #ifndef __ARCH_SPARC_REGISTERS_HH__
 #define __ARCH_SPARC_REGISTERS_HH__
 
+#include "arch/generic/vec_reg.hh"
 #include "arch/sparc/generated/max_inst_regs.hh"
 #include "arch/sparc/miscregs.hh"
 #include "arch/sparc/sparc_traits.hh"
@@ -52,6 +53,15 @@ typedef uint32_t FloatRegBits;
 // dummy typedef since we don't have CC regs
 typedef uint8_t CCReg;
 
+// dummy typedefs since we don't have vector regs
+constexpr unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+// This has to be one to prevent warnings that are treated as errors
+constexpr unsigned NumVecRegs = 1;
+
 typedef union
 {
     IntReg intReg;
diff --git a/src/arch/x86/isa.hh b/src/arch/x86/isa.hh
index 099d27c7c..b61face09 100644
--- a/src/arch/x86/isa.hh
+++ b/src/arch/x86/isa.hh
@@ -82,6 +82,8 @@ namespace X86ISA
                 return RegId(CCRegClass, flattenCCIndex(regId.index()));
               case MiscRegClass:
                 return RegId(MiscRegClass, flattenMiscIndex(regId.index()));
+              default:
+                break;
             }
             return regId;
         }
@@ -102,6 +104,18 @@ namespace X86ISA
             return reg;
         }
 
+        int
+        flattenVecIndex(int reg) const
+        {
+            return reg;
+        }
+
+        int
+        flattenVecElemIndex(int reg) const
+        {
+            return reg;
+        }
+
         int
         flattenCCIndex(int reg) const
         {
diff --git a/src/arch/x86/registers.hh b/src/arch/x86/registers.hh
index d23731977..9db3349f0 100644
--- a/src/arch/x86/registers.hh
+++ b/src/arch/x86/registers.hh
@@ -41,6 +41,7 @@
 #ifndef __ARCH_X86_REGISTERS_HH__
 #define __ARCH_X86_REGISTERS_HH__
 
+#include "arch/generic/vec_reg.hh"
 #include "arch/x86/generated/max_inst_regs.hh"
 #include "arch/x86/regs/int.hh"
 #include "arch/x86/regs/ccr.hh"
@@ -93,6 +94,15 @@ typedef uint64_t IntReg;
 typedef uint64_t CCReg;
 typedef uint64_t MiscReg;
 
+// dummy typedefs since we don't have vector regs
+constexpr unsigned NumVecElemPerVecReg = 2;
+using VecElem = uint32_t;
+using VecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, false>;
+using ConstVecReg = ::VecRegT<VecElem, NumVecElemPerVecReg, true>;
+using VecRegContainer = VecReg::Container;
+// This has to be one to prevent warnings that are treated as errors
+constexpr unsigned NumVecRegs = 1;
+
 //These floating point types are correct for mmx, but not
 //technically for x87 (80 bits) or at all for xmm (128 bits)
 typedef double FloatReg;
diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh
index a8e619cd9..132c390b3 100644
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -48,8 +48,8 @@
 
 #include <array>
 #include <bitset>
+#include <deque>
 #include <list>
-#include <queue>
 #include <string>
 
 #include "arch/generic/tlb.hh"
@@ -82,6 +82,7 @@ class BaseDynInst : public ExecContext, public RefCounted
     // Typedef for the CPU.
     typedef typename Impl::CPUType ImplCPU;
     typedef typename ImplCPU::ImplState ImplState;
+    using VecRegContainer = TheISA::VecRegContainer;
 
     // The DynInstPtr type.
     typedef typename Impl::DynInstPtr DynInstPtr;
@@ -591,6 +592,10 @@ class BaseDynInst : public ExecContext, public RefCounted
     int8_t numFPDestRegs()  const { return staticInst->numFPDestRegs(); }
     int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
     int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); }
+    int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); }
+    int8_t numVecElemDestRegs() const {
+        return staticInst->numVecElemDestRegs();
+    }
 
     /** Returns the logical register index of the i'th destination register. */
     const RegId& destRegIdx(int i) const { return staticInst->destRegIdx(i); }
@@ -615,6 +620,8 @@ class BaseDynInst : public ExecContext, public RefCounted
     }
 
     /** Pushes a result onto the instResult queue. */
+    /** @{ */
+    /** Scalar result. */
     template<typename T>
     void setScalarResult(T&& t)
     {
@@ -624,6 +631,27 @@ class BaseDynInst : public ExecContext, public RefCounted
         }
     }
 
+    /** Full vector result. */
+    template<typename T>
+    void setVecResult(T&& t)
+    {
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::VecReg));
+        }
+    }
+
+    /** Vector element result. */
+    template<typename T>
+    void setVecElemResult(T&& t)
+    {
+        if (instFlags[RecordResult]) {
+            instResult.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::VecElem));
+        }
+    }
+    /** @} */
+
     /** Records an integer register being set to a value. */
     void setIntRegOperand(const StaticInst *si, int idx, IntReg val)
     {
@@ -642,6 +670,13 @@ class BaseDynInst : public ExecContext, public RefCounted
         setScalarResult(val);
     }
 
+    /** Record a vector register being set to a value */
+    void setVecRegOperand(const StaticInst *si, int idx,
+            const VecRegContainer& val)
+    {
+        setVecResult(val);
+    }
+
     /** Records an fp register being set to an integer value. */
     void
     setFloatRegOperandBits(const StaticInst *si, int idx, FloatRegBits val)
@@ -649,6 +684,12 @@ class BaseDynInst : public ExecContext, public RefCounted
         setScalarResult(val);
     }
 
+    /** Record a vector register being set to a value */
+    void setVecElemOperand(const StaticInst *si, int idx, const VecElem val)
+    {
+        setVecElemResult(val);
+    }
+
     /** Records that one of the source registers is ready. */
     void markSrcRegReady();
 
diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh
index 6571d034a..213106bd2 100644
--- a/src/cpu/checker/cpu.hh
+++ b/src/cpu/checker/cpu.hh
@@ -96,6 +96,7 @@ class CheckerCPU : public BaseCPU, public ExecContext
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::MiscReg MiscReg;
+    using VecRegContainer = TheISA::VecRegContainer;
 
     /** id attached to all issued requests */
     MasterID masterId;
@@ -225,6 +226,111 @@ class CheckerCPU : public BaseCPU, public ExecContext
         return thread->readFloatRegBits(reg.index());
     }
 
+    /**
+     * Read source vector register operand.
+     */
+    const VecRegContainer& readVecRegOperand(const StaticInst *si,
+                                             int idx) const override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVecReg(reg);
+    }
+
+    /**
+     * Read destination vector register operand for modification.
+     */
+    VecRegContainer& getWritableVecRegOperand(const StaticInst *si,
+                                             int idx) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->getWritableVecReg(reg);
+    }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    virtual ConstVecLane8
+    readVec8BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVec8BitLaneReg(reg);
+    }
+
+    /** Reads source vector 16bit operand. */
+    virtual ConstVecLane16
+    readVec16BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVec16BitLaneReg(reg);
+    }
+
+    /** Reads source vector 32bit operand. */
+    virtual ConstVecLane32
+    readVec32BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVec32BitLaneReg(reg);
+    }
+
+    /** Reads source vector 64bit operand. */
+    virtual ConstVecLane64
+    readVec64BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVec64BitLaneReg(reg);
+    }
+
+    /** Write a lane of the destination vector operand. */
+    template <typename LD>
+    void
+    setVecLaneOperandT(const StaticInst *si, int idx, const LD& val)
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->setVecLane(reg, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::Byte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::TwoByte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::FourByte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::EightByte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    /** @} */
+
+    VecElem readVecElemOperand(const StaticInst *si, int idx) const override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        return thread->readVecElem(reg);
+    }
+
     CCReg readCCRegOperand(const StaticInst *si, int idx) override
     {
         const RegId& reg = si->srcRegIdx(idx);
@@ -239,6 +345,20 @@ class CheckerCPU : public BaseCPU, public ExecContext
                         InstResult::ResultType::Scalar));
     }
 
+    template<typename T>
+    void setVecResult(T&& t)
+    {
+        result.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::VecReg));
+    }
+
+    template<typename T>
+    void setVecElemResult(T&& t)
+    {
+        result.push(InstResult(std::forward<T>(t),
+                        InstResult::ResultType::VecElem));
+    }
+
     void setIntRegOperand(const StaticInst *si, int idx,
                           IntReg val) override
     {
@@ -274,6 +394,24 @@ class CheckerCPU : public BaseCPU, public ExecContext
         setScalarResult((uint64_t)val);
     }
 
+    void setVecRegOperand(const StaticInst *si, int idx,
+                                const VecRegContainer& val) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        thread->setVecReg(reg, val);
+        setVecResult(val);
+    }
+
+    void setVecElemOperand(const StaticInst *si, int idx,
+                           const VecElem val) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecElem());
+        thread->setVecElem(reg, val);
+        setVecElemResult(val);
+    }
+
     bool readPredicate() override { return thread->readPredicate(); }
     void setPredicate(bool val) override
     {
diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh
index ed86aec84..d81858c14 100644
--- a/src/cpu/checker/cpu_impl.hh
+++ b/src/cpu/checker/cpu_impl.hh
@@ -486,6 +486,7 @@ Checker<Impl>::validateExecution(DynInstPtr &inst)
     int idx = -1;
     bool result_mismatch = false;
     bool scalar_mismatch = false;
+    bool vector_mismatch = false;
 
     if (inst->isUnverifiable()) {
         // Unverifiable instructions assume they were executed
@@ -503,8 +504,10 @@ Checker<Impl>::validateExecution(DynInstPtr &inst)
             if (checker_val != inst_val) {
                 result_mismatch = true;
                 idx = i;
-                scalar_mismatch = true;
-                break;
+                scalar_mismatch = checker_val.isScalar();
+                vector_mismatch = checker_val.isVector();
+                panic_if(!(scalar_mismatch || vector_mismatch),
+                        "Unknown type of result\n");
             }
         }
     } // Checker CPU checks all the saved results in the dyninst passed by
@@ -610,6 +613,15 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
             panic_if(!mismatch_val.isScalar(), "Unexpected type of result");
             thread->setFloatRegBits(idx.index(), mismatch_val.asInteger());
             break;
+          case VecRegClass:
+            panic_if(!mismatch_val.isVector(), "Unexpected type of result");
+            thread->setVecReg(idx, mismatch_val.asVector());
+            break;
+          case VecElemClass:
+            panic_if(!mismatch_val.isVecElem(),
+                     "Unexpected type of result");
+            thread->setVecElem(idx, mismatch_val.asVectorElem());
+            break;
           case CCRegClass:
             panic_if(!mismatch_val.isScalar(), "Unexpected type of result");
             thread->setCCReg(idx.index(), mismatch_val.asInteger());
@@ -618,6 +630,8 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
             panic_if(!mismatch_val.isScalar(), "Unexpected type of result");
             thread->setMiscReg(idx.index(), mismatch_val.asInteger());
             break;
+          default:
+            panic("Unknown register class: %d", (int)idx.classValue());
         }
     }
     start_idx++;
@@ -634,6 +648,14 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
             panic_if(!res.isScalar(), "Unexpected type of result");
             thread->setFloatRegBits(idx.index(), res.asInteger());
             break;
+          case VecRegClass:
+            panic_if(!res.isVector(), "Unexpected type of result");
+            thread->setVecReg(idx, res.asVector());
+            break;
+          case VecElemClass:
+            panic_if(!res.isVecElem(), "Unexpected type of result");
+            thread->setVecElem(idx, res.asVectorElem());
+            break;
           case CCRegClass:
             panic_if(!res.isScalar(), "Unexpected type of result");
             thread->setCCReg(idx.index(), res.asInteger());
@@ -644,6 +666,8 @@ Checker<Impl>::copyResult(DynInstPtr &inst, const InstResult& mismatch_val,
             thread->setMiscReg(idx.index(), 0);
             break;
             // else Register is out of range...
+          default:
+            panic("Unknown register class: %d", (int)idx.classValue());
         }
     }
 }
diff --git a/src/cpu/checker/thread_context.hh b/src/cpu/checker/thread_context.hh
index e48f5936b..5208932de 100644
--- a/src/cpu/checker/thread_context.hh
+++ b/src/cpu/checker/thread_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -215,6 +215,55 @@ class CheckerThreadContext : public ThreadContext
     FloatRegBits readFloatRegBits(int reg_idx)
     { return actualTC->readFloatRegBits(reg_idx); }
 
+    const VecRegContainer& readVecReg(const RegId& reg) const
+    { return actualTC->readVecReg(reg); }
+
+    /**
+     * Read vector register for modification, hierarchical indexing.
+     */
+    VecRegContainer& getWritableVecReg(const RegId& reg)
+    { return actualTC->getWritableVecReg(reg); }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    ConstVecLane8
+    readVec8BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec8BitLaneReg(reg); }
+
+    /** Reads source vector 16bit operand. */
+    ConstVecLane16
+    readVec16BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec16BitLaneReg(reg); }
+
+    /** Reads source vector 32bit operand. */
+    ConstVecLane32
+    readVec32BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec32BitLaneReg(reg); }
+
+    /** Reads source vector 64bit operand. */
+    ConstVecLane64
+    readVec64BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec64BitLaneReg(reg); }
+
+    /** Write a lane of the destination vector register. */
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::Byte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::TwoByte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::FourByte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::EightByte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    /** @} */
+
+    const VecElem& readVecElem(const RegId& reg) const
+    { return actualTC->readVecElem(reg); }
+
     CCReg readCCReg(int reg_idx)
     { return actualTC->readCCReg(reg_idx); }
 
@@ -236,6 +285,18 @@ class CheckerThreadContext : public ThreadContext
         checkerTC->setFloatRegBits(reg_idx, val);
     }
 
+    void setVecReg(const RegId& reg, const VecRegContainer& val)
+    {
+        actualTC->setVecReg(reg, val);
+        checkerTC->setVecReg(reg, val);
+    }
+
+    void setVecElem(const RegId& reg, const VecElem& val)
+    {
+        actualTC->setVecElem(reg, val);
+        checkerTC->setVecElem(reg, val);
+    }
+
     void setCCReg(int reg_idx, CCReg val)
     {
         actualTC->setCCReg(reg_idx, val);
@@ -333,6 +394,26 @@ class CheckerThreadContext : public ThreadContext
     void setFloatRegBitsFlat(int idx, FloatRegBits val)
     { actualTC->setFloatRegBitsFlat(idx, val); }
 
+    const VecRegContainer& readVecRegFlat(int idx) const
+    { return actualTC->readVecRegFlat(idx); }
+
+    /**
+     * Read vector register for modification, flat indexing.
+     */
+    VecRegContainer& getWritableVecRegFlat(int idx)
+    { return actualTC->getWritableVecRegFlat(idx); }
+
+    void setVecRegFlat(int idx, const VecRegContainer& val)
+    { actualTC->setVecRegFlat(idx, val); }
+
+    const VecElem& readVecElemFlat(const RegIndex& idx,
+                                   const ElemIndex& elem_idx) const
+    { return actualTC->readVecElemFlat(idx, elem_idx); }
+
+    void setVecElemFlat(const RegIndex& idx,
+                        const ElemIndex& elem_idx, const VecElem& val)
+    { actualTC->setVecElemFlat(idx, elem_idx, val); }
+
     CCReg readCCRegFlat(int idx)
     { return actualTC->readCCRegFlat(idx); }
 
diff --git a/src/cpu/exec_context.hh b/src/cpu/exec_context.hh
index d33147240..e7f5d37ac 100644
--- a/src/cpu/exec_context.hh
+++ b/src/cpu/exec_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 ARM Limited
+ * Copyright (c) 2014, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -79,6 +79,8 @@ class ExecContext {
     typedef TheISA::MiscReg MiscReg;
 
     typedef TheISA::CCReg CCReg;
+    using VecRegContainer = TheISA::VecRegContainer;
+    using VecElem = TheISA::VecElem;
 
   public:
     /**
@@ -121,6 +123,63 @@ class ExecContext {
 
     /** @} */
 
+    /** Vector Register Interfaces. */
+    /** @{ */
+    /** Reads source vector register operand. */
+    virtual const VecRegContainer&
+    readVecRegOperand(const StaticInst *si, int idx) const = 0;
+
+    /** Gets destination vector register operand for modification. */
+    virtual VecRegContainer&
+    getWritableVecRegOperand(const StaticInst *si, int idx) = 0;
+
+    /** Sets a destination vector register operand to a value. */
+    virtual void
+    setVecRegOperand(const StaticInst *si, int idx,
+                     const VecRegContainer& val) = 0;
+    /** @} */
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    virtual ConstVecLane8
+    readVec8BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+    /** Reads source vector 16bit operand. */
+    virtual ConstVecLane16
+    readVec16BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+    /** Reads source vector 32bit operand. */
+    virtual ConstVecLane32
+    readVec32BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+    /** Reads source vector 64bit operand. */
+    virtual ConstVecLane64
+    readVec64BitLaneOperand(const StaticInst *si, int idx) const = 0;
+
+    /** Write a lane of the destination vector operand. */
+    /** @{ */
+    virtual void setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::Byte>& val) = 0;
+    virtual void setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::TwoByte>& val) = 0;
+    virtual void setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::FourByte>& val) = 0;
+    virtual void setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::EightByte>& val) = 0;
+    /** @} */
+
+    /** Vector Elem Interfaces. */
+    /** @{ */
+    /** Reads an element of a vector register. */
+    virtual VecElem readVecElemOperand(const StaticInst *si,
+                                        int idx) const = 0;
+
+    /** Sets a vector register to a value. */
+    virtual void setVecElemOperand(const StaticInst *si, int idx,
+                                   const VecElem val) = 0;
+    /** @} */
+
     /**
      * @{
      * @name Condition Code Registers
diff --git a/src/cpu/inst_res.hh b/src/cpu/inst_res.hh
index f6f14fe19..9b6a23d95 100644
--- a/src/cpu/inst_res.hh
+++ b/src/cpu/inst_res.hh
@@ -43,17 +43,24 @@
 #include <type_traits>
 
 #include "arch/generic/types.hh"
+#include "arch/generic/vec_reg.hh"
 
 class InstResult {
+    using VecRegContainer = TheISA::VecRegContainer;
+    using VecElem = TheISA::VecElem;
   public:
     union MultiResult {
         uint64_t integer;
         double dbl;
+        VecRegContainer vector;
+        VecElem vecElem;
         MultiResult() {}
     };
 
     enum class ResultType {
         Scalar,
+        VecElem,
+        VecReg,
         NumResultTypes,
         Invalid
     };
@@ -77,7 +84,32 @@ class InstResult {
             result.dbl = i;
         }
     }
+    /** Vector result. */
+    explicit InstResult(const VecRegContainer& v, const ResultType& t)
+        : type(t) { result.vector = v; }
 
+    InstResult& operator=(const InstResult& that) {
+        type = that.type;
+        switch (type) {
+        /* Given that misc regs are not written to, there may be invalids in
+         * the result stack. */
+        case ResultType::Invalid:
+            break;
+        case ResultType::Scalar:
+            result.integer = that.result.integer;
+            break;
+        case ResultType::VecElem:
+            result.vecElem = that.result.vecElem;
+            break;
+        case ResultType::VecReg:
+            result.vector = that.result.vector;
+            break;
+        default:
+            panic("Assigning result from unknown result type");
+            break;
+        }
+        return *this;
+    }
     /**
      * Result comparison
      * Two invalid results always differ.
@@ -88,6 +120,10 @@ class InstResult {
         switch (type) {
         case ResultType::Scalar:
             return result.integer == that.result.integer;
+        case ResultType::VecElem:
+            return result.vecElem == that.result.vecElem;
+        case ResultType::VecReg:
+            return result.vector == that.result.vector;
         case ResultType::Invalid:
             return false;
         default:
@@ -103,6 +139,10 @@ class InstResult {
     /** @{ */
     /** Is this a scalar result?. */
     bool isScalar() const { return type == ResultType::Scalar; }
+    /** Is this a vector result?. */
+    bool isVector() const { return type == ResultType::VecReg; }
+    /** Is this a vector element result?. */
+    bool isVecElem() const { return type == ResultType::VecElem; }
     /** Is this a valid result?. */
     bool isValid() const { return type != ResultType::Invalid; }
     /** @} */
@@ -125,6 +165,18 @@ class InstResult {
     {
         return result.integer;
     }
+    const VecRegContainer&
+    asVector() const
+    {
+        panic_if(!isVector(), "Converting scalar (or invalid) to vector!!");
+        return result.vector;
+    }
+    const VecElem&
+    asVectorElem() const
+    {
+        panic_if(!isVecElem(), "Converting scalar (or invalid) to vector!!");
+        return result.vecElem;
+    }
     /** @} */
 };
 
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index 1ed598833..756b214bd 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -153,6 +153,13 @@ printRegName(std::ostream &os, const RegId& reg)
       case FloatRegClass:
         os << 'f' << static_cast<unsigned int>(reg.index());
         break;
+      case VecRegClass:
+        os << 'v' << static_cast<unsigned int>(reg.index());
+        break;
+      case VecElemClass:
+        os << 'v' << static_cast<unsigned int>(reg.index()) << '[' <<
+              static_cast<unsigned int>(reg.elemIndex()) << ']';
+        break;
       case IntRegClass:
         if (reg.isZeroReg()) {
             os << 'z';
@@ -162,6 +169,8 @@ printRegName(std::ostream &os, const RegId& reg)
         break;
       case CCRegClass:
         os << 'c' << static_cast<unsigned int>(reg.index());
+      default:
+        panic("Unknown register class: %d", (int)reg.classValue());
     }
 }
 
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index e91b7a6dd..4b3a02fca 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2014, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -145,6 +145,30 @@ class ExecContext : public ::ExecContext
         return thread.readFloatRegBits(reg.index());
     }
 
+    const TheISA::VecRegContainer&
+    readVecRegOperand(const StaticInst *si, int idx) const override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.readVecReg(reg);
+    }
+
+    TheISA::VecRegContainer&
+    getWritableVecRegOperand(const StaticInst *si, int idx) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.getWritableVecReg(reg);
+    }
+
+    TheISA::VecElem
+    readVecElemOperand(const StaticInst *si, int idx) const override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.readVecElem(reg);
+    }
+
     void
     setIntRegOperand(const StaticInst *si, int idx, IntReg val) override
     {
@@ -171,6 +195,102 @@ class ExecContext : public ::ExecContext
         thread.setFloatRegBits(reg.index(), val);
     }
 
+    void
+    setVecRegOperand(const StaticInst *si, int idx,
+                     const TheISA::VecRegContainer& val) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        thread.setVecReg(reg, val);
+    }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    ConstVecLane8
+    readVec8BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.readVec8BitLaneReg(reg);
+    }
+
+    /** Reads source vector 16bit operand. */
+    ConstVecLane16
+    readVec16BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.readVec16BitLaneReg(reg);
+    }
+
+    /** Reads source vector 32bit operand. */
+    ConstVecLane32
+    readVec32BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.readVec32BitLaneReg(reg);
+    }
+
+    /** Reads source vector 64bit operand. */
+    ConstVecLane64
+    readVec64BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    {
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.readVec64BitLaneReg(reg);
+    }
+
+    /** Write a lane of the destination vector operand. */
+    template <typename LD>
+    void
+    setVecLaneOperandT(const StaticInst *si, int idx,
+            const LD& val)
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread.setVecLane(reg, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::Byte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::TwoByte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::FourByte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::EightByte>& val) override
+    {
+        setVecLaneOperandT(si, idx, val);
+    }
+    /** @} */
+
+    void
+    setVecElemOperand(const StaticInst *si, int idx,
+                      const TheISA::VecElem val) override
+    {
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        thread.setVecElem(reg, val);
+    }
+
     bool
     readPredicate() override
     {
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index e3497a5cf..c56d3b303 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -67,6 +67,16 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
                 reg.index();
             ret = true;
             break;
+          case VecRegClass:
+            scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs +
+                TheISA::NumFloatRegs + reg.index();
+            ret = true;
+            break;
+          case VecElemClass:
+            scoreboard_index = TheISA::NumIntRegs + TheISA::NumCCRegs +
+                TheISA::NumFloatRegs + TheISA::NumVecRegs + reg.index();
+            ret = true;
+            break;
           case CCRegClass:
             scoreboard_index = TheISA::NumIntRegs + reg.index();
             ret = true;
@@ -75,6 +85,9 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
               /* Don't bother with Misc registers */
             ret = false;
             break;
+          default:
+            panic("Unknown register class: %d",
+                    static_cast<int>(reg.classValue()));
         }
     }
 
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index 7fe5002f9..9e42c2a6b 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014 ARM Limited
+ * Copyright (c) 2013-2014, 2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -94,7 +94,7 @@ class Scoreboard : public Named
     Scoreboard(const std::string &name) :
         Named(name),
         numRegs(TheISA::NumIntRegs + TheISA::NumCCRegs +
-            TheISA::NumFloatRegs),
+            TheISA::NumFloatRegs + TheISA::NumVecRegs),
         numResults(numRegs, 0),
         numUnpredictableResults(numRegs, 0),
         fuIndices(numRegs, 0),
diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py
index cab2cf34f..8507ab6ff 100644
--- a/src/cpu/o3/O3CPU.py
+++ b/src/cpu/o3/O3CPU.py
@@ -1,3 +1,15 @@
+# Copyright (c) 2016 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
 # Copyright (c) 2005-2007 The Regents of The University of Michigan
 # All rights reserved.
 #
@@ -125,6 +137,8 @@ class DerivO3CPU(BaseCPU):
         # (it's a side effect of int reg renaming), so they should
         # never be the bottleneck here.
         _defaultNumPhysCCRegs = Self.numPhysIntRegs * 5
+    numPhysVecRegs = Param.Unsigned(256, "Number of physical vector "
+                                      "registers")
     numPhysCCRegs = Param.Unsigned(_defaultNumPhysCCRegs,
                                    "Number of physical cc registers")
     numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh
index 49e153a52..f5be5a804 100644
--- a/src/cpu/o3/comm.hh
+++ b/src/cpu/o3/comm.hh
@@ -75,12 +75,18 @@ class PhysRegId : private RegId {
         : RegId(_regClass, _regIdx), flatIdx(_flatIdx)
     {}
 
+    /** Vector PhysRegId constructor (w/ elemIndex). */
+    explicit PhysRegId(RegClass _regClass, PhysRegIndex _regIdx,
+              ElemIndex elem_idx, PhysRegIndex flat_idx)
+        : RegId(_regClass, _regIdx, elem_idx), flatIdx(flat_idx) { }
+
     /** Visible RegId methods */
     /** @{ */
     using RegId::index;
     using RegId::classValue;
     using RegId::isZeroReg;
     using RegId::className;
+    using RegId::elemIndex;
      /** @} */
     /**
      * Explicit forward methods, to prevent comparisons of PhysRegId with
@@ -109,6 +115,12 @@ class PhysRegId : private RegId {
     /** @Return true if it is a  condition-code physical register. */
     bool isCCPhysReg() const { return isCCReg(); }
 
+    /** @Return true if it is a vector physical register. */
+    bool isVectorPhysReg() const { return isVecReg(); }
+
+    /** @Return true if it is a vector element physical register. */
+    bool isVectorPhysElem() const { return isVecElem(); }
+
     /** @Return true if it is a  condition-code physical register. */
     bool isMiscPhysReg() const { return isMiscReg(); }
 
@@ -123,11 +135,18 @@ class PhysRegId : private RegId {
 
     /** Flat index accessor */
     const PhysRegIndex& flatIndex() const { return flatIdx; }
+
+    static PhysRegId elemId(const PhysRegId* vid, ElemIndex elem)
+    {
+        assert(vid->isVectorPhysReg());
+        return PhysRegId(VecElemClass, vid->index(), elem);
+    }
 };
 
-// PhysRegIds only need to be created once and then we can use the following
-// to work with them
-typedef const PhysRegId* PhysRegIdPtr;
+/** Constant pointer definition.
+ * PhysRegIds only need to be created once and then we can just share
+ * pointers */
+using PhysRegIdPtr = const PhysRegId*;
 
 /** Struct that defines the information passed from fetch to decode. */
 template<class Impl>
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index a7a39b72a..c249d90ba 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -46,6 +46,7 @@
 
 #include "cpu/o3/cpu.hh"
 
+#include "arch/generic/traits.hh"
 #include "arch/kernel_stats.hh"
 #include "config/the_isa.hh"
 #include "cpu/activity.hh"
@@ -171,9 +172,14 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
       iew(this, params),
       commit(this, params),
 
+      /* It is mandatory that all SMT threads use the same renaming mode as
+       * they are sharing registers and rename */
+      vecMode(initRenameMode<TheISA::ISA>::mode(params->isa[0])),
       regFile(params->numPhysIntRegs,
               params->numPhysFloatRegs,
-              params->numPhysCCRegs),
+              params->numPhysVecRegs,
+              params->numPhysCCRegs,
+              vecMode),
 
       freeList(name() + ".freelist", &regFile),
 
@@ -270,6 +276,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
     //Make Sure That this a Valid Architeture
     assert(params->numPhysIntRegs   >= numThreads * TheISA::NumIntRegs);
     assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs);
+    assert(params->numPhysVecRegs >= numThreads * TheISA::NumVecRegs);
     assert(params->numPhysCCRegs >= numThreads * TheISA::NumCCRegs);
 
     rename.setScoreboard(&scoreboard);
@@ -278,6 +285,7 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
     // Setup the rename map for whichever stages need it.
     for (ThreadID tid = 0; tid < numThreads; tid++) {
         isa[tid] = params->isa[tid];
+        assert(initRenameMode<TheISA::ISA>::equals(isa[tid], isa[0]));
 
         // Only Alpha has an FP zero register, so for other ISAs we
         // use an invalid FP register index to avoid special treatment
@@ -287,10 +295,11 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
             (THE_ISA == ALPHA_ISA) ? TheISA::ZeroReg : invalidFPReg;
 
         commitRenameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
-                                  &freeList);
+                                  &freeList,
+                                  vecMode);
 
         renameMap[tid].init(&regFile, TheISA::ZeroReg, fpZeroReg,
-                            &freeList);
+                            &freeList, vecMode);
     }
 
     // Initialize rename map to assign physical registers to the
@@ -311,6 +320,30 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
                     RegId(FloatRegClass, ridx), phys_reg);
         }
 
+        /* Here we need two 'interfaces' the 'whole register' and the
+         * 'register element'. At any point only one of them will be
+         * active. */
+        if (vecMode == Enums::Full) {
+            /* Initialize the full-vector interface */
+            for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
+                RegId rid = RegId(VecRegClass, ridx);
+                PhysRegIdPtr phys_reg = freeList.getVecReg();
+                renameMap[tid].setEntry(rid, phys_reg);
+                commitRenameMap[tid].setEntry(rid, phys_reg);
+            }
+        } else {
+            /* Initialize the vector-element interface */
+            for (RegIndex ridx = 0; ridx < TheISA::NumVecRegs; ++ridx) {
+                for (ElemIndex ldx = 0; ldx < TheISA::NumVecElemPerVecReg;
+                        ++ldx) {
+                    RegId lrid = RegId(VecElemClass, ridx, ldx);
+                    PhysRegIdPtr phys_elem = freeList.getVecElem();
+                    renameMap[tid].setEntry(lrid, phys_elem);
+                    commitRenameMap[tid].setEntry(lrid, phys_elem);
+                }
+            }
+        }
+
         for (RegIndex ridx = 0; ridx < TheISA::NumCCRegs; ++ridx) {
             PhysRegIdPtr phys_reg = freeList.getCCReg();
             renameMap[tid].setEntry(RegId(CCRegClass, ridx), phys_reg);
@@ -514,6 +547,16 @@ FullO3CPU<Impl>::regStats()
         .desc("number of floating regfile writes")
         .prereq(fpRegfileWrites);
 
+    vecRegfileReads
+        .name(name() + ".vec_regfile_reads")
+        .desc("number of vector regfile reads")
+        .prereq(vecRegfileReads);
+
+    vecRegfileWrites
+        .name(name() + ".vec_regfile_writes")
+        .desc("number of vector regfile writes")
+        .prereq(vecRegfileWrites);
+
     ccRegfileReads
         .name(name() + ".cc_regfile_reads")
         .desc("number of cc regfile reads")
@@ -1256,6 +1299,32 @@ FullO3CPU<Impl>::readFloatRegBits(PhysRegIdPtr phys_reg)
     return regFile.readFloatRegBits(phys_reg);
 }
 
+template <class Impl>
+auto
+FullO3CPU<Impl>::readVecReg(PhysRegIdPtr phys_reg) const
+        -> const VecRegContainer&
+{
+    vecRegfileReads++;
+    return regFile.readVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableVecReg(PhysRegIdPtr phys_reg)
+        -> VecRegContainer&
+{
+    vecRegfileWrites++;
+    return regFile.getWritableVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::readVecElem(PhysRegIdPtr phys_reg) const -> const VecElem&
+{
+    vecRegfileReads++;
+    return regFile.readVecElem(phys_reg);
+}
+
 template <class Impl>
 CCReg
 FullO3CPU<Impl>::readCCReg(PhysRegIdPtr phys_reg)
@@ -1288,6 +1357,22 @@ FullO3CPU<Impl>::setFloatRegBits(PhysRegIdPtr phys_reg, FloatRegBits val)
     regFile.setFloatRegBits(phys_reg, val);
 }
 
+template <class Impl>
+void
+FullO3CPU<Impl>::setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
+{
+    vecRegfileWrites++;
+    regFile.setVecReg(phys_reg, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setVecElem(PhysRegIdPtr phys_reg, const VecElem& val)
+{
+    vecRegfileWrites++;
+    regFile.setVecElem(phys_reg, val);
+}
+
 template <class Impl>
 void
 FullO3CPU<Impl>::setCCReg(PhysRegIdPtr phys_reg, CCReg val)
@@ -1329,6 +1414,36 @@ FullO3CPU<Impl>::readArchFloatRegInt(int reg_idx, ThreadID tid)
     return regFile.readFloatRegBits(phys_reg);
 }
 
+template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecReg(int reg_idx, ThreadID tid) const
+        -> const VecRegContainer&
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecRegClass, reg_idx));
+    return readVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::getWritableArchVecReg(int reg_idx, ThreadID tid)
+        -> VecRegContainer&
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecRegClass, reg_idx));
+    return getWritableVecReg(phys_reg);
+}
+
+template <class Impl>
+auto
+FullO3CPU<Impl>::readArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+                                 ThreadID tid) const -> const VecElem&
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                                RegId(VecRegClass, reg_idx, ldx));
+    return readVecElem(phys_reg);
+}
+
 template <class Impl>
 CCReg
 FullO3CPU<Impl>::readArchCCReg(int reg_idx, ThreadID tid)
@@ -1373,6 +1488,26 @@ FullO3CPU<Impl>::setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid)
     regFile.setFloatRegBits(phys_reg, val);
 }
 
+template <class Impl>
+void
+FullO3CPU<Impl>::setArchVecReg(int reg_idx, const VecRegContainer& val,
+                               ThreadID tid)
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecRegClass, reg_idx));
+    setVecReg(phys_reg, val);
+}
+
+template <class Impl>
+void
+FullO3CPU<Impl>::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+                                const VecElem& val, ThreadID tid)
+{
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                RegId(VecRegClass, reg_idx, ldx));
+    setVecElem(phys_reg, val);
+}
+
 template <class Impl>
 void
 FullO3CPU<Impl>::setArchCCReg(int reg_idx, CCReg val, ThreadID tid)
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index b5cbc5fe2..d78d1b9d3 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2013 ARM Limited
+ * Copyright (c) 2011-2013, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -53,6 +53,7 @@
 #include <set>
 #include <vector>
 
+#include "arch/generic/types.hh"
 #include "arch/types.hh"
 #include "base/statistics.hh"
 #include "config/the_isa.hh"
@@ -103,6 +104,9 @@ class FullO3CPU : public BaseO3CPU
     typedef typename Impl::DynInstPtr DynInstPtr;
     typedef typename Impl::O3CPU O3CPU;
 
+    using VecElem =  TheISA::VecElem;
+    using VecRegContainer =  TheISA::VecRegContainer;
+
     typedef O3ThreadState<Impl> ImplState;
     typedef O3ThreadState<Impl> Thread;
 
@@ -417,6 +421,46 @@ class FullO3CPU : public BaseO3CPU
 
     TheISA::FloatRegBits readFloatRegBits(PhysRegIdPtr phys_reg);
 
+    const VecRegContainer& readVecReg(PhysRegIdPtr reg_idx) const;
+
+    /**
+     * Read physical vector register for modification.
+     */
+    VecRegContainer& getWritableVecReg(PhysRegIdPtr reg_idx);
+
+    /**
+     * Read physical vector register lane
+     */
+    template<typename VecElem, int LaneIdx>
+    VecLaneT<VecElem, true>
+    readVecLane(PhysRegIdPtr phys_reg) const
+    {
+        vecRegfileReads++;
+        return regFile.readVecLane<VecElem, LaneIdx>(phys_reg);
+    }
+
+    /**
+     * Read physical vector register lane
+     */
+    template<typename VecElem>
+    VecLaneT<VecElem, true>
+    readVecLane(PhysRegIdPtr phys_reg) const
+    {
+        vecRegfileReads++;
+        return regFile.readVecLane<VecElem>(phys_reg);
+    }
+
+    /** Write a lane of the destination vector register. */
+    template<typename LD>
+    void
+    setVecLane(PhysRegIdPtr phys_reg, const LD& val)
+    {
+        vecRegfileWrites++;
+        return regFile.setVecLane(phys_reg, val);
+    }
+
+    const VecElem& readVecElem(PhysRegIdPtr reg_idx) const;
+
     TheISA::CCReg readCCReg(PhysRegIdPtr phys_reg);
 
     void setIntReg(PhysRegIdPtr phys_reg, uint64_t val);
@@ -425,6 +469,10 @@ class FullO3CPU : public BaseO3CPU
 
     void setFloatRegBits(PhysRegIdPtr phys_reg, TheISA::FloatRegBits val);
 
+    void setVecReg(PhysRegIdPtr reg_idx, const VecRegContainer& val);
+
+    void setVecElem(PhysRegIdPtr reg_idx, const VecElem& val);
+
     void setCCReg(PhysRegIdPtr phys_reg, TheISA::CCReg val);
 
     uint64_t readArchIntReg(int reg_idx, ThreadID tid);
@@ -433,6 +481,34 @@ class FullO3CPU : public BaseO3CPU
 
     uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid);
 
+    const VecRegContainer& readArchVecReg(int reg_idx, ThreadID tid) const;
+    /** Read architectural vector register for modification. */
+    VecRegContainer& getWritableArchVecReg(int reg_idx, ThreadID tid);
+
+    /** Read architectural vector register lane. */
+    template<typename VecElem>
+    VecLaneT<VecElem, true>
+    readArchVecLane(int reg_idx, int lId, ThreadID tid) const
+    {
+        PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                    RegId(VecRegClass, reg_idx));
+        return readVecLane<VecElem>(phys_reg);
+    }
+
+
+    /** Write a lane of the destination vector register. */
+    template<typename LD>
+    void
+    setArchVecLane(int reg_idx, int lId, ThreadID tid, const LD& val)
+    {
+        PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+                    RegId(VecRegClass, reg_idx));
+        setVecLane(phys_reg, val);
+    }
+
+    const VecElem& readArchVecElem(const RegIndex& reg_idx,
+                                   const ElemIndex& ldx, ThreadID tid) const;
+
     TheISA::CCReg readArchCCReg(int reg_idx, ThreadID tid);
 
     /** Architectural register accessors.  Looks up in the commit
@@ -446,6 +522,11 @@ class FullO3CPU : public BaseO3CPU
 
     void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid);
 
+    void setArchVecReg(int reg_idx, const VecRegContainer& val, ThreadID tid);
+
+    void setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
+                        const VecElem& val, ThreadID tid);
+
     void setArchCCReg(int reg_idx, TheISA::CCReg val, ThreadID tid);
 
     /** Sets the commit PC state of a specific thread. */
@@ -540,6 +621,9 @@ class FullO3CPU : public BaseO3CPU
     /** The commit stage. */
     typename CPUPolicy::Commit commit;
 
+    /** The rename mode of the vector registers */
+    Enums::VecRegRenameMode vecMode;
+
     /** The register file. */
     PhysRegFile regFile;
 
@@ -722,6 +806,9 @@ class FullO3CPU : public BaseO3CPU
     //number of float register file accesses
     Stats::Scalar fpRegfileReads;
     Stats::Scalar fpRegfileWrites;
+    //number of vector register file accesses
+    mutable Stats::Scalar vecRegfileReads;
+    Stats::Scalar vecRegfileWrites;
     //number of CC register file accesses
     Stats::Scalar ccRegfileReads;
     Stats::Scalar ccRegfileWrites;
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index a6adb4c20..0643e7e30 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010 ARM Limited
+ * Copyright (c) 2010, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -72,6 +72,9 @@ class BaseO3DynInst : public BaseDynInst<Impl>
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::CCReg   CCReg;
+    using VecRegContainer = TheISA::VecRegContainer;
+    using VecElem = TheISA::VecElem;
+    static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
 
     /** Misc register type. */
     typedef TheISA::MiscReg  MiscReg;
@@ -83,9 +86,9 @@ class BaseO3DynInst : public BaseDynInst<Impl>
 
   public:
     /** BaseDynInst constructor given a binary instruction. */
-    BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr &macroop,
-                  TheISA::PCState pc, TheISA::PCState predPC,
-                  InstSeqNum seq_num, O3CPU *cpu);
+    BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
+            &macroop, TheISA::PCState pc, TheISA::PCState predPC,
+            InstSeqNum seq_num, O3CPU *cpu);
 
     /** BaseDynInst constructor given a static inst pointer. */
     BaseO3DynInst(const StaticInstPtr &_staticInst,
@@ -107,6 +110,11 @@ class BaseO3DynInst : public BaseDynInst<Impl>
     void initVars();
 
   protected:
+    /** Explicitation of dependent names. */
+    using BaseDynInst<Impl>::cpu;
+    using BaseDynInst<Impl>::_srcRegIdx;
+    using BaseDynInst<Impl>::_destRegIdx;
+
     /** Values to be written to the destination misc. registers. */
     std::array<MiscReg, TheISA::MaxMiscDestRegs> _destMiscRegVal;
 
@@ -213,19 +221,30 @@ class BaseO3DynInst : public BaseDynInst<Impl>
             switch (original_dest_reg.classValue()) {
               case IntRegClass:
                 this->setIntRegOperand(this->staticInst.get(), idx,
-                                       this->cpu->readIntReg(prev_phys_reg));
+                               this->cpu->readIntReg(prev_phys_reg));
                 break;
               case FloatRegClass:
                 this->setFloatRegOperandBits(this->staticInst.get(), idx,
-                                             this->cpu->readFloatRegBits(prev_phys_reg));
+                               this->cpu->readFloatRegBits(prev_phys_reg));
+                break;
+              case VecRegClass:
+                this->setVecRegOperand(this->staticInst.get(), idx,
+                               this->cpu->readVecReg(prev_phys_reg));
+                break;
+              case VecElemClass:
+                this->setVecElemOperand(this->staticInst.get(), idx,
+                               this->cpu->readVecElem(prev_phys_reg));
                 break;
               case CCRegClass:
                 this->setCCRegOperand(this->staticInst.get(), idx,
-                                      this->cpu->readCCReg(prev_phys_reg));
+                               this->cpu->readCCReg(prev_phys_reg));
                 break;
               case MiscRegClass:
                 // no need to forward misc reg values
                 break;
+              default:
+                panic("Unknown register class: %d",
+                        (int)original_dest_reg.classValue());
             }
         }
     }
@@ -266,6 +285,89 @@ class BaseO3DynInst : public BaseDynInst<Impl>
         return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
     }
 
+    const VecRegContainer&
+    readVecRegOperand(const StaticInst *si, int idx) const
+    {
+        return this->cpu->readVecReg(this->_srcRegIdx[idx]);
+    }
+
+    /**
+     * Read destination vector register operand for modification.
+     */
+    VecRegContainer&
+    getWritableVecRegOperand(const StaticInst *si, int idx)
+    {
+        return this->cpu->getWritableVecReg(this->_destRegIdx[idx]);
+    }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    ConstVecLane8
+    readVec8BitLaneOperand(const StaticInst *si, int idx) const
+    {
+        return cpu->template readVecLane<uint8_t>(_srcRegIdx[idx]);
+    }
+
+    /** Reads source vector 16bit operand. */
+    ConstVecLane16
+    readVec16BitLaneOperand(const StaticInst *si, int idx) const
+    {
+        return cpu->template readVecLane<uint16_t>(_srcRegIdx[idx]);
+    }
+
+    /** Reads source vector 32bit operand. */
+    ConstVecLane32
+    readVec32BitLaneOperand(const StaticInst *si, int idx) const
+    {
+        return cpu->template readVecLane<uint32_t>(_srcRegIdx[idx]);
+    }
+
+    /** Reads source vector 64bit operand. */
+    ConstVecLane64
+    readVec64BitLaneOperand(const StaticInst *si, int idx) const
+    {
+        return cpu->template readVecLane<uint64_t>(_srcRegIdx[idx]);
+    }
+
+    /** Write a lane of the destination vector operand. */
+    template <typename LD>
+    void
+    setVecLaneOperandT(const StaticInst *si, int idx, const LD& val)
+    {
+        return cpu->template setVecLane(_destRegIdx[idx], val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::Byte>& val)
+    {
+        return setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::TwoByte>& val)
+    {
+        return setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::FourByte>& val)
+    {
+        return setVecLaneOperandT(si, idx, val);
+    }
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::EightByte>& val)
+    {
+        return setVecLaneOperandT(si, idx, val);
+    }
+    /** @} */
+
+    VecElem readVecElemOperand(const StaticInst *si, int idx) const
+    {
+        return this->cpu->readVecElem(this->_srcRegIdx[idx]);
+    }
+
     CCReg readCCRegOperand(const StaticInst *si, int idx)
     {
         return this->cpu->readCCReg(this->_srcRegIdx[idx]);
@@ -293,6 +395,22 @@ class BaseO3DynInst : public BaseDynInst<Impl>
         BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
     }
 
+    void
+    setVecRegOperand(const StaticInst *si, int idx,
+                     const VecRegContainer& val)
+    {
+        this->cpu->setVecReg(this->_destRegIdx[idx], val);
+        BaseDynInst<Impl>::setVecRegOperand(si, idx, val);
+    }
+
+    void setVecElemOperand(const StaticInst *si, int idx,
+                           const VecElem val)
+    {
+        int reg_idx = idx;
+        this->cpu->setVecElem(this->_destRegIdx[reg_idx], val);
+        BaseDynInst<Impl>::setVecElemOperand(si, idx, val);
+    }
+
     void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
     {
         this->cpu->setCCReg(this->_destRegIdx[idx], val);
diff --git a/src/cpu/o3/free_list.hh b/src/cpu/o3/free_list.hh
index 6fc6cc909..f4c26a697 100644
--- a/src/cpu/o3/free_list.hh
+++ b/src/cpu/o3/free_list.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -63,6 +75,16 @@ class SimpleFreeList
     /** Add a physical register to the free list */
     void addReg(PhysRegIdPtr reg) { freeRegs.push(reg); }
 
+    /** Add physical registers to the free list */
+    template<class InputIt>
+    void
+    addRegs(InputIt first, InputIt last) {
+        std::for_each(first, last,
+            [this](const typename InputIt::value_type& reg) {
+                this->freeRegs.push(&reg);
+            });
+    }
+
     /** Get the next available register from the free list */
     PhysRegIdPtr getReg()
     {
@@ -107,6 +129,15 @@ class UnifiedFreeList
     /** The list of free floating point registers. */
     SimpleFreeList floatList;
 
+    /** The following two are exclusive interfaces. */
+    /** @{ */
+    /** The list of free vector registers. */
+    SimpleFreeList vecList;
+
+    /** The list of free vector element registers. */
+    SimpleFreeList vecElemList;
+    /** @} */
+
     /** The list of free condition-code registers. */
     SimpleFreeList ccList;
 
@@ -146,18 +177,36 @@ class UnifiedFreeList
     /** Gets a free fp register. */
     PhysRegIdPtr getFloatReg() { return floatList.getReg(); }
 
+    /** Gets a free vector register. */
+    PhysRegIdPtr getVecReg() { return vecList.getReg(); }
+
+    /** Gets a free vector elemenet register. */
+    PhysRegIdPtr getVecElem() { return vecElemList.getReg(); }
+
     /** Gets a free cc register. */
     PhysRegIdPtr getCCReg() { return ccList.getReg(); }
 
     /** Adds a register back to the free list. */
     void addReg(PhysRegIdPtr freed_reg);
 
+    /** Adds a register back to the free list. */
+    template<class InputIt>
+    void addRegs(InputIt first, InputIt last);
+
     /** Adds an integer register back to the free list. */
     void addIntReg(PhysRegIdPtr freed_reg) { intList.addReg(freed_reg); }
 
     /** Adds a fp register back to the free list. */
     void addFloatReg(PhysRegIdPtr freed_reg) { floatList.addReg(freed_reg); }
 
+    /** Adds a vector register back to the free list. */
+    void addVecReg(PhysRegIdPtr freed_reg) { vecList.addReg(freed_reg); }
+
+    /** Adds a vector element register back to the free list. */
+    void addVecElem(PhysRegIdPtr freed_reg) {
+        vecElemList.addReg(freed_reg);
+    }
+
     /** Adds a cc register back to the free list. */
     void addCCReg(PhysRegIdPtr freed_reg) { ccList.addReg(freed_reg); }
 
@@ -167,6 +216,12 @@ class UnifiedFreeList
     /** Checks if there are any free fp registers. */
     bool hasFreeFloatRegs() const { return floatList.hasFreeRegs(); }
 
+    /** Checks if there are any free vector registers. */
+    bool hasFreeVecRegs() const { return vecList.hasFreeRegs(); }
+
+    /** Checks if there are any free vector registers. */
+    bool hasFreeVecElems() const { return vecElemList.hasFreeRegs(); }
+
     /** Checks if there are any free cc registers. */
     bool hasFreeCCRegs() const { return ccList.hasFreeRegs(); }
 
@@ -176,10 +231,49 @@ class UnifiedFreeList
     /** Returns the number of free fp registers. */
     unsigned numFreeFloatRegs() const { return floatList.numFreeRegs(); }
 
+    /** Returns the number of free vector registers. */
+    unsigned numFreeVecRegs() const { return vecList.numFreeRegs(); }
+
     /** Returns the number of free cc registers. */
     unsigned numFreeCCRegs() const { return ccList.numFreeRegs(); }
 };
 
+template<class InputIt>
+inline void
+UnifiedFreeList::addRegs(InputIt first, InputIt last)
+{
+    // Are there any registers to add?
+    if (first == last)
+        return;
+
+    panic_if((first != last) &&
+            first->classValue() != (last-1)->classValue(),
+            "Attempt to add mixed type regs: %s and %s",
+            first->className(),
+            (last-1)->className());
+    switch (first->classValue()) {
+        case IntRegClass:
+            intList.addRegs(first, last);
+            break;
+        case FloatRegClass:
+            floatList.addRegs(first, last);
+            break;
+        case VecRegClass:
+            vecList.addRegs(first, last);
+            break;
+        case VecElemClass:
+            vecElemList.addRegs(first, last);
+            break;
+        case CCRegClass:
+            ccList.addRegs(first, last);
+            break;
+        default:
+            panic("Unexpected RegClass (%s)",
+                                   first->className());
+    }
+
+}
+
 inline void
 UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
 {
@@ -194,6 +288,12 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
         case FloatRegClass:
             floatList.addReg(freed_reg);
             break;
+        case VecRegClass:
+            vecList.addReg(freed_reg);
+            break;
+        case VecElemClass:
+            vecElemList.addReg(freed_reg);
+            break;
         case CCRegClass:
             ccList.addReg(freed_reg);
             break;
diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh
index f52cf2d6c..2b113ae04 100644
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -98,8 +98,11 @@ InstructionQueue<Impl>::InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr,
     numThreads = params->numThreads;
 
     // Set the number of total physical registers
+    // As the vector registers have two addressing modes, they are added twice
     numPhysRegs = params->numPhysIntRegs + params->numPhysFloatRegs +
-        params->numPhysCCRegs;
+                    params->numPhysVecRegs +
+                    params->numPhysVecRegs * TheISA::NumVecElemPerVecReg +
+                    params->numPhysCCRegs;
 
     //Create an entry for each physical register within the
     //dependency graph.
diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index ea4370f48..2f41e2ac2 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -33,20 +45,31 @@
 
 #include "cpu/o3/regfile.hh"
 
+#include "cpu/o3/free_list.hh"
+#include "arch/generic/types.hh"
 #include "cpu/o3/free_list.hh"
 
 PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
                          unsigned _numPhysicalFloatRegs,
-                         unsigned _numPhysicalCCRegs)
+                         unsigned _numPhysicalVecRegs,
+                         unsigned _numPhysicalCCRegs,
+                         VecMode vmode)
     : intRegFile(_numPhysicalIntRegs),
       floatRegFile(_numPhysicalFloatRegs),
+      vectorRegFile(_numPhysicalVecRegs),
       ccRegFile(_numPhysicalCCRegs),
       numPhysicalIntRegs(_numPhysicalIntRegs),
       numPhysicalFloatRegs(_numPhysicalFloatRegs),
+      numPhysicalVecRegs(_numPhysicalVecRegs),
+      numPhysicalVecElemRegs(_numPhysicalVecRegs *
+                             NumVecElemPerVecReg),
       numPhysicalCCRegs(_numPhysicalCCRegs),
       totalNumRegs(_numPhysicalIntRegs
                    + _numPhysicalFloatRegs
-                   + _numPhysicalCCRegs)
+                   + _numPhysicalVecRegs
+                   + _numPhysicalVecRegs * NumVecElemPerVecReg
+                   + _numPhysicalCCRegs),
+      vecMode(vmode)
 {
     PhysRegIndex phys_reg;
     PhysRegIndex flat_reg_idx = 0;
@@ -68,6 +91,23 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
         floatRegIds.emplace_back(FloatRegClass, phys_reg, flat_reg_idx++);
     }
 
+    // The next batch of the registers are the vector physical
+    // registers; put them onto the vector free list.
+    for (phys_reg = 0; phys_reg < numPhysicalVecRegs; phys_reg++) {
+        vectorRegFile[phys_reg].zero();
+        vecRegIds.emplace_back(VecRegClass, phys_reg, flat_reg_idx++);
+    }
+    // The next batch of the registers are the vector element physical
+    // registers; they refer to the same containers as the vector
+    // registers, just a different (and incompatible) way to access
+    // them; put them onto the vector free list.
+    for (phys_reg = 0; phys_reg < numPhysicalVecRegs; phys_reg++) {
+        for (ElemIndex eIdx = 0; eIdx < NumVecElemPerVecReg; eIdx++) {
+            vecElemIds.emplace_back(VecElemClass, phys_reg,
+                    eIdx, flat_reg_idx++);
+        }
+    }
+
     // The rest of the registers are the condition-code physical
     // registers; put them onto the condition-code free list.
     for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) {
@@ -90,20 +130,90 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
     // The initial batch of registers are the integer ones
     for (reg_idx = 0; reg_idx < numPhysicalIntRegs; reg_idx++) {
         assert(intRegIds[reg_idx].index() == reg_idx);
-        freeList->addIntReg(&intRegIds[reg_idx]);
     }
+    freeList->addRegs(intRegIds.begin(), intRegIds.end());
 
     // The next batch of the registers are the floating-point physical
     // registers; put them onto the floating-point free list.
     for (reg_idx = 0; reg_idx < numPhysicalFloatRegs; reg_idx++) {
         assert(floatRegIds[reg_idx].index() == reg_idx);
-        freeList->addFloatReg(&floatRegIds[reg_idx]);
     }
+    freeList->addRegs(floatRegIds.begin(), floatRegIds.end());
+
+    /* The next batch of the registers are the vector physical
+     * registers; put them onto the vector free list. */
+    for (reg_idx = 0; reg_idx < numPhysicalVecRegs; reg_idx++) {
+        assert(vecRegIds[reg_idx].index() == reg_idx);
+        for (ElemIndex elemIdx = 0; elemIdx < NumVecElemPerVecReg; elemIdx++) {
+            assert(vecElemIds[reg_idx * NumVecElemPerVecReg +
+                    elemIdx].index() == reg_idx);
+            assert(vecElemIds[reg_idx * NumVecElemPerVecReg +
+                    elemIdx].elemIndex() == elemIdx);
+        }
+    }
+
+    /* depending on the mode we add the vector registers as whole units or
+     * as different elements. */
+    if (vecMode == Enums::Full)
+        freeList->addRegs(vecRegIds.begin(), vecRegIds.end());
+    else
+        freeList->addRegs(vecElemIds.begin(), vecElemIds.end());
 
     // The rest of the registers are the condition-code physical
     // registers; put them onto the condition-code free list.
     for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) {
         assert(ccRegIds[reg_idx].index() == reg_idx);
-        freeList->addCCReg(&ccRegIds[reg_idx]);
     }
+    freeList->addRegs(ccRegIds.begin(), ccRegIds.end());
 }
+
+auto
+PhysRegFile::getRegElemIds(PhysRegIdPtr reg) -> IdRange
+{
+    panic_if(!reg->isVectorPhysReg(),
+            "Trying to get elems of a %s register", reg->className());
+    auto idx = reg->index();
+    return std::make_pair(
+                vecElemIds.begin() + idx * NumVecElemPerVecReg,
+                vecElemIds.begin() + (idx+1) * NumVecElemPerVecReg);
+}
+
+auto
+PhysRegFile::getRegIds(RegClass cls) -> IdRange
+{
+    switch (cls)
+    {
+      case IntRegClass:
+        return std::make_pair(intRegIds.begin(), intRegIds.end());
+      case FloatRegClass:
+        return std::make_pair(floatRegIds.begin(), floatRegIds.end());
+      case VecRegClass:
+        return std::make_pair(vecRegIds.begin(), vecRegIds.end());
+      case VecElemClass:
+        return std::make_pair(vecElemIds.begin(), vecElemIds.end());
+      case CCRegClass:
+        return std::make_pair(ccRegIds.begin(), ccRegIds.end());
+      case MiscRegClass:
+        return std::make_pair(miscRegIds.begin(), miscRegIds.end());
+    }
+    /* There is no way to make an empty iterator */
+    return std::make_pair(PhysIds::const_iterator(),
+                          PhysIds::const_iterator());
+}
+
+PhysRegIdPtr
+PhysRegFile::getTrueId(PhysRegIdPtr reg)
+{
+    switch (reg->classValue()) {
+    case VecRegClass:
+        return &vecRegIds[reg->index()];
+    case VecElemClass:
+        return &vecElemIds[reg->index() * NumVecElemPerVecReg +
+            reg->elemIndex()];
+    default:
+        panic_if(!reg->isVectorPhysElem(),
+            "Trying to get the register of a %s register", reg->className());
+    }
+    return nullptr;
+}
+
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index c353b2746..7feec933f 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -42,6 +54,7 @@
 #include "config/the_isa.hh"
 #include "cpu/o3/comm.hh"
 #include "debug/IEW.hh"
+#include "enums/VecRegRenameMode.hh"
 
 class UnifiedFreeList;
 
@@ -56,6 +69,15 @@ class PhysRegFile
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::CCReg CCReg;
+    using VecElem = TheISA::VecElem;
+    using VecRegContainer = TheISA::VecRegContainer;
+    using PhysIds = std::vector<PhysRegId>;
+    using VecMode = Enums::VecRegRenameMode;
+  public:
+    using IdRange = std::pair<PhysIds::const_iterator,
+                              PhysIds::const_iterator>;
+  private:
+    static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
 
     typedef union {
         FloatReg d;
@@ -70,6 +92,11 @@ class PhysRegFile
     std::vector<PhysFloatReg> floatRegFile;
     std::vector<PhysRegId> floatRegIds;
 
+    /** Vector register file. */
+    std::vector<VecRegContainer> vectorRegFile;
+    std::vector<PhysRegId> vecRegIds;
+    std::vector<PhysRegId> vecElemIds;
+
     /** Condition-code register file. */
     std::vector<CCReg> ccRegFile;
     std::vector<PhysRegId> ccRegIds;
@@ -83,18 +110,31 @@ class PhysRegFile
     unsigned numPhysicalIntRegs;
 
     /**
-     * Number of physical general purpose registers
+     * Number of physical floating point registers
      */
     unsigned numPhysicalFloatRegs;
 
     /**
-     * Number of physical general purpose registers
+     * Number of physical vector registers
+     */
+    unsigned numPhysicalVecRegs;
+
+    /**
+     * Number of physical vector element registers
+     */
+    unsigned numPhysicalVecElemRegs;
+
+    /**
+     * Number of physical CC registers
      */
     unsigned numPhysicalCCRegs;
 
     /** Total number of physical registers. */
     unsigned totalNumRegs;
 
+    /** Mode in which vector registers are addressed. */
+    VecMode vecMode;
+
   public:
     /**
      * Constructs a physical register file with the specified amount of
@@ -102,7 +142,10 @@ class PhysRegFile
      */
     PhysRegFile(unsigned _numPhysicalIntRegs,
                 unsigned _numPhysicalFloatRegs,
-                unsigned _numPhysicalCCRegs);
+                unsigned _numPhysicalVecRegs,
+                unsigned _numPhysicalCCRegs,
+                VecMode vmode
+                );
 
     /**
      * Destructor to free resources
@@ -117,6 +160,11 @@ class PhysRegFile
 
     /** @return the number of floating-point physical registers. */
     unsigned numFloatPhysRegs() const { return numPhysicalFloatRegs; }
+    /** @return the number of vector physical registers. */
+    unsigned numVecPhysRegs() const { return numPhysicalVecRegs; }
+
+    /** @return the number of vector physical registers. */
+    unsigned numVecElemPhysRegs() const { return numPhysicalVecElemRegs; }
 
     /** @return the number of condition-code physical registers. */
     unsigned numCCPhysRegs() const { return numPhysicalCCRegs; }
@@ -164,6 +212,68 @@ class PhysRegFile
         return floatRegBits;
     }
 
+    /** Reads a vector register. */
+    const VecRegContainer& readVecReg(PhysRegIdPtr phys_reg) const
+    {
+        assert(phys_reg->isVectorPhysReg());
+
+        DPRINTF(IEW, "RegFile: Access to vector register %i, has "
+                "data %s\n", int(phys_reg->index()),
+                vectorRegFile[phys_reg->index()].as<VecElem>().print());
+
+        return vectorRegFile[phys_reg->index()];
+    }
+
+    /** Reads a vector register for modification. */
+    VecRegContainer& getWritableVecReg(PhysRegIdPtr phys_reg)
+    {
+        /* const_cast for not duplicating code above. */
+        return const_cast<VecRegContainer&>(readVecReg(phys_reg));
+    }
+
+    /** Reads a vector register lane. */
+    template <typename VecElem, int LaneIdx>
+    VecLaneT<VecElem, true>
+    readVecLane(PhysRegIdPtr phys_reg) const
+    {
+        return readVecReg(phys_reg).laneView<VecElem, LaneIdx>();
+    }
+
+    /** Reads a vector register lane. */
+    template <typename VecElem>
+    VecLaneT<VecElem, true>
+    readVecLane(PhysRegIdPtr phys_reg) const
+    {
+        return readVecReg(phys_reg).laneView<VecElem>(phys_reg->elemIndex());
+    }
+
+    /** Get a vector register lane for modification. */
+    template <typename LD>
+    void
+    setVecLane(PhysRegIdPtr phys_reg, const LD& val)
+    {
+        assert(phys_reg->isVectorPhysReg());
+
+        DPRINTF(IEW, "RegFile: Setting vector register %i[%d] to %lx\n",
+                int(phys_reg->index()), phys_reg->elemIndex(), val);
+
+        vectorRegFile[phys_reg->index()].laneView<typename LD::UnderlyingType>(
+                phys_reg->elemIndex()) = val;
+    }
+
+    /** Reads a vector element. */
+    const VecElem& readVecElem(PhysRegIdPtr phys_reg) const
+    {
+        assert(phys_reg->isVectorPhysElem());
+        auto ret = vectorRegFile[phys_reg->index()].as<VecElem>();
+        const VecElem& val = ret[phys_reg->elemIndex()];
+        DPRINTF(IEW, "RegFile: Access to element %d of vector register %i,"
+                " has data %#x\n", phys_reg->elemIndex(),
+                int(phys_reg->index()), val);
+
+        return val;
+    }
+
     /** Reads a condition-code register. */
     CCReg readCCReg(PhysRegIdPtr phys_reg)
     {
@@ -207,7 +317,31 @@ class PhysRegFile
         DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
                 phys_reg->index(), (uint64_t)val);
 
-        floatRegFile[phys_reg->index()].q = val;
+        if (!phys_reg->isZeroReg())
+            floatRegFile[phys_reg->index()].q = val;
+    }
+
+    /** Sets a vector register to the given value. */
+    void setVecReg(PhysRegIdPtr phys_reg, const VecRegContainer& val)
+    {
+        assert(phys_reg->isVectorPhysReg());
+
+        DPRINTF(IEW, "RegFile: Setting vector register %i to %s\n",
+                int(phys_reg->index()), val.print());
+
+        vectorRegFile[phys_reg->index()] = val;
+    }
+
+    /** Sets a vector register to the given value. */
+    void setVecElem(PhysRegIdPtr phys_reg, const VecElem val)
+    {
+        assert(phys_reg->isVectorPhysElem());
+
+        DPRINTF(IEW, "RegFile: Setting element %d of vector register %i to"
+                " %#x\n", phys_reg->elemIndex(), int(phys_reg->index()), val);
+
+        vectorRegFile[phys_reg->index()].as<VecElem>()[phys_reg->elemIndex()] =
+                val;
     }
 
     /** Sets a condition-code register to the given value. */
@@ -220,6 +354,25 @@ class PhysRegFile
 
         ccRegFile[phys_reg->index()] = val;
     }
+
+    /** Get the PhysRegIds of the elems of a vector register.
+     * Auxiliary function to transition from Full vector mode to Elem mode.
+     */
+    IdRange getRegElemIds(PhysRegIdPtr reg);
+
+    /**
+     * Get the PhysRegIds of the elems of all vector registers.
+     * Auxiliary function to transition from Full vector mode to Elem mode
+     * and to initialise the rename map.
+     */
+    IdRange getRegIds(RegClass cls);
+
+     /**
+      * Get the true physical register id.
+      * As many parts work with PhysRegIdPtr, we need to be able to produce
+      * the pointer out of just class and register idx.
+      */
+     PhysRegIdPtr getTrueId(PhysRegIdPtr reg);
 };
 
 
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 6d3861ba6..d0f6ba13d 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -464,8 +464,6 @@ class DefaultRename
     /** The maximum skid buffer size. */
     unsigned skidBufferMax;
 
-    PhysRegIndex maxPhysicalRegs;
-
     /** Enum to record the source of a structure full stall.  Can come from
      * either ROB, IQ, LSQ, and it is priortized in that order.
      */
@@ -515,6 +513,7 @@ class DefaultRename
     Stats::Scalar renameRenameLookups;
     Stats::Scalar intRenameLookups;
     Stats::Scalar fpRenameLookups;
+    Stats::Scalar vecRenameLookups;
     /** Stat for total number of committed renaming mappings. */
     Stats::Scalar renameCommittedMaps;
     /** Stat for total number of mappings that were undone due to a squash. */
diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh
index 9c9b030f5..b9adcdff7 100644
--- a/src/cpu/o3/rename_impl.hh
+++ b/src/cpu/o3/rename_impl.hh
@@ -67,9 +67,7 @@ DefaultRename<Impl>::DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params)
       commitToRenameDelay(params->commitToRenameDelay),
       renameWidth(params->renameWidth),
       commitWidth(params->commitWidth),
-      numThreads(params->numThreads),
-      maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs
-                      + params->numPhysCCRegs)
+      numThreads(params->numThreads)
 {
     if (renameWidth > Impl::MaxWidth)
         fatal("renameWidth (%d) is larger than compiled limit (%d),\n"
@@ -182,6 +180,10 @@ DefaultRename<Impl>::regStats()
         .name(name() + ".fp_rename_lookups")
         .desc("Number of floating rename lookups")
         .prereq(fpRenameLookups);
+    vecRenameLookups
+        .name(name() + ".vec_rename_lookups")
+        .desc("Number of vector rename lookups")
+        .prereq(vecRenameLookups);
 }
 
 template <class Impl>
@@ -645,6 +647,8 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
         // to rename to.  Otherwise block.
         if (!renameMap[tid]->canRename(inst->numIntDestRegs(),
                                        inst->numFPDestRegs(),
+                                       inst->numVecDestRegs(),
+                                       inst->numVecElemDestRegs(),
                                        inst->numCCDestRegs())) {
             DPRINTF(Rename, "Blocking due to lack of free "
                     "physical registers to rename to.\n");
@@ -1239,12 +1243,17 @@ DefaultRename<Impl>::readFreeEntries(ThreadID tid)
     }
 
     DPRINTF(Rename, "[tid:%i]: Free IQ: %i, Free ROB: %i, "
-                    "Free LQ: %i, Free SQ: %i\n",
+                    "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i)\n",
             tid,
             freeEntries[tid].iqEntries,
             freeEntries[tid].robEntries,
             freeEntries[tid].lqEntries,
-            freeEntries[tid].sqEntries);
+            freeEntries[tid].sqEntries,
+            renameMap[tid]->numFreeEntries(),
+            renameMap[tid]->numFreeIntEntries(),
+            renameMap[tid]->numFreeFloatEntries(),
+            renameMap[tid]->numFreeVecEntries(),
+            renameMap[tid]->numFreeCCEntries());
 
     DPRINTF(Rename, "[tid:%i]: %i instructions not yet in ROB\n",
             tid, instsInProgress[tid]);
diff --git a/src/cpu/o3/rename_map.cc b/src/cpu/o3/rename_map.cc
index 38ccc7ec9..bde2a6921 100644
--- a/src/cpu/o3/rename_map.cc
+++ b/src/cpu/o3/rename_map.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2004-2005 The Regents of The University of Michigan
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved.
@@ -93,15 +105,92 @@ void
 UnifiedRenameMap::init(PhysRegFile *_regFile,
                        RegIndex _intZeroReg,
                        RegIndex _floatZeroReg,
-                       UnifiedFreeList *freeList)
+                       UnifiedFreeList *freeList,
+                       VecMode _mode)
 {
     regFile = _regFile;
+    vecMode = _mode;
 
     intMap.init(TheISA::NumIntRegs, &(freeList->intList), _intZeroReg);
 
     floatMap.init(TheISA::NumFloatRegs, &(freeList->floatList), _floatZeroReg);
 
+    vecMap.init(TheISA::NumVecRegs, &(freeList->vecList), (RegIndex)-1);
+
+    vecElemMap.init(TheISA::NumVecRegs * NVecElems,
+            &(freeList->vecElemList), (RegIndex)-1);
+
     ccMap.init(TheISA::NumCCRegs, &(freeList->ccList), (RegIndex)-1);
 
 }
 
+void
+UnifiedRenameMap::switchMode(VecMode newVecMode, UnifiedFreeList* freeList)
+{
+    if (newVecMode == Enums::Elem && vecMode == Enums::Full) {
+        /* Switch to vector element rename mode. */
+        /* The free list should currently be tracking full registers. */
+        panic_if(freeList->hasFreeVecElems(),
+                "The free list is already tracking Vec elems");
+        panic_if(freeList->numFreeVecRegs() !=
+                regFile->numVecPhysRegs() - TheISA::NumVecRegs,
+                "The free list has lost vector registers");
+        /* Split the mapping of each arch reg. */
+        int reg = 0;
+        for (auto &e: vecMap) {
+            PhysRegFile::IdRange range = this->regFile->getRegElemIds(e);
+            uint32_t i;
+            for (i = 0; range.first != range.second; i++, range.first++) {
+                vecElemMap.setEntry(RegId(VecElemClass, reg, i),
+                                    &(*range.first));
+            }
+            panic_if(i != NVecElems,
+                "Wrong name of elems: expecting %u, got %d\n",
+                TheISA::NumVecElemPerVecReg, i);
+            reg++;
+        }
+        /* Split the free regs. */
+        while (freeList->hasFreeVecRegs()) {
+            auto vr = freeList->getVecReg();
+            auto range = this->regFile->getRegElemIds(vr);
+            freeList->addRegs(range.first, range.second);
+        }
+        vecMode = Enums::Elem;
+    } else if (newVecMode == Enums::Full && vecMode == Enums::Elem) {
+        /* Switch to full vector register rename mode. */
+        /* The free list should currently be tracking register elems. */
+        panic_if(freeList->hasFreeVecRegs(),
+                "The free list is already tracking full Vec");
+        panic_if(freeList->numFreeVecRegs() !=
+                regFile->numVecElemPhysRegs() - TheISA::NumFloatRegs,
+                "The free list has lost vector register elements");
+        /* To rebuild the arch regs we take the easy road:
+         *  1.- Stitch the elems together into vectors.
+         *  2.- Replace the contents of the register file with the vectors
+         *  3.- Set the remaining registers as free
+         */
+        TheISA::VecRegContainer new_RF[TheISA::NumVecRegs];
+        for (uint32_t i = 0; i < TheISA::NumVecRegs; i++) {
+            VecReg dst = new_RF[i].as<TheISA::VecElem>();
+            for (uint32_t l = 0; l < NVecElems; l++) {
+                RegId s_rid(VecElemClass, i, l);
+                PhysRegIdPtr s_prid = vecElemMap.lookup(s_rid);
+                dst[l] = regFile->readVecElem(s_prid);
+            }
+        }
+
+        for (uint32_t i = 0; i < TheISA::NumVecRegs; i++) {
+            PhysRegId pregId(VecRegClass, i, 0);
+            regFile->setVecReg(regFile->getTrueId(&pregId), new_RF[i]);
+        }
+
+        auto range = regFile->getRegIds(VecRegClass);
+        freeList->addRegs(range.first + TheISA::NumVecRegs, range.second);
+
+        /* We remove the elems from the free list. */
+        while (freeList->hasFreeVecElems())
+            freeList->getVecElem();
+        vecMode = Enums::Full;
+    }
+}
+
diff --git a/src/cpu/o3/rename_map.hh b/src/cpu/o3/rename_map.hh
index 028c32e3a..ab909f090 100644
--- a/src/cpu/o3/rename_map.hh
+++ b/src/cpu/o3/rename_map.hh
@@ -54,6 +54,7 @@
 #include "cpu/o3/free_list.hh"
 #include "cpu/o3/regfile.hh"
 #include "cpu/reg_class.hh"
+#include "enums/VecRegRenameMode.hh"
 
 /**
  * Register rename map for a single class of registers (e.g., integer
@@ -68,6 +69,10 @@ class SimpleRenameMap
     using Arch2PhysMap = std::vector<PhysRegIdPtr>;
     /** The acutal arch-to-phys register map */
     Arch2PhysMap map;
+  public:
+    using iterator = Arch2PhysMap::iterator;
+    using const_iterator = Arch2PhysMap::const_iterator;
+  private:
 
     /**
      * Pointer to the free list from which new physical registers
@@ -139,6 +144,20 @@ class SimpleRenameMap
 
     /** Return the number of free entries on the associated free list. */
     unsigned numFreeEntries() const { return freeList->numFreeRegs(); }
+
+    /** Forward begin/cbegin to the map. */
+    /** @{ */
+    iterator begin() { return map.begin(); }
+    const_iterator begin() const { return map.begin(); }
+    const_iterator cbegin() const { return map.cbegin(); }
+    /** @} */
+
+    /** Forward end/cend to the map. */
+    /** @{ */
+    iterator end() { return map.end(); }
+    const_iterator end() const { return map.end(); }
+    const_iterator cend() const { return map.cend(); }
+    /** @} */
 };
 
 
@@ -152,6 +171,8 @@ class SimpleRenameMap
 class UnifiedRenameMap
 {
   private:
+    static constexpr uint32_t NVecElems = TheISA::NumVecElemPerVecReg;
+    using VecReg = TheISA::VecReg;
 
     /** The integer register rename map */
     SimpleRenameMap intMap;
@@ -162,6 +183,15 @@ class UnifiedRenameMap
     /** The condition-code register rename map */
     SimpleRenameMap ccMap;
 
+    /** The vector register rename map */
+    SimpleRenameMap vecMap;
+
+    /** The vector element register rename map */
+    SimpleRenameMap vecElemMap;
+
+    using VecMode = Enums::VecRegRenameMode;
+    VecMode vecMode;
+
     /**
      * The register file object is used only to get PhysRegIdPtr
      * on MiscRegs, as they are stored in it.
@@ -182,7 +212,8 @@ class UnifiedRenameMap
     void init(PhysRegFile *_regFile,
               RegIndex _intZeroReg,
               RegIndex _floatZeroReg,
-              UnifiedFreeList *freeList);
+              UnifiedFreeList *freeList,
+              VecMode _mode);
 
     /**
      * Tell rename map to get a new free physical register to remap
@@ -199,6 +230,12 @@ class UnifiedRenameMap
             return intMap.rename(arch_reg);
           case FloatRegClass:
             return floatMap.rename(arch_reg);
+          case VecRegClass:
+            assert(vecMode == Enums::Full);
+            return vecMap.rename(arch_reg);
+          case VecElemClass:
+            assert(vecMode == Enums::Elem);
+            return vecElemMap.rename(arch_reg);
           case CCRegClass:
             return ccMap.rename(arch_reg);
           case MiscRegClass:
@@ -232,6 +269,14 @@ class UnifiedRenameMap
           case FloatRegClass:
             return  floatMap.lookup(arch_reg);
 
+          case VecRegClass:
+            assert(vecMode == Enums::Full);
+            return  vecMap.lookup(arch_reg);
+
+          case VecElemClass:
+            assert(vecMode == Enums::Elem);
+            return  vecElemMap.lookup(arch_reg);
+
           case CCRegClass:
             return ccMap.lookup(arch_reg);
 
@@ -265,6 +310,16 @@ class UnifiedRenameMap
             assert(phys_reg->isFloatPhysReg());
             return floatMap.setEntry(arch_reg, phys_reg);
 
+          case VecRegClass:
+            assert(phys_reg->isVectorPhysReg());
+            assert(vecMode == Enums::Full);
+            return vecMap.setEntry(arch_reg, phys_reg);
+
+          case VecElemClass:
+            assert(phys_reg->isVectorPhysElem());
+            assert(vecMode == Enums::Elem);
+            return vecElemMap.setEntry(arch_reg, phys_reg);
+
           case CCRegClass:
             assert(phys_reg->isCCPhysReg());
             return ccMap.setEntry(arch_reg, phys_reg);
@@ -291,18 +346,39 @@ class UnifiedRenameMap
      */
     unsigned numFreeEntries() const
     {
-        return std::min(intMap.numFreeEntries(), floatMap.numFreeEntries());
+        return std::min(
+                std::min(intMap.numFreeEntries(), floatMap.numFreeEntries()),
+                vecMode == Enums::Full ? vecMap.numFreeEntries()
+                                    : vecElemMap.numFreeEntries());
     }
 
+    unsigned numFreeIntEntries() const { return intMap.numFreeEntries(); }
+    unsigned numFreeFloatEntries() const { return floatMap.numFreeEntries(); }
+    unsigned numFreeVecEntries() const
+    {
+        return vecMode == Enums::Full
+                ? vecMap.numFreeEntries()
+                : vecElemMap.numFreeEntries();
+    }
+    unsigned numFreeCCEntries() const { return ccMap.numFreeEntries(); }
+
     /**
      * Return whether there are enough registers to serve the request.
      */
-    bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t ccRegs) const
+    bool canRename(uint32_t intRegs, uint32_t floatRegs, uint32_t vectorRegs,
+                    uint32_t vecElemRegs, uint32_t ccRegs) const
     {
         return intRegs <= intMap.numFreeEntries() &&
             floatRegs <= floatMap.numFreeEntries() &&
+            vectorRegs <= vecMap.numFreeEntries() &&
+            vecElemRegs <= vecElemMap.numFreeEntries() &&
             ccRegs <= ccMap.numFreeEntries();
     }
+    /**
+     * Set vector mode to Full or Elem.
+     * Ignore 'silent' modifications.
+     */
+    void switchMode(VecMode newVecMode, UnifiedFreeList* freeList);
 
 };
 
diff --git a/src/cpu/o3/thread_context.hh b/src/cpu/o3/thread_context.hh
index 161d70b28..ac4ceed02 100755
--- a/src/cpu/o3/thread_context.hh
+++ b/src/cpu/o3/thread_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -194,6 +194,70 @@ class O3ThreadContext : public ThreadContext
                                                  reg_idx)).index());
     }
 
+    virtual const VecRegContainer& readVecReg(const RegId& id) const {
+        return readVecRegFlat(flattenRegId(id).index());
+    }
+
+    /**
+     * Read vector register operand for modification, hierarchical indexing.
+     */
+    virtual VecRegContainer& getWritableVecReg(const RegId& id) {
+        return getWritableVecRegFlat(flattenRegId(id).index());
+    }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    virtual ConstVecLane8
+    readVec8BitLaneReg(const RegId& id) const
+    {
+        return readVecLaneFlat<uint8_t>(flattenRegId(id).index(),
+                    id.elemIndex());
+    }
+
+    /** Reads source vector 16bit operand. */
+    virtual ConstVecLane16
+    readVec16BitLaneReg(const RegId& id) const
+    {
+        return readVecLaneFlat<uint16_t>(flattenRegId(id).index(),
+                    id.elemIndex());
+    }
+
+    /** Reads source vector 32bit operand. */
+    virtual ConstVecLane32
+    readVec32BitLaneReg(const RegId& id) const
+    {
+        return readVecLaneFlat<uint32_t>(flattenRegId(id).index(),
+                    id.elemIndex());
+    }
+
+    /** Reads source vector 64bit operand. */
+    virtual ConstVecLane64
+    readVec64BitLaneReg(const RegId& id) const
+    {
+        return readVecLaneFlat<uint64_t>(flattenRegId(id).index(),
+                    id.elemIndex());
+    }
+
+    /** Write a lane of the destination vector register. */
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::Byte>& val)
+    { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::TwoByte>& val)
+    { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::FourByte>& val)
+    { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::EightByte>& val)
+    { return setVecLaneFlat(flattenRegId(reg).index(), reg.elemIndex(), val); }
+    /** @} */
+
+    virtual const VecElem& readVecElem(const RegId& reg) const {
+        return readVecElemFlat(flattenRegId(reg).index(), reg.elemIndex());
+    }
+
     virtual CCReg readCCReg(int reg_idx) {
         return readCCRegFlat(flattenRegId(RegId(CCRegClass,
                                                  reg_idx)).index());
@@ -214,6 +278,14 @@ class O3ThreadContext : public ThreadContext
                                                reg_idx)).index(), val);
     }
 
+    virtual void setVecReg(const RegId& reg, const VecRegContainer& val) {
+        setVecRegFlat(flattenRegId(reg).index(), val);
+    }
+
+    virtual void setVecElem(const RegId& reg, const VecElem& val) {
+        setVecElemFlat(flattenRegId(reg).index(), reg.elemIndex(), val);
+    }
+
     virtual void setCCReg(int reg_idx, CCReg val) {
         setCCRegFlat(flattenRegId(RegId(CCRegClass, reg_idx)).index(), val);
     }
@@ -298,6 +370,29 @@ class O3ThreadContext : public ThreadContext
     virtual FloatRegBits readFloatRegBitsFlat(int idx);
     virtual void setFloatRegBitsFlat(int idx, FloatRegBits val);
 
+    virtual const VecRegContainer& readVecRegFlat(int idx) const;
+    /** Read vector register operand for modification, flat indexing. */
+    virtual VecRegContainer& getWritableVecRegFlat(int idx);
+    virtual void setVecRegFlat(int idx, const VecRegContainer& val);
+
+    template <typename VecElem>
+    VecLaneT<VecElem, true> readVecLaneFlat(int idx, int lId) const
+    {
+        return cpu->template readArchVecLane<VecElem>(idx, lId,
+                thread->threadId());
+    }
+
+    template <typename LD>
+    void setVecLaneFlat(int idx, int lId, const LD& val)
+    {
+        cpu->template setArchVecLane(idx, lId, thread->threadId(), val);
+    }
+
+    virtual const VecElem& readVecElemFlat(const RegIndex& idx,
+                                           const ElemIndex& elemIndex) const;
+    virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
+                                const VecElem& val);
+
     virtual CCReg readCCRegFlat(int idx);
     virtual void setCCRegFlat(int idx, CCReg val);
 };
diff --git a/src/cpu/o3/thread_context_impl.hh b/src/cpu/o3/thread_context_impl.hh
index c3f894275..2d109aea9 100755
--- a/src/cpu/o3/thread_context_impl.hh
+++ b/src/cpu/o3/thread_context_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2012, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -208,6 +208,28 @@ O3ThreadContext<Impl>::readFloatRegBitsFlat(int reg_idx)
     return cpu->readArchFloatRegInt(reg_idx, thread->threadId());
 }
 
+template <class Impl>
+const TheISA::VecRegContainer&
+O3ThreadContext<Impl>::readVecRegFlat(int reg_id) const
+{
+    return cpu->readArchVecReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
+TheISA::VecRegContainer&
+O3ThreadContext<Impl>::getWritableVecRegFlat(int reg_id)
+{
+    return cpu->getWritableArchVecReg(reg_id, thread->threadId());
+}
+
+template <class Impl>
+const TheISA::VecElem&
+O3ThreadContext<Impl>::readVecElemFlat(const RegIndex& idx,
+                                           const ElemIndex& elemIndex) const
+{
+    return cpu->readArchVecElem(idx, elemIndex, thread->threadId());
+}
+
 template <class Impl>
 TheISA::CCReg
 O3ThreadContext<Impl>::readCCRegFlat(int reg_idx)
@@ -242,6 +264,24 @@ O3ThreadContext<Impl>::setFloatRegBitsFlat(int reg_idx, FloatRegBits val)
     conditionalSquash();
 }
 
+template <class Impl>
+void
+O3ThreadContext<Impl>::setVecRegFlat(int reg_idx, const VecRegContainer& val)
+{
+    cpu->setArchVecReg(reg_idx, val, thread->threadId());
+
+    conditionalSquash();
+}
+
+template <class Impl>
+void
+O3ThreadContext<Impl>::setVecElemFlat(const RegIndex& idx,
+        const ElemIndex& elemIndex, const VecElem& val)
+{
+    cpu->setArchVecElem(idx, elemIndex, val, thread->threadId());
+    conditionalSquash();
+}
+
 template <class Impl>
 void
 O3ThreadContext<Impl>::setCCRegFlat(int reg_idx, TheISA::CCReg val)
diff --git a/src/cpu/reg_class.cc b/src/cpu/reg_class.cc
index 53a50ce8e..16c1949ee 100644
--- a/src/cpu/reg_class.cc
+++ b/src/cpu/reg_class.cc
@@ -1,4 +1,16 @@
 /*
+ * Copyright (c) 2016 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -33,6 +45,8 @@
 const char *RegId::regClassStrings[] = {
     "IntRegClass",
     "FloatRegClass",
+    "VecRegClass",
+    "VecElemClass",
     "CCRegClass",
     "MiscRegClass"
 };
diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh
index 05869e8fb..27bf59b19 100644
--- a/src/cpu/reg_class.hh
+++ b/src/cpu/reg_class.hh
@@ -39,6 +39,7 @@
  *
  * Authors: Steve Reinhardt
  *          Nathanael Premillieu
+ *          Rekai Gonzalez
  */
 
 #ifndef __CPU__REG_CLASS_HH__
@@ -55,6 +56,10 @@
 enum RegClass {
     IntRegClass,        ///< Integer register
     FloatRegClass,      ///< Floating-point register
+    /** Vector Register. */
+    VecRegClass,
+    /** Vector Register Native Elem lane. */
+    VecElemClass,
     CCRegClass,         ///< Condition-code register
     MiscRegClass        ///< Control (misc) register
 };
@@ -75,14 +80,27 @@ class RegId {
     static const char* regClassStrings[];
     RegClass regClass;
     RegIndex regIdx;
+    ElemIndex elemIdx;
+    static constexpr size_t Scale = TheISA::NumVecElemPerVecReg;
   public:
     RegId() {};
     RegId(RegClass reg_class, RegIndex reg_idx)
-        : regClass(reg_class), regIdx(reg_idx)
-    {}
+        : regClass(reg_class), regIdx(reg_idx), elemIdx(-1)
+    {
+        panic_if(regClass == VecElemClass,
+                "Creating vector physical index w/o element index");
+    }
+
+    explicit RegId(RegClass reg_class, RegIndex reg_idx, ElemIndex elem_idx)
+        : regClass(reg_class), regIdx(reg_idx), elemIdx(elem_idx)
+    {
+        panic_if(regClass != VecElemClass,
+                "Creating non-vector physical index w/ element index");
+    }
 
     bool operator==(const RegId& that) const {
-        return regClass == that.classValue() && regIdx == that.index();
+        return regClass == that.classValue() && regIdx == that.index()
+                                             && elemIdx == that.elemIndex();
     }
 
     bool operator!=(const RegId& that) const {
@@ -94,7 +112,9 @@ class RegId {
      */
     bool operator<(const RegId& that) const {
         return regClass < that.classValue() ||
-                (regClass == that.classValue() && regIdx < that.index());
+            (regClass == that.classValue() && (
+                   regIdx < that.index() ||
+                   (regIdx == that.index() && elemIdx < that.elemIndex())));
     }
 
     /**
@@ -119,12 +139,26 @@ class RegId {
     /** @return true if it is a floating-point physical register. */
     bool isFloatReg() const { return regClass == FloatRegClass; }
 
+    /** @Return true if it is a  condition-code physical register. */
+    bool isVecReg() const { return regClass == VecRegClass; }
+
+    /** @Return true if it is a  condition-code physical register. */
+    bool isVecElem() const { return regClass == VecElemClass; }
+
     /** @Return true if it is a  condition-code physical register. */
     bool isCCReg() const { return regClass == CCRegClass; }
 
     /** @Return true if it is a  condition-code physical register. */
     bool isMiscReg() const { return regClass == MiscRegClass; }
 
+    /**
+     * Return true if this register can be renamed
+     */
+    bool isRenameable()
+    {
+        return regClass != MiscRegClass;
+    }
+
     /** Index accessors */
     /** @{ */
     const RegIndex& index() const { return regIdx; }
@@ -136,6 +170,8 @@ class RegId {
     inline RegIndex flatIndex() const;
     /** @} */
 
+    /** Elem accessor */
+    const RegIndex& elemIndex() const { return elemIdx; }
     /** Class accessor */
     const RegClass& classValue() const { return regClass; }
     /** Return a const char* with the register class name. */
diff --git a/src/cpu/reg_class_impl.hh b/src/cpu/reg_class_impl.hh
index a47328b10..98b341e86 100644
--- a/src/cpu/reg_class_impl.hh
+++ b/src/cpu/reg_class_impl.hh
@@ -55,13 +55,18 @@ bool RegId::isZeroReg() const
              regIdx == TheISA::ZeroReg));
 }
 
+static constexpr size_t Scale = TheISA::NumVecElemPerVecReg;
+
 RegIndex RegId::flatIndex() const {
     switch (regClass) {
     case IntRegClass:
     case FloatRegClass:
+    case VecRegClass:
     case CCRegClass:
     case MiscRegClass:
         return regIdx;
+    case VecElemClass:
+        return Scale*regIdx + elemIdx;
     }
     panic("Trying to flatten a register without class!");
     return -1;
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 77d2fb4ac..57cea4ba7 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -292,6 +292,16 @@ BaseSimpleCPU::regStats()
             .desc("number of times the floating registers were written")
             ;
 
+        t_info.numVecRegReads
+            .name(thread_str + ".num_vec_register_reads")
+            .desc("number of times the vector registers were read")
+            ;
+
+        t_info.numVecRegWrites
+            .name(thread_str + ".num_vec_register_writes")
+            .desc("number of times the vector registers were written")
+            ;
+
         t_info.numCCRegReads
             .name(thread_str + ".num_cc_register_reads")
             .desc("number of times the CC registers were read")
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index f221d6c93..0f546407d 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015 ARM Limited
+ * Copyright (c) 2014-2016 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -64,6 +64,8 @@ class SimpleExecContext : public ExecContext {
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::CCReg CCReg;
+    using VecRegContainer = TheISA::VecRegContainer;
+    using VecElem = TheISA::VecElem;
 
   public:
     BaseSimpleCPU *cpu;
@@ -112,6 +114,10 @@ class SimpleExecContext : public ExecContext {
     Stats::Scalar numFpRegReads;
     Stats::Scalar numFpRegWrites;
 
+    // Number of vector register file accesses
+    mutable Stats::Scalar numVecRegReads;
+    Stats::Scalar numVecRegWrites;
+
     // Number of condition code register file accesses
     Stats::Scalar numCCRegReads;
     Stats::Scalar numCCRegWrites;
@@ -219,6 +225,124 @@ class SimpleExecContext : public ExecContext {
         thread->setFloatRegBits(reg.index(), val);
     }
 
+    /** Reads a vector register. */
+    const VecRegContainer&
+    readVecRegOperand(const StaticInst *si, int idx) const override
+    {
+        numVecRegReads++;
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVecReg(reg);
+    }
+
+    /** Reads a vector register for modification. */
+    VecRegContainer&
+    getWritableVecRegOperand(const StaticInst *si, int idx) override
+    {
+        numVecRegWrites++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->getWritableVecReg(reg);
+    }
+
+    /** Sets a vector register to a value. */
+    void setVecRegOperand(const StaticInst *si, int idx,
+                          const VecRegContainer& val) override
+    {
+        numVecRegWrites++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        thread->setVecReg(reg, val);
+    }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector lane. */
+    template <typename VecElem>
+    VecLaneT<VecElem, true>
+    readVecLaneOperand(const StaticInst *si, int idx) const
+    {
+        numVecRegReads++;
+        const RegId& reg = si->srcRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->readVecLane<VecElem>(reg);
+    }
+    /** Reads source vector 8bit operand. */
+    virtual ConstVecLane8
+    readVec8BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    { return readVecLaneOperand<uint8_t>(si, idx); }
+
+    /** Reads source vector 16bit operand. */
+    virtual ConstVecLane16
+    readVec16BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    { return readVecLaneOperand<uint16_t>(si, idx); }
+
+    /** Reads source vector 32bit operand. */
+    virtual ConstVecLane32
+    readVec32BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    { return readVecLaneOperand<uint32_t>(si, idx); }
+
+    /** Reads source vector 64bit operand. */
+    virtual ConstVecLane64
+    readVec64BitLaneOperand(const StaticInst *si, int idx) const
+                            override
+    { return readVecLaneOperand<uint64_t>(si, idx); }
+
+    /** Write a lane of the destination vector operand. */
+    template <typename LD>
+    void
+    setVecLaneOperandT(const StaticInst *si, int idx,
+            const LD& val)
+    {
+        numVecRegWrites++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecReg());
+        return thread->setVecLane(reg, val);
+    }
+    /** Write a lane of the destination vector operand. */
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::Byte>& val) override
+    { return setVecLaneOperandT(si, idx, val); }
+    /** Write a lane of the destination vector operand. */
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::TwoByte>& val) override
+    { return setVecLaneOperandT(si, idx, val); }
+    /** Write a lane of the destination vector operand. */
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::FourByte>& val) override
+    { return setVecLaneOperandT(si, idx, val); }
+    /** Write a lane of the destination vector operand. */
+    virtual void
+    setVecLaneOperand(const StaticInst *si, int idx,
+            const LaneData<LaneSize::EightByte>& val) override
+    { return setVecLaneOperandT(si, idx, val); }
+    /** @} */
+
+    /** Reads an element of a vector register. */
+    VecElem readVecElemOperand(const StaticInst *si, int idx) const override
+    {
+        numVecRegReads++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecElem());
+        return thread->readVecElem(reg);
+    }
+
+    /** Sets an element of a vector register to a value. */
+    void setVecElemOperand(const StaticInst *si, int idx,
+                           const VecElem val) override
+    {
+        numVecRegWrites++;
+        const RegId& reg = si->destRegIdx(idx);
+        assert(reg.isVecElem());
+        thread->setVecElem(reg, val);
+    }
+
     CCReg readCCRegOperand(const StaticInst *si, int idx) override
     {
         numCCRegReads++;
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 286d91766..4ea8b91ba 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -58,6 +58,7 @@
 #include "debug/CCRegs.hh"
 #include "debug/FloatRegs.hh"
 #include "debug/IntRegs.hh"
+#include "debug/VecRegs.hh"
 #include "mem/page_table.hh"
 #include "mem/request.hh"
 #include "sim/byteswap.hh"
@@ -102,6 +103,8 @@ class SimpleThread : public ThreadState
     typedef TheISA::FloatReg FloatReg;
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::CCReg CCReg;
+    using VecRegContainer = TheISA::VecRegContainer;
+    using VecElem = TheISA::VecElem;
   public:
     typedef ThreadContext::Status Status;
 
@@ -111,6 +114,7 @@ class SimpleThread : public ThreadState
         FloatRegBits i[TheISA::NumFloatRegs];
     } floatRegs;
     TheISA::IntReg intRegs[TheISA::NumIntRegs];
+    VecRegContainer vecRegs[TheISA::NumVecRegs];
 #ifdef ISA_HAS_CC_REGS
     TheISA::CCReg ccRegs[TheISA::NumCCRegs];
 #endif
@@ -227,6 +231,9 @@ class SimpleThread : public ThreadState
         _pcState = 0;
         memset(intRegs, 0, sizeof(intRegs));
         memset(floatRegs.i, 0, sizeof(floatRegs.i));
+        for (int i = 0; i < TheISA::NumVecRegs; i++) {
+            vecRegs[i].zero();
+        }
 #ifdef ISA_HAS_CC_REGS
         memset(ccRegs, 0, sizeof(ccRegs));
 #endif
@@ -266,6 +273,98 @@ class SimpleThread : public ThreadState
         return regVal;
     }
 
+    const VecRegContainer&
+    readVecReg(const RegId& reg) const
+    {
+        int flatIndex = isa->flattenVecIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        const VecRegContainer& regVal = readVecRegFlat(flatIndex);
+        DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s.\n",
+                reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+        return regVal;
+    }
+
+    VecRegContainer&
+    getWritableVecReg(const RegId& reg)
+    {
+        int flatIndex = isa->flattenVecIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        VecRegContainer& regVal = getWritableVecRegFlat(flatIndex);
+        DPRINTF(VecRegs, "Reading vector reg %d (%d) as %s for modify.\n",
+                reg.index(), flatIndex, regVal.as<TheISA::VecElem>().print());
+        return regVal;
+    }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector <T> operand. */
+    template <typename T>
+    VecLaneT<T, true>
+    readVecLane(const RegId& reg) const
+    {
+        int flatIndex = isa->flattenVecIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        auto regVal = readVecLaneFlat<T>(flatIndex, reg.elemIndex());
+        DPRINTF(VecRegs, "Reading vector lane %d (%d)[%d] as %lx.\n",
+                reg.index(), flatIndex, reg.elemIndex(), regVal);
+        return regVal;
+    }
+
+    /** Reads source vector 8bit operand. */
+    virtual ConstVecLane8
+    readVec8BitLaneReg(const RegId& reg) const
+    { return readVecLane<uint8_t>(reg); }
+
+    /** Reads source vector 16bit operand. */
+    virtual ConstVecLane16
+    readVec16BitLaneReg(const RegId& reg) const
+    { return readVecLane<uint16_t>(reg); }
+
+    /** Reads source vector 32bit operand. */
+    virtual ConstVecLane32
+    readVec32BitLaneReg(const RegId& reg) const
+    { return readVecLane<uint32_t>(reg); }
+
+    /** Reads source vector 64bit operand. */
+    virtual ConstVecLane64
+    readVec64BitLaneReg(const RegId& reg) const
+    { return readVecLane<uint64_t>(reg); }
+
+    /** Write a lane of the destination vector register. */
+    template <typename LD>
+    void setVecLaneT(const RegId& reg, const LD& val)
+    {
+        int flatIndex = isa->flattenVecIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        setVecLaneFlat(flatIndex, reg.elemIndex(), val);
+        DPRINTF(VecRegs, "Reading vector lane %d (%d)[%d] to %lx.\n",
+                reg.index(), flatIndex, reg.elemIndex(), val);
+    }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::Byte>& val)
+    { return setVecLaneT(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::TwoByte>& val)
+    { return setVecLaneT(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::FourByte>& val)
+    { return setVecLaneT(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::EightByte>& val)
+    { return setVecLaneT(reg, val); }
+    /** @} */
+
+    const VecElem& readVecElem(const RegId& reg) const
+    {
+        int flatIndex = isa->flattenVecElemIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        const VecElem& regVal = readVecElemFlat(flatIndex, reg.elemIndex());
+        DPRINTF(VecRegs, "Reading element %d of vector reg %d (%d) as"
+                " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, regVal);
+        return regVal;
+    }
+
+
     CCReg readCCReg(int reg_idx)
     {
 #ifdef ISA_HAS_CC_REGS
@@ -312,6 +411,24 @@ class SimpleThread : public ThreadState
                 reg_idx, flatIndex, val, floatRegs.f[flatIndex]);
     }
 
+    void setVecReg(const RegId& reg, const VecRegContainer& val)
+    {
+        int flatIndex = isa->flattenVecIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        setVecRegFlat(flatIndex, val);
+        DPRINTF(VecRegs, "Setting vector reg %d (%d) to %s.\n",
+                reg.index(), flatIndex, val.print());
+    }
+
+    void setVecElem(const RegId& reg, const VecElem& val)
+    {
+        int flatIndex = isa->flattenVecElemIndex(reg.index());
+        assert(flatIndex < TheISA::NumVecRegs);
+        setVecElemFlat(flatIndex, reg.elemIndex(), val);
+        DPRINTF(VecRegs, "Setting element %d of vector reg %d (%d) to"
+                " %#x.\n", reg.elemIndex(), reg.index(), flatIndex, val);
+    }
+
     void setCCReg(int reg_idx, CCReg val)
     {
 #ifdef ISA_HAS_CC_REGS
@@ -428,6 +545,45 @@ class SimpleThread : public ThreadState
         floatRegs.i[idx] = val;
     }
 
+    const VecRegContainer& readVecRegFlat(const RegIndex& reg) const
+    {
+        return vecRegs[reg];
+    }
+
+    VecRegContainer& getWritableVecRegFlat(const RegIndex& reg)
+    {
+        return vecRegs[reg];
+    }
+
+    void setVecRegFlat(const RegIndex& reg, const VecRegContainer& val)
+    {
+        vecRegs[reg] = val;
+    }
+
+    template <typename T>
+    VecLaneT<T, true> readVecLaneFlat(const RegIndex& reg, int lId) const
+    {
+        return vecRegs[reg].laneView<T>(lId);
+    }
+
+    template <typename LD>
+    void setVecLaneFlat(const RegIndex& reg, int lId, const LD& val)
+    {
+        vecRegs[reg].laneView<typename LD::UnderlyingType>(lId) = val;
+    }
+
+    const VecElem& readVecElemFlat(const RegIndex& reg,
+                                   const ElemIndex& elemIndex) const
+    {
+        return vecRegs[reg].as<TheISA::VecElem>()[elemIndex];
+    }
+
+    void setVecElemFlat(const RegIndex& reg, const ElemIndex& elemIndex,
+                        const VecElem val)
+    {
+        vecRegs[reg].as<TheISA::VecElem>()[elemIndex] = val;
+    }
+
 #ifdef ISA_HAS_CC_REGS
     CCReg readCCRegFlat(int idx) { return ccRegs[idx]; }
     void setCCRegFlat(int idx, CCReg val) { ccRegs[idx] = val; }
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index d60afc019..e7507c6a6 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -100,13 +100,20 @@ class StaticInst : public RefCounted, public StaticInstFlags
     int8_t _numCCDestRegs;
     //@}
 
+    /** To use in architectures with vector register file. */
+    /** @{ */
+    int8_t _numVecDestRegs;
+    int8_t _numVecElemDestRegs;
+    /** @} */
+
   public:
 
     /// @name Register information.
-    /// The sum of numFPDestRegs() and numIntDestRegs() equals
-    /// numDestRegs().  The former two functions are used to track
-    /// physical register usage for machines with separate int & FP
-    /// reg files.
+    /// The sum of numFPDestRegs(), numIntDestRegs(), numVecDestRegs() and
+    /// numVecelemDestRegs() equals numDestRegs().  The former two functions
+    /// are used to track physical register usage for machines with separate
+    /// int & FP reg files, the next two is for machines with vector register
+    /// file.
     //@{
     /// Number of source registers.
     int8_t numSrcRegs()  const { return _numSrcRegs; }
@@ -116,7 +123,10 @@ class StaticInst : public RefCounted, public StaticInstFlags
     int8_t numFPDestRegs()  const { return _numFPDestRegs; }
     /// Number of integer destination regs.
     int8_t numIntDestRegs() const { return _numIntDestRegs; }
-    //@}
+    /// Number of vector destination regs.
+    int8_t numVecDestRegs() const { return _numVecDestRegs; }
+    /// Number of vector element destination regs.
+    int8_t numVecElemDestRegs() const { return _numVecElemDestRegs; }
     /// Number of coprocesor destination regs.
     int8_t numCCDestRegs() const { return _numCCDestRegs; }
     //@}
@@ -252,7 +262,8 @@ class StaticInst : public RefCounted, public StaticInstFlags
     StaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass)
         : _opClass(__opClass), _numSrcRegs(0), _numDestRegs(0),
           _numFPDestRegs(0), _numIntDestRegs(0), _numCCDestRegs(0),
-          machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0)
+          _numVecDestRegs(0), _numVecElemDestRegs(0), machInst(_machInst),
+          mnemonic(_mnemonic), cachedDisassembly(0)
     { }
 
   public:
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index cea21e790..0d288de6f 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 ARM Limited
+ * Copyright (c) 2012, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -76,6 +76,16 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two)
             panic("Float reg idx %d doesn't match, one: %#x, two: %#x",
                   i, t1, t2);
     }
+
+    // Then loop through the vector registers.
+    for (int i = 0; i < TheISA::NumVecRegs; ++i) {
+        RegId rid(VecRegClass, i);
+        const TheISA::VecRegContainer& t1 = one->readVecReg(rid);
+        const TheISA::VecRegContainer& t2 = two->readVecReg(rid);
+        if (t1 != t2)
+            panic("Vec reg idx %d doesn't match, one: %#x, two: %#x",
+                  i, t1, t2);
+    }
     for (int i = 0; i < TheISA::NumMiscRegs; ++i) {
         TheISA::MiscReg t1 = one->readMiscRegNoEffect(i);
         TheISA::MiscReg t2 = two->readMiscRegNoEffect(i);
@@ -152,6 +162,12 @@ serialize(ThreadContext &tc, CheckpointOut &cp)
     // compatibility.
     arrayParamOut(cp, "floatRegs.i", floatRegs, NumFloatRegs);
 
+    std::vector<TheISA::VecRegContainer> vecRegs(NumVecRegs);
+    for (int i = 0; i < NumVecRegs; ++i) {
+        vecRegs[i] = tc.readVecRegFlat(i);
+    }
+    SERIALIZE_CONTAINER(vecRegs);
+
     IntReg intRegs[NumIntRegs];
     for (int i = 0; i < NumIntRegs; ++i)
         intRegs[i] = tc.readIntRegFlat(i);
@@ -181,6 +197,12 @@ unserialize(ThreadContext &tc, CheckpointIn &cp)
     for (int i = 0; i < NumFloatRegs; ++i)
         tc.setFloatRegBitsFlat(i, floatRegs[i]);
 
+    std::vector<TheISA::VecRegContainer> vecRegs(NumVecRegs);
+    UNSERIALIZE_CONTAINER(vecRegs);
+    for (int i = 0; i < NumVecRegs; ++i) {
+        tc.setVecRegFlat(i, vecRegs[i]);
+    }
+
     IntReg intRegs[NumIntRegs];
     UNSERIALIZE_ARRAY(intRegs, NumIntRegs);
     for (int i = 0; i < NumIntRegs; ++i)
diff --git a/src/cpu/thread_context.hh b/src/cpu/thread_context.hh
index 43c40481e..66b2f7554 100644
--- a/src/cpu/thread_context.hh
+++ b/src/cpu/thread_context.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012 ARM Limited
+ * Copyright (c) 2011-2012, 2016 ARM Limited
  * Copyright (c) 2013 Advanced Micro Devices, Inc.
  * All rights reserved
  *
@@ -100,6 +100,8 @@ class ThreadContext
     typedef TheISA::FloatRegBits FloatRegBits;
     typedef TheISA::CCReg CCReg;
     typedef TheISA::MiscReg MiscReg;
+    using VecRegContainer = TheISA::VecRegContainer;
+    using VecElem = TheISA::VecElem;
   public:
 
     enum Status
@@ -212,6 +214,40 @@ class ThreadContext
 
     virtual FloatRegBits readFloatRegBits(int reg_idx) = 0;
 
+    virtual const VecRegContainer& readVecReg(const RegId& reg) const = 0;
+    virtual VecRegContainer& getWritableVecReg(const RegId& reg) = 0;
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    virtual ConstVecLane8
+    readVec8BitLaneReg(const RegId& reg) const = 0;
+
+    /** Reads source vector 16bit operand. */
+    virtual ConstVecLane16
+    readVec16BitLaneReg(const RegId& reg) const = 0;
+
+    /** Reads source vector 32bit operand. */
+    virtual ConstVecLane32
+    readVec32BitLaneReg(const RegId& reg) const = 0;
+
+    /** Reads source vector 64bit operand. */
+    virtual ConstVecLane64
+    readVec64BitLaneReg(const RegId& reg) const = 0;
+
+    /** Write a lane of the destination vector register. */
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::Byte>& val) = 0;
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::TwoByte>& val) = 0;
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::FourByte>& val) = 0;
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::EightByte>& val) = 0;
+    /** @} */
+
+    virtual const VecElem& readVecElem(const RegId& reg) const = 0;
+
     virtual CCReg readCCReg(int reg_idx) = 0;
 
     virtual void setIntReg(int reg_idx, uint64_t val) = 0;
@@ -220,6 +256,10 @@ class ThreadContext
 
     virtual void setFloatRegBits(int reg_idx, FloatRegBits val) = 0;
 
+    virtual void setVecReg(const RegId& reg, const VecRegContainer& val) = 0;
+
+    virtual void setVecElem(const RegId& reg, const VecElem& val) = 0;
+
     virtual void setCCReg(int reg_idx, CCReg val) = 0;
 
     virtual TheISA::PCState pcState() = 0;
@@ -303,6 +343,15 @@ class ThreadContext
     virtual FloatRegBits readFloatRegBitsFlat(int idx) = 0;
     virtual void setFloatRegBitsFlat(int idx, FloatRegBits val) = 0;
 
+    virtual const VecRegContainer& readVecRegFlat(int idx) const = 0;
+    virtual VecRegContainer& getWritableVecRegFlat(int idx) = 0;
+    virtual void setVecRegFlat(int idx, const VecRegContainer& val) = 0;
+
+    virtual const VecElem& readVecElemFlat(const RegIndex& idx,
+                                           const ElemIndex& elemIdx) const = 0;
+    virtual void setVecElemFlat(const RegIndex& idx, const ElemIndex& elemIdx,
+                                const VecElem& val) = 0;
+
     virtual CCReg readCCRegFlat(int idx) = 0;
     virtual void setCCRegFlat(int idx, CCReg val) = 0;
     /** @} */
@@ -421,6 +470,52 @@ class ProxyThreadContext : public ThreadContext
     FloatRegBits readFloatRegBits(int reg_idx)
     { return actualTC->readFloatRegBits(reg_idx); }
 
+    const VecRegContainer& readVecReg(const RegId& reg) const
+    { return actualTC->readVecReg(reg); }
+
+    VecRegContainer& getWritableVecReg(const RegId& reg)
+    { return actualTC->getWritableVecReg(reg); }
+
+    /** Vector Register Lane Interfaces. */
+    /** @{ */
+    /** Reads source vector 8bit operand. */
+    ConstVecLane8
+    readVec8BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec8BitLaneReg(reg); }
+
+    /** Reads source vector 16bit operand. */
+    ConstVecLane16
+    readVec16BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec16BitLaneReg(reg); }
+
+    /** Reads source vector 32bit operand. */
+    ConstVecLane32
+    readVec32BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec32BitLaneReg(reg); }
+
+    /** Reads source vector 64bit operand. */
+    ConstVecLane64
+    readVec64BitLaneReg(const RegId& reg) const
+    { return actualTC->readVec64BitLaneReg(reg); }
+
+    /** Write a lane of the destination vector register. */
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::Byte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::TwoByte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::FourByte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    virtual void setVecLane(const RegId& reg,
+            const LaneData<LaneSize::EightByte>& val)
+    { return actualTC->setVecLane(reg, val); }
+    /** @} */
+
+    const VecElem& readVecElem(const RegId& reg) const
+    { return actualTC->readVecElem(reg); }
+
     CCReg readCCReg(int reg_idx)
     { return actualTC->readCCReg(reg_idx); }
 
@@ -433,6 +528,12 @@ class ProxyThreadContext : public ThreadContext
     void setFloatRegBits(int reg_idx, FloatRegBits val)
     { actualTC->setFloatRegBits(reg_idx, val); }
 
+    void setVecReg(const RegId& reg, const VecRegContainer& val)
+    { actualTC->setVecReg(reg, val); }
+
+    void setVecElem(const RegId& reg, const VecElem& val)
+    { actualTC->setVecElem(reg, val); }
+
     void setCCReg(int reg_idx, CCReg val)
     { actualTC->setCCReg(reg_idx, val); }
 
@@ -495,6 +596,23 @@ class ProxyThreadContext : public ThreadContext
     void setFloatRegBitsFlat(int idx, FloatRegBits val)
     { actualTC->setFloatRegBitsFlat(idx, val); }
 
+    const VecRegContainer& readVecRegFlat(int id) const
+    { return actualTC->readVecRegFlat(id); }
+
+    VecRegContainer& getWritableVecRegFlat(int id)
+    { return actualTC->getWritableVecRegFlat(id); }
+
+    void setVecRegFlat(int idx, const VecRegContainer& val)
+    { actualTC->setVecRegFlat(idx, val); }
+
+    const VecElem& readVecElemFlat(const RegIndex& id,
+                                   const ElemIndex& elemIndex) const
+    { return actualTC->readVecElemFlat(id, elemIndex); }
+
+    void setVecElemFlat(const RegIndex& id, const ElemIndex& elemIndex,
+                        const VecElem& val)
+    { actualTC->setVecElemFlat(id, elemIndex, val); }
+
     CCReg readCCRegFlat(int idx)
     { return actualTC->readCCRegFlat(idx); }
 
diff --git a/src/sim/serialize.cc b/src/sim/serialize.cc
index f06aaf899..f49092d4d 100644
--- a/src/sim/serialize.cc
+++ b/src/sim/serialize.cc
@@ -57,6 +57,7 @@
 #include <string>
 #include <vector>
 
+#include "arch/generic/vec_reg.hh"
 #include "base/framebuffer.hh"
 #include "base/inifile.hh"
 #include "base/misc.hh"
@@ -471,6 +472,8 @@ INSTANTIATE_PARAM_TEMPLATES(float)
 INSTANTIATE_PARAM_TEMPLATES(double)
 INSTANTIATE_PARAM_TEMPLATES(string)
 INSTANTIATE_PARAM_TEMPLATES(Pixel)
+INSTANTIATE_PARAM_TEMPLATES(VecRegContainer<8>)
+INSTANTIATE_PARAM_TEMPLATES(VecRegContainer<16>)
 
 // set is only used with strings and furthermore doesn't agree with Pixel
 template void
-- 
2.30.2