arm: Add support for ARMv8 (AArch64 & AArch32)
[gem5.git] / src / arch / arm / isa / insts / macromem.isa
index ca2c7c6abda6f2a3da506841aa2a70f2bf06ed59..f164595dde1a067b6c576d23b431e541ee68840b 100644 (file)
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
 
-// Copyright (c) 2010 ARM Limited
+// Copyright (c) 2010-2013 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
 //
 
 let {{
-    microLdrUopCode = "IWRa = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
+    microLdrUopCode = "IWRa = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
     microLdrUopIop = InstObjParams('ldr_uop', 'MicroLdrUop',
                                    'MicroMemOp',
                                    {'memacc_code': microLdrUopCode,
-                                    'ea_code': 'EA = Rb + (up ? imm : -imm);',
+                                    'ea_code': 'EA = URb + (up ? imm : -imm);',
                                     'predicate_test': predicateTest},
                                    ['IsMicroop'])
 
-    microLdrFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
+    microLdrFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
     microLdrFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrFpUop',
-                                     'MicroMemOp',
-                                     {'memacc_code': microLdrFpUopCode,
-                                      'ea_code': 'EA = Rb + (up ? imm : -imm);',
-                                      'predicate_test': predicateTest},
-                                     ['IsMicroop'])
+                                      'MicroMemOp',
+                                      {'memacc_code': microLdrFpUopCode,
+                                       'ea_code': vfpEnabledCheckCode +
+                                           'EA = URb + (up ? imm : -imm);',
+                                       'predicate_test': predicateTest},
+                                      ['IsMicroop'])
+
+    microLdrDBFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
+    microLdrDBFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDBFpUop',
+                                      'MicroMemOp',
+                                      {'memacc_code': microLdrFpUopCode,
+                                       'ea_code': vfpEnabledCheckCode + '''
+                                        EA = URb + (up ? imm : -imm) +
+                                             (((CPSR)Cpsr).e ? 4 : 0);
+                                        ''',
+                                       'predicate_test': predicateTest},
+                                      ['IsMicroop'])
+
+    microLdrDTFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);"
+    microLdrDTFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDTFpUop',
+                                      'MicroMemOp',
+                                      {'memacc_code': microLdrFpUopCode,
+                                       'ea_code': vfpEnabledCheckCode + '''
+                                        EA = URb + (up ? imm : -imm) -
+                                             (((CPSR)Cpsr).e ? 4 : 0);
+                                        ''',
+                                       'predicate_test': predicateTest},
+                                      ['IsMicroop'])
 
-    microLdrRetUopCode = '''
-        CPSR cpsr = Cpsr;
+    microRetUopCode = '''
+        CPSR old_cpsr = Cpsr;
         SCTLR sctlr = Sctlr;
-        uint32_t newCpsr =
-            cpsrWriteByInstr(cpsr | CondCodes, Spsr, 0xF, true, sctlr.nmfi);
-        Cpsr = ~CondCodesMask & newCpsr;
-        CondCodes = CondCodesMask & newCpsr;
-        IWNPC = cSwap(Mem.uw, cpsr.e) | ((Spsr & 0x20) ? 1 : 0);
+
+        CPSR new_cpsr =
+            cpsrWriteByInstr(old_cpsr, Spsr, Scr, Nsacr, 0xF, true,
+                             sctlr.nmfi, xc->tcBase());
+        Cpsr = ~CondCodesMask & new_cpsr;
+        CondCodesNZ = new_cpsr.nz;
+        CondCodesC = new_cpsr.c;
+        CondCodesV = new_cpsr.v;
+        CondCodesGE = new_cpsr.ge;
+        IWNPC = cSwap(%s, old_cpsr.e) | ((Spsr & 0x20) ? 1 : 0);
+        NextItState = ((((CPSR)Spsr).it2 << 2) & 0xFC)
+                | (((CPSR)Spsr).it1 & 0x3);
+        SevMailbox = 1;
     '''
+
     microLdrRetUopIop = InstObjParams('ldr_ret_uop', 'MicroLdrRetUop',
                                       'MicroMemOp',
-                                      {'memacc_code': microLdrRetUopCode,
+                                      {'memacc_code':
+                                          microRetUopCode % 'Mem_uw',
                                        'ea_code':
-                                          'EA = Rb + (up ? imm : -imm);',
+                                          'EA = URb + (up ? imm : -imm);',
                                        'predicate_test': condPredicateTest},
-                                      ['IsMicroop'])
+                                      ['IsMicroop','IsNonSpeculative',
+                                       'IsSerializeAfter'])
 
-    microStrUopCode = "Mem = cSwap(Ra.uw, ((CPSR)Cpsr).e);"
+    microStrUopCode = "Mem = cSwap(URa_uw, ((CPSR)Cpsr).e);"
     microStrUopIop = InstObjParams('str_uop', 'MicroStrUop',
                                    'MicroMemOp',
                                    {'memacc_code': microStrUopCode,
                                     'postacc_code': "",
-                                    'ea_code': 'EA = Rb + (up ? imm : -imm);',
+                                    'ea_code': 'EA = URb + (up ? imm : -imm);',
                                     'predicate_test': predicateTest},
                                    ['IsMicroop'])
 
-    microStrFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
+    microStrFpUopCode = "Mem = cSwap(Fa_uw, ((CPSR)Cpsr).e);"
     microStrFpUopIop = InstObjParams('strfp_uop', 'MicroStrFpUop',
                                      'MicroMemOp',
                                      {'memacc_code': microStrFpUopCode,
                                       'postacc_code': "",
-                                      'ea_code': 'EA = Rb + (up ? imm : -imm);',
+                                      'ea_code': vfpEnabledCheckCode +
+                                           'EA = URb + (up ? imm : -imm);',
                                       'predicate_test': predicateTest},
                                      ['IsMicroop'])
 
+    microStrDBFpUopCode = "Mem = cSwap(Fa_uw, ((CPSR)Cpsr).e);"
+    microStrDBFpUopIop = InstObjParams('strfp_uop', 'MicroStrDBFpUop',
+                                       'MicroMemOp',
+                                       {'memacc_code': microStrFpUopCode,
+                                        'postacc_code': "",
+                                        'ea_code': vfpEnabledCheckCode + '''
+                                         EA = URb + (up ? imm : -imm) +
+                                              (((CPSR)Cpsr).e ? 4 : 0);
+                                         ''',
+                                        'predicate_test': predicateTest},
+                                       ['IsMicroop'])
+
+    microStrDTFpUopCode = "Mem = cSwap(Fa_uw, ((CPSR)Cpsr).e);"
+    microStrDTFpUopIop = InstObjParams('strfp_uop', 'MicroStrDTFpUop',
+                                       'MicroMemOp',
+                                       {'memacc_code': microStrFpUopCode,
+                                        'postacc_code': "",
+                                        'ea_code': vfpEnabledCheckCode + '''
+                                         EA = URb + (up ? imm : -imm) -
+                                              (((CPSR)Cpsr).e ? 4 : 0);
+                                         ''',
+                                        'predicate_test': predicateTest},
+                                       ['IsMicroop'])
+
     header_output = decoder_output = exec_output = ''
 
-    loadIops = (microLdrUopIop, microLdrFpUopIop, microLdrRetUopIop)
-    storeIops = (microStrUopIop, microStrFpUopIop)
+    loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop,
+                microLdrDBFpUopIop, microLdrDTFpUopIop)
+    storeIops = (microStrUopIop, microStrFpUopIop,
+                 microStrDBFpUopIop, microStrDTFpUopIop)
     for iop in loadIops + storeIops:
         header_output += MicroMemDeclare.subst(iop)
         decoder_output += MicroMemConstructor.subst(iop)
@@ -115,6 +176,407 @@ let {{
                        StoreCompleteAcc.subst(iop)
 }};
 
+let {{
+    exec_output = header_output = ''
+
+    eaCode = 'EA = XURa + imm;'
+
+    for size in (1, 2, 3, 4, 6, 8, 12, 16):
+        # Set up the memory access.
+        regs = (size + 3) // 4
+        subst = { "size" : size, "regs" : regs }
+        memDecl = '''
+        union MemUnion {
+            uint8_t bytes[%(size)d];
+            Element elements[%(size)d / sizeof(Element)];
+            uint32_t floatRegBits[%(regs)d];
+        };
+        ''' % subst
+
+        # Do endian conversion for all the elements.
+        convCode = '''
+            const unsigned eCount = sizeof(memUnion.elements) /
+                                    sizeof(memUnion.elements[0]);
+            if (((CPSR)Cpsr).e) {
+                for (unsigned i = 0; i < eCount; i++) {
+                    memUnion.elements[i] = gtobe(memUnion.elements[i]);
+                }
+            } else {
+                for (unsigned i = 0; i < eCount; i++) {
+                    memUnion.elements[i] = gtole(memUnion.elements[i]);
+                }
+            }
+        '''
+
+        # Offload everything into registers
+        regSetCode = ''
+        for reg in range(regs):
+            mask = ''
+            if reg == regs - 1:
+                mask = ' & mask(%d)' % (32 - 8 * (regs * 4 - size))
+            regSetCode += '''
+            FpDestP%(reg)d_uw = gtoh(memUnion.floatRegBits[%(reg)d])%(mask)s;
+            ''' % { "reg" : reg, "mask" : mask }
+
+        # Pull everything in from registers
+        regGetCode = ''
+        for reg in range(regs):
+            regGetCode += '''
+            memUnion.floatRegBits[%(reg)d] = htog(FpDestP%(reg)d_uw);
+            ''' % { "reg" : reg }
+
+        loadMemAccCode = convCode + regSetCode
+        storeMemAccCode = regGetCode + convCode
+
+        loadIop = InstObjParams('ldrneon%(size)d_uop' % subst,
+                                'MicroLdrNeon%(size)dUop' % subst,
+                                'MicroNeonMemOp',
+                                { 'mem_decl' : memDecl,
+                                  'size' : size,
+                                  'memacc_code' : loadMemAccCode,
+                                  'ea_code' : simdEnabledCheckCode + eaCode,
+                                  'predicate_test' : predicateTest },
+                                [ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
+        storeIop = InstObjParams('strneon%(size)d_uop' % subst,
+                                 'MicroStrNeon%(size)dUop' % subst,
+                                 'MicroNeonMemOp',
+                                 { 'mem_decl' : memDecl,
+                                   'size' : size,
+                                   'memacc_code' : storeMemAccCode,
+                                   'ea_code' : simdEnabledCheckCode + eaCode,
+                                   'predicate_test' : predicateTest },
+                                 [ 'IsMicroop', 'IsMemRef', 'IsStore' ])
+
+        exec_output += NeonLoadExecute.subst(loadIop) + \
+                       NeonLoadInitiateAcc.subst(loadIop) + \
+                       NeonLoadCompleteAcc.subst(loadIop) + \
+                       NeonStoreExecute.subst(storeIop) + \
+                       NeonStoreInitiateAcc.subst(storeIop) + \
+                       NeonStoreCompleteAcc.subst(storeIop)
+        header_output += MicroNeonMemDeclare.subst(loadIop) + \
+                         MicroNeonMemDeclare.subst(storeIop)
+}};
+
+let {{
+    exec_output = ''
+    for eSize, type in (1, 'uint8_t'), \
+                       (2, 'uint16_t'), \
+                       (4, 'uint32_t'), \
+                       (8, 'uint64_t'):
+        size = eSize
+        # An instruction handles no more than 16 bytes and no more than
+        # 4 elements, or the number of elements needed to fill 8 or 16 bytes.
+        sizes = set((16, 8))
+        for count in 1, 2, 3, 4:
+            size = count * eSize
+            if size <= 16:
+                sizes.add(size)
+        for size in sizes:
+            substDict = {
+                "class_name" : "MicroLdrNeon%dUop" % size,
+                "targs" : type
+            }
+            exec_output += MicroNeonMemExecDeclare.subst(substDict)
+            substDict["class_name"] = "MicroStrNeon%dUop" % size
+            exec_output += MicroNeonMemExecDeclare.subst(substDict)
+            size += eSize
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Neon (de)interlacing microops
+//
+
+let {{
+    header_output = exec_output = ''
+    for dRegs in (2, 3, 4):
+        loadConv = ''
+        unloadConv = ''
+        for dReg in range(dRegs):
+            loadConv += '''
+                conv1.cRegs[%(sReg0)d] = htog(FpOp1P%(sReg0)d_uw);
+                conv1.cRegs[%(sReg1)d] = htog(FpOp1P%(sReg1)d_uw);
+            ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
+            unloadConv += '''
+                FpDestS%(dReg)dP0_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
+                FpDestS%(dReg)dP1_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
+            ''' % { "dReg" : dReg }
+        microDeintNeonCode = '''
+            const unsigned dRegs = %(dRegs)d;
+            const unsigned regs = 2 * dRegs;
+            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+                                     sizeof(Element);
+            union convStruct {
+                FloatRegBits cRegs[regs];
+                Element elements[dRegs * perDReg];
+            } conv1, conv2;
+
+            %(loadConv)s
+
+            unsigned srcElem = 0;
+            for (unsigned destOffset = 0;
+                    destOffset < perDReg; destOffset++) {
+                for (unsigned dReg = 0; dReg < dRegs; dReg++) {
+                    conv2.elements[dReg * perDReg + destOffset] =
+                        conv1.elements[srcElem++];
+                }
+            }
+
+            %(unloadConv)s
+        ''' % { "dRegs" : dRegs,
+                "loadConv" : loadConv,
+                "unloadConv" : unloadConv }
+        microDeintNeonIop = \
+            InstObjParams('deintneon%duop' % (dRegs * 2),
+                          'MicroDeintNeon%dUop' % (dRegs * 2),
+                          'MicroNeonMixOp',
+                          { 'predicate_test': predicateTest,
+                            'code' : microDeintNeonCode },
+                            ['IsMicroop'])
+        header_output += MicroNeonMixDeclare.subst(microDeintNeonIop)
+        exec_output += MicroNeonMixExecute.subst(microDeintNeonIop)
+
+        loadConv = ''
+        unloadConv = ''
+        for dReg in range(dRegs):
+            loadConv += '''
+                conv1.cRegs[2 * %(dReg)d + 0] = htog(FpOp1S%(dReg)dP0_uw);
+                conv1.cRegs[2 * %(dReg)d + 1] = htog(FpOp1S%(dReg)dP1_uw);
+            ''' % { "dReg" : dReg }
+            unloadConv += '''
+                FpDestP%(sReg0)d_uw = gtoh(conv2.cRegs[%(sReg0)d]);
+                FpDestP%(sReg1)d_uw = gtoh(conv2.cRegs[%(sReg1)d]);
+            ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
+        microInterNeonCode = '''
+            const unsigned dRegs = %(dRegs)d;
+            const unsigned regs = 2 * dRegs;
+            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+                                     sizeof(Element);
+            union convStruct {
+                FloatRegBits cRegs[regs];
+                Element elements[dRegs * perDReg];
+            } conv1, conv2;
+
+            %(loadConv)s
+
+            unsigned destElem = 0;
+            for (unsigned srcOffset = 0;
+                    srcOffset < perDReg; srcOffset++) {
+                for (unsigned dReg = 0; dReg < dRegs; dReg++) {
+                    conv2.elements[destElem++] =
+                        conv1.elements[dReg * perDReg + srcOffset];
+                }
+            }
+
+            %(unloadConv)s
+        ''' % { "dRegs" : dRegs,
+                "loadConv" : loadConv,
+                "unloadConv" : unloadConv }
+        microInterNeonIop = \
+            InstObjParams('interneon%duop' % (dRegs * 2),
+                          'MicroInterNeon%dUop' % (dRegs * 2),
+                          'MicroNeonMixOp',
+                          { 'predicate_test': predicateTest,
+                            'code' : microInterNeonCode },
+                            ['IsMicroop'])
+        header_output += MicroNeonMixDeclare.subst(microInterNeonIop)
+        exec_output += MicroNeonMixExecute.subst(microInterNeonIop)
+}};
+
+let {{
+    exec_output = ''
+    for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
+        for dRegs in (2, 3, 4):
+            Name = "MicroDeintNeon%dUop" % (dRegs * 2)
+            substDict = { "class_name" : Name, "targs" : type }
+            exec_output += MicroNeonExecDeclare.subst(substDict)
+            Name = "MicroInterNeon%dUop" % (dRegs * 2)
+            substDict = { "class_name" : Name, "targs" : type }
+            exec_output += MicroNeonExecDeclare.subst(substDict)
+}};
+
+////////////////////////////////////////////////////////////////////
+//
+// Neon microops to pack/unpack a single lane
+//
+
+let {{
+    header_output = exec_output = ''
+    for sRegs in 1, 2:
+        baseLoadRegs = ''
+        for reg in range(sRegs):
+            baseLoadRegs += '''
+                sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d_uw);
+                sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d_uw);
+            ''' % { "reg0" : (2 * reg + 0),
+                    "reg1" : (2 * reg + 1) }
+        for dRegs in range(sRegs, 5):
+            unloadRegs = ''
+            loadRegs = baseLoadRegs
+            for reg in range(dRegs):
+                loadRegs += '''
+                    destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0_uw);
+                    destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1_uw);
+                ''' % { "reg" : reg }
+                unloadRegs += '''
+                    FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
+                    FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
+                ''' % { "reg" : reg }
+            microUnpackNeonCode = '''
+            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+                                     sizeof(Element);
+
+            union SourceRegs {
+                FloatRegBits fRegs[2 * %(sRegs)d];
+                Element elements[%(sRegs)d * perDReg];
+            } sourceRegs;
+
+            union DestReg {
+                FloatRegBits fRegs[2];
+                Element elements[perDReg];
+            } destRegs[%(dRegs)d];
+
+            %(loadRegs)s
+
+            for (unsigned i = 0; i < %(dRegs)d; i++) {
+                destRegs[i].elements[lane] = sourceRegs.elements[i];
+            }
+
+            %(unloadRegs)s
+            ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
+                    "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
+
+            microUnpackNeonIop = \
+                InstObjParams('unpackneon%dto%duop' % (sRegs * 2, dRegs * 2),
+                              'MicroUnpackNeon%dto%dUop' %
+                                    (sRegs * 2, dRegs * 2),
+                              'MicroNeonMixLaneOp',
+                              { 'predicate_test': predicateTest,
+                                'code' : microUnpackNeonCode },
+                                ['IsMicroop'])
+            header_output += MicroNeonMixLaneDeclare.subst(microUnpackNeonIop)
+            exec_output += MicroNeonMixExecute.subst(microUnpackNeonIop)
+
+    for sRegs in 1, 2:
+        loadRegs = ''
+        for reg in range(sRegs):
+            loadRegs += '''
+                sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d_uw);
+                sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d_uw);
+            ''' % { "reg0" : (2 * reg + 0),
+                    "reg1" : (2 * reg + 1) }
+        for dRegs in range(sRegs, 5):
+            unloadRegs = ''
+            for reg in range(dRegs):
+                unloadRegs += '''
+                    FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
+                    FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
+                ''' % { "reg" : reg }
+            microUnpackAllNeonCode = '''
+            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+                                     sizeof(Element);
+
+            union SourceRegs {
+                FloatRegBits fRegs[2 * %(sRegs)d];
+                Element elements[%(sRegs)d * perDReg];
+            } sourceRegs;
+
+            union DestReg {
+                FloatRegBits fRegs[2];
+                Element elements[perDReg];
+            } destRegs[%(dRegs)d];
+
+            %(loadRegs)s
+
+            for (unsigned i = 0; i < %(dRegs)d; i++) {
+                for (unsigned j = 0; j < perDReg; j++)
+                    destRegs[i].elements[j] = sourceRegs.elements[i];
+            }
+
+            %(unloadRegs)s
+            ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
+                    "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
+
+            microUnpackAllNeonIop = \
+                InstObjParams('unpackallneon%dto%duop' % (sRegs * 2, dRegs * 2),
+                              'MicroUnpackAllNeon%dto%dUop' %
+                                    (sRegs * 2, dRegs * 2),
+                              'MicroNeonMixOp',
+                              { 'predicate_test': predicateTest,
+                                'code' : microUnpackAllNeonCode },
+                                ['IsMicroop'])
+            header_output += MicroNeonMixDeclare.subst(microUnpackAllNeonIop)
+            exec_output += MicroNeonMixExecute.subst(microUnpackAllNeonIop)
+
+    for dRegs in 1, 2:
+        unloadRegs = ''
+        for reg in range(dRegs):
+            unloadRegs += '''
+                FpDestP%(reg0)d_uw = gtoh(destRegs.fRegs[%(reg0)d]);
+                FpDestP%(reg1)d_uw = gtoh(destRegs.fRegs[%(reg1)d]);
+            ''' % { "reg0" : (2 * reg + 0),
+                    "reg1" : (2 * reg + 1) }
+        for sRegs in range(dRegs, 5):
+            loadRegs = ''
+            for reg in range(sRegs):
+                loadRegs += '''
+                    sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0_uw);
+                    sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1_uw);
+                ''' % { "reg" : reg }
+            microPackNeonCode = '''
+            const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
+                                     sizeof(Element);
+
+            union SourceReg {
+                FloatRegBits fRegs[2];
+                Element elements[perDReg];
+            } sourceRegs[%(sRegs)d];
+
+            union DestRegs {
+                FloatRegBits fRegs[2 * %(dRegs)d];
+                Element elements[%(dRegs)d * perDReg];
+            } destRegs;
+
+            %(loadRegs)s
+
+            for (unsigned i = 0; i < %(sRegs)d; i++) {
+                destRegs.elements[i] = sourceRegs[i].elements[lane];
+            }
+            for (unsigned i = %(sRegs)d; i < %(dRegs)d * perDReg; ++i) {
+                destRegs.elements[i] = 0;
+            }
+
+            %(unloadRegs)s
+            ''' % { "sRegs" : sRegs, "dRegs" : dRegs,
+                    "loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
+
+            microPackNeonIop = \
+                InstObjParams('packneon%dto%duop' % (sRegs * 2, dRegs * 2),
+                              'MicroPackNeon%dto%dUop' %
+                                    (sRegs * 2, dRegs * 2),
+                              'MicroNeonMixLaneOp',
+                              { 'predicate_test': predicateTest,
+                                'code' : microPackNeonCode },
+                                ['IsMicroop'])
+            header_output += MicroNeonMixLaneDeclare.subst(microPackNeonIop)
+            exec_output += MicroNeonMixExecute.subst(microPackNeonIop)
+}};
+
+let {{
+    exec_output = ''
+    for typeSize in (8, 16, 32):
+        for sRegs in 1, 2:
+            for dRegs in range(sRegs, min(sRegs * 64 / typeSize + 1, 5)):
+                for format in ("MicroUnpackNeon%(sRegs)dto%(dRegs)dUop",
+                               "MicroUnpackAllNeon%(sRegs)dto%(dRegs)dUop",
+                               "MicroPackNeon%(dRegs)dto%(sRegs)dUop"):
+                    Name = format % { "sRegs" : sRegs * 2,
+                                      "dRegs" : dRegs * 2 }
+                    substDict = { "class_name" : Name,
+                                  "targs" : "uint%d_t" % typeSize }
+                    exec_output += MicroNeonExecDeclare.subst(substDict)
+}};
+
 ////////////////////////////////////////////////////////////////////
 //
 // Integer = Integer op Immediate microops
@@ -122,23 +584,134 @@ let {{
 
 let {{
     microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop',
-                                    'MicroIntOp',
-                                    {'code': 'Ra = Rb + imm;',
+                                    'MicroIntImmOp',
+                                    {'code': 'URa = URb + imm;',
                                      'predicate_test': predicateTest},
                                     ['IsMicroop'])
 
+    microAddUopCode = '''
+        URa = URb + shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
+    '''
+
+    microAddXiUopIop = InstObjParams('addxi_uop', 'MicroAddXiUop',
+                                     'MicroIntImmXOp',
+                                     'XURa = XURb + imm;',
+                                     ['IsMicroop'])
+
+    microAddXiSpAlignUopIop = InstObjParams('addxi_uop', 'MicroAddXiSpAlignUop',
+                                            'MicroIntImmXOp', '''
+        if (isSP((IntRegIndex) urb) && bits(XURb, 3, 0) &&
+            SPAlignmentCheckEnabled(xc->tcBase())) {
+            return new SPAlignmentFault();
+        }
+        XURa = XURb + imm;
+    ''', ['IsMicroop'])
+
+    microAddXERegUopIop = InstObjParams('addxr_uop', 'MicroAddXERegUop',
+                                        'MicroIntRegXOp',
+                                        'XURa = XURb + ' + \
+                                            'extendReg64(XURc, type, shiftAmt, 64);',
+                                        ['IsMicroop'])
+
+    microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
+                                   'MicroIntRegOp',
+                                   {'code': microAddUopCode,
+                                    'predicate_test': pickPredicate(microAddUopCode)},
+                                   ['IsMicroop'])
+
     microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop',
-                                    'MicroIntOp',
-                                    {'code': 'Ra = Rb - imm;',
+                                    'MicroIntImmOp',
+                                    {'code': 'URa = URb - imm;',
                                      'predicate_test': predicateTest},
                                     ['IsMicroop'])
 
-    header_output = MicroIntDeclare.subst(microAddiUopIop) + \
-                    MicroIntDeclare.subst(microSubiUopIop)
-    decoder_output = MicroIntConstructor.subst(microAddiUopIop) + \
-                     MicroIntConstructor.subst(microSubiUopIop)
+    microSubXiUopIop = InstObjParams('subxi_uop', 'MicroSubXiUop',
+                                     'MicroIntImmXOp',
+                                     'XURa = XURb - imm;',
+                                     ['IsMicroop'])
+
+    microSubUopCode = '''
+        URa = URb - shift_rm_imm(URc, shiftAmt, shiftType, OptShiftRmCondCodesC);
+    '''
+    microSubUopIop = InstObjParams('sub_uop', 'MicroSubUop',
+                                   'MicroIntRegOp',
+                                   {'code': microSubUopCode,
+                                    'predicate_test': pickPredicate(microSubUopCode)},
+                                   ['IsMicroop'])
+
+    microUopRegMovIop = InstObjParams('uopReg_uop', 'MicroUopRegMov',
+                                   'MicroIntMov',
+                                   {'code': 'IWRa = URb;',
+                                    'predicate_test': predicateTest},
+                                   ['IsMicroop'])
+
+    microUopRegMovRetIop = InstObjParams('movret_uop', 'MicroUopRegMovRet',
+                                      'MicroIntMov',
+                                     {'code': microRetUopCode % 'URb',
+                                      'predicate_test': predicateTest},
+                                     ['IsMicroop', 'IsNonSpeculative',
+                                      'IsSerializeAfter'])
+
+    setPCCPSRDecl = '''
+                    CPSR cpsrOrCondCodes = URc;
+                    SCTLR sctlr = Sctlr;
+                    pNPC = URa;
+                    CPSR new_cpsr =
+                    cpsrWriteByInstr(cpsrOrCondCodes, URb, Scr, Nsacr,
+                                     0xF, true, sctlr.nmfi, xc->tcBase());
+                    Cpsr = ~CondCodesMask & new_cpsr;
+                    NextThumb = new_cpsr.t;
+                    NextJazelle = new_cpsr.j;
+                    NextItState = ((((CPSR)URb).it2 << 2) & 0xFC)
+                                    | (((CPSR)URb).it1 & 0x3);
+                    CondCodesNZ = new_cpsr.nz;
+                    CondCodesC = new_cpsr.c;
+                    CondCodesV = new_cpsr.v;
+                    CondCodesGE = new_cpsr.ge;
+                    '''
+
+    microUopSetPCCPSRIop = InstObjParams('uopSet_uop', 'MicroUopSetPCCPSR',
+                                         'MicroSetPCCPSR',
+                                         {'code': setPCCPSRDecl,
+                                          'predicate_test': predicateTest},
+                                         ['IsMicroop'])
+
+    header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
+                    MicroIntImmDeclare.subst(microAddXiUopIop) + \
+                    MicroIntImmDeclare.subst(microAddXiSpAlignUopIop) + \
+                    MicroIntImmDeclare.subst(microSubiUopIop) + \
+                    MicroIntImmDeclare.subst(microSubXiUopIop) + \
+                    MicroIntRegDeclare.subst(microAddUopIop) + \
+                    MicroIntRegDeclare.subst(microSubUopIop) + \
+                    MicroIntXERegDeclare.subst(microAddXERegUopIop) + \
+                    MicroIntMovDeclare.subst(microUopRegMovIop) + \
+                    MicroIntMovDeclare.subst(microUopRegMovRetIop) + \
+                    MicroSetPCCPSRDeclare.subst(microUopSetPCCPSRIop)
+
+    decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
+                     MicroIntImmXConstructor.subst(microAddXiUopIop) + \
+                     MicroIntImmXConstructor.subst(microAddXiSpAlignUopIop) + \
+                     MicroIntImmConstructor.subst(microSubiUopIop) + \
+                     MicroIntImmXConstructor.subst(microSubXiUopIop) + \
+                     MicroIntRegConstructor.subst(microAddUopIop) + \
+                     MicroIntRegConstructor.subst(microSubUopIop) + \
+                     MicroIntXERegConstructor.subst(microAddXERegUopIop) + \
+                     MicroIntMovConstructor.subst(microUopRegMovIop) + \
+                     MicroIntMovConstructor.subst(microUopRegMovRetIop) + \
+                     MicroSetPCCPSRConstructor.subst(microUopSetPCCPSRIop)
+
     exec_output = PredOpExecute.subst(microAddiUopIop) + \
-                  PredOpExecute.subst(microSubiUopIop)
+                  BasicExecute.subst(microAddXiUopIop) + \
+                  BasicExecute.subst(microAddXiSpAlignUopIop) + \
+                  PredOpExecute.subst(microSubiUopIop) + \
+                  BasicExecute.subst(microSubXiUopIop) + \
+                  PredOpExecute.subst(microAddUopIop) + \
+                  PredOpExecute.subst(microSubUopIop) + \
+                  BasicExecute.subst(microAddXERegUopIop) + \
+                  PredOpExecute.subst(microUopRegMovIop) + \
+                  PredOpExecute.subst(microUopRegMovRetIop) + \
+                  PredOpExecute.subst(microUopSetPCCPSRIop)
+
 }};
 
 let {{
@@ -146,6 +719,41 @@ let {{
     header_output = MacroMemDeclare.subst(iop)
     decoder_output = MacroMemConstructor.subst(iop)
 
+    iop = InstObjParams("ldpstp", "LdpStp", 'PairMemOp', "", [])
+    header_output += PairMemDeclare.subst(iop)
+    decoder_output += PairMemConstructor.subst(iop)
+
+    iopImm = InstObjParams("bigfpmemimm", "BigFpMemImm", "BigFpMemImmOp", "")
+    iopPre = InstObjParams("bigfpmempre", "BigFpMemPre", "BigFpMemPreOp", "")
+    iopPost = InstObjParams("bigfpmempost", "BigFpMemPost", "BigFpMemPostOp", "")
+    for iop in (iopImm, iopPre, iopPost):
+        header_output += BigFpMemImmDeclare.subst(iop)
+        decoder_output += BigFpMemImmConstructor.subst(iop)
+
+    iop = InstObjParams("bigfpmemreg", "BigFpMemReg", "BigFpMemRegOp", "")
+    header_output += BigFpMemRegDeclare.subst(iop)
+    decoder_output += BigFpMemRegConstructor.subst(iop)
+
+    iop = InstObjParams("bigfpmemlit", "BigFpMemLit", "BigFpMemLitOp", "")
+    header_output += BigFpMemLitDeclare.subst(iop)
+    decoder_output += BigFpMemLitConstructor.subst(iop)
+
+    iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
+    header_output += VMemMultDeclare.subst(iop)
+    decoder_output += VMemMultConstructor.subst(iop)
+
+    iop = InstObjParams("vldsingle", "VldSingle", 'VldSingleOp', "", [])
+    header_output += VMemSingleDeclare.subst(iop)
+    decoder_output += VMemSingleConstructor.subst(iop)
+
+    iop = InstObjParams("vstmult", "VstMult", 'VstMultOp', "", [])
+    header_output += VMemMultDeclare.subst(iop)
+    decoder_output += VMemMultConstructor.subst(iop)
+
+    iop = InstObjParams("vstsingle", "VstSingle", 'VstSingleOp', "", [])
+    header_output += VMemSingleDeclare.subst(iop)
+    decoder_output += VMemSingleConstructor.subst(iop)
+
     vfpIop = InstObjParams("vldmstm", "VLdmStm", 'MacroVFPMemOp', "", [])
     header_output += MacroVFPMemDeclare.subst(vfpIop)
     decoder_output += MacroVFPMemConstructor.subst(vfpIop)