ARM: Cleanup and small fixes to some NEON ops to match the spec.
authorWilliam Wang <William.Wang@arm.com>
Mon, 4 Apr 2011 16:42:28 +0000 (11:42 -0500)
committerWilliam Wang <William.Wang@arm.com>
Mon, 4 Apr 2011 16:42:28 +0000 (11:42 -0500)
Only certain bits of the cpacr can be written, some must be equal.
Mult instructions that write the same register should do something sane

src/arch/arm/isa.cc
src/arch/arm/isa/insts/mult.isa
src/arch/arm/isa/insts/neon.isa
src/arch/arm/miscregs.hh
src/arch/arm/utility.hh

index f3f73089634dee80978b746ba44989b49e282b40..216ae04e746b89c473969c1e5af296d40dac0a77 100644 (file)
@@ -268,19 +268,22 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
         switch (misc_reg) {
           case MISCREG_CPACR:
             {
-                CPACR newCpacr = 0;
-                CPACR valCpacr = val;
-                newCpacr.cp10 = valCpacr.cp10;
-                newCpacr.cp11 = valCpacr.cp11;
-                //XXX d32dis isn't implemented. The manual says whether or not
-                //it works is implementation defined.
-                newCpacr.asedis = valCpacr.asedis;
-                newVal = newCpacr;
+
+                const uint32_t ones = (uint32_t)(-1);
+                CPACR cpacrMask = 0;
+                // Only cp10, cp11, and ase are implemented, nothing else should
+                // be writable
+                cpacrMask.cp10 = ones;
+                cpacrMask.cp11 = ones;
+                cpacrMask.asedis = ones;
+                newVal &= cpacrMask;
+                DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
+                        miscRegName[misc_reg], newVal);
             }
             break;
           case MISCREG_CSSELR:
             warn_once("The csselr register isn't implemented.\n");
-            break;
+            return;
           case MISCREG_FPSCR:
             {
                 const uint32_t ones = (uint32_t)(-1);
@@ -320,6 +323,8 @@ ISA::setMiscReg(int misc_reg, const MiscReg &val, ThreadContext *tc)
             break;
           case MISCREG_FPEXC:
             {
+                // vfpv3 architecture, section B.6.1 of DDI04068
+                // bit 29 - valid only if fpexc[31] is 0
                 const uint32_t fpexcMask = 0x60000000;
                 newVal = (newVal & fpexcMask) |
                          (miscRegs[MISCREG_FPEXC] & ~fpexcMask);
index ae8f04a812b1732192bbc9574a452e2575e38f8f..b3a9fca5fc3a35428332488eb21f9fc935ab365c 100644 (file)
@@ -349,8 +349,8 @@ let {{
                                  ''')
     buildMult4Inst    ("smull", '''resTemp = (int64_t)Reg2.sw *
                                              (int64_t)Reg3.sw;
-                                   Reg0 = (int32_t)resTemp;
                                    Reg1 = (int32_t)(resTemp >> 32);
+                                   Reg0 = (int32_t)resTemp;
                                 ''', "llbit")
     buildMult3InstUnCc("smulwb", '''Reg0 = resTemp =
                                         (Reg1.sw *
@@ -374,16 +374,16 @@ let {{
                                  ''')
     buildMult4InstUnCc("umaal", '''resTemp = Reg2.ud * Reg3.ud +
                                              Reg0.ud + Reg1.ud;
-                                   Reg0.ud = (uint32_t)resTemp;
                                    Reg1.ud = (uint32_t)(resTemp >> 32);
+                                   Reg0.ud = (uint32_t)resTemp;
                                 ''')
     buildMult4Inst    ("umlal", '''resTemp = Reg2.ud * Reg3.ud + Reg0.ud +
                                              (Reg1.ud << 32);
-                                   Reg0.ud = (uint32_t)resTemp;
                                    Reg1.ud = (uint32_t)(resTemp >> 32);
+                                   Reg0.ud = (uint32_t)resTemp;
                                 ''', "llbit")
     buildMult4Inst    ("umull", '''resTemp = Reg2.ud * Reg3.ud;
-                                   Reg0 = (uint32_t)resTemp;
                                    Reg1 = (uint32_t)(resTemp >> 32);
+                                   Reg0 = (uint32_t)resTemp;
                                 ''', "llbit")
 }};
index 5aca525a4ca4d97aaa7449eddd2f7c77c85ee053..083d1ebaf20d4cec478a1dcadb4e08bf03c62884 100644 (file)
@@ -1761,8 +1761,8 @@ let {{
             }
         }
     '''
-    threeEqualRegInst("vshl", "VshlD", "SimdAluOp", allTypes, 2, vshlCode)
-    threeEqualRegInst("vshl", "VshlQ", "SimdAluOp", allTypes, 4, vshlCode)
+    threeEqualRegInst("vshl", "VshlD", "SimdShiftOp", allTypes, 2, vshlCode)
+    threeEqualRegInst("vshl", "VshlQ", "SimdShiftOp", allTypes, 4, vshlCode)
 
     vrshlCode = '''
         int16_t shiftAmt = (int8_t)srcElem2;
@@ -3204,8 +3204,8 @@ let {{
             substDict = { "targs" : type,
                           "class_name" : Name }
             exec_output += NeonExecDeclare.subst(substDict)
-    vdupGprInst("vdup", "NVdupDGpr", "SimdAluOp", smallUnsignedTypes, 2)
-    vdupGprInst("vdup", "NVdupQGpr", "SimdAluOp", smallUnsignedTypes, 4)
+    vdupGprInst("vdup", "NVdupDGpr", "SimdMiscOp", smallUnsignedTypes, 2)
+    vdupGprInst("vdup", "NVdupQGpr", "SimdMiscOp", smallUnsignedTypes, 4)
 
     vmovCode = 'destElem = imm;'
     oneRegImmInst("vmov", "NVmoviD", "SimdMiscOp", ("uint64_t",), 2, vmovCode)
@@ -3309,8 +3309,8 @@ let {{
             }
         }
     '''
-    buildVext("vext", "NVextD", "SimdAluOp", ("uint8_t",), 2, vextCode)
-    buildVext("vext", "NVextQ", "SimdAluOp", ("uint8_t",), 4, vextCode)
+    buildVext("vext", "NVextD", "SimdMiscOp", ("uint8_t",), 2, vextCode)
+    buildVext("vext", "NVextQ", "SimdMiscOp", ("uint8_t",), 4, vextCode)
 
     def buildVtbxl(name, Name, opClass, length, isVtbl):
         global header_output, decoder_output, exec_output
@@ -3366,13 +3366,13 @@ let {{
         decoder_output += RegRegRegOpConstructor.subst(iop)
         exec_output += PredOpExecute.subst(iop)
 
-    buildVtbxl("vtbl", "NVtbl1", "SimdAluOp", 1, "true")
-    buildVtbxl("vtbl", "NVtbl2", "SimdAluOp", 2, "true")
-    buildVtbxl("vtbl", "NVtbl3", "SimdAluOp", 3, "true")
-    buildVtbxl("vtbl", "NVtbl4", "SimdAluOp", 4, "true")
+    buildVtbxl("vtbl", "NVtbl1", "SimdMiscOp", 1, "true")
+    buildVtbxl("vtbl", "NVtbl2", "SimdMiscOp", 2, "true")
+    buildVtbxl("vtbl", "NVtbl3", "SimdMiscOp", 3, "true")
+    buildVtbxl("vtbl", "NVtbl4", "SimdMiscOp", 4, "true")
 
-    buildVtbxl("vtbx", "NVtbx1", "SimdAluOp", 1, "false")
-    buildVtbxl("vtbx", "NVtbx2", "SimdAluOp", 2, "false")
-    buildVtbxl("vtbx", "NVtbx3", "SimdAluOp", 3, "false")
-    buildVtbxl("vtbx", "NVtbx4", "SimdAluOp", 4, "false")
+    buildVtbxl("vtbx", "NVtbx1", "SimdMiscOp", 1, "false")
+    buildVtbxl("vtbx", "NVtbx2", "SimdMiscOp", 2, "false")
+    buildVtbxl("vtbx", "NVtbx3", "SimdMiscOp", 3, "false")
+    buildVtbxl("vtbx", "NVtbx4", "SimdMiscOp", 4, "false")
 }};
index 1e105799fbe6cd34d2c747654c010731717e513b..fc18fa11424c20b359f88fceac9f28c0072f7b6e 100644 (file)
@@ -310,6 +310,7 @@ namespace ArmISA
         Bitfield<23, 22> cp11;
         Bitfield<25, 24> cp12;
         Bitfield<27, 26> cp13;
+        Bitfield<29, 28> rsvd;
         Bitfield<30> d32dis;
         Bitfield<31> asedis;
     EndBitUnion(CPACR)
index 0fea44695db12d528742945342e874c1ad939e3f..20cb9b42625c8045a2eacc04720d3e5bb3e4257a 100644 (file)
@@ -146,7 +146,12 @@ vfpEnabled(CPACR cpacr, CPSR cpsr)
 static inline bool
 vfpEnabled(CPACR cpacr, CPSR cpsr, FPEXC fpexc)
 {
-    return fpexc.en && vfpEnabled(cpacr, cpsr);
+    if ((cpacr.cp11 == 0x3) ||
+        ((cpacr.cp11 == 0x1) && inPrivilegedMode(cpsr)))
+        return fpexc.en && vfpEnabled(cpacr, cpsr);
+    else
+        return fpexc.en && vfpEnabled(cpacr, cpsr) &&
+            (cpacr.cp11 == cpacr.cp10);
 }
 
 static inline bool