2 * Copyright (c) 2010-2014 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 * Authors: Stephen Hines
45 #include "arch/arm/insts/macromem.hh"
47 #include "arch/arm/generated/decoder.hh"
48 #include "arch/arm/insts/neon64_mem.hh"
51 using namespace ArmISAInst
;
56 MacroMemOp::MacroMemOp(const char *mnem
, ExtMachInst machInst
,
57 OpClass __opClass
, IntRegIndex rn
,
58 bool index
, bool up
, bool user
, bool writeback
,
59 bool load
, uint32_t reglist
) :
60 PredMacroOp(mnem
, machInst
, __opClass
)
62 uint32_t regs
= reglist
;
63 uint32_t ones
= number_of_ones(reglist
);
64 uint32_t mem_ops
= ones
;
66 // Copy the base address register if we overwrite it, or if this instruction
67 // is basically a no-op (we have to do something)
68 bool copy_base
= (bits(reglist
, rn
) && load
) || !ones
;
69 bool force_user
= user
& !bits(reglist
, 15);
70 bool exception_ret
= user
& bits(reglist
, 15);
71 bool pc_temp
= load
&& writeback
&& bits(reglist
, 15);
76 numMicroops
= ((ones
+ 1) / 2)
77 + ((ones
% 2 == 0 && exception_ret
) ? 1 : 0)
82 numMicroops
= ones
+ (writeback
? 1 : 0);
85 microOps
= new StaticInstPtr
[numMicroops
];
90 addr
= (ones
<< 2) - 4;
95 StaticInstPtr
*uop
= microOps
;
97 // Add 0 to Rn and stick it in ureg0.
98 // This is equivalent to a move.
100 *uop
++ = new MicroAddiUop(machInst
, INTREG_UREG0
, rn
, 0);
103 while (mem_ops
!= 0) {
104 // Do load operations in pairs if possible
105 if (load
&& mem_ops
>= 2 &&
106 !(mem_ops
== 2 && bits(regs
,INTREG_PC
) && exception_ret
)) {
107 // 64-bit memory operation
108 // Find 2 set register bits (clear them after finding)
112 // Find the first register
113 while (!bits(regs
, reg
)) reg
++;
114 replaceBits(regs
, reg
, 0);
115 reg_idx1
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
117 // Find the second register
118 while (!bits(regs
, reg
)) reg
++;
119 replaceBits(regs
, reg
, 0);
120 reg_idx2
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
122 // Load into temp reg if necessary
123 if (reg_idx2
== INTREG_PC
&& pc_temp
)
124 reg_idx2
= INTREG_UREG1
;
126 // Actually load both registers from memory
127 *uop
= new MicroLdr2Uop(machInst
, reg_idx1
, reg_idx2
,
128 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
130 if (!writeback
&& reg_idx2
== INTREG_PC
) {
131 // No writeback if idx==pc, set appropriate flags
132 (*uop
)->setFlag(StaticInst::IsControl
);
133 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
135 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
136 (*uop
)->setFlag(StaticInst::IsCondControl
);
138 (*uop
)->setFlag(StaticInst::IsUncondControl
);
145 // 32-bit memory operation
146 // Find register for operation
148 while(!bits(regs
, reg
)) reg
++;
149 replaceBits(regs
, reg
, 0);
150 reg_idx
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
153 if (writeback
&& reg_idx
== INTREG_PC
) {
154 // If this instruction changes the PC and performs a
155 // writeback, ensure the pc load/branch is the last uop.
156 // Load into a temp reg here.
157 *uop
= new MicroLdrUop(machInst
, INTREG_UREG1
,
158 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
159 } else if (reg_idx
== INTREG_PC
&& exception_ret
) {
160 // Special handling for exception return
161 *uop
= new MicroLdrRetUop(machInst
, reg_idx
,
162 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
164 // standard single load uop
165 *uop
= new MicroLdrUop(machInst
, reg_idx
,
166 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
169 // Loading pc as last operation? Set appropriate flags.
170 if (!writeback
&& reg_idx
== INTREG_PC
) {
171 (*uop
)->setFlag(StaticInst::IsControl
);
172 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
174 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
175 (*uop
)->setFlag(StaticInst::IsCondControl
);
177 (*uop
)->setFlag(StaticInst::IsUncondControl
);
180 *uop
= new MicroStrUop(machInst
, reg_idx
, rn
, up
, addr
);
188 // Load/store micro-op generated, go to next uop
192 if (writeback
&& ones
) {
193 // Perform writeback uop operation
195 *uop
++ = new MicroAddiUop(machInst
, rn
, rn
, ones
* 4);
197 *uop
++ = new MicroSubiUop(machInst
, rn
, rn
, ones
* 4);
199 // Write PC after address writeback?
202 *uop
= new MicroUopRegMovRet(machInst
, 0, INTREG_UREG1
);
204 *uop
= new MicroUopRegMov(machInst
, INTREG_PC
, INTREG_UREG1
);
206 (*uop
)->setFlag(StaticInst::IsControl
);
207 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
209 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
210 (*uop
)->setFlag(StaticInst::IsCondControl
);
212 (*uop
)->setFlag(StaticInst::IsUncondControl
);
215 (*uop
)->setFlag(StaticInst::IsReturn
);
222 (*uop
)->setLastMicroop();
224 /* Take the control flags from the last microop for the macroop */
225 if ((*uop
)->isControl())
226 setFlag(StaticInst::IsControl
);
227 if ((*uop
)->isCondCtrl())
228 setFlag(StaticInst::IsCondControl
);
229 if ((*uop
)->isUncondCtrl())
230 setFlag(StaticInst::IsUncondControl
);
231 if ((*uop
)->isIndirectCtrl())
232 setFlag(StaticInst::IsIndirectControl
);
233 if ((*uop
)->isReturn())
234 setFlag(StaticInst::IsReturn
);
236 for (StaticInstPtr
*uop
= microOps
; !(*uop
)->isLastMicroop(); uop
++) {
237 (*uop
)->setDelayedCommit();
241 PairMemOp::PairMemOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
242 uint32_t size
, bool fp
, bool load
, bool noAlloc
,
243 bool signExt
, bool exclusive
, bool acrel
,
244 int64_t imm
, AddrMode mode
,
245 IntRegIndex rn
, IntRegIndex rt
, IntRegIndex rt2
) :
246 PredMacroOp(mnem
, machInst
, __opClass
)
248 bool post
= (mode
== AddrMd_PostIndex
);
249 bool writeback
= (mode
!= AddrMd_Offset
);
252 // Use integer rounding to round up loads of size 4
253 numMicroops
= (post
? 0 : 1) + ((size
+ 4) / 8) + (writeback
? 1 : 0);
255 numMicroops
= (post
? 0 : 1) + (size
/ 4) + (writeback
? 1 : 0);
257 microOps
= new StaticInstPtr
[numMicroops
];
259 StaticInstPtr
*uop
= microOps
;
264 *uop
++ = new MicroAddXiSpAlignUop(machInst
, INTREG_UREG0
, rn
,
271 *uop
++ = new MicroLdFp16Uop(machInst
, rt
,
272 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
273 *uop
++ = new MicroLdFp16Uop(machInst
, rt2
,
274 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
276 *uop
++ = new MicroStrQBFpXImmUop(machInst
, rt
,
277 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
278 *uop
++ = new MicroStrQTFpXImmUop(machInst
, rt
,
279 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
280 *uop
++ = new MicroStrQBFpXImmUop(machInst
, rt2
,
281 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
282 *uop
++ = new MicroStrQTFpXImmUop(machInst
, rt2
,
283 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
285 } else if (size
== 8) {
287 *uop
++ = new MicroLdPairFp8Uop(machInst
, rt
, rt2
,
288 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
290 *uop
++ = new MicroStrFpXImmUop(machInst
, rt
,
291 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
292 *uop
++ = new MicroStrFpXImmUop(machInst
, rt2
,
293 post
? rn
: INTREG_UREG0
, 8, noAlloc
, exclusive
, acrel
);
295 } else if (size
== 4) {
297 *uop
++ = new MicroLdrDFpXImmUop(machInst
, rt
, rt2
,
298 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
300 *uop
++ = new MicroStrDFpXImmUop(machInst
, rt
, rt2
,
301 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
307 *uop
++ = new MicroLdPairUop(machInst
, rt
, rt2
,
308 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
310 *uop
++ = new MicroStrXImmUop(machInst
, rt
, post
? rn
: INTREG_UREG0
,
311 0, noAlloc
, exclusive
, acrel
);
312 *uop
++ = new MicroStrXImmUop(machInst
, rt2
, post
? rn
: INTREG_UREG0
,
313 size
, noAlloc
, exclusive
, acrel
);
315 } else if (size
== 4) {
318 *uop
++ = new MicroLdrDSXImmUop(machInst
, rt
, rt2
,
319 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
321 *uop
++ = new MicroLdrDUXImmUop(machInst
, rt
, rt2
,
322 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
325 *uop
++ = new MicroStrDXImmUop(machInst
, rt
, rt2
,
326 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
332 *uop
++ = new MicroAddXiUop(machInst
, rn
, post
? rn
: INTREG_UREG0
,
336 assert(uop
== µOps
[numMicroops
]);
337 (*--uop
)->setLastMicroop();
339 for (StaticInstPtr
*curUop
= microOps
;
340 !(*curUop
)->isLastMicroop(); curUop
++) {
341 (*curUop
)->setDelayedCommit();
345 BigFpMemImmOp::BigFpMemImmOp(const char *mnem
, ExtMachInst machInst
,
346 OpClass __opClass
, bool load
, IntRegIndex dest
,
347 IntRegIndex base
, int64_t imm
) :
348 PredMacroOp(mnem
, machInst
, __opClass
)
350 numMicroops
= load
? 1 : 2;
351 microOps
= new StaticInstPtr
[numMicroops
];
353 StaticInstPtr
*uop
= microOps
;
356 *uop
= new MicroLdFp16Uop(machInst
, dest
, base
, imm
);
358 *uop
= new MicroStrQBFpXImmUop(machInst
, dest
, base
, imm
);
359 (*uop
)->setDelayedCommit();
360 *++uop
= new MicroStrQTFpXImmUop(machInst
, dest
, base
, imm
);
362 (*uop
)->setLastMicroop();
365 BigFpMemPostOp::BigFpMemPostOp(const char *mnem
, ExtMachInst machInst
,
366 OpClass __opClass
, bool load
, IntRegIndex dest
,
367 IntRegIndex base
, int64_t imm
) :
368 PredMacroOp(mnem
, machInst
, __opClass
)
370 numMicroops
= load
? 2 : 3;
371 microOps
= new StaticInstPtr
[numMicroops
];
373 StaticInstPtr
*uop
= microOps
;
376 *uop
++ = new MicroLdFp16Uop(machInst
, dest
, base
, 0);
378 *uop
++= new MicroStrQBFpXImmUop(machInst
, dest
, base
, 0);
379 *uop
++ = new MicroStrQTFpXImmUop(machInst
, dest
, base
, 0);
381 *uop
= new MicroAddXiUop(machInst
, base
, base
, imm
);
382 (*uop
)->setLastMicroop();
384 for (StaticInstPtr
*curUop
= microOps
;
385 !(*curUop
)->isLastMicroop(); curUop
++) {
386 (*curUop
)->setDelayedCommit();
390 BigFpMemPreOp::BigFpMemPreOp(const char *mnem
, ExtMachInst machInst
,
391 OpClass __opClass
, bool load
, IntRegIndex dest
,
392 IntRegIndex base
, int64_t imm
) :
393 PredMacroOp(mnem
, machInst
, __opClass
)
395 numMicroops
= load
? 2 : 3;
396 microOps
= new StaticInstPtr
[numMicroops
];
398 StaticInstPtr
*uop
= microOps
;
401 *uop
++ = new MicroLdFp16Uop(machInst
, dest
, base
, imm
);
403 *uop
++ = new MicroStrQBFpXImmUop(machInst
, dest
, base
, imm
);
404 *uop
++ = new MicroStrQTFpXImmUop(machInst
, dest
, base
, imm
);
406 *uop
= new MicroAddXiUop(machInst
, base
, base
, imm
);
407 (*uop
)->setLastMicroop();
409 for (StaticInstPtr
*curUop
= microOps
;
410 !(*curUop
)->isLastMicroop(); curUop
++) {
411 (*curUop
)->setDelayedCommit();
415 BigFpMemRegOp::BigFpMemRegOp(const char *mnem
, ExtMachInst machInst
,
416 OpClass __opClass
, bool load
, IntRegIndex dest
,
417 IntRegIndex base
, IntRegIndex offset
,
418 ArmExtendType type
, int64_t imm
) :
419 PredMacroOp(mnem
, machInst
, __opClass
)
421 numMicroops
= load
? 1 : 2;
422 microOps
= new StaticInstPtr
[numMicroops
];
424 StaticInstPtr
*uop
= microOps
;
427 *uop
= new MicroLdFp16RegUop(machInst
, dest
, base
,
430 *uop
= new MicroStrQBFpXRegUop(machInst
, dest
, base
,
432 (*uop
)->setDelayedCommit();
433 *++uop
= new MicroStrQTFpXRegUop(machInst
, dest
, base
,
437 (*uop
)->setLastMicroop();
440 BigFpMemLitOp::BigFpMemLitOp(const char *mnem
, ExtMachInst machInst
,
441 OpClass __opClass
, IntRegIndex dest
,
443 PredMacroOp(mnem
, machInst
, __opClass
)
446 microOps
= new StaticInstPtr
[numMicroops
];
448 microOps
[0] = new MicroLdFp16LitUop(machInst
, dest
, imm
);
449 microOps
[0]->setLastMicroop();
452 VldMultOp::VldMultOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
453 unsigned elems
, RegIndex rn
, RegIndex vd
, unsigned regs
,
454 unsigned inc
, uint32_t size
, uint32_t align
, RegIndex rm
) :
455 PredMacroOp(mnem
, machInst
, __opClass
)
457 assert(regs
> 0 && regs
<= 4);
458 assert(regs
% elems
== 0);
460 numMicroops
= (regs
> 2) ? 2 : 1;
461 bool wb
= (rm
!= 15);
462 bool deinterleave
= (elems
> 1);
464 if (wb
) numMicroops
++;
465 if (deinterleave
) numMicroops
+= (regs
/ elems
);
466 microOps
= new StaticInstPtr
[numMicroops
];
468 RegIndex rMid
= deinterleave
? NumFloatV7ArchRegs
: vd
* 2;
470 uint32_t noAlign
= TLB::MustBeOne
;
475 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
476 size
, machInst
, rMid
, rn
, 0, align
);
477 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
478 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
481 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
482 size
, machInst
, rMid
, rn
, 0, align
);
483 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon8Uop
>(
484 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
487 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
488 size
, machInst
, rMid
, rn
, 0, align
);
491 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon8Uop
>(
492 size
, machInst
, rMid
, rn
, 0, align
);
495 // Unknown number of registers
496 microOps
[uopIdx
++] = new Unknown(machInst
);
499 if (rm
!= 15 && rm
!= 13) {
501 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
504 new MicroAddiUop(machInst
, rn
, rn
, regs
* 8);
511 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon8Uop
>(
512 size
, machInst
, vd
* 2, rMid
, inc
* 2);
516 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon6Uop
>(
517 size
, machInst
, vd
* 2, rMid
, inc
* 2);
520 assert(regs
== 4 || regs
== 2);
522 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
523 size
, machInst
, vd
* 2, rMid
, inc
* 2);
524 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
525 size
, machInst
, vd
* 2 + 2, rMid
+ 4, inc
* 2);
527 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
528 size
, machInst
, vd
* 2, rMid
, inc
* 2);
532 // Bad number of elements to deinterleave
533 microOps
[uopIdx
++] = new Unknown(machInst
);
536 assert(uopIdx
== numMicroops
);
538 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
539 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
541 uopPtr
->setDelayedCommit();
543 microOps
[numMicroops
- 1]->setLastMicroop();
546 VldSingleOp::VldSingleOp(const char *mnem
, ExtMachInst machInst
,
547 OpClass __opClass
, bool all
, unsigned elems
,
548 RegIndex rn
, RegIndex vd
, unsigned regs
,
549 unsigned inc
, uint32_t size
, uint32_t align
,
550 RegIndex rm
, unsigned lane
) :
551 PredMacroOp(mnem
, machInst
, __opClass
)
553 assert(regs
> 0 && regs
<= 4);
554 assert(regs
% elems
== 0);
556 unsigned eBytes
= (1 << size
);
557 unsigned loadSize
= eBytes
* elems
;
558 unsigned loadRegs M5_VAR_USED
= (loadSize
+ sizeof(FloatRegBits
) - 1) /
559 sizeof(FloatRegBits
);
561 assert(loadRegs
> 0 && loadRegs
<= 4);
564 bool wb
= (rm
!= 15);
566 if (wb
) numMicroops
++;
567 numMicroops
+= (regs
/ elems
);
568 microOps
= new StaticInstPtr
[numMicroops
];
570 RegIndex ufp0
= NumFloatV7ArchRegs
;
575 microOps
[uopIdx
++] = new MicroLdrNeon1Uop
<uint8_t>(
576 machInst
, ufp0
, rn
, 0, align
);
580 microOps
[uopIdx
++] = new MicroLdrNeon2Uop
<uint16_t>(
581 machInst
, ufp0
, rn
, 0, align
);
583 microOps
[uopIdx
++] = new MicroLdrNeon2Uop
<uint8_t>(
584 machInst
, ufp0
, rn
, 0, align
);
588 microOps
[uopIdx
++] = new MicroLdrNeon3Uop
<uint8_t>(
589 machInst
, ufp0
, rn
, 0, align
);
594 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint8_t>(
595 machInst
, ufp0
, rn
, 0, align
);
598 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint16_t>(
599 machInst
, ufp0
, rn
, 0, align
);
602 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint32_t>(
603 machInst
, ufp0
, rn
, 0, align
);
608 microOps
[uopIdx
++] = new MicroLdrNeon6Uop
<uint16_t>(
609 machInst
, ufp0
, rn
, 0, align
);
614 microOps
[uopIdx
++] = new MicroLdrNeon8Uop
<uint16_t>(
615 machInst
, ufp0
, rn
, 0, align
);
618 microOps
[uopIdx
++] = new MicroLdrNeon8Uop
<uint32_t>(
619 machInst
, ufp0
, rn
, 0, align
);
624 microOps
[uopIdx
++] = new MicroLdrNeon12Uop
<uint32_t>(
625 machInst
, ufp0
, rn
, 0, align
);
628 microOps
[uopIdx
++] = new MicroLdrNeon16Uop
<uint32_t>(
629 machInst
, ufp0
, rn
, 0, align
);
632 // Unrecognized load size
633 microOps
[uopIdx
++] = new Unknown(machInst
);
636 if (rm
!= 15 && rm
!= 13) {
638 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
641 new MicroAddiUop(machInst
, rn
, rn
, loadSize
);
650 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to8Uop
<uint8_t>(
651 machInst
, vd
* 2, ufp0
, inc
* 2);
653 microOps
[uopIdx
++] = new MicroUnpackNeon2to8Uop
<uint8_t>(
654 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
659 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to8Uop
<uint16_t>(
660 machInst
, vd
* 2, ufp0
, inc
* 2);
662 microOps
[uopIdx
++] = new MicroUnpackNeon2to8Uop
<uint16_t>(
663 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
668 microOps
[uopIdx
++] = new MicroUnpackAllNeon4to8Uop
<uint32_t>(
669 machInst
, vd
* 2, ufp0
, inc
* 2);
671 microOps
[uopIdx
++] = new MicroUnpackNeon4to8Uop
<uint32_t>(
672 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
677 microOps
[uopIdx
++] = new Unknown(machInst
);
686 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to6Uop
<uint8_t>(
687 machInst
, vd
* 2, ufp0
, inc
* 2);
689 microOps
[uopIdx
++] = new MicroUnpackNeon2to6Uop
<uint8_t>(
690 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
695 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to6Uop
<uint16_t>(
696 machInst
, vd
* 2, ufp0
, inc
* 2);
698 microOps
[uopIdx
++] = new MicroUnpackNeon2to6Uop
<uint16_t>(
699 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
704 microOps
[uopIdx
++] = new MicroUnpackAllNeon4to6Uop
<uint32_t>(
705 machInst
, vd
* 2, ufp0
, inc
* 2);
707 microOps
[uopIdx
++] = new MicroUnpackNeon4to6Uop
<uint32_t>(
708 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
713 microOps
[uopIdx
++] = new Unknown(machInst
);
719 assert(loadRegs
<= 2);
723 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint8_t>(
724 machInst
, vd
* 2, ufp0
, inc
* 2);
726 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint8_t>(
727 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
732 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint16_t>(
733 machInst
, vd
* 2, ufp0
, inc
* 2);
735 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint16_t>(
736 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
741 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint32_t>(
742 machInst
, vd
* 2, ufp0
, inc
* 2);
744 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint32_t>(
745 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
750 microOps
[uopIdx
++] = new Unknown(machInst
);
755 assert(regs
== 1 || (all
&& regs
== 2));
756 assert(loadRegs
<= 2);
757 for (unsigned offset
= 0; offset
< regs
; offset
++) {
762 new MicroUnpackAllNeon2to2Uop
<uint8_t>(
763 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
766 new MicroUnpackNeon2to2Uop
<uint8_t>(
767 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
773 new MicroUnpackAllNeon2to2Uop
<uint16_t>(
774 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
777 new MicroUnpackNeon2to2Uop
<uint16_t>(
778 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
784 new MicroUnpackAllNeon2to2Uop
<uint32_t>(
785 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
788 new MicroUnpackNeon2to2Uop
<uint32_t>(
789 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
794 microOps
[uopIdx
++] = new Unknown(machInst
);
800 // Bad number of elements to unpack
801 microOps
[uopIdx
++] = new Unknown(machInst
);
803 assert(uopIdx
== numMicroops
);
805 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
806 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
808 uopPtr
->setDelayedCommit();
810 microOps
[numMicroops
- 1]->setLastMicroop();
813 VstMultOp::VstMultOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
814 unsigned elems
, RegIndex rn
, RegIndex vd
, unsigned regs
,
815 unsigned inc
, uint32_t size
, uint32_t align
, RegIndex rm
) :
816 PredMacroOp(mnem
, machInst
, __opClass
)
818 assert(regs
> 0 && regs
<= 4);
819 assert(regs
% elems
== 0);
821 numMicroops
= (regs
> 2) ? 2 : 1;
822 bool wb
= (rm
!= 15);
823 bool interleave
= (elems
> 1);
825 if (wb
) numMicroops
++;
826 if (interleave
) numMicroops
+= (regs
/ elems
);
827 microOps
= new StaticInstPtr
[numMicroops
];
829 uint32_t noAlign
= TLB::MustBeOne
;
831 RegIndex rMid
= interleave
? NumFloatV7ArchRegs
: vd
* 2;
838 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon8Uop
>(
839 size
, machInst
, rMid
, vd
* 2, inc
* 2);
843 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon6Uop
>(
844 size
, machInst
, rMid
, vd
* 2, inc
* 2);
847 assert(regs
== 4 || regs
== 2);
849 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
850 size
, machInst
, rMid
, vd
* 2, inc
* 2);
851 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
852 size
, machInst
, rMid
+ 4, vd
* 2 + 2, inc
* 2);
854 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
855 size
, machInst
, rMid
, vd
* 2, inc
* 2);
859 // Bad number of elements to interleave
860 microOps
[uopIdx
++] = new Unknown(machInst
);
865 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
866 size
, machInst
, rMid
, rn
, 0, align
);
867 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
868 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
871 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
872 size
, machInst
, rMid
, rn
, 0, align
);
873 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon8Uop
>(
874 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
877 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
878 size
, machInst
, rMid
, rn
, 0, align
);
881 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon8Uop
>(
882 size
, machInst
, rMid
, rn
, 0, align
);
885 // Unknown number of registers
886 microOps
[uopIdx
++] = new Unknown(machInst
);
889 if (rm
!= 15 && rm
!= 13) {
891 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
894 new MicroAddiUop(machInst
, rn
, rn
, regs
* 8);
897 assert(uopIdx
== numMicroops
);
899 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
900 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
902 uopPtr
->setDelayedCommit();
904 microOps
[numMicroops
- 1]->setLastMicroop();
907 VstSingleOp::VstSingleOp(const char *mnem
, ExtMachInst machInst
,
908 OpClass __opClass
, bool all
, unsigned elems
,
909 RegIndex rn
, RegIndex vd
, unsigned regs
,
910 unsigned inc
, uint32_t size
, uint32_t align
,
911 RegIndex rm
, unsigned lane
) :
912 PredMacroOp(mnem
, machInst
, __opClass
)
915 assert(regs
> 0 && regs
<= 4);
916 assert(regs
% elems
== 0);
918 unsigned eBytes
= (1 << size
);
919 unsigned storeSize
= eBytes
* elems
;
920 unsigned storeRegs M5_VAR_USED
= (storeSize
+ sizeof(FloatRegBits
) - 1) /
921 sizeof(FloatRegBits
);
923 assert(storeRegs
> 0 && storeRegs
<= 4);
926 bool wb
= (rm
!= 15);
928 if (wb
) numMicroops
++;
929 numMicroops
+= (regs
/ elems
);
930 microOps
= new StaticInstPtr
[numMicroops
];
932 RegIndex ufp0
= NumFloatV7ArchRegs
;
940 microOps
[uopIdx
++] = new MicroPackNeon8to2Uop
<uint8_t>(
941 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
944 microOps
[uopIdx
++] = new MicroPackNeon8to2Uop
<uint16_t>(
945 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
948 microOps
[uopIdx
++] = new MicroPackNeon8to4Uop
<uint32_t>(
949 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
953 microOps
[uopIdx
++] = new Unknown(machInst
);
961 microOps
[uopIdx
++] = new MicroPackNeon6to2Uop
<uint8_t>(
962 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
965 microOps
[uopIdx
++] = new MicroPackNeon6to2Uop
<uint16_t>(
966 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
969 microOps
[uopIdx
++] = new MicroPackNeon6to4Uop
<uint32_t>(
970 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
974 microOps
[uopIdx
++] = new Unknown(machInst
);
980 assert(storeRegs
<= 2);
983 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint8_t>(
984 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
987 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint16_t>(
988 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
991 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint32_t>(
992 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
996 microOps
[uopIdx
++] = new Unknown(machInst
);
1001 assert(regs
== 1 || (all
&& regs
== 2));
1002 assert(storeRegs
<= 2);
1003 for (unsigned offset
= 0; offset
< regs
; offset
++) {
1006 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint8_t>(
1007 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1010 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint16_t>(
1011 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1014 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint32_t>(
1015 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1019 microOps
[uopIdx
++] = new Unknown(machInst
);
1025 // Bad number of elements to unpack
1026 microOps
[uopIdx
++] = new Unknown(machInst
);
1028 switch (storeSize
) {
1030 microOps
[uopIdx
++] = new MicroStrNeon1Uop
<uint8_t>(
1031 machInst
, ufp0
, rn
, 0, align
);
1035 microOps
[uopIdx
++] = new MicroStrNeon2Uop
<uint16_t>(
1036 machInst
, ufp0
, rn
, 0, align
);
1038 microOps
[uopIdx
++] = new MicroStrNeon2Uop
<uint8_t>(
1039 machInst
, ufp0
, rn
, 0, align
);
1043 microOps
[uopIdx
++] = new MicroStrNeon3Uop
<uint8_t>(
1044 machInst
, ufp0
, rn
, 0, align
);
1049 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint8_t>(
1050 machInst
, ufp0
, rn
, 0, align
);
1053 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint16_t>(
1054 machInst
, ufp0
, rn
, 0, align
);
1057 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint32_t>(
1058 machInst
, ufp0
, rn
, 0, align
);
1063 microOps
[uopIdx
++] = new MicroStrNeon6Uop
<uint16_t>(
1064 machInst
, ufp0
, rn
, 0, align
);
1069 microOps
[uopIdx
++] = new MicroStrNeon8Uop
<uint16_t>(
1070 machInst
, ufp0
, rn
, 0, align
);
1073 microOps
[uopIdx
++] = new MicroStrNeon8Uop
<uint32_t>(
1074 machInst
, ufp0
, rn
, 0, align
);
1079 microOps
[uopIdx
++] = new MicroStrNeon12Uop
<uint32_t>(
1080 machInst
, ufp0
, rn
, 0, align
);
1083 microOps
[uopIdx
++] = new MicroStrNeon16Uop
<uint32_t>(
1084 machInst
, ufp0
, rn
, 0, align
);
1088 microOps
[uopIdx
++] = new Unknown(machInst
);
1091 if (rm
!= 15 && rm
!= 13) {
1092 microOps
[uopIdx
++] =
1093 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
1095 microOps
[uopIdx
++] =
1096 new MicroAddiUop(machInst
, rn
, rn
, storeSize
);
1099 assert(uopIdx
== numMicroops
);
1101 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
1102 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
1104 uopPtr
->setDelayedCommit();
1106 microOps
[numMicroops
- 1]->setLastMicroop();
1109 VldMultOp64::VldMultOp64(const char *mnem
, ExtMachInst machInst
,
1110 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1111 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1112 uint8_t numStructElems
, uint8_t numRegs
, bool wb
) :
1113 PredMacroOp(mnem
, machInst
, __opClass
)
1115 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1116 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1117 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1119 numMicroops
= wb
? 1 : 0;
1121 int totNumBytes
= numRegs
* dataSize
/ 8;
1122 assert(totNumBytes
<= 64);
1124 // The guiding principle here is that no more than 16 bytes can be
1125 // transferred at a time
1126 int numMemMicroops
= totNumBytes
/ 16;
1127 int residuum
= totNumBytes
% 16;
1130 numMicroops
+= numMemMicroops
;
1132 int numMarshalMicroops
= numRegs
/ 2 + (numRegs
% 2 ? 1 : 0);
1133 numMicroops
+= numMarshalMicroops
;
1135 microOps
= new StaticInstPtr
[numMicroops
];
1136 unsigned uopIdx
= 0;
1137 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1138 TLB::AllowUnaligned
;
1141 for(; i
< numMemMicroops
- 1; ++i
) {
1142 microOps
[uopIdx
++] = new MicroNeonLoad64(
1143 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1144 baseIsSP
, 16 /* accSize */, eSize
);
1146 microOps
[uopIdx
++] = new MicroNeonLoad64(
1147 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1148 residuum
? residuum
: 16 /* accSize */, eSize
);
1150 // Writeback microop: the post-increment amount is encoded in "Rm": a
1151 // 64-bit general register OR as '11111' for an immediate value equal to
1152 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1154 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1155 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1158 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1163 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1165 case 1: microOps
[uopIdx
++] = new MicroDeintNeon64_1Reg(
1166 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1167 numStructElems
, 1, i
/* step */);
1169 case 2: microOps
[uopIdx
++] = new MicroDeintNeon64_2Reg(
1170 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1171 numStructElems
, 2, i
/* step */);
1173 case 3: microOps
[uopIdx
++] = new MicroDeintNeon64_3Reg(
1174 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1175 numStructElems
, 3, i
/* step */);
1177 case 4: microOps
[uopIdx
++] = new MicroDeintNeon64_4Reg(
1178 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1179 numStructElems
, 4, i
/* step */);
1181 default: panic("Invalid number of registers");
1186 assert(uopIdx
== numMicroops
);
1188 for (int i
= 0; i
< numMicroops
- 1; ++i
) {
1189 microOps
[i
]->setDelayedCommit();
1191 microOps
[numMicroops
- 1]->setLastMicroop();
1194 VstMultOp64::VstMultOp64(const char *mnem
, ExtMachInst machInst
,
1195 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1196 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1197 uint8_t numStructElems
, uint8_t numRegs
, bool wb
) :
1198 PredMacroOp(mnem
, machInst
, __opClass
)
1200 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1201 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1202 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1204 numMicroops
= wb
? 1 : 0;
1206 int totNumBytes
= numRegs
* dataSize
/ 8;
1207 assert(totNumBytes
<= 64);
1209 // The guiding principle here is that no more than 16 bytes can be
1210 // transferred at a time
1211 int numMemMicroops
= totNumBytes
/ 16;
1212 int residuum
= totNumBytes
% 16;
1215 numMicroops
+= numMemMicroops
;
1217 int numMarshalMicroops
= totNumBytes
> 32 ? 2 : 1;
1218 numMicroops
+= numMarshalMicroops
;
1220 microOps
= new StaticInstPtr
[numMicroops
];
1221 unsigned uopIdx
= 0;
1223 for(int i
= 0; i
< numMarshalMicroops
; ++i
) {
1225 case 1: microOps
[uopIdx
++] = new MicroIntNeon64_1Reg(
1226 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1227 numStructElems
, 1, i
/* step */);
1229 case 2: microOps
[uopIdx
++] = new MicroIntNeon64_2Reg(
1230 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1231 numStructElems
, 2, i
/* step */);
1233 case 3: microOps
[uopIdx
++] = new MicroIntNeon64_3Reg(
1234 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1235 numStructElems
, 3, i
/* step */);
1237 case 4: microOps
[uopIdx
++] = new MicroIntNeon64_4Reg(
1238 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1239 numStructElems
, 4, i
/* step */);
1241 default: panic("Invalid number of registers");
1245 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1246 TLB::AllowUnaligned
;
1249 for(; i
< numMemMicroops
- 1; ++i
) {
1250 microOps
[uopIdx
++] = new MicroNeonStore64(
1251 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1252 baseIsSP
, 16 /* accSize */, eSize
);
1254 microOps
[uopIdx
++] = new MicroNeonStore64(
1255 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1256 residuum
? residuum
: 16 /* accSize */, eSize
);
1258 // Writeback microop: the post-increment amount is encoded in "Rm": a
1259 // 64-bit general register OR as '11111' for an immediate value equal to
1260 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1262 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1263 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1266 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1271 assert(uopIdx
== numMicroops
);
1273 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1274 microOps
[i
]->setDelayedCommit();
1276 microOps
[numMicroops
- 1]->setLastMicroop();
1279 VldSingleOp64::VldSingleOp64(const char *mnem
, ExtMachInst machInst
,
1280 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1281 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1282 uint8_t numStructElems
, uint8_t index
, bool wb
,
1284 PredMacroOp(mnem
, machInst
, __opClass
),
1285 eSize(0), dataSize(0), numStructElems(0), index(0),
1286 wb(false), replicate(false)
1289 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1290 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1291 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1293 numMicroops
= wb
? 1 : 0;
1295 int eSizeBytes
= 1 << eSize
;
1296 int totNumBytes
= numStructElems
* eSizeBytes
;
1297 assert(totNumBytes
<= 64);
1299 // The guiding principle here is that no more than 16 bytes can be
1300 // transferred at a time
1301 int numMemMicroops
= totNumBytes
/ 16;
1302 int residuum
= totNumBytes
% 16;
1305 numMicroops
+= numMemMicroops
;
1307 int numMarshalMicroops
= numStructElems
/ 2 + (numStructElems
% 2 ? 1 : 0);
1308 numMicroops
+= numMarshalMicroops
;
1310 microOps
= new StaticInstPtr
[numMicroops
];
1311 unsigned uopIdx
= 0;
1313 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1314 TLB::AllowUnaligned
;
1317 for (; i
< numMemMicroops
- 1; ++i
) {
1318 microOps
[uopIdx
++] = new MicroNeonLoad64(
1319 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1320 baseIsSP
, 16 /* accSize */, eSize
);
1322 microOps
[uopIdx
++] = new MicroNeonLoad64(
1323 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1324 residuum
? residuum
: 16 /* accSize */, eSize
);
1326 // Writeback microop: the post-increment amount is encoded in "Rm": a
1327 // 64-bit general register OR as '11111' for an immediate value equal to
1328 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1330 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1331 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1334 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1339 for(int i
= 0; i
< numMarshalMicroops
; ++i
) {
1340 microOps
[uopIdx
++] = new MicroUnpackNeon64(
1341 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1342 numStructElems
, index
, i
/* step */, replicate
);
1345 assert(uopIdx
== numMicroops
);
1347 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1348 microOps
[i
]->setDelayedCommit();
1350 microOps
[numMicroops
- 1]->setLastMicroop();
1353 VstSingleOp64::VstSingleOp64(const char *mnem
, ExtMachInst machInst
,
1354 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1355 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1356 uint8_t numStructElems
, uint8_t index
, bool wb
,
1358 PredMacroOp(mnem
, machInst
, __opClass
),
1359 eSize(0), dataSize(0), numStructElems(0), index(0),
1360 wb(false), replicate(false)
1362 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1363 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1364 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1366 numMicroops
= wb
? 1 : 0;
1368 int eSizeBytes
= 1 << eSize
;
1369 int totNumBytes
= numStructElems
* eSizeBytes
;
1370 assert(totNumBytes
<= 64);
1372 // The guiding principle here is that no more than 16 bytes can be
1373 // transferred at a time
1374 int numMemMicroops
= totNumBytes
/ 16;
1375 int residuum
= totNumBytes
% 16;
1378 numMicroops
+= numMemMicroops
;
1380 int numMarshalMicroops
= totNumBytes
> 32 ? 2 : 1;
1381 numMicroops
+= numMarshalMicroops
;
1383 microOps
= new StaticInstPtr
[numMicroops
];
1384 unsigned uopIdx
= 0;
1386 for(int i
= 0; i
< numMarshalMicroops
; ++i
) {
1387 microOps
[uopIdx
++] = new MicroPackNeon64(
1388 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1389 numStructElems
, index
, i
/* step */, replicate
);
1392 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1393 TLB::AllowUnaligned
;
1396 for(; i
< numMemMicroops
- 1; ++i
) {
1397 microOps
[uopIdx
++] = new MicroNeonStore64(
1398 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1399 baseIsSP
, 16 /* accsize */, eSize
);
1401 microOps
[uopIdx
++] = new MicroNeonStore64(
1402 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1403 residuum
? residuum
: 16 /* accSize */, eSize
);
1405 // Writeback microop: the post-increment amount is encoded in "Rm": a
1406 // 64-bit general register OR as '11111' for an immediate value equal to
1407 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1409 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1410 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1413 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1418 assert(uopIdx
== numMicroops
);
1420 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1421 microOps
[i
]->setDelayedCommit();
1423 microOps
[numMicroops
- 1]->setLastMicroop();
1426 MacroVFPMemOp::MacroVFPMemOp(const char *mnem
, ExtMachInst machInst
,
1427 OpClass __opClass
, IntRegIndex rn
,
1428 RegIndex vd
, bool single
, bool up
,
1429 bool writeback
, bool load
, uint32_t offset
) :
1430 PredMacroOp(mnem
, machInst
, __opClass
)
1434 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1435 // to be functionally identical except that fldmx is deprecated. For now
1436 // we'll assume they're otherwise interchangable.
1437 int count
= (single
? offset
: (offset
/ 2));
1438 if (count
== 0 || count
> NumFloatV7ArchRegs
)
1439 warn_once("Bad offset field for VFP load/store multiple.\n");
1441 // Force there to be at least one microop so the macroop makes sense.
1444 if (count
> NumFloatV7ArchRegs
)
1445 count
= NumFloatV7ArchRegs
;
1447 numMicroops
= count
* (single
? 1 : 2) + (writeback
? 1 : 0);
1448 microOps
= new StaticInstPtr
[numMicroops
];
1456 for (int j
= 0; j
< count
; j
++) {
1459 microOps
[i
++] = new MicroLdrFpUop(machInst
, vd
++, rn
,
1462 microOps
[i
++] = new MicroLdrDBFpUop(machInst
, vd
++, rn
,
1464 microOps
[i
++] = new MicroLdrDTFpUop(machInst
, vd
++, rn
, tempUp
,
1465 addr
+ (up
? 4 : -4));
1469 microOps
[i
++] = new MicroStrFpUop(machInst
, vd
++, rn
,
1472 microOps
[i
++] = new MicroStrDBFpUop(machInst
, vd
++, rn
,
1474 microOps
[i
++] = new MicroStrDTFpUop(machInst
, vd
++, rn
, tempUp
,
1475 addr
+ (up
? 4 : -4));
1479 addr
-= (single
? 4 : 8);
1480 // The microops don't handle negative displacement, so turn if we
1481 // hit zero, flip polarity and start adding.
1487 addr
+= (single
? 4 : 8);
1494 new MicroAddiUop(machInst
, rn
, rn
, 4 * offset
);
1497 new MicroSubiUop(machInst
, rn
, rn
, 4 * offset
);
1501 assert(numMicroops
== i
);
1502 microOps
[numMicroops
- 1]->setLastMicroop();
1504 for (StaticInstPtr
*curUop
= microOps
;
1505 !(*curUop
)->isLastMicroop(); curUop
++) {
1506 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(curUop
->get());
1508 uopPtr
->setDelayedCommit();
1513 MicroIntImmOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1515 std::stringstream ss
;
1521 ccprintf(ss
, "#%d", imm
);
1526 MicroIntImmXOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1528 std::stringstream ss
;
1534 ccprintf(ss
, "#%d", imm
);
1539 MicroSetPCCPSR::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1541 std::stringstream ss
;
1548 MicroIntRegXOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1550 std::stringstream ss
;
1555 printExtendOperand(false, ss
, (IntRegIndex
)urc
, type
, shiftAmt
);
1560 MicroIntMov::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1562 std::stringstream ss
;
1571 MicroIntOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1573 std::stringstream ss
;
1584 MicroMemOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1586 std::stringstream ss
;
1589 printReg(ss
, ura
+ FP_Reg_Base
);
1595 ccprintf(ss
, "#%d", imm
);
1601 MicroMemPairOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1603 std::stringstream ss
;
1607 printReg(ss
, dest2
);
1611 ccprintf(ss
, "#%d", imm
);