2 * Copyright (c) 2010-2014 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include "arch/arm/insts/macromem.hh"
45 #include "arch/arm/generated/decoder.hh"
46 #include "arch/arm/insts/neon64_mem.hh"
49 using namespace ArmISAInst
;
54 MacroMemOp::MacroMemOp(const char *mnem
, ExtMachInst machInst
,
55 OpClass __opClass
, IntRegIndex rn
,
56 bool index
, bool up
, bool user
, bool writeback
,
57 bool load
, uint32_t reglist
) :
58 PredMacroOp(mnem
, machInst
, __opClass
)
60 uint32_t regs
= reglist
;
61 uint32_t ones
= number_of_ones(reglist
);
62 uint32_t mem_ops
= ones
;
64 // Copy the base address register if we overwrite it, or if this instruction
65 // is basically a no-op (we have to do something)
66 bool copy_base
= (bits(reglist
, rn
) && load
) || !ones
;
67 bool force_user
= user
& !bits(reglist
, 15);
68 bool exception_ret
= user
& bits(reglist
, 15);
69 bool pc_temp
= load
&& writeback
&& bits(reglist
, 15);
74 numMicroops
= ((ones
+ 1) / 2)
75 + ((ones
% 2 == 0 && exception_ret
) ? 1 : 0)
80 numMicroops
= ones
+ (writeback
? 1 : 0);
83 microOps
= new StaticInstPtr
[numMicroops
];
88 addr
= (ones
<< 2) - 4;
93 StaticInstPtr
*uop
= microOps
;
95 // Add 0 to Rn and stick it in ureg0.
96 // This is equivalent to a move.
98 *uop
++ = new MicroAddiUop(machInst
, INTREG_UREG0
, rn
, 0);
101 while (mem_ops
!= 0) {
102 // Do load operations in pairs if possible
103 if (load
&& mem_ops
>= 2 &&
104 !(mem_ops
== 2 && bits(regs
,INTREG_PC
) && exception_ret
)) {
105 // 64-bit memory operation
106 // Find 2 set register bits (clear them after finding)
110 // Find the first register
111 while (!bits(regs
, reg
)) reg
++;
112 replaceBits(regs
, reg
, 0);
113 reg_idx1
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
115 // Find the second register
116 while (!bits(regs
, reg
)) reg
++;
117 replaceBits(regs
, reg
, 0);
118 reg_idx2
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
120 // Load into temp reg if necessary
121 if (reg_idx2
== INTREG_PC
&& pc_temp
)
122 reg_idx2
= INTREG_UREG1
;
124 // Actually load both registers from memory
125 *uop
= new MicroLdr2Uop(machInst
, reg_idx1
, reg_idx2
,
126 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
128 if (!writeback
&& reg_idx2
== INTREG_PC
) {
129 // No writeback if idx==pc, set appropriate flags
130 (*uop
)->setFlag(StaticInst::IsControl
);
131 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
133 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
134 (*uop
)->setFlag(StaticInst::IsCondControl
);
136 (*uop
)->setFlag(StaticInst::IsUncondControl
);
143 // 32-bit memory operation
144 // Find register for operation
146 while (!bits(regs
, reg
)) reg
++;
147 replaceBits(regs
, reg
, 0);
148 reg_idx
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
151 if (writeback
&& reg_idx
== INTREG_PC
) {
152 // If this instruction changes the PC and performs a
153 // writeback, ensure the pc load/branch is the last uop.
154 // Load into a temp reg here.
155 *uop
= new MicroLdrUop(machInst
, INTREG_UREG1
,
156 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
157 } else if (reg_idx
== INTREG_PC
&& exception_ret
) {
158 // Special handling for exception return
159 *uop
= new MicroLdrRetUop(machInst
, reg_idx
,
160 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
162 // standard single load uop
163 *uop
= new MicroLdrUop(machInst
, reg_idx
,
164 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
167 // Loading pc as last operation? Set appropriate flags.
168 if (!writeback
&& reg_idx
== INTREG_PC
) {
169 (*uop
)->setFlag(StaticInst::IsControl
);
170 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
172 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
173 (*uop
)->setFlag(StaticInst::IsCondControl
);
175 (*uop
)->setFlag(StaticInst::IsUncondControl
);
178 *uop
= new MicroStrUop(machInst
, reg_idx
, rn
, up
, addr
);
186 // Load/store micro-op generated, go to next uop
190 if (writeback
&& ones
) {
191 // Perform writeback uop operation
193 *uop
++ = new MicroAddiUop(machInst
, rn
, rn
, ones
* 4);
195 *uop
++ = new MicroSubiUop(machInst
, rn
, rn
, ones
* 4);
197 // Write PC after address writeback?
200 *uop
= new MicroUopRegMovRet(machInst
, 0, INTREG_UREG1
);
202 *uop
= new MicroUopRegMov(machInst
, INTREG_PC
, INTREG_UREG1
);
204 (*uop
)->setFlag(StaticInst::IsControl
);
205 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
207 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
208 (*uop
)->setFlag(StaticInst::IsCondControl
);
210 (*uop
)->setFlag(StaticInst::IsUncondControl
);
213 (*uop
)->setFlag(StaticInst::IsReturn
);
220 (*uop
)->setLastMicroop();
221 microOps
[0]->setFirstMicroop();
223 /* Take the control flags from the last microop for the macroop */
224 if ((*uop
)->isControl())
225 setFlag(StaticInst::IsControl
);
226 if ((*uop
)->isCondCtrl())
227 setFlag(StaticInst::IsCondControl
);
228 if ((*uop
)->isUncondCtrl())
229 setFlag(StaticInst::IsUncondControl
);
230 if ((*uop
)->isIndirectCtrl())
231 setFlag(StaticInst::IsIndirectControl
);
232 if ((*uop
)->isReturn())
233 setFlag(StaticInst::IsReturn
);
235 for (StaticInstPtr
*uop
= microOps
; !(*uop
)->isLastMicroop(); uop
++) {
236 (*uop
)->setDelayedCommit();
240 PairMemOp::PairMemOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
241 uint32_t size
, bool fp
, bool load
, bool noAlloc
,
242 bool signExt
, bool exclusive
, bool acrel
,
243 int64_t imm
, AddrMode mode
,
244 IntRegIndex rn
, IntRegIndex rt
, IntRegIndex rt2
) :
245 PredMacroOp(mnem
, machInst
, __opClass
)
247 bool post
= (mode
== AddrMd_PostIndex
);
248 bool writeback
= (mode
!= AddrMd_Offset
);
251 // Use integer rounding to round up loads of size 4
252 numMicroops
= (post
? 0 : 1) + ((size
+ 4) / 8) + (writeback
? 1 : 0);
254 numMicroops
= (post
? 0 : 1) + (size
/ 4) + (writeback
? 1 : 0);
256 microOps
= new StaticInstPtr
[numMicroops
];
258 StaticInstPtr
*uop
= microOps
;
263 *uop
++ = new MicroAddXiSpAlignUop(machInst
, INTREG_UREG0
, rn
,
270 *uop
++ = new MicroLdFp16Uop(machInst
, rt
,
271 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
272 *uop
++ = new MicroLdFp16Uop(machInst
, rt2
,
273 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
275 *uop
++ = new MicroStrQBFpXImmUop(machInst
, rt
,
276 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
277 *uop
++ = new MicroStrQTFpXImmUop(machInst
, rt
,
278 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
279 *uop
++ = new MicroStrQBFpXImmUop(machInst
, rt2
,
280 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
281 *uop
++ = new MicroStrQTFpXImmUop(machInst
, rt2
,
282 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
284 } else if (size
== 8) {
286 *uop
++ = new MicroLdPairFp8Uop(machInst
, rt
, rt2
,
287 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
289 *uop
++ = new MicroStrFpXImmUop(machInst
, rt
,
290 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
291 *uop
++ = new MicroStrFpXImmUop(machInst
, rt2
,
292 post
? rn
: INTREG_UREG0
, 8, noAlloc
, exclusive
, acrel
);
294 } else if (size
== 4) {
296 *uop
++ = new MicroLdrDFpXImmUop(machInst
, rt
, rt2
,
297 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
299 *uop
++ = new MicroStrDFpXImmUop(machInst
, rt
, rt2
,
300 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
306 *uop
++ = new MicroLdPairUop(machInst
, rt
, rt2
,
307 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
309 *uop
++ = new MicroStrXImmUop(machInst
, rt
, post
? rn
: INTREG_UREG0
,
310 0, noAlloc
, exclusive
, acrel
);
311 *uop
++ = new MicroStrXImmUop(machInst
, rt2
, post
? rn
: INTREG_UREG0
,
312 size
, noAlloc
, exclusive
, acrel
);
314 } else if (size
== 4) {
317 *uop
++ = new MicroLdrDSXImmUop(machInst
, rt
, rt2
,
318 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
320 *uop
++ = new MicroLdrDUXImmUop(machInst
, rt
, rt2
,
321 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
324 *uop
++ = new MicroStrDXImmUop(machInst
, rt
, rt2
,
325 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
331 *uop
++ = new MicroAddXiUop(machInst
, rn
, post
? rn
: INTREG_UREG0
,
335 assert(uop
== µOps
[numMicroops
]);
336 (*--uop
)->setLastMicroop();
337 microOps
[0]->setFirstMicroop();
339 for (StaticInstPtr
*curUop
= microOps
;
340 !(*curUop
)->isLastMicroop(); curUop
++) {
341 (*curUop
)->setDelayedCommit();
345 BigFpMemImmOp::BigFpMemImmOp(const char *mnem
, ExtMachInst machInst
,
346 OpClass __opClass
, bool load
, IntRegIndex dest
,
347 IntRegIndex base
, int64_t imm
) :
348 PredMacroOp(mnem
, machInst
, __opClass
)
350 numMicroops
= load
? 1 : 2;
351 microOps
= new StaticInstPtr
[numMicroops
];
353 StaticInstPtr
*uop
= microOps
;
356 *uop
= new MicroLdFp16Uop(machInst
, dest
, base
, imm
);
358 *uop
= new MicroStrQBFpXImmUop(machInst
, dest
, base
, imm
);
359 (*uop
)->setDelayedCommit();
360 *++uop
= new MicroStrQTFpXImmUop(machInst
, dest
, base
, imm
);
362 (*uop
)->setLastMicroop();
363 microOps
[0]->setFirstMicroop();
366 BigFpMemPostOp::BigFpMemPostOp(const char *mnem
, ExtMachInst machInst
,
367 OpClass __opClass
, bool load
, IntRegIndex dest
,
368 IntRegIndex base
, int64_t imm
) :
369 PredMacroOp(mnem
, machInst
, __opClass
)
371 numMicroops
= load
? 2 : 3;
372 microOps
= new StaticInstPtr
[numMicroops
];
374 StaticInstPtr
*uop
= microOps
;
377 *uop
++ = new MicroLdFp16Uop(machInst
, dest
, base
, 0);
379 *uop
++= new MicroStrQBFpXImmUop(machInst
, dest
, base
, 0);
380 *uop
++ = new MicroStrQTFpXImmUop(machInst
, dest
, base
, 0);
382 *uop
= new MicroAddXiUop(machInst
, base
, base
, imm
);
383 (*uop
)->setLastMicroop();
384 microOps
[0]->setFirstMicroop();
386 for (StaticInstPtr
*curUop
= microOps
;
387 !(*curUop
)->isLastMicroop(); curUop
++) {
388 (*curUop
)->setDelayedCommit();
392 BigFpMemPreOp::BigFpMemPreOp(const char *mnem
, ExtMachInst machInst
,
393 OpClass __opClass
, bool load
, IntRegIndex dest
,
394 IntRegIndex base
, int64_t imm
) :
395 PredMacroOp(mnem
, machInst
, __opClass
)
397 numMicroops
= load
? 2 : 3;
398 microOps
= new StaticInstPtr
[numMicroops
];
400 StaticInstPtr
*uop
= microOps
;
403 *uop
++ = new MicroLdFp16Uop(machInst
, dest
, base
, imm
);
405 *uop
++ = new MicroStrQBFpXImmUop(machInst
, dest
, base
, imm
);
406 *uop
++ = new MicroStrQTFpXImmUop(machInst
, dest
, base
, imm
);
408 *uop
= new MicroAddXiUop(machInst
, base
, base
, imm
);
409 (*uop
)->setLastMicroop();
410 microOps
[0]->setFirstMicroop();
412 for (StaticInstPtr
*curUop
= microOps
;
413 !(*curUop
)->isLastMicroop(); curUop
++) {
414 (*curUop
)->setDelayedCommit();
418 BigFpMemRegOp::BigFpMemRegOp(const char *mnem
, ExtMachInst machInst
,
419 OpClass __opClass
, bool load
, IntRegIndex dest
,
420 IntRegIndex base
, IntRegIndex offset
,
421 ArmExtendType type
, int64_t imm
) :
422 PredMacroOp(mnem
, machInst
, __opClass
)
424 numMicroops
= load
? 1 : 2;
425 microOps
= new StaticInstPtr
[numMicroops
];
427 StaticInstPtr
*uop
= microOps
;
430 *uop
= new MicroLdFp16RegUop(machInst
, dest
, base
,
433 *uop
= new MicroStrQBFpXRegUop(machInst
, dest
, base
,
435 (*uop
)->setDelayedCommit();
436 *++uop
= new MicroStrQTFpXRegUop(machInst
, dest
, base
,
440 (*uop
)->setLastMicroop();
441 microOps
[0]->setFirstMicroop();
444 BigFpMemLitOp::BigFpMemLitOp(const char *mnem
, ExtMachInst machInst
,
445 OpClass __opClass
, IntRegIndex dest
,
447 PredMacroOp(mnem
, machInst
, __opClass
)
450 microOps
= new StaticInstPtr
[numMicroops
];
452 microOps
[0] = new MicroLdFp16LitUop(machInst
, dest
, imm
);
453 microOps
[0]->setLastMicroop();
454 microOps
[0]->setFirstMicroop();
457 VldMultOp::VldMultOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
458 unsigned elems
, RegIndex rn
, RegIndex vd
, unsigned regs
,
459 unsigned inc
, uint32_t size
, uint32_t align
, RegIndex rm
) :
460 PredMacroOp(mnem
, machInst
, __opClass
)
462 assert(regs
> 0 && regs
<= 4);
463 assert(regs
% elems
== 0);
465 numMicroops
= (regs
> 2) ? 2 : 1;
466 bool wb
= (rm
!= 15);
467 bool deinterleave
= (elems
> 1);
469 if (wb
) numMicroops
++;
470 if (deinterleave
) numMicroops
+= (regs
/ elems
);
471 microOps
= new StaticInstPtr
[numMicroops
];
473 RegIndex rMid
= deinterleave
? VecSpecialElem
: vd
* 2;
475 uint32_t noAlign
= TLB::MustBeOne
;
480 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
481 size
, machInst
, rMid
, rn
, 0, align
);
482 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
483 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
486 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
487 size
, machInst
, rMid
, rn
, 0, align
);
488 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon8Uop
>(
489 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
492 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
493 size
, machInst
, rMid
, rn
, 0, align
);
496 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon8Uop
>(
497 size
, machInst
, rMid
, rn
, 0, align
);
500 // Unknown number of registers
501 microOps
[uopIdx
++] = new Unknown(machInst
);
504 if (rm
!= 15 && rm
!= 13) {
506 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
509 new MicroAddiUop(machInst
, rn
, rn
, regs
* 8);
516 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon8Uop
>(
517 size
, machInst
, vd
* 2, rMid
, inc
* 2);
521 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon6Uop
>(
522 size
, machInst
, vd
* 2, rMid
, inc
* 2);
525 assert(regs
== 4 || regs
== 2);
527 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
528 size
, machInst
, vd
* 2, rMid
, inc
* 2);
529 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
530 size
, machInst
, vd
* 2 + 2, rMid
+ 4, inc
* 2);
532 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
533 size
, machInst
, vd
* 2, rMid
, inc
* 2);
537 // Bad number of elements to deinterleave
538 microOps
[uopIdx
++] = new Unknown(machInst
);
541 assert(uopIdx
== numMicroops
);
543 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
544 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
546 uopPtr
->setDelayedCommit();
548 microOps
[0]->setFirstMicroop();
549 microOps
[numMicroops
- 1]->setLastMicroop();
552 VldSingleOp::VldSingleOp(const char *mnem
, ExtMachInst machInst
,
553 OpClass __opClass
, bool all
, unsigned elems
,
554 RegIndex rn
, RegIndex vd
, unsigned regs
,
555 unsigned inc
, uint32_t size
, uint32_t align
,
556 RegIndex rm
, unsigned lane
) :
557 PredMacroOp(mnem
, machInst
, __opClass
)
559 assert(regs
> 0 && regs
<= 4);
560 assert(regs
% elems
== 0);
562 unsigned eBytes
= (1 << size
);
563 unsigned loadSize
= eBytes
* elems
;
564 unsigned loadRegs M5_VAR_USED
=
565 (loadSize
+ sizeof(uint32_t) - 1) / sizeof(uint32_t);
567 assert(loadRegs
> 0 && loadRegs
<= 4);
570 bool wb
= (rm
!= 15);
572 if (wb
) numMicroops
++;
573 numMicroops
+= (regs
/ elems
);
574 microOps
= new StaticInstPtr
[numMicroops
];
576 RegIndex ufp0
= VecSpecialElem
;
581 microOps
[uopIdx
++] = new MicroLdrNeon1Uop
<uint8_t>(
582 machInst
, ufp0
, rn
, 0, align
);
586 microOps
[uopIdx
++] = new MicroLdrNeon2Uop
<uint16_t>(
587 machInst
, ufp0
, rn
, 0, align
);
589 microOps
[uopIdx
++] = new MicroLdrNeon2Uop
<uint8_t>(
590 machInst
, ufp0
, rn
, 0, align
);
594 microOps
[uopIdx
++] = new MicroLdrNeon3Uop
<uint8_t>(
595 machInst
, ufp0
, rn
, 0, align
);
600 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint8_t>(
601 machInst
, ufp0
, rn
, 0, align
);
604 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint16_t>(
605 machInst
, ufp0
, rn
, 0, align
);
608 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint32_t>(
609 machInst
, ufp0
, rn
, 0, align
);
614 microOps
[uopIdx
++] = new MicroLdrNeon6Uop
<uint16_t>(
615 machInst
, ufp0
, rn
, 0, align
);
620 microOps
[uopIdx
++] = new MicroLdrNeon8Uop
<uint16_t>(
621 machInst
, ufp0
, rn
, 0, align
);
624 microOps
[uopIdx
++] = new MicroLdrNeon8Uop
<uint32_t>(
625 machInst
, ufp0
, rn
, 0, align
);
630 microOps
[uopIdx
++] = new MicroLdrNeon12Uop
<uint32_t>(
631 machInst
, ufp0
, rn
, 0, align
);
634 microOps
[uopIdx
++] = new MicroLdrNeon16Uop
<uint32_t>(
635 machInst
, ufp0
, rn
, 0, align
);
638 // Unrecognized load size
639 microOps
[uopIdx
++] = new Unknown(machInst
);
642 if (rm
!= 15 && rm
!= 13) {
644 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
647 new MicroAddiUop(machInst
, rn
, rn
, loadSize
);
656 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to8Uop
<uint8_t>(
657 machInst
, vd
* 2, ufp0
, inc
* 2);
659 microOps
[uopIdx
++] = new MicroUnpackNeon2to8Uop
<uint8_t>(
660 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
665 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to8Uop
<uint16_t>(
666 machInst
, vd
* 2, ufp0
, inc
* 2);
668 microOps
[uopIdx
++] = new MicroUnpackNeon2to8Uop
<uint16_t>(
669 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
674 microOps
[uopIdx
++] = new MicroUnpackAllNeon4to8Uop
<uint32_t>(
675 machInst
, vd
* 2, ufp0
, inc
* 2);
677 microOps
[uopIdx
++] = new MicroUnpackNeon4to8Uop
<uint32_t>(
678 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
683 microOps
[uopIdx
++] = new Unknown(machInst
);
692 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to6Uop
<uint8_t>(
693 machInst
, vd
* 2, ufp0
, inc
* 2);
695 microOps
[uopIdx
++] = new MicroUnpackNeon2to6Uop
<uint8_t>(
696 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
701 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to6Uop
<uint16_t>(
702 machInst
, vd
* 2, ufp0
, inc
* 2);
704 microOps
[uopIdx
++] = new MicroUnpackNeon2to6Uop
<uint16_t>(
705 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
710 microOps
[uopIdx
++] = new MicroUnpackAllNeon4to6Uop
<uint32_t>(
711 machInst
, vd
* 2, ufp0
, inc
* 2);
713 microOps
[uopIdx
++] = new MicroUnpackNeon4to6Uop
<uint32_t>(
714 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
719 microOps
[uopIdx
++] = new Unknown(machInst
);
725 assert(loadRegs
<= 2);
729 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint8_t>(
730 machInst
, vd
* 2, ufp0
, inc
* 2);
732 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint8_t>(
733 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
738 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint16_t>(
739 machInst
, vd
* 2, ufp0
, inc
* 2);
741 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint16_t>(
742 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
747 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint32_t>(
748 machInst
, vd
* 2, ufp0
, inc
* 2);
750 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint32_t>(
751 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
756 microOps
[uopIdx
++] = new Unknown(machInst
);
761 assert(regs
== 1 || (all
&& regs
== 2));
762 assert(loadRegs
<= 2);
763 for (unsigned offset
= 0; offset
< regs
; offset
++) {
768 new MicroUnpackAllNeon2to2Uop
<uint8_t>(
769 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
772 new MicroUnpackNeon2to2Uop
<uint8_t>(
773 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
779 new MicroUnpackAllNeon2to2Uop
<uint16_t>(
780 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
783 new MicroUnpackNeon2to2Uop
<uint16_t>(
784 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
790 new MicroUnpackAllNeon2to2Uop
<uint32_t>(
791 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
794 new MicroUnpackNeon2to2Uop
<uint32_t>(
795 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
800 microOps
[uopIdx
++] = new Unknown(machInst
);
806 // Bad number of elements to unpack
807 microOps
[uopIdx
++] = new Unknown(machInst
);
809 assert(uopIdx
== numMicroops
);
811 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
812 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
814 uopPtr
->setDelayedCommit();
816 microOps
[0]->setFirstMicroop();
817 microOps
[numMicroops
- 1]->setLastMicroop();
820 VstMultOp::VstMultOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
821 unsigned elems
, RegIndex rn
, RegIndex vd
, unsigned regs
,
822 unsigned inc
, uint32_t size
, uint32_t align
, RegIndex rm
) :
823 PredMacroOp(mnem
, machInst
, __opClass
)
825 assert(regs
> 0 && regs
<= 4);
826 assert(regs
% elems
== 0);
828 numMicroops
= (regs
> 2) ? 2 : 1;
829 bool wb
= (rm
!= 15);
830 bool interleave
= (elems
> 1);
832 if (wb
) numMicroops
++;
833 if (interleave
) numMicroops
+= (regs
/ elems
);
834 microOps
= new StaticInstPtr
[numMicroops
];
836 uint32_t noAlign
= TLB::MustBeOne
;
838 RegIndex rMid
= interleave
? VecSpecialElem
: vd
* 2;
845 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon8Uop
>(
846 size
, machInst
, rMid
, vd
* 2, inc
* 2);
850 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon6Uop
>(
851 size
, machInst
, rMid
, vd
* 2, inc
* 2);
854 assert(regs
== 4 || regs
== 2);
856 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
857 size
, machInst
, rMid
, vd
* 2, inc
* 2);
858 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
859 size
, machInst
, rMid
+ 4, vd
* 2 + 2, inc
* 2);
861 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
862 size
, machInst
, rMid
, vd
* 2, inc
* 2);
866 // Bad number of elements to interleave
867 microOps
[uopIdx
++] = new Unknown(machInst
);
872 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
873 size
, machInst
, rMid
, rn
, 0, align
);
874 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
875 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
878 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
879 size
, machInst
, rMid
, rn
, 0, align
);
880 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon8Uop
>(
881 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
884 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
885 size
, machInst
, rMid
, rn
, 0, align
);
888 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon8Uop
>(
889 size
, machInst
, rMid
, rn
, 0, align
);
892 // Unknown number of registers
893 microOps
[uopIdx
++] = new Unknown(machInst
);
896 if (rm
!= 15 && rm
!= 13) {
898 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
901 new MicroAddiUop(machInst
, rn
, rn
, regs
* 8);
904 assert(uopIdx
== numMicroops
);
906 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
907 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
909 uopPtr
->setDelayedCommit();
911 microOps
[0]->setFirstMicroop();
912 microOps
[numMicroops
- 1]->setLastMicroop();
915 VstSingleOp::VstSingleOp(const char *mnem
, ExtMachInst machInst
,
916 OpClass __opClass
, bool all
, unsigned elems
,
917 RegIndex rn
, RegIndex vd
, unsigned regs
,
918 unsigned inc
, uint32_t size
, uint32_t align
,
919 RegIndex rm
, unsigned lane
) :
920 PredMacroOp(mnem
, machInst
, __opClass
)
923 assert(regs
> 0 && regs
<= 4);
924 assert(regs
% elems
== 0);
926 unsigned eBytes
= (1 << size
);
927 unsigned storeSize
= eBytes
* elems
;
928 unsigned storeRegs M5_VAR_USED
=
929 (storeSize
+ sizeof(uint32_t) - 1) / sizeof(uint32_t);
931 assert(storeRegs
> 0 && storeRegs
<= 4);
934 bool wb
= (rm
!= 15);
936 if (wb
) numMicroops
++;
937 numMicroops
+= (regs
/ elems
);
938 microOps
= new StaticInstPtr
[numMicroops
];
940 RegIndex ufp0
= VecSpecialElem
;
948 microOps
[uopIdx
++] = new MicroPackNeon8to2Uop
<uint8_t>(
949 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
952 microOps
[uopIdx
++] = new MicroPackNeon8to2Uop
<uint16_t>(
953 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
956 microOps
[uopIdx
++] = new MicroPackNeon8to4Uop
<uint32_t>(
957 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
961 microOps
[uopIdx
++] = new Unknown(machInst
);
969 microOps
[uopIdx
++] = new MicroPackNeon6to2Uop
<uint8_t>(
970 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
973 microOps
[uopIdx
++] = new MicroPackNeon6to2Uop
<uint16_t>(
974 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
977 microOps
[uopIdx
++] = new MicroPackNeon6to4Uop
<uint32_t>(
978 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
982 microOps
[uopIdx
++] = new Unknown(machInst
);
988 assert(storeRegs
<= 2);
991 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint8_t>(
992 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
995 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint16_t>(
996 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
999 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint32_t>(
1000 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
1004 microOps
[uopIdx
++] = new Unknown(machInst
);
1009 assert(regs
== 1 || (all
&& regs
== 2));
1010 assert(storeRegs
<= 2);
1011 for (unsigned offset
= 0; offset
< regs
; offset
++) {
1014 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint8_t>(
1015 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1018 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint16_t>(
1019 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1022 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint32_t>(
1023 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1027 microOps
[uopIdx
++] = new Unknown(machInst
);
1033 // Bad number of elements to unpack
1034 microOps
[uopIdx
++] = new Unknown(machInst
);
1036 switch (storeSize
) {
1038 microOps
[uopIdx
++] = new MicroStrNeon1Uop
<uint8_t>(
1039 machInst
, ufp0
, rn
, 0, align
);
1043 microOps
[uopIdx
++] = new MicroStrNeon2Uop
<uint16_t>(
1044 machInst
, ufp0
, rn
, 0, align
);
1046 microOps
[uopIdx
++] = new MicroStrNeon2Uop
<uint8_t>(
1047 machInst
, ufp0
, rn
, 0, align
);
1051 microOps
[uopIdx
++] = new MicroStrNeon3Uop
<uint8_t>(
1052 machInst
, ufp0
, rn
, 0, align
);
1057 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint8_t>(
1058 machInst
, ufp0
, rn
, 0, align
);
1061 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint16_t>(
1062 machInst
, ufp0
, rn
, 0, align
);
1065 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint32_t>(
1066 machInst
, ufp0
, rn
, 0, align
);
1071 microOps
[uopIdx
++] = new MicroStrNeon6Uop
<uint16_t>(
1072 machInst
, ufp0
, rn
, 0, align
);
1077 microOps
[uopIdx
++] = new MicroStrNeon8Uop
<uint16_t>(
1078 machInst
, ufp0
, rn
, 0, align
);
1081 microOps
[uopIdx
++] = new MicroStrNeon8Uop
<uint32_t>(
1082 machInst
, ufp0
, rn
, 0, align
);
1087 microOps
[uopIdx
++] = new MicroStrNeon12Uop
<uint32_t>(
1088 machInst
, ufp0
, rn
, 0, align
);
1091 microOps
[uopIdx
++] = new MicroStrNeon16Uop
<uint32_t>(
1092 machInst
, ufp0
, rn
, 0, align
);
1096 microOps
[uopIdx
++] = new Unknown(machInst
);
1099 if (rm
!= 15 && rm
!= 13) {
1100 microOps
[uopIdx
++] =
1101 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
1103 microOps
[uopIdx
++] =
1104 new MicroAddiUop(machInst
, rn
, rn
, storeSize
);
1107 assert(uopIdx
== numMicroops
);
1109 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
1110 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
1112 uopPtr
->setDelayedCommit();
1114 microOps
[0]->setFirstMicroop();
1115 microOps
[numMicroops
- 1]->setLastMicroop();
1118 VldMultOp64::VldMultOp64(const char *mnem
, ExtMachInst machInst
,
1119 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1120 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1121 uint8_t numStructElems
, uint8_t numRegs
, bool wb
) :
1122 PredMacroOp(mnem
, machInst
, __opClass
)
1124 RegIndex vx
= NumVecV8ArchRegs
;
1125 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1126 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1128 numMicroops
= wb
? 1 : 0;
1130 int totNumBytes
= numRegs
* dataSize
/ 8;
1131 assert(totNumBytes
<= 64);
1133 // The guiding principle here is that no more than 16 bytes can be
1134 // transferred at a time
1135 int numMemMicroops
= totNumBytes
/ 16;
1136 int residuum
= totNumBytes
% 16;
1139 numMicroops
+= numMemMicroops
;
1141 int numMarshalMicroops
= numRegs
/ 2 + (numRegs
% 2 ? 1 : 0);
1142 numMicroops
+= numMarshalMicroops
;
1144 microOps
= new StaticInstPtr
[numMicroops
];
1145 unsigned uopIdx
= 0;
1146 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1147 TLB::AllowUnaligned
;
1150 for (; i
< numMemMicroops
- 1; ++i
) {
1151 microOps
[uopIdx
++] = new MicroNeonLoad64(
1152 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1153 baseIsSP
, 16 /* accSize */, eSize
);
1155 microOps
[uopIdx
++] = new MicroNeonLoad64(
1156 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1157 residuum
? residuum
: 16 /* accSize */, eSize
);
1159 // Writeback microop: the post-increment amount is encoded in "Rm": a
1160 // 64-bit general register OR as '11111' for an immediate value equal to
1161 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1163 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1164 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1167 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1172 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1174 case 1: microOps
[uopIdx
++] = new MicroDeintNeon64_1Reg(
1175 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1176 numStructElems
, 1, i
/* step */);
1178 case 2: microOps
[uopIdx
++] = new MicroDeintNeon64_2Reg(
1179 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1180 numStructElems
, 2, i
/* step */);
1182 case 3: microOps
[uopIdx
++] = new MicroDeintNeon64_3Reg(
1183 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1184 numStructElems
, 3, i
/* step */);
1186 case 4: microOps
[uopIdx
++] = new MicroDeintNeon64_4Reg(
1187 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1188 numStructElems
, 4, i
/* step */);
1190 default: panic("Invalid number of registers");
1195 assert(uopIdx
== numMicroops
);
1197 for (int i
= 0; i
< numMicroops
- 1; ++i
) {
1198 microOps
[i
]->setDelayedCommit();
1200 microOps
[numMicroops
- 1]->setLastMicroop();
1203 VstMultOp64::VstMultOp64(const char *mnem
, ExtMachInst machInst
,
1204 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1205 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1206 uint8_t numStructElems
, uint8_t numRegs
, bool wb
) :
1207 PredMacroOp(mnem
, machInst
, __opClass
)
1209 RegIndex vx
= NumVecV8ArchRegs
;
1210 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1211 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1213 numMicroops
= wb
? 1 : 0;
1215 int totNumBytes
= numRegs
* dataSize
/ 8;
1216 assert(totNumBytes
<= 64);
1218 // The guiding principle here is that no more than 16 bytes can be
1219 // transferred at a time
1220 int numMemMicroops
= totNumBytes
/ 16;
1221 int residuum
= totNumBytes
% 16;
1224 numMicroops
+= numMemMicroops
;
1226 int numMarshalMicroops
= totNumBytes
> 32 ? 2 : 1;
1227 numMicroops
+= numMarshalMicroops
;
1229 microOps
= new StaticInstPtr
[numMicroops
];
1230 unsigned uopIdx
= 0;
1232 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1234 case 1: microOps
[uopIdx
++] = new MicroIntNeon64_1Reg(
1235 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1236 numStructElems
, 1, i
/* step */);
1238 case 2: microOps
[uopIdx
++] = new MicroIntNeon64_2Reg(
1239 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1240 numStructElems
, 2, i
/* step */);
1242 case 3: microOps
[uopIdx
++] = new MicroIntNeon64_3Reg(
1243 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1244 numStructElems
, 3, i
/* step */);
1246 case 4: microOps
[uopIdx
++] = new MicroIntNeon64_4Reg(
1247 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1248 numStructElems
, 4, i
/* step */);
1250 default: panic("Invalid number of registers");
1254 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1255 TLB::AllowUnaligned
;
1258 for (; i
< numMemMicroops
- 1; ++i
) {
1259 microOps
[uopIdx
++] = new MicroNeonStore64(
1260 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1261 baseIsSP
, 16 /* accSize */, eSize
);
1263 microOps
[uopIdx
++] = new MicroNeonStore64(
1264 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1265 residuum
? residuum
: 16 /* accSize */, eSize
);
1267 // Writeback microop: the post-increment amount is encoded in "Rm": a
1268 // 64-bit general register OR as '11111' for an immediate value equal to
1269 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1271 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1272 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1275 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1280 assert(uopIdx
== numMicroops
);
1282 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1283 microOps
[i
]->setDelayedCommit();
1285 microOps
[numMicroops
- 1]->setLastMicroop();
1288 VldSingleOp64::VldSingleOp64(const char *mnem
, ExtMachInst machInst
,
1289 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1290 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1291 uint8_t numStructElems
, uint8_t index
, bool wb
,
1293 PredMacroOp(mnem
, machInst
, __opClass
),
1294 eSize(0), dataSize(0), numStructElems(0), index(0),
1295 wb(false), replicate(false)
1298 RegIndex vx
= NumVecV8ArchRegs
;
1299 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1300 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1302 numMicroops
= wb
? 1 : 0;
1304 int eSizeBytes
= 1 << eSize
;
1305 int totNumBytes
= numStructElems
* eSizeBytes
;
1306 assert(totNumBytes
<= 64);
1308 // The guiding principle here is that no more than 16 bytes can be
1309 // transferred at a time
1310 int numMemMicroops
= totNumBytes
/ 16;
1311 int residuum
= totNumBytes
% 16;
1314 numMicroops
+= numMemMicroops
;
1316 int numMarshalMicroops
= numStructElems
/ 2 + (numStructElems
% 2 ? 1 : 0);
1317 numMicroops
+= numMarshalMicroops
;
1319 microOps
= new StaticInstPtr
[numMicroops
];
1320 unsigned uopIdx
= 0;
1322 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1323 TLB::AllowUnaligned
;
1326 for (; i
< numMemMicroops
- 1; ++i
) {
1327 microOps
[uopIdx
++] = new MicroNeonLoad64(
1328 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1329 baseIsSP
, 16 /* accSize */, eSize
);
1331 microOps
[uopIdx
++] = new MicroNeonLoad64(
1332 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1333 residuum
? residuum
: 16 /* accSize */, eSize
);
1335 // Writeback microop: the post-increment amount is encoded in "Rm": a
1336 // 64-bit general register OR as '11111' for an immediate value equal to
1337 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1339 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1340 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1343 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1348 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1349 microOps
[uopIdx
++] = new MicroUnpackNeon64(
1350 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1351 numStructElems
, index
, i
/* step */, replicate
);
1354 assert(uopIdx
== numMicroops
);
1356 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1357 microOps
[i
]->setDelayedCommit();
1359 microOps
[numMicroops
- 1]->setLastMicroop();
1362 VstSingleOp64::VstSingleOp64(const char *mnem
, ExtMachInst machInst
,
1363 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1364 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1365 uint8_t numStructElems
, uint8_t index
, bool wb
,
1367 PredMacroOp(mnem
, machInst
, __opClass
),
1368 eSize(0), dataSize(0), numStructElems(0), index(0),
1369 wb(false), replicate(false)
1371 RegIndex vx
= NumVecV8ArchRegs
;
1372 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1373 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1375 numMicroops
= wb
? 1 : 0;
1377 int eSizeBytes
= 1 << eSize
;
1378 int totNumBytes
= numStructElems
* eSizeBytes
;
1379 assert(totNumBytes
<= 64);
1381 // The guiding principle here is that no more than 16 bytes can be
1382 // transferred at a time
1383 int numMemMicroops
= totNumBytes
/ 16;
1384 int residuum
= totNumBytes
% 16;
1387 numMicroops
+= numMemMicroops
;
1389 int numMarshalMicroops
= totNumBytes
> 32 ? 2 : 1;
1390 numMicroops
+= numMarshalMicroops
;
1392 microOps
= new StaticInstPtr
[numMicroops
];
1393 unsigned uopIdx
= 0;
1395 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1396 microOps
[uopIdx
++] = new MicroPackNeon64(
1397 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1398 numStructElems
, index
, i
/* step */, replicate
);
1401 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1402 TLB::AllowUnaligned
;
1405 for (; i
< numMemMicroops
- 1; ++i
) {
1406 microOps
[uopIdx
++] = new MicroNeonStore64(
1407 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1408 baseIsSP
, 16 /* accsize */, eSize
);
1410 microOps
[uopIdx
++] = new MicroNeonStore64(
1411 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1412 residuum
? residuum
: 16 /* accSize */, eSize
);
1414 // Writeback microop: the post-increment amount is encoded in "Rm": a
1415 // 64-bit general register OR as '11111' for an immediate value equal to
1416 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1418 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1419 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1422 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1427 assert(uopIdx
== numMicroops
);
1429 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1430 microOps
[i
]->setDelayedCommit();
1432 microOps
[numMicroops
- 1]->setLastMicroop();
1435 MacroVFPMemOp::MacroVFPMemOp(const char *mnem
, ExtMachInst machInst
,
1436 OpClass __opClass
, IntRegIndex rn
,
1437 RegIndex vd
, bool single
, bool up
,
1438 bool writeback
, bool load
, uint32_t offset
) :
1439 PredMacroOp(mnem
, machInst
, __opClass
)
1443 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1444 // to be functionally identical except that fldmx is deprecated. For now
1445 // we'll assume they're otherwise interchangable.
1446 int count
= (single
? offset
: (offset
/ 2));
1447 numMicroops
= count
* (single
? 1 : 2) + (writeback
? 1 : 0);
1448 microOps
= new StaticInstPtr
[numMicroops
];
1456 for (int j
= 0; j
< count
; j
++) {
1459 microOps
[i
++] = new MicroLdrFpUop(machInst
, vd
++, rn
,
1462 microOps
[i
++] = new MicroLdrDBFpUop(machInst
, vd
++, rn
,
1464 microOps
[i
++] = new MicroLdrDTFpUop(machInst
, vd
++, rn
, tempUp
,
1465 addr
+ (up
? 4 : -4));
1469 microOps
[i
++] = new MicroStrFpUop(machInst
, vd
++, rn
,
1472 microOps
[i
++] = new MicroStrDBFpUop(machInst
, vd
++, rn
,
1474 microOps
[i
++] = new MicroStrDTFpUop(machInst
, vd
++, rn
, tempUp
,
1475 addr
+ (up
? 4 : -4));
1479 addr
-= (single
? 4 : 8);
1480 // The microops don't handle negative displacement, so turn if we
1481 // hit zero, flip polarity and start adding.
1487 addr
+= (single
? 4 : 8);
1494 new MicroAddiUop(machInst
, rn
, rn
, 4 * offset
);
1497 new MicroSubiUop(machInst
, rn
, rn
, 4 * offset
);
1501 assert(numMicroops
== i
);
1502 microOps
[numMicroops
- 1]->setLastMicroop();
1504 for (StaticInstPtr
*curUop
= microOps
;
1505 !(*curUop
)->isLastMicroop(); curUop
++) {
1506 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(curUop
->get());
1508 uopPtr
->setDelayedCommit();
1513 MicroIntImmOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1515 std::stringstream ss
;
1517 printIntReg(ss
, ura
);
1519 printIntReg(ss
, urb
);
1521 ccprintf(ss
, "#%d", imm
);
1526 MicroIntImmXOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1528 std::stringstream ss
;
1530 printIntReg(ss
, ura
);
1532 printIntReg(ss
, urb
);
1534 ccprintf(ss
, "#%d", imm
);
1539 MicroSetPCCPSR::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1541 std::stringstream ss
;
1548 MicroIntRegXOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1550 std::stringstream ss
;
1552 printIntReg(ss
, ura
);
1554 printIntReg(ss
, urb
);
1555 printExtendOperand(false, ss
, (IntRegIndex
)urc
, type
, shiftAmt
);
1560 MicroIntMov::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1562 std::stringstream ss
;
1564 printIntReg(ss
, ura
);
1566 printIntReg(ss
, urb
);
1571 MicroIntOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1573 std::stringstream ss
;
1575 printIntReg(ss
, ura
);
1577 printIntReg(ss
, urb
);
1579 printIntReg(ss
, urc
);
1584 MicroMemOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1586 std::stringstream ss
;
1589 printFloatReg(ss
, ura
);
1591 printIntReg(ss
, ura
);
1593 printIntReg(ss
, urb
);
1595 ccprintf(ss
, "#%d", imm
);
1601 MicroMemPairOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1603 std::stringstream ss
;
1605 printIntReg(ss
, dest
);
1607 printIntReg(ss
, dest2
);
1609 printIntReg(ss
, urb
);
1611 ccprintf(ss
, "#%d", imm
);