2 * Copyright (c) 2010-2014 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 * Authors: Stephen Hines
45 #include "arch/arm/insts/macromem.hh"
47 #include "arch/arm/generated/decoder.hh"
48 #include "arch/arm/insts/neon64_mem.hh"
51 using namespace ArmISAInst
;
56 MacroMemOp::MacroMemOp(const char *mnem
, ExtMachInst machInst
,
57 OpClass __opClass
, IntRegIndex rn
,
58 bool index
, bool up
, bool user
, bool writeback
,
59 bool load
, uint32_t reglist
) :
60 PredMacroOp(mnem
, machInst
, __opClass
)
62 uint32_t regs
= reglist
;
63 uint32_t ones
= number_of_ones(reglist
);
64 uint32_t mem_ops
= ones
;
66 // Copy the base address register if we overwrite it, or if this instruction
67 // is basically a no-op (we have to do something)
68 bool copy_base
= (bits(reglist
, rn
) && load
) || !ones
;
69 bool force_user
= user
& !bits(reglist
, 15);
70 bool exception_ret
= user
& bits(reglist
, 15);
71 bool pc_temp
= load
&& writeback
&& bits(reglist
, 15);
76 numMicroops
= ((ones
+ 1) / 2)
77 + ((ones
% 2 == 0 && exception_ret
) ? 1 : 0)
82 numMicroops
= ones
+ (writeback
? 1 : 0);
85 microOps
= new StaticInstPtr
[numMicroops
];
90 addr
= (ones
<< 2) - 4;
95 StaticInstPtr
*uop
= microOps
;
97 // Add 0 to Rn and stick it in ureg0.
98 // This is equivalent to a move.
100 *uop
++ = new MicroAddiUop(machInst
, INTREG_UREG0
, rn
, 0);
103 while (mem_ops
!= 0) {
104 // Do load operations in pairs if possible
105 if (load
&& mem_ops
>= 2 &&
106 !(mem_ops
== 2 && bits(regs
,INTREG_PC
) && exception_ret
)) {
107 // 64-bit memory operation
108 // Find 2 set register bits (clear them after finding)
112 // Find the first register
113 while (!bits(regs
, reg
)) reg
++;
114 replaceBits(regs
, reg
, 0);
115 reg_idx1
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
117 // Find the second register
118 while (!bits(regs
, reg
)) reg
++;
119 replaceBits(regs
, reg
, 0);
120 reg_idx2
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
122 // Load into temp reg if necessary
123 if (reg_idx2
== INTREG_PC
&& pc_temp
)
124 reg_idx2
= INTREG_UREG1
;
126 // Actually load both registers from memory
127 *uop
= new MicroLdr2Uop(machInst
, reg_idx1
, reg_idx2
,
128 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
130 if (!writeback
&& reg_idx2
== INTREG_PC
) {
131 // No writeback if idx==pc, set appropriate flags
132 (*uop
)->setFlag(StaticInst::IsControl
);
133 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
135 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
136 (*uop
)->setFlag(StaticInst::IsCondControl
);
138 (*uop
)->setFlag(StaticInst::IsUncondControl
);
145 // 32-bit memory operation
146 // Find register for operation
148 while (!bits(regs
, reg
)) reg
++;
149 replaceBits(regs
, reg
, 0);
150 reg_idx
= force_user
? intRegInMode(MODE_USER
, reg
) : reg
;
153 if (writeback
&& reg_idx
== INTREG_PC
) {
154 // If this instruction changes the PC and performs a
155 // writeback, ensure the pc load/branch is the last uop.
156 // Load into a temp reg here.
157 *uop
= new MicroLdrUop(machInst
, INTREG_UREG1
,
158 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
159 } else if (reg_idx
== INTREG_PC
&& exception_ret
) {
160 // Special handling for exception return
161 *uop
= new MicroLdrRetUop(machInst
, reg_idx
,
162 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
164 // standard single load uop
165 *uop
= new MicroLdrUop(machInst
, reg_idx
,
166 copy_base
? INTREG_UREG0
: rn
, up
, addr
);
169 // Loading pc as last operation? Set appropriate flags.
170 if (!writeback
&& reg_idx
== INTREG_PC
) {
171 (*uop
)->setFlag(StaticInst::IsControl
);
172 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
174 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
175 (*uop
)->setFlag(StaticInst::IsCondControl
);
177 (*uop
)->setFlag(StaticInst::IsUncondControl
);
180 *uop
= new MicroStrUop(machInst
, reg_idx
, rn
, up
, addr
);
188 // Load/store micro-op generated, go to next uop
192 if (writeback
&& ones
) {
193 // Perform writeback uop operation
195 *uop
++ = new MicroAddiUop(machInst
, rn
, rn
, ones
* 4);
197 *uop
++ = new MicroSubiUop(machInst
, rn
, rn
, ones
* 4);
199 // Write PC after address writeback?
202 *uop
= new MicroUopRegMovRet(machInst
, 0, INTREG_UREG1
);
204 *uop
= new MicroUopRegMov(machInst
, INTREG_PC
, INTREG_UREG1
);
206 (*uop
)->setFlag(StaticInst::IsControl
);
207 (*uop
)->setFlag(StaticInst::IsIndirectControl
);
209 if (!(condCode
== COND_AL
|| condCode
== COND_UC
))
210 (*uop
)->setFlag(StaticInst::IsCondControl
);
212 (*uop
)->setFlag(StaticInst::IsUncondControl
);
215 (*uop
)->setFlag(StaticInst::IsReturn
);
222 (*uop
)->setLastMicroop();
223 microOps
[0]->setFirstMicroop();
225 /* Take the control flags from the last microop for the macroop */
226 if ((*uop
)->isControl())
227 setFlag(StaticInst::IsControl
);
228 if ((*uop
)->isCondCtrl())
229 setFlag(StaticInst::IsCondControl
);
230 if ((*uop
)->isUncondCtrl())
231 setFlag(StaticInst::IsUncondControl
);
232 if ((*uop
)->isIndirectCtrl())
233 setFlag(StaticInst::IsIndirectControl
);
234 if ((*uop
)->isReturn())
235 setFlag(StaticInst::IsReturn
);
237 for (StaticInstPtr
*uop
= microOps
; !(*uop
)->isLastMicroop(); uop
++) {
238 (*uop
)->setDelayedCommit();
242 PairMemOp::PairMemOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
243 uint32_t size
, bool fp
, bool load
, bool noAlloc
,
244 bool signExt
, bool exclusive
, bool acrel
,
245 int64_t imm
, AddrMode mode
,
246 IntRegIndex rn
, IntRegIndex rt
, IntRegIndex rt2
) :
247 PredMacroOp(mnem
, machInst
, __opClass
)
249 bool post
= (mode
== AddrMd_PostIndex
);
250 bool writeback
= (mode
!= AddrMd_Offset
);
253 // Use integer rounding to round up loads of size 4
254 numMicroops
= (post
? 0 : 1) + ((size
+ 4) / 8) + (writeback
? 1 : 0);
256 numMicroops
= (post
? 0 : 1) + (size
/ 4) + (writeback
? 1 : 0);
258 microOps
= new StaticInstPtr
[numMicroops
];
260 StaticInstPtr
*uop
= microOps
;
265 *uop
++ = new MicroAddXiSpAlignUop(machInst
, INTREG_UREG0
, rn
,
272 *uop
++ = new MicroLdFp16Uop(machInst
, rt
,
273 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
274 *uop
++ = new MicroLdFp16Uop(machInst
, rt2
,
275 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
277 *uop
++ = new MicroStrQBFpXImmUop(machInst
, rt
,
278 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
279 *uop
++ = new MicroStrQTFpXImmUop(machInst
, rt
,
280 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
281 *uop
++ = new MicroStrQBFpXImmUop(machInst
, rt2
,
282 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
283 *uop
++ = new MicroStrQTFpXImmUop(machInst
, rt2
,
284 post
? rn
: INTREG_UREG0
, 16, noAlloc
, exclusive
, acrel
);
286 } else if (size
== 8) {
288 *uop
++ = new MicroLdPairFp8Uop(machInst
, rt
, rt2
,
289 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
291 *uop
++ = new MicroStrFpXImmUop(machInst
, rt
,
292 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
293 *uop
++ = new MicroStrFpXImmUop(machInst
, rt2
,
294 post
? rn
: INTREG_UREG0
, 8, noAlloc
, exclusive
, acrel
);
296 } else if (size
== 4) {
298 *uop
++ = new MicroLdrDFpXImmUop(machInst
, rt
, rt2
,
299 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
301 *uop
++ = new MicroStrDFpXImmUop(machInst
, rt
, rt2
,
302 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
308 *uop
++ = new MicroLdPairUop(machInst
, rt
, rt2
,
309 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
311 *uop
++ = new MicroStrXImmUop(machInst
, rt
, post
? rn
: INTREG_UREG0
,
312 0, noAlloc
, exclusive
, acrel
);
313 *uop
++ = new MicroStrXImmUop(machInst
, rt2
, post
? rn
: INTREG_UREG0
,
314 size
, noAlloc
, exclusive
, acrel
);
316 } else if (size
== 4) {
319 *uop
++ = new MicroLdrDSXImmUop(machInst
, rt
, rt2
,
320 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
322 *uop
++ = new MicroLdrDUXImmUop(machInst
, rt
, rt2
,
323 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
326 *uop
++ = new MicroStrDXImmUop(machInst
, rt
, rt2
,
327 post
? rn
: INTREG_UREG0
, 0, noAlloc
, exclusive
, acrel
);
333 *uop
++ = new MicroAddXiUop(machInst
, rn
, post
? rn
: INTREG_UREG0
,
337 assert(uop
== µOps
[numMicroops
]);
338 (*--uop
)->setLastMicroop();
339 microOps
[0]->setFirstMicroop();
341 for (StaticInstPtr
*curUop
= microOps
;
342 !(*curUop
)->isLastMicroop(); curUop
++) {
343 (*curUop
)->setDelayedCommit();
347 BigFpMemImmOp::BigFpMemImmOp(const char *mnem
, ExtMachInst machInst
,
348 OpClass __opClass
, bool load
, IntRegIndex dest
,
349 IntRegIndex base
, int64_t imm
) :
350 PredMacroOp(mnem
, machInst
, __opClass
)
352 numMicroops
= load
? 1 : 2;
353 microOps
= new StaticInstPtr
[numMicroops
];
355 StaticInstPtr
*uop
= microOps
;
358 *uop
= new MicroLdFp16Uop(machInst
, dest
, base
, imm
);
360 *uop
= new MicroStrQBFpXImmUop(machInst
, dest
, base
, imm
);
361 (*uop
)->setDelayedCommit();
362 *++uop
= new MicroStrQTFpXImmUop(machInst
, dest
, base
, imm
);
364 (*uop
)->setLastMicroop();
365 microOps
[0]->setFirstMicroop();
368 BigFpMemPostOp::BigFpMemPostOp(const char *mnem
, ExtMachInst machInst
,
369 OpClass __opClass
, bool load
, IntRegIndex dest
,
370 IntRegIndex base
, int64_t imm
) :
371 PredMacroOp(mnem
, machInst
, __opClass
)
373 numMicroops
= load
? 2 : 3;
374 microOps
= new StaticInstPtr
[numMicroops
];
376 StaticInstPtr
*uop
= microOps
;
379 *uop
++ = new MicroLdFp16Uop(machInst
, dest
, base
, 0);
381 *uop
++= new MicroStrQBFpXImmUop(machInst
, dest
, base
, 0);
382 *uop
++ = new MicroStrQTFpXImmUop(machInst
, dest
, base
, 0);
384 *uop
= new MicroAddXiUop(machInst
, base
, base
, imm
);
385 (*uop
)->setLastMicroop();
386 microOps
[0]->setFirstMicroop();
388 for (StaticInstPtr
*curUop
= microOps
;
389 !(*curUop
)->isLastMicroop(); curUop
++) {
390 (*curUop
)->setDelayedCommit();
394 BigFpMemPreOp::BigFpMemPreOp(const char *mnem
, ExtMachInst machInst
,
395 OpClass __opClass
, bool load
, IntRegIndex dest
,
396 IntRegIndex base
, int64_t imm
) :
397 PredMacroOp(mnem
, machInst
, __opClass
)
399 numMicroops
= load
? 2 : 3;
400 microOps
= new StaticInstPtr
[numMicroops
];
402 StaticInstPtr
*uop
= microOps
;
405 *uop
++ = new MicroLdFp16Uop(machInst
, dest
, base
, imm
);
407 *uop
++ = new MicroStrQBFpXImmUop(machInst
, dest
, base
, imm
);
408 *uop
++ = new MicroStrQTFpXImmUop(machInst
, dest
, base
, imm
);
410 *uop
= new MicroAddXiUop(machInst
, base
, base
, imm
);
411 (*uop
)->setLastMicroop();
412 microOps
[0]->setFirstMicroop();
414 for (StaticInstPtr
*curUop
= microOps
;
415 !(*curUop
)->isLastMicroop(); curUop
++) {
416 (*curUop
)->setDelayedCommit();
420 BigFpMemRegOp::BigFpMemRegOp(const char *mnem
, ExtMachInst machInst
,
421 OpClass __opClass
, bool load
, IntRegIndex dest
,
422 IntRegIndex base
, IntRegIndex offset
,
423 ArmExtendType type
, int64_t imm
) :
424 PredMacroOp(mnem
, machInst
, __opClass
)
426 numMicroops
= load
? 1 : 2;
427 microOps
= new StaticInstPtr
[numMicroops
];
429 StaticInstPtr
*uop
= microOps
;
432 *uop
= new MicroLdFp16RegUop(machInst
, dest
, base
,
435 *uop
= new MicroStrQBFpXRegUop(machInst
, dest
, base
,
437 (*uop
)->setDelayedCommit();
438 *++uop
= new MicroStrQTFpXRegUop(machInst
, dest
, base
,
442 (*uop
)->setLastMicroop();
443 microOps
[0]->setFirstMicroop();
446 BigFpMemLitOp::BigFpMemLitOp(const char *mnem
, ExtMachInst machInst
,
447 OpClass __opClass
, IntRegIndex dest
,
449 PredMacroOp(mnem
, machInst
, __opClass
)
452 microOps
= new StaticInstPtr
[numMicroops
];
454 microOps
[0] = new MicroLdFp16LitUop(machInst
, dest
, imm
);
455 microOps
[0]->setLastMicroop();
456 microOps
[0]->setFirstMicroop();
459 VldMultOp::VldMultOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
460 unsigned elems
, RegIndex rn
, RegIndex vd
, unsigned regs
,
461 unsigned inc
, uint32_t size
, uint32_t align
, RegIndex rm
) :
462 PredMacroOp(mnem
, machInst
, __opClass
)
464 assert(regs
> 0 && regs
<= 4);
465 assert(regs
% elems
== 0);
467 numMicroops
= (regs
> 2) ? 2 : 1;
468 bool wb
= (rm
!= 15);
469 bool deinterleave
= (elems
> 1);
471 if (wb
) numMicroops
++;
472 if (deinterleave
) numMicroops
+= (regs
/ elems
);
473 microOps
= new StaticInstPtr
[numMicroops
];
475 RegIndex rMid
= deinterleave
? NumFloatV7ArchRegs
: vd
* 2;
477 uint32_t noAlign
= TLB::MustBeOne
;
482 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
483 size
, machInst
, rMid
, rn
, 0, align
);
484 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
485 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
488 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
489 size
, machInst
, rMid
, rn
, 0, align
);
490 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon8Uop
>(
491 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
494 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon16Uop
>(
495 size
, machInst
, rMid
, rn
, 0, align
);
498 microOps
[uopIdx
++] = newNeonMemInst
<MicroLdrNeon8Uop
>(
499 size
, machInst
, rMid
, rn
, 0, align
);
502 // Unknown number of registers
503 microOps
[uopIdx
++] = new Unknown(machInst
);
506 if (rm
!= 15 && rm
!= 13) {
508 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
511 new MicroAddiUop(machInst
, rn
, rn
, regs
* 8);
518 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon8Uop
>(
519 size
, machInst
, vd
* 2, rMid
, inc
* 2);
523 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon6Uop
>(
524 size
, machInst
, vd
* 2, rMid
, inc
* 2);
527 assert(regs
== 4 || regs
== 2);
529 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
530 size
, machInst
, vd
* 2, rMid
, inc
* 2);
531 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
532 size
, machInst
, vd
* 2 + 2, rMid
+ 4, inc
* 2);
534 microOps
[uopIdx
++] = newNeonMixInst
<MicroDeintNeon4Uop
>(
535 size
, machInst
, vd
* 2, rMid
, inc
* 2);
539 // Bad number of elements to deinterleave
540 microOps
[uopIdx
++] = new Unknown(machInst
);
543 assert(uopIdx
== numMicroops
);
545 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
546 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
548 uopPtr
->setDelayedCommit();
550 microOps
[0]->setFirstMicroop();
551 microOps
[numMicroops
- 1]->setLastMicroop();
554 VldSingleOp::VldSingleOp(const char *mnem
, ExtMachInst machInst
,
555 OpClass __opClass
, bool all
, unsigned elems
,
556 RegIndex rn
, RegIndex vd
, unsigned regs
,
557 unsigned inc
, uint32_t size
, uint32_t align
,
558 RegIndex rm
, unsigned lane
) :
559 PredMacroOp(mnem
, machInst
, __opClass
)
561 assert(regs
> 0 && regs
<= 4);
562 assert(regs
% elems
== 0);
564 unsigned eBytes
= (1 << size
);
565 unsigned loadSize
= eBytes
* elems
;
566 unsigned loadRegs M5_VAR_USED
= (loadSize
+ sizeof(FloatRegBits
) - 1) /
567 sizeof(FloatRegBits
);
569 assert(loadRegs
> 0 && loadRegs
<= 4);
572 bool wb
= (rm
!= 15);
574 if (wb
) numMicroops
++;
575 numMicroops
+= (regs
/ elems
);
576 microOps
= new StaticInstPtr
[numMicroops
];
578 RegIndex ufp0
= NumFloatV7ArchRegs
;
583 microOps
[uopIdx
++] = new MicroLdrNeon1Uop
<uint8_t>(
584 machInst
, ufp0
, rn
, 0, align
);
588 microOps
[uopIdx
++] = new MicroLdrNeon2Uop
<uint16_t>(
589 machInst
, ufp0
, rn
, 0, align
);
591 microOps
[uopIdx
++] = new MicroLdrNeon2Uop
<uint8_t>(
592 machInst
, ufp0
, rn
, 0, align
);
596 microOps
[uopIdx
++] = new MicroLdrNeon3Uop
<uint8_t>(
597 machInst
, ufp0
, rn
, 0, align
);
602 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint8_t>(
603 machInst
, ufp0
, rn
, 0, align
);
606 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint16_t>(
607 machInst
, ufp0
, rn
, 0, align
);
610 microOps
[uopIdx
++] = new MicroLdrNeon4Uop
<uint32_t>(
611 machInst
, ufp0
, rn
, 0, align
);
616 microOps
[uopIdx
++] = new MicroLdrNeon6Uop
<uint16_t>(
617 machInst
, ufp0
, rn
, 0, align
);
622 microOps
[uopIdx
++] = new MicroLdrNeon8Uop
<uint16_t>(
623 machInst
, ufp0
, rn
, 0, align
);
626 microOps
[uopIdx
++] = new MicroLdrNeon8Uop
<uint32_t>(
627 machInst
, ufp0
, rn
, 0, align
);
632 microOps
[uopIdx
++] = new MicroLdrNeon12Uop
<uint32_t>(
633 machInst
, ufp0
, rn
, 0, align
);
636 microOps
[uopIdx
++] = new MicroLdrNeon16Uop
<uint32_t>(
637 machInst
, ufp0
, rn
, 0, align
);
640 // Unrecognized load size
641 microOps
[uopIdx
++] = new Unknown(machInst
);
644 if (rm
!= 15 && rm
!= 13) {
646 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
649 new MicroAddiUop(machInst
, rn
, rn
, loadSize
);
658 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to8Uop
<uint8_t>(
659 machInst
, vd
* 2, ufp0
, inc
* 2);
661 microOps
[uopIdx
++] = new MicroUnpackNeon2to8Uop
<uint8_t>(
662 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
667 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to8Uop
<uint16_t>(
668 machInst
, vd
* 2, ufp0
, inc
* 2);
670 microOps
[uopIdx
++] = new MicroUnpackNeon2to8Uop
<uint16_t>(
671 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
676 microOps
[uopIdx
++] = new MicroUnpackAllNeon4to8Uop
<uint32_t>(
677 machInst
, vd
* 2, ufp0
, inc
* 2);
679 microOps
[uopIdx
++] = new MicroUnpackNeon4to8Uop
<uint32_t>(
680 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
685 microOps
[uopIdx
++] = new Unknown(machInst
);
694 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to6Uop
<uint8_t>(
695 machInst
, vd
* 2, ufp0
, inc
* 2);
697 microOps
[uopIdx
++] = new MicroUnpackNeon2to6Uop
<uint8_t>(
698 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
703 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to6Uop
<uint16_t>(
704 machInst
, vd
* 2, ufp0
, inc
* 2);
706 microOps
[uopIdx
++] = new MicroUnpackNeon2to6Uop
<uint16_t>(
707 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
712 microOps
[uopIdx
++] = new MicroUnpackAllNeon4to6Uop
<uint32_t>(
713 machInst
, vd
* 2, ufp0
, inc
* 2);
715 microOps
[uopIdx
++] = new MicroUnpackNeon4to6Uop
<uint32_t>(
716 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
721 microOps
[uopIdx
++] = new Unknown(machInst
);
727 assert(loadRegs
<= 2);
731 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint8_t>(
732 machInst
, vd
* 2, ufp0
, inc
* 2);
734 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint8_t>(
735 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
740 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint16_t>(
741 machInst
, vd
* 2, ufp0
, inc
* 2);
743 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint16_t>(
744 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
749 microOps
[uopIdx
++] = new MicroUnpackAllNeon2to4Uop
<uint32_t>(
750 machInst
, vd
* 2, ufp0
, inc
* 2);
752 microOps
[uopIdx
++] = new MicroUnpackNeon2to4Uop
<uint32_t>(
753 machInst
, vd
* 2, ufp0
, inc
* 2, lane
);
758 microOps
[uopIdx
++] = new Unknown(machInst
);
763 assert(regs
== 1 || (all
&& regs
== 2));
764 assert(loadRegs
<= 2);
765 for (unsigned offset
= 0; offset
< regs
; offset
++) {
770 new MicroUnpackAllNeon2to2Uop
<uint8_t>(
771 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
774 new MicroUnpackNeon2to2Uop
<uint8_t>(
775 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
781 new MicroUnpackAllNeon2to2Uop
<uint16_t>(
782 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
785 new MicroUnpackNeon2to2Uop
<uint16_t>(
786 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
792 new MicroUnpackAllNeon2to2Uop
<uint32_t>(
793 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2);
796 new MicroUnpackNeon2to2Uop
<uint32_t>(
797 machInst
, (vd
+ offset
) * 2, ufp0
, inc
* 2, lane
);
802 microOps
[uopIdx
++] = new Unknown(machInst
);
808 // Bad number of elements to unpack
809 microOps
[uopIdx
++] = new Unknown(machInst
);
811 assert(uopIdx
== numMicroops
);
813 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
814 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
816 uopPtr
->setDelayedCommit();
818 microOps
[0]->setFirstMicroop();
819 microOps
[numMicroops
- 1]->setLastMicroop();
822 VstMultOp::VstMultOp(const char *mnem
, ExtMachInst machInst
, OpClass __opClass
,
823 unsigned elems
, RegIndex rn
, RegIndex vd
, unsigned regs
,
824 unsigned inc
, uint32_t size
, uint32_t align
, RegIndex rm
) :
825 PredMacroOp(mnem
, machInst
, __opClass
)
827 assert(regs
> 0 && regs
<= 4);
828 assert(regs
% elems
== 0);
830 numMicroops
= (regs
> 2) ? 2 : 1;
831 bool wb
= (rm
!= 15);
832 bool interleave
= (elems
> 1);
834 if (wb
) numMicroops
++;
835 if (interleave
) numMicroops
+= (regs
/ elems
);
836 microOps
= new StaticInstPtr
[numMicroops
];
838 uint32_t noAlign
= TLB::MustBeOne
;
840 RegIndex rMid
= interleave
? NumFloatV7ArchRegs
: vd
* 2;
847 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon8Uop
>(
848 size
, machInst
, rMid
, vd
* 2, inc
* 2);
852 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon6Uop
>(
853 size
, machInst
, rMid
, vd
* 2, inc
* 2);
856 assert(regs
== 4 || regs
== 2);
858 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
859 size
, machInst
, rMid
, vd
* 2, inc
* 2);
860 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
861 size
, machInst
, rMid
+ 4, vd
* 2 + 2, inc
* 2);
863 microOps
[uopIdx
++] = newNeonMixInst
<MicroInterNeon4Uop
>(
864 size
, machInst
, rMid
, vd
* 2, inc
* 2);
868 // Bad number of elements to interleave
869 microOps
[uopIdx
++] = new Unknown(machInst
);
874 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
875 size
, machInst
, rMid
, rn
, 0, align
);
876 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
877 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
880 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
881 size
, machInst
, rMid
, rn
, 0, align
);
882 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon8Uop
>(
883 size
, machInst
, rMid
+ 4, rn
, 16, noAlign
);
886 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon16Uop
>(
887 size
, machInst
, rMid
, rn
, 0, align
);
890 microOps
[uopIdx
++] = newNeonMemInst
<MicroStrNeon8Uop
>(
891 size
, machInst
, rMid
, rn
, 0, align
);
894 // Unknown number of registers
895 microOps
[uopIdx
++] = new Unknown(machInst
);
898 if (rm
!= 15 && rm
!= 13) {
900 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
903 new MicroAddiUop(machInst
, rn
, rn
, regs
* 8);
906 assert(uopIdx
== numMicroops
);
908 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
909 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
911 uopPtr
->setDelayedCommit();
913 microOps
[0]->setFirstMicroop();
914 microOps
[numMicroops
- 1]->setLastMicroop();
917 VstSingleOp::VstSingleOp(const char *mnem
, ExtMachInst machInst
,
918 OpClass __opClass
, bool all
, unsigned elems
,
919 RegIndex rn
, RegIndex vd
, unsigned regs
,
920 unsigned inc
, uint32_t size
, uint32_t align
,
921 RegIndex rm
, unsigned lane
) :
922 PredMacroOp(mnem
, machInst
, __opClass
)
925 assert(regs
> 0 && regs
<= 4);
926 assert(regs
% elems
== 0);
928 unsigned eBytes
= (1 << size
);
929 unsigned storeSize
= eBytes
* elems
;
930 unsigned storeRegs M5_VAR_USED
= (storeSize
+ sizeof(FloatRegBits
) - 1) /
931 sizeof(FloatRegBits
);
933 assert(storeRegs
> 0 && storeRegs
<= 4);
936 bool wb
= (rm
!= 15);
938 if (wb
) numMicroops
++;
939 numMicroops
+= (regs
/ elems
);
940 microOps
= new StaticInstPtr
[numMicroops
];
942 RegIndex ufp0
= NumFloatV7ArchRegs
;
950 microOps
[uopIdx
++] = new MicroPackNeon8to2Uop
<uint8_t>(
951 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
954 microOps
[uopIdx
++] = new MicroPackNeon8to2Uop
<uint16_t>(
955 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
958 microOps
[uopIdx
++] = new MicroPackNeon8to4Uop
<uint32_t>(
959 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
963 microOps
[uopIdx
++] = new Unknown(machInst
);
971 microOps
[uopIdx
++] = new MicroPackNeon6to2Uop
<uint8_t>(
972 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
975 microOps
[uopIdx
++] = new MicroPackNeon6to2Uop
<uint16_t>(
976 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
979 microOps
[uopIdx
++] = new MicroPackNeon6to4Uop
<uint32_t>(
980 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
984 microOps
[uopIdx
++] = new Unknown(machInst
);
990 assert(storeRegs
<= 2);
993 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint8_t>(
994 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
997 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint16_t>(
998 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
1001 microOps
[uopIdx
++] = new MicroPackNeon4to2Uop
<uint32_t>(
1002 machInst
, ufp0
, vd
* 2, inc
* 2, lane
);
1006 microOps
[uopIdx
++] = new Unknown(machInst
);
1011 assert(regs
== 1 || (all
&& regs
== 2));
1012 assert(storeRegs
<= 2);
1013 for (unsigned offset
= 0; offset
< regs
; offset
++) {
1016 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint8_t>(
1017 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1020 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint16_t>(
1021 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1024 microOps
[uopIdx
++] = new MicroPackNeon2to2Uop
<uint32_t>(
1025 machInst
, ufp0
, (vd
+ offset
) * 2, inc
* 2, lane
);
1029 microOps
[uopIdx
++] = new Unknown(machInst
);
1035 // Bad number of elements to unpack
1036 microOps
[uopIdx
++] = new Unknown(machInst
);
1038 switch (storeSize
) {
1040 microOps
[uopIdx
++] = new MicroStrNeon1Uop
<uint8_t>(
1041 machInst
, ufp0
, rn
, 0, align
);
1045 microOps
[uopIdx
++] = new MicroStrNeon2Uop
<uint16_t>(
1046 machInst
, ufp0
, rn
, 0, align
);
1048 microOps
[uopIdx
++] = new MicroStrNeon2Uop
<uint8_t>(
1049 machInst
, ufp0
, rn
, 0, align
);
1053 microOps
[uopIdx
++] = new MicroStrNeon3Uop
<uint8_t>(
1054 machInst
, ufp0
, rn
, 0, align
);
1059 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint8_t>(
1060 machInst
, ufp0
, rn
, 0, align
);
1063 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint16_t>(
1064 machInst
, ufp0
, rn
, 0, align
);
1067 microOps
[uopIdx
++] = new MicroStrNeon4Uop
<uint32_t>(
1068 machInst
, ufp0
, rn
, 0, align
);
1073 microOps
[uopIdx
++] = new MicroStrNeon6Uop
<uint16_t>(
1074 machInst
, ufp0
, rn
, 0, align
);
1079 microOps
[uopIdx
++] = new MicroStrNeon8Uop
<uint16_t>(
1080 machInst
, ufp0
, rn
, 0, align
);
1083 microOps
[uopIdx
++] = new MicroStrNeon8Uop
<uint32_t>(
1084 machInst
, ufp0
, rn
, 0, align
);
1089 microOps
[uopIdx
++] = new MicroStrNeon12Uop
<uint32_t>(
1090 machInst
, ufp0
, rn
, 0, align
);
1093 microOps
[uopIdx
++] = new MicroStrNeon16Uop
<uint32_t>(
1094 machInst
, ufp0
, rn
, 0, align
);
1098 microOps
[uopIdx
++] = new Unknown(machInst
);
1101 if (rm
!= 15 && rm
!= 13) {
1102 microOps
[uopIdx
++] =
1103 new MicroAddUop(machInst
, rn
, rn
, rm
, 0, ArmISA::LSL
);
1105 microOps
[uopIdx
++] =
1106 new MicroAddiUop(machInst
, rn
, rn
, storeSize
);
1109 assert(uopIdx
== numMicroops
);
1111 for (unsigned i
= 0; i
< numMicroops
- 1; i
++) {
1112 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(microOps
[i
].get());
1114 uopPtr
->setDelayedCommit();
1116 microOps
[0]->setFirstMicroop();
1117 microOps
[numMicroops
- 1]->setLastMicroop();
1120 VldMultOp64::VldMultOp64(const char *mnem
, ExtMachInst machInst
,
1121 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1122 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1123 uint8_t numStructElems
, uint8_t numRegs
, bool wb
) :
1124 PredMacroOp(mnem
, machInst
, __opClass
)
1126 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1127 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1128 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1130 numMicroops
= wb
? 1 : 0;
1132 int totNumBytes
= numRegs
* dataSize
/ 8;
1133 assert(totNumBytes
<= 64);
1135 // The guiding principle here is that no more than 16 bytes can be
1136 // transferred at a time
1137 int numMemMicroops
= totNumBytes
/ 16;
1138 int residuum
= totNumBytes
% 16;
1141 numMicroops
+= numMemMicroops
;
1143 int numMarshalMicroops
= numRegs
/ 2 + (numRegs
% 2 ? 1 : 0);
1144 numMicroops
+= numMarshalMicroops
;
1146 microOps
= new StaticInstPtr
[numMicroops
];
1147 unsigned uopIdx
= 0;
1148 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1149 TLB::AllowUnaligned
;
1152 for (; i
< numMemMicroops
- 1; ++i
) {
1153 microOps
[uopIdx
++] = new MicroNeonLoad64(
1154 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1155 baseIsSP
, 16 /* accSize */, eSize
);
1157 microOps
[uopIdx
++] = new MicroNeonLoad64(
1158 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1159 residuum
? residuum
: 16 /* accSize */, eSize
);
1161 // Writeback microop: the post-increment amount is encoded in "Rm": a
1162 // 64-bit general register OR as '11111' for an immediate value equal to
1163 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1165 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1166 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1169 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1174 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1176 case 1: microOps
[uopIdx
++] = new MicroDeintNeon64_1Reg(
1177 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1178 numStructElems
, 1, i
/* step */);
1180 case 2: microOps
[uopIdx
++] = new MicroDeintNeon64_2Reg(
1181 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1182 numStructElems
, 2, i
/* step */);
1184 case 3: microOps
[uopIdx
++] = new MicroDeintNeon64_3Reg(
1185 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1186 numStructElems
, 3, i
/* step */);
1188 case 4: microOps
[uopIdx
++] = new MicroDeintNeon64_4Reg(
1189 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1190 numStructElems
, 4, i
/* step */);
1192 default: panic("Invalid number of registers");
1197 assert(uopIdx
== numMicroops
);
1199 for (int i
= 0; i
< numMicroops
- 1; ++i
) {
1200 microOps
[i
]->setDelayedCommit();
1202 microOps
[numMicroops
- 1]->setLastMicroop();
1205 VstMultOp64::VstMultOp64(const char *mnem
, ExtMachInst machInst
,
1206 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1207 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1208 uint8_t numStructElems
, uint8_t numRegs
, bool wb
) :
1209 PredMacroOp(mnem
, machInst
, __opClass
)
1211 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1212 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1213 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1215 numMicroops
= wb
? 1 : 0;
1217 int totNumBytes
= numRegs
* dataSize
/ 8;
1218 assert(totNumBytes
<= 64);
1220 // The guiding principle here is that no more than 16 bytes can be
1221 // transferred at a time
1222 int numMemMicroops
= totNumBytes
/ 16;
1223 int residuum
= totNumBytes
% 16;
1226 numMicroops
+= numMemMicroops
;
1228 int numMarshalMicroops
= totNumBytes
> 32 ? 2 : 1;
1229 numMicroops
+= numMarshalMicroops
;
1231 microOps
= new StaticInstPtr
[numMicroops
];
1232 unsigned uopIdx
= 0;
1234 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1236 case 1: microOps
[uopIdx
++] = new MicroIntNeon64_1Reg(
1237 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1238 numStructElems
, 1, i
/* step */);
1240 case 2: microOps
[uopIdx
++] = new MicroIntNeon64_2Reg(
1241 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1242 numStructElems
, 2, i
/* step */);
1244 case 3: microOps
[uopIdx
++] = new MicroIntNeon64_3Reg(
1245 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1246 numStructElems
, 3, i
/* step */);
1248 case 4: microOps
[uopIdx
++] = new MicroIntNeon64_4Reg(
1249 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1250 numStructElems
, 4, i
/* step */);
1252 default: panic("Invalid number of registers");
1256 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1257 TLB::AllowUnaligned
;
1260 for (; i
< numMemMicroops
- 1; ++i
) {
1261 microOps
[uopIdx
++] = new MicroNeonStore64(
1262 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1263 baseIsSP
, 16 /* accSize */, eSize
);
1265 microOps
[uopIdx
++] = new MicroNeonStore64(
1266 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1267 residuum
? residuum
: 16 /* accSize */, eSize
);
1269 // Writeback microop: the post-increment amount is encoded in "Rm": a
1270 // 64-bit general register OR as '11111' for an immediate value equal to
1271 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1273 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1274 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1277 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1282 assert(uopIdx
== numMicroops
);
1284 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1285 microOps
[i
]->setDelayedCommit();
1287 microOps
[numMicroops
- 1]->setLastMicroop();
1290 VldSingleOp64::VldSingleOp64(const char *mnem
, ExtMachInst machInst
,
1291 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1292 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1293 uint8_t numStructElems
, uint8_t index
, bool wb
,
1295 PredMacroOp(mnem
, machInst
, __opClass
),
1296 eSize(0), dataSize(0), numStructElems(0), index(0),
1297 wb(false), replicate(false)
1300 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1301 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1302 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1304 numMicroops
= wb
? 1 : 0;
1306 int eSizeBytes
= 1 << eSize
;
1307 int totNumBytes
= numStructElems
* eSizeBytes
;
1308 assert(totNumBytes
<= 64);
1310 // The guiding principle here is that no more than 16 bytes can be
1311 // transferred at a time
1312 int numMemMicroops
= totNumBytes
/ 16;
1313 int residuum
= totNumBytes
% 16;
1316 numMicroops
+= numMemMicroops
;
1318 int numMarshalMicroops
= numStructElems
/ 2 + (numStructElems
% 2 ? 1 : 0);
1319 numMicroops
+= numMarshalMicroops
;
1321 microOps
= new StaticInstPtr
[numMicroops
];
1322 unsigned uopIdx
= 0;
1324 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1325 TLB::AllowUnaligned
;
1328 for (; i
< numMemMicroops
- 1; ++i
) {
1329 microOps
[uopIdx
++] = new MicroNeonLoad64(
1330 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1331 baseIsSP
, 16 /* accSize */, eSize
);
1333 microOps
[uopIdx
++] = new MicroNeonLoad64(
1334 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1335 residuum
? residuum
: 16 /* accSize */, eSize
);
1337 // Writeback microop: the post-increment amount is encoded in "Rm": a
1338 // 64-bit general register OR as '11111' for an immediate value equal to
1339 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1341 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1342 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1345 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1350 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1351 microOps
[uopIdx
++] = new MicroUnpackNeon64(
1352 machInst
, vd
+ (RegIndex
) (2 * i
), vx
, eSize
, dataSize
,
1353 numStructElems
, index
, i
/* step */, replicate
);
1356 assert(uopIdx
== numMicroops
);
1358 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1359 microOps
[i
]->setDelayedCommit();
1361 microOps
[numMicroops
- 1]->setLastMicroop();
1364 VstSingleOp64::VstSingleOp64(const char *mnem
, ExtMachInst machInst
,
1365 OpClass __opClass
, RegIndex rn
, RegIndex vd
,
1366 RegIndex rm
, uint8_t eSize
, uint8_t dataSize
,
1367 uint8_t numStructElems
, uint8_t index
, bool wb
,
1369 PredMacroOp(mnem
, machInst
, __opClass
),
1370 eSize(0), dataSize(0), numStructElems(0), index(0),
1371 wb(false), replicate(false)
1373 RegIndex vx
= NumFloatV8ArchRegs
/ 4;
1374 RegIndex rnsp
= (RegIndex
) makeSP((IntRegIndex
) rn
);
1375 bool baseIsSP
= isSP((IntRegIndex
) rnsp
);
1377 numMicroops
= wb
? 1 : 0;
1379 int eSizeBytes
= 1 << eSize
;
1380 int totNumBytes
= numStructElems
* eSizeBytes
;
1381 assert(totNumBytes
<= 64);
1383 // The guiding principle here is that no more than 16 bytes can be
1384 // transferred at a time
1385 int numMemMicroops
= totNumBytes
/ 16;
1386 int residuum
= totNumBytes
% 16;
1389 numMicroops
+= numMemMicroops
;
1391 int numMarshalMicroops
= totNumBytes
> 32 ? 2 : 1;
1392 numMicroops
+= numMarshalMicroops
;
1394 microOps
= new StaticInstPtr
[numMicroops
];
1395 unsigned uopIdx
= 0;
1397 for (int i
= 0; i
< numMarshalMicroops
; ++i
) {
1398 microOps
[uopIdx
++] = new MicroPackNeon64(
1399 machInst
, vx
+ (RegIndex
) (2 * i
), vd
, eSize
, dataSize
,
1400 numStructElems
, index
, i
/* step */, replicate
);
1403 uint32_t memaccessFlags
= TLB::MustBeOne
| (TLB::ArmFlags
) eSize
|
1404 TLB::AllowUnaligned
;
1407 for (; i
< numMemMicroops
- 1; ++i
) {
1408 microOps
[uopIdx
++] = new MicroNeonStore64(
1409 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
,
1410 baseIsSP
, 16 /* accsize */, eSize
);
1412 microOps
[uopIdx
++] = new MicroNeonStore64(
1413 machInst
, vx
+ (RegIndex
) i
, rnsp
, 16 * i
, memaccessFlags
, baseIsSP
,
1414 residuum
? residuum
: 16 /* accSize */, eSize
);
1416 // Writeback microop: the post-increment amount is encoded in "Rm": a
1417 // 64-bit general register OR as '11111' for an immediate value equal to
1418 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1420 if (rm
!= ((RegIndex
) INTREG_X31
)) {
1421 microOps
[uopIdx
++] = new MicroAddXERegUop(machInst
, rnsp
, rnsp
, rm
,
1424 microOps
[uopIdx
++] = new MicroAddXiUop(machInst
, rnsp
, rnsp
,
1429 assert(uopIdx
== numMicroops
);
1431 for (int i
= 0; i
< numMicroops
- 1; i
++) {
1432 microOps
[i
]->setDelayedCommit();
1434 microOps
[numMicroops
- 1]->setLastMicroop();
1437 MacroVFPMemOp::MacroVFPMemOp(const char *mnem
, ExtMachInst machInst
,
1438 OpClass __opClass
, IntRegIndex rn
,
1439 RegIndex vd
, bool single
, bool up
,
1440 bool writeback
, bool load
, uint32_t offset
) :
1441 PredMacroOp(mnem
, machInst
, __opClass
)
1445 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1446 // to be functionally identical except that fldmx is deprecated. For now
1447 // we'll assume they're otherwise interchangable.
1448 int count
= (single
? offset
: (offset
/ 2));
1449 if (count
== 0 || count
> NumFloatV7ArchRegs
)
1450 warn_once("Bad offset field for VFP load/store multiple.\n");
1452 // Force there to be at least one microop so the macroop makes sense.
1455 if (count
> NumFloatV7ArchRegs
)
1456 count
= NumFloatV7ArchRegs
;
1458 numMicroops
= count
* (single
? 1 : 2) + (writeback
? 1 : 0);
1459 microOps
= new StaticInstPtr
[numMicroops
];
1467 for (int j
= 0; j
< count
; j
++) {
1470 microOps
[i
++] = new MicroLdrFpUop(machInst
, vd
++, rn
,
1473 microOps
[i
++] = new MicroLdrDBFpUop(machInst
, vd
++, rn
,
1475 microOps
[i
++] = new MicroLdrDTFpUop(machInst
, vd
++, rn
, tempUp
,
1476 addr
+ (up
? 4 : -4));
1480 microOps
[i
++] = new MicroStrFpUop(machInst
, vd
++, rn
,
1483 microOps
[i
++] = new MicroStrDBFpUop(machInst
, vd
++, rn
,
1485 microOps
[i
++] = new MicroStrDTFpUop(machInst
, vd
++, rn
, tempUp
,
1486 addr
+ (up
? 4 : -4));
1490 addr
-= (single
? 4 : 8);
1491 // The microops don't handle negative displacement, so turn if we
1492 // hit zero, flip polarity and start adding.
1498 addr
+= (single
? 4 : 8);
1505 new MicroAddiUop(machInst
, rn
, rn
, 4 * offset
);
1508 new MicroSubiUop(machInst
, rn
, rn
, 4 * offset
);
1512 assert(numMicroops
== i
);
1513 microOps
[numMicroops
- 1]->setLastMicroop();
1515 for (StaticInstPtr
*curUop
= microOps
;
1516 !(*curUop
)->isLastMicroop(); curUop
++) {
1517 MicroOp
* uopPtr
= dynamic_cast<MicroOp
*>(curUop
->get());
1519 uopPtr
->setDelayedCommit();
1524 MicroIntImmOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1526 std::stringstream ss
;
1532 ccprintf(ss
, "#%d", imm
);
1537 MicroIntImmXOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1539 std::stringstream ss
;
1545 ccprintf(ss
, "#%d", imm
);
1550 MicroSetPCCPSR::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1552 std::stringstream ss
;
1559 MicroIntRegXOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1561 std::stringstream ss
;
1566 printExtendOperand(false, ss
, (IntRegIndex
)urc
, type
, shiftAmt
);
1571 MicroIntMov::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1573 std::stringstream ss
;
1582 MicroIntOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1584 std::stringstream ss
;
1595 MicroMemOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1597 std::stringstream ss
;
1600 printReg(ss
, ura
+ FP_Reg_Base
);
1606 ccprintf(ss
, "#%d", imm
);
1612 MicroMemPairOp::generateDisassembly(Addr pc
, const SymbolTable
*symtab
) const
1614 std::stringstream ss
;
1618 printReg(ss
, dest2
);
1622 ccprintf(ss
, "#%d", imm
);