arm: Delete authors lists from the arm files.
[gem5.git] / src / arch / arm / insts / macromem.cc
1 /*
2 * Copyright (c) 2010-2014 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 #include "arch/arm/insts/macromem.hh"
42
43 #include <sstream>
44
45 #include "arch/arm/generated/decoder.hh"
46 #include "arch/arm/insts/neon64_mem.hh"
47
48 using namespace std;
49 using namespace ArmISAInst;
50
51 namespace ArmISA
52 {
53
54 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
55 OpClass __opClass, IntRegIndex rn,
56 bool index, bool up, bool user, bool writeback,
57 bool load, uint32_t reglist) :
58 PredMacroOp(mnem, machInst, __opClass)
59 {
60 uint32_t regs = reglist;
61 uint32_t ones = number_of_ones(reglist);
62 uint32_t mem_ops = ones;
63
64 // Copy the base address register if we overwrite it, or if this instruction
65 // is basically a no-op (we have to do something)
66 bool copy_base = (bits(reglist, rn) && load) || !ones;
67 bool force_user = user & !bits(reglist, 15);
68 bool exception_ret = user & bits(reglist, 15);
69 bool pc_temp = load && writeback && bits(reglist, 15);
70
71 if (!ones) {
72 numMicroops = 1;
73 } else if (load) {
74 numMicroops = ((ones + 1) / 2)
75 + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
76 + (copy_base ? 1 : 0)
77 + (writeback? 1 : 0)
78 + (pc_temp ? 1 : 0);
79 } else {
80 numMicroops = ones + (writeback ? 1 : 0);
81 }
82
83 microOps = new StaticInstPtr[numMicroops];
84
85 uint32_t addr = 0;
86
87 if (!up)
88 addr = (ones << 2) - 4;
89
90 if (!index)
91 addr += 4;
92
93 StaticInstPtr *uop = microOps;
94
95 // Add 0 to Rn and stick it in ureg0.
96 // This is equivalent to a move.
97 if (copy_base)
98 *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
99
100 unsigned reg = 0;
101 while (mem_ops != 0) {
102 // Do load operations in pairs if possible
103 if (load && mem_ops >= 2 &&
104 !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
105 // 64-bit memory operation
106 // Find 2 set register bits (clear them after finding)
107 unsigned reg_idx1;
108 unsigned reg_idx2;
109
110 // Find the first register
111 while (!bits(regs, reg)) reg++;
112 replaceBits(regs, reg, 0);
113 reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
114
115 // Find the second register
116 while (!bits(regs, reg)) reg++;
117 replaceBits(regs, reg, 0);
118 reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
119
120 // Load into temp reg if necessary
121 if (reg_idx2 == INTREG_PC && pc_temp)
122 reg_idx2 = INTREG_UREG1;
123
124 // Actually load both registers from memory
125 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
126 copy_base ? INTREG_UREG0 : rn, up, addr);
127
128 if (!writeback && reg_idx2 == INTREG_PC) {
129 // No writeback if idx==pc, set appropriate flags
130 (*uop)->setFlag(StaticInst::IsControl);
131 (*uop)->setFlag(StaticInst::IsIndirectControl);
132
133 if (!(condCode == COND_AL || condCode == COND_UC))
134 (*uop)->setFlag(StaticInst::IsCondControl);
135 else
136 (*uop)->setFlag(StaticInst::IsUncondControl);
137 }
138
139 if (up) addr += 8;
140 else addr -= 8;
141 mem_ops -= 2;
142 } else {
143 // 32-bit memory operation
144 // Find register for operation
145 unsigned reg_idx;
146 while (!bits(regs, reg)) reg++;
147 replaceBits(regs, reg, 0);
148 reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
149
150 if (load) {
151 if (writeback && reg_idx == INTREG_PC) {
152 // If this instruction changes the PC and performs a
153 // writeback, ensure the pc load/branch is the last uop.
154 // Load into a temp reg here.
155 *uop = new MicroLdrUop(machInst, INTREG_UREG1,
156 copy_base ? INTREG_UREG0 : rn, up, addr);
157 } else if (reg_idx == INTREG_PC && exception_ret) {
158 // Special handling for exception return
159 *uop = new MicroLdrRetUop(machInst, reg_idx,
160 copy_base ? INTREG_UREG0 : rn, up, addr);
161 } else {
162 // standard single load uop
163 *uop = new MicroLdrUop(machInst, reg_idx,
164 copy_base ? INTREG_UREG0 : rn, up, addr);
165 }
166
167 // Loading pc as last operation? Set appropriate flags.
168 if (!writeback && reg_idx == INTREG_PC) {
169 (*uop)->setFlag(StaticInst::IsControl);
170 (*uop)->setFlag(StaticInst::IsIndirectControl);
171
172 if (!(condCode == COND_AL || condCode == COND_UC))
173 (*uop)->setFlag(StaticInst::IsCondControl);
174 else
175 (*uop)->setFlag(StaticInst::IsUncondControl);
176 }
177 } else {
178 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
179 }
180
181 if (up) addr += 4;
182 else addr -= 4;
183 --mem_ops;
184 }
185
186 // Load/store micro-op generated, go to next uop
187 ++uop;
188 }
189
190 if (writeback && ones) {
191 // Perform writeback uop operation
192 if (up)
193 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
194 else
195 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
196
197 // Write PC after address writeback?
198 if (pc_temp) {
199 if (exception_ret) {
200 *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
201 } else {
202 *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
203 }
204 (*uop)->setFlag(StaticInst::IsControl);
205 (*uop)->setFlag(StaticInst::IsIndirectControl);
206
207 if (!(condCode == COND_AL || condCode == COND_UC))
208 (*uop)->setFlag(StaticInst::IsCondControl);
209 else
210 (*uop)->setFlag(StaticInst::IsUncondControl);
211
212 if (rn == INTREG_SP)
213 (*uop)->setFlag(StaticInst::IsReturn);
214
215 ++uop;
216 }
217 }
218
219 --uop;
220 (*uop)->setLastMicroop();
221 microOps[0]->setFirstMicroop();
222
223 /* Take the control flags from the last microop for the macroop */
224 if ((*uop)->isControl())
225 setFlag(StaticInst::IsControl);
226 if ((*uop)->isCondCtrl())
227 setFlag(StaticInst::IsCondControl);
228 if ((*uop)->isUncondCtrl())
229 setFlag(StaticInst::IsUncondControl);
230 if ((*uop)->isIndirectCtrl())
231 setFlag(StaticInst::IsIndirectControl);
232 if ((*uop)->isReturn())
233 setFlag(StaticInst::IsReturn);
234
235 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
236 (*uop)->setDelayedCommit();
237 }
238 }
239
240 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
241 uint32_t size, bool fp, bool load, bool noAlloc,
242 bool signExt, bool exclusive, bool acrel,
243 int64_t imm, AddrMode mode,
244 IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) :
245 PredMacroOp(mnem, machInst, __opClass)
246 {
247 bool post = (mode == AddrMd_PostIndex);
248 bool writeback = (mode != AddrMd_Offset);
249
250 if (load) {
251 // Use integer rounding to round up loads of size 4
252 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
253 } else {
254 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
255 }
256 microOps = new StaticInstPtr[numMicroops];
257
258 StaticInstPtr *uop = microOps;
259
260 rn = makeSP(rn);
261
262 if (!post) {
263 *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
264 post ? 0 : imm);
265 }
266
267 if (fp) {
268 if (size == 16) {
269 if (load) {
270 *uop++ = new MicroLdFp16Uop(machInst, rt,
271 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
272 *uop++ = new MicroLdFp16Uop(machInst, rt2,
273 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
274 } else {
275 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
276 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
277 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
278 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
279 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
280 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
281 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
282 post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
283 }
284 } else if (size == 8) {
285 if (load) {
286 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
287 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
288 } else {
289 *uop++ = new MicroStrFpXImmUop(machInst, rt,
290 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
291 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
292 post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
293 }
294 } else if (size == 4) {
295 if (load) {
296 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
297 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
298 } else {
299 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
300 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
301 }
302 }
303 } else {
304 if (size == 8) {
305 if (load) {
306 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
307 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
308 } else {
309 *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
310 0, noAlloc, exclusive, acrel);
311 *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
312 size, noAlloc, exclusive, acrel);
313 }
314 } else if (size == 4) {
315 if (load) {
316 if (signExt) {
317 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
318 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
319 } else {
320 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
321 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
322 }
323 } else {
324 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
325 post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
326 }
327 }
328 }
329
330 if (writeback) {
331 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
332 post ? imm : 0);
333 }
334
335 assert(uop == &microOps[numMicroops]);
336 (*--uop)->setLastMicroop();
337 microOps[0]->setFirstMicroop();
338
339 for (StaticInstPtr *curUop = microOps;
340 !(*curUop)->isLastMicroop(); curUop++) {
341 (*curUop)->setDelayedCommit();
342 }
343 }
344
345 BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
346 OpClass __opClass, bool load, IntRegIndex dest,
347 IntRegIndex base, int64_t imm) :
348 PredMacroOp(mnem, machInst, __opClass)
349 {
350 numMicroops = load ? 1 : 2;
351 microOps = new StaticInstPtr[numMicroops];
352
353 StaticInstPtr *uop = microOps;
354
355 if (load) {
356 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
357 } else {
358 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
359 (*uop)->setDelayedCommit();
360 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
361 }
362 (*uop)->setLastMicroop();
363 microOps[0]->setFirstMicroop();
364 }
365
366 BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
367 OpClass __opClass, bool load, IntRegIndex dest,
368 IntRegIndex base, int64_t imm) :
369 PredMacroOp(mnem, machInst, __opClass)
370 {
371 numMicroops = load ? 2 : 3;
372 microOps = new StaticInstPtr[numMicroops];
373
374 StaticInstPtr *uop = microOps;
375
376 if (load) {
377 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
378 } else {
379 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
380 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
381 }
382 *uop = new MicroAddXiUop(machInst, base, base, imm);
383 (*uop)->setLastMicroop();
384 microOps[0]->setFirstMicroop();
385
386 for (StaticInstPtr *curUop = microOps;
387 !(*curUop)->isLastMicroop(); curUop++) {
388 (*curUop)->setDelayedCommit();
389 }
390 }
391
392 BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
393 OpClass __opClass, bool load, IntRegIndex dest,
394 IntRegIndex base, int64_t imm) :
395 PredMacroOp(mnem, machInst, __opClass)
396 {
397 numMicroops = load ? 2 : 3;
398 microOps = new StaticInstPtr[numMicroops];
399
400 StaticInstPtr *uop = microOps;
401
402 if (load) {
403 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
404 } else {
405 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
406 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
407 }
408 *uop = new MicroAddXiUop(machInst, base, base, imm);
409 (*uop)->setLastMicroop();
410 microOps[0]->setFirstMicroop();
411
412 for (StaticInstPtr *curUop = microOps;
413 !(*curUop)->isLastMicroop(); curUop++) {
414 (*curUop)->setDelayedCommit();
415 }
416 }
417
418 BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
419 OpClass __opClass, bool load, IntRegIndex dest,
420 IntRegIndex base, IntRegIndex offset,
421 ArmExtendType type, int64_t imm) :
422 PredMacroOp(mnem, machInst, __opClass)
423 {
424 numMicroops = load ? 1 : 2;
425 microOps = new StaticInstPtr[numMicroops];
426
427 StaticInstPtr *uop = microOps;
428
429 if (load) {
430 *uop = new MicroLdFp16RegUop(machInst, dest, base,
431 offset, type, imm);
432 } else {
433 *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
434 offset, type, imm);
435 (*uop)->setDelayedCommit();
436 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
437 offset, type, imm);
438 }
439
440 (*uop)->setLastMicroop();
441 microOps[0]->setFirstMicroop();
442 }
443
444 BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
445 OpClass __opClass, IntRegIndex dest,
446 int64_t imm) :
447 PredMacroOp(mnem, machInst, __opClass)
448 {
449 numMicroops = 1;
450 microOps = new StaticInstPtr[numMicroops];
451
452 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
453 microOps[0]->setLastMicroop();
454 microOps[0]->setFirstMicroop();
455 }
456
457 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
458 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
459 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
460 PredMacroOp(mnem, machInst, __opClass)
461 {
462 assert(regs > 0 && regs <= 4);
463 assert(regs % elems == 0);
464
465 numMicroops = (regs > 2) ? 2 : 1;
466 bool wb = (rm != 15);
467 bool deinterleave = (elems > 1);
468
469 if (wb) numMicroops++;
470 if (deinterleave) numMicroops += (regs / elems);
471 microOps = new StaticInstPtr[numMicroops];
472
473 RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2;
474
475 uint32_t noAlign = TLB::MustBeOne;
476
477 unsigned uopIdx = 0;
478 switch (regs) {
479 case 4:
480 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
481 size, machInst, rMid, rn, 0, align);
482 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
483 size, machInst, rMid + 4, rn, 16, noAlign);
484 break;
485 case 3:
486 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
487 size, machInst, rMid, rn, 0, align);
488 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
489 size, machInst, rMid + 4, rn, 16, noAlign);
490 break;
491 case 2:
492 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
493 size, machInst, rMid, rn, 0, align);
494 break;
495 case 1:
496 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
497 size, machInst, rMid, rn, 0, align);
498 break;
499 default:
500 // Unknown number of registers
501 microOps[uopIdx++] = new Unknown(machInst);
502 }
503 if (wb) {
504 if (rm != 15 && rm != 13) {
505 microOps[uopIdx++] =
506 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
507 } else {
508 microOps[uopIdx++] =
509 new MicroAddiUop(machInst, rn, rn, regs * 8);
510 }
511 }
512 if (deinterleave) {
513 switch (elems) {
514 case 4:
515 assert(regs == 4);
516 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
517 size, machInst, vd * 2, rMid, inc * 2);
518 break;
519 case 3:
520 assert(regs == 3);
521 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
522 size, machInst, vd * 2, rMid, inc * 2);
523 break;
524 case 2:
525 assert(regs == 4 || regs == 2);
526 if (regs == 4) {
527 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
528 size, machInst, vd * 2, rMid, inc * 2);
529 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
530 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
531 } else {
532 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
533 size, machInst, vd * 2, rMid, inc * 2);
534 }
535 break;
536 default:
537 // Bad number of elements to deinterleave
538 microOps[uopIdx++] = new Unknown(machInst);
539 }
540 }
541 assert(uopIdx == numMicroops);
542
543 for (unsigned i = 0; i < numMicroops - 1; i++) {
544 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
545 assert(uopPtr);
546 uopPtr->setDelayedCommit();
547 }
548 microOps[0]->setFirstMicroop();
549 microOps[numMicroops - 1]->setLastMicroop();
550 }
551
552 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
553 OpClass __opClass, bool all, unsigned elems,
554 RegIndex rn, RegIndex vd, unsigned regs,
555 unsigned inc, uint32_t size, uint32_t align,
556 RegIndex rm, unsigned lane) :
557 PredMacroOp(mnem, machInst, __opClass)
558 {
559 assert(regs > 0 && regs <= 4);
560 assert(regs % elems == 0);
561
562 unsigned eBytes = (1 << size);
563 unsigned loadSize = eBytes * elems;
564 unsigned loadRegs M5_VAR_USED =
565 (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
566
567 assert(loadRegs > 0 && loadRegs <= 4);
568
569 numMicroops = 1;
570 bool wb = (rm != 15);
571
572 if (wb) numMicroops++;
573 numMicroops += (regs / elems);
574 microOps = new StaticInstPtr[numMicroops];
575
576 RegIndex ufp0 = VecSpecialElem;
577
578 unsigned uopIdx = 0;
579 switch (loadSize) {
580 case 1:
581 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
582 machInst, ufp0, rn, 0, align);
583 break;
584 case 2:
585 if (eBytes == 2) {
586 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
587 machInst, ufp0, rn, 0, align);
588 } else {
589 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
590 machInst, ufp0, rn, 0, align);
591 }
592 break;
593 case 3:
594 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
595 machInst, ufp0, rn, 0, align);
596 break;
597 case 4:
598 switch (eBytes) {
599 case 1:
600 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
601 machInst, ufp0, rn, 0, align);
602 break;
603 case 2:
604 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
605 machInst, ufp0, rn, 0, align);
606 break;
607 case 4:
608 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
609 machInst, ufp0, rn, 0, align);
610 break;
611 }
612 break;
613 case 6:
614 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
615 machInst, ufp0, rn, 0, align);
616 break;
617 case 8:
618 switch (eBytes) {
619 case 2:
620 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
621 machInst, ufp0, rn, 0, align);
622 break;
623 case 4:
624 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
625 machInst, ufp0, rn, 0, align);
626 break;
627 }
628 break;
629 case 12:
630 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
631 machInst, ufp0, rn, 0, align);
632 break;
633 case 16:
634 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
635 machInst, ufp0, rn, 0, align);
636 break;
637 default:
638 // Unrecognized load size
639 microOps[uopIdx++] = new Unknown(machInst);
640 }
641 if (wb) {
642 if (rm != 15 && rm != 13) {
643 microOps[uopIdx++] =
644 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
645 } else {
646 microOps[uopIdx++] =
647 new MicroAddiUop(machInst, rn, rn, loadSize);
648 }
649 }
650 switch (elems) {
651 case 4:
652 assert(regs == 4);
653 switch (size) {
654 case 0:
655 if (all) {
656 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
657 machInst, vd * 2, ufp0, inc * 2);
658 } else {
659 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
660 machInst, vd * 2, ufp0, inc * 2, lane);
661 }
662 break;
663 case 1:
664 if (all) {
665 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
666 machInst, vd * 2, ufp0, inc * 2);
667 } else {
668 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
669 machInst, vd * 2, ufp0, inc * 2, lane);
670 }
671 break;
672 case 2:
673 if (all) {
674 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
675 machInst, vd * 2, ufp0, inc * 2);
676 } else {
677 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
678 machInst, vd * 2, ufp0, inc * 2, lane);
679 }
680 break;
681 default:
682 // Bad size
683 microOps[uopIdx++] = new Unknown(machInst);
684 break;
685 }
686 break;
687 case 3:
688 assert(regs == 3);
689 switch (size) {
690 case 0:
691 if (all) {
692 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
693 machInst, vd * 2, ufp0, inc * 2);
694 } else {
695 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
696 machInst, vd * 2, ufp0, inc * 2, lane);
697 }
698 break;
699 case 1:
700 if (all) {
701 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
702 machInst, vd * 2, ufp0, inc * 2);
703 } else {
704 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
705 machInst, vd * 2, ufp0, inc * 2, lane);
706 }
707 break;
708 case 2:
709 if (all) {
710 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
711 machInst, vd * 2, ufp0, inc * 2);
712 } else {
713 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
714 machInst, vd * 2, ufp0, inc * 2, lane);
715 }
716 break;
717 default:
718 // Bad size
719 microOps[uopIdx++] = new Unknown(machInst);
720 break;
721 }
722 break;
723 case 2:
724 assert(regs == 2);
725 assert(loadRegs <= 2);
726 switch (size) {
727 case 0:
728 if (all) {
729 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
730 machInst, vd * 2, ufp0, inc * 2);
731 } else {
732 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
733 machInst, vd * 2, ufp0, inc * 2, lane);
734 }
735 break;
736 case 1:
737 if (all) {
738 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
739 machInst, vd * 2, ufp0, inc * 2);
740 } else {
741 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
742 machInst, vd * 2, ufp0, inc * 2, lane);
743 }
744 break;
745 case 2:
746 if (all) {
747 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
748 machInst, vd * 2, ufp0, inc * 2);
749 } else {
750 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
751 machInst, vd * 2, ufp0, inc * 2, lane);
752 }
753 break;
754 default:
755 // Bad size
756 microOps[uopIdx++] = new Unknown(machInst);
757 break;
758 }
759 break;
760 case 1:
761 assert(regs == 1 || (all && regs == 2));
762 assert(loadRegs <= 2);
763 for (unsigned offset = 0; offset < regs; offset++) {
764 switch (size) {
765 case 0:
766 if (all) {
767 microOps[uopIdx++] =
768 new MicroUnpackAllNeon2to2Uop<uint8_t>(
769 machInst, (vd + offset) * 2, ufp0, inc * 2);
770 } else {
771 microOps[uopIdx++] =
772 new MicroUnpackNeon2to2Uop<uint8_t>(
773 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
774 }
775 break;
776 case 1:
777 if (all) {
778 microOps[uopIdx++] =
779 new MicroUnpackAllNeon2to2Uop<uint16_t>(
780 machInst, (vd + offset) * 2, ufp0, inc * 2);
781 } else {
782 microOps[uopIdx++] =
783 new MicroUnpackNeon2to2Uop<uint16_t>(
784 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
785 }
786 break;
787 case 2:
788 if (all) {
789 microOps[uopIdx++] =
790 new MicroUnpackAllNeon2to2Uop<uint32_t>(
791 machInst, (vd + offset) * 2, ufp0, inc * 2);
792 } else {
793 microOps[uopIdx++] =
794 new MicroUnpackNeon2to2Uop<uint32_t>(
795 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
796 }
797 break;
798 default:
799 // Bad size
800 microOps[uopIdx++] = new Unknown(machInst);
801 break;
802 }
803 }
804 break;
805 default:
806 // Bad number of elements to unpack
807 microOps[uopIdx++] = new Unknown(machInst);
808 }
809 assert(uopIdx == numMicroops);
810
811 for (unsigned i = 0; i < numMicroops - 1; i++) {
812 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
813 assert(uopPtr);
814 uopPtr->setDelayedCommit();
815 }
816 microOps[0]->setFirstMicroop();
817 microOps[numMicroops - 1]->setLastMicroop();
818 }
819
820 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
821 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
822 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
823 PredMacroOp(mnem, machInst, __opClass)
824 {
825 assert(regs > 0 && regs <= 4);
826 assert(regs % elems == 0);
827
828 numMicroops = (regs > 2) ? 2 : 1;
829 bool wb = (rm != 15);
830 bool interleave = (elems > 1);
831
832 if (wb) numMicroops++;
833 if (interleave) numMicroops += (regs / elems);
834 microOps = new StaticInstPtr[numMicroops];
835
836 uint32_t noAlign = TLB::MustBeOne;
837
838 RegIndex rMid = interleave ? VecSpecialElem : vd * 2;
839
840 unsigned uopIdx = 0;
841 if (interleave) {
842 switch (elems) {
843 case 4:
844 assert(regs == 4);
845 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
846 size, machInst, rMid, vd * 2, inc * 2);
847 break;
848 case 3:
849 assert(regs == 3);
850 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
851 size, machInst, rMid, vd * 2, inc * 2);
852 break;
853 case 2:
854 assert(regs == 4 || regs == 2);
855 if (regs == 4) {
856 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
857 size, machInst, rMid, vd * 2, inc * 2);
858 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
859 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
860 } else {
861 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
862 size, machInst, rMid, vd * 2, inc * 2);
863 }
864 break;
865 default:
866 // Bad number of elements to interleave
867 microOps[uopIdx++] = new Unknown(machInst);
868 }
869 }
870 switch (regs) {
871 case 4:
872 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
873 size, machInst, rMid, rn, 0, align);
874 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
875 size, machInst, rMid + 4, rn, 16, noAlign);
876 break;
877 case 3:
878 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
879 size, machInst, rMid, rn, 0, align);
880 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
881 size, machInst, rMid + 4, rn, 16, noAlign);
882 break;
883 case 2:
884 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
885 size, machInst, rMid, rn, 0, align);
886 break;
887 case 1:
888 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
889 size, machInst, rMid, rn, 0, align);
890 break;
891 default:
892 // Unknown number of registers
893 microOps[uopIdx++] = new Unknown(machInst);
894 }
895 if (wb) {
896 if (rm != 15 && rm != 13) {
897 microOps[uopIdx++] =
898 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
899 } else {
900 microOps[uopIdx++] =
901 new MicroAddiUop(machInst, rn, rn, regs * 8);
902 }
903 }
904 assert(uopIdx == numMicroops);
905
906 for (unsigned i = 0; i < numMicroops - 1; i++) {
907 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
908 assert(uopPtr);
909 uopPtr->setDelayedCommit();
910 }
911 microOps[0]->setFirstMicroop();
912 microOps[numMicroops - 1]->setLastMicroop();
913 }
914
915 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
916 OpClass __opClass, bool all, unsigned elems,
917 RegIndex rn, RegIndex vd, unsigned regs,
918 unsigned inc, uint32_t size, uint32_t align,
919 RegIndex rm, unsigned lane) :
920 PredMacroOp(mnem, machInst, __opClass)
921 {
922 assert(!all);
923 assert(regs > 0 && regs <= 4);
924 assert(regs % elems == 0);
925
926 unsigned eBytes = (1 << size);
927 unsigned storeSize = eBytes * elems;
928 unsigned storeRegs M5_VAR_USED =
929 (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
930
931 assert(storeRegs > 0 && storeRegs <= 4);
932
933 numMicroops = 1;
934 bool wb = (rm != 15);
935
936 if (wb) numMicroops++;
937 numMicroops += (regs / elems);
938 microOps = new StaticInstPtr[numMicroops];
939
940 RegIndex ufp0 = VecSpecialElem;
941
942 unsigned uopIdx = 0;
943 switch (elems) {
944 case 4:
945 assert(regs == 4);
946 switch (size) {
947 case 0:
948 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
949 machInst, ufp0, vd * 2, inc * 2, lane);
950 break;
951 case 1:
952 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
953 machInst, ufp0, vd * 2, inc * 2, lane);
954 break;
955 case 2:
956 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
957 machInst, ufp0, vd * 2, inc * 2, lane);
958 break;
959 default:
960 // Bad size
961 microOps[uopIdx++] = new Unknown(machInst);
962 break;
963 }
964 break;
965 case 3:
966 assert(regs == 3);
967 switch (size) {
968 case 0:
969 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
970 machInst, ufp0, vd * 2, inc * 2, lane);
971 break;
972 case 1:
973 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
974 machInst, ufp0, vd * 2, inc * 2, lane);
975 break;
976 case 2:
977 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
978 machInst, ufp0, vd * 2, inc * 2, lane);
979 break;
980 default:
981 // Bad size
982 microOps[uopIdx++] = new Unknown(machInst);
983 break;
984 }
985 break;
986 case 2:
987 assert(regs == 2);
988 assert(storeRegs <= 2);
989 switch (size) {
990 case 0:
991 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
992 machInst, ufp0, vd * 2, inc * 2, lane);
993 break;
994 case 1:
995 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
996 machInst, ufp0, vd * 2, inc * 2, lane);
997 break;
998 case 2:
999 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1000 machInst, ufp0, vd * 2, inc * 2, lane);
1001 break;
1002 default:
1003 // Bad size
1004 microOps[uopIdx++] = new Unknown(machInst);
1005 break;
1006 }
1007 break;
1008 case 1:
1009 assert(regs == 1 || (all && regs == 2));
1010 assert(storeRegs <= 2);
1011 for (unsigned offset = 0; offset < regs; offset++) {
1012 switch (size) {
1013 case 0:
1014 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1015 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1016 break;
1017 case 1:
1018 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1019 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1020 break;
1021 case 2:
1022 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1023 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1024 break;
1025 default:
1026 // Bad size
1027 microOps[uopIdx++] = new Unknown(machInst);
1028 break;
1029 }
1030 }
1031 break;
1032 default:
1033 // Bad number of elements to unpack
1034 microOps[uopIdx++] = new Unknown(machInst);
1035 }
1036 switch (storeSize) {
1037 case 1:
1038 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1039 machInst, ufp0, rn, 0, align);
1040 break;
1041 case 2:
1042 if (eBytes == 2) {
1043 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1044 machInst, ufp0, rn, 0, align);
1045 } else {
1046 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1047 machInst, ufp0, rn, 0, align);
1048 }
1049 break;
1050 case 3:
1051 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1052 machInst, ufp0, rn, 0, align);
1053 break;
1054 case 4:
1055 switch (eBytes) {
1056 case 1:
1057 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1058 machInst, ufp0, rn, 0, align);
1059 break;
1060 case 2:
1061 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1062 machInst, ufp0, rn, 0, align);
1063 break;
1064 case 4:
1065 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1066 machInst, ufp0, rn, 0, align);
1067 break;
1068 }
1069 break;
1070 case 6:
1071 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1072 machInst, ufp0, rn, 0, align);
1073 break;
1074 case 8:
1075 switch (eBytes) {
1076 case 2:
1077 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1078 machInst, ufp0, rn, 0, align);
1079 break;
1080 case 4:
1081 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1082 machInst, ufp0, rn, 0, align);
1083 break;
1084 }
1085 break;
1086 case 12:
1087 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1088 machInst, ufp0, rn, 0, align);
1089 break;
1090 case 16:
1091 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1092 machInst, ufp0, rn, 0, align);
1093 break;
1094 default:
1095 // Bad store size
1096 microOps[uopIdx++] = new Unknown(machInst);
1097 }
1098 if (wb) {
1099 if (rm != 15 && rm != 13) {
1100 microOps[uopIdx++] =
1101 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1102 } else {
1103 microOps[uopIdx++] =
1104 new MicroAddiUop(machInst, rn, rn, storeSize);
1105 }
1106 }
1107 assert(uopIdx == numMicroops);
1108
1109 for (unsigned i = 0; i < numMicroops - 1; i++) {
1110 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1111 assert(uopPtr);
1112 uopPtr->setDelayedCommit();
1113 }
1114 microOps[0]->setFirstMicroop();
1115 microOps[numMicroops - 1]->setLastMicroop();
1116 }
1117
1118 VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1119 OpClass __opClass, RegIndex rn, RegIndex vd,
1120 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1121 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1122 PredMacroOp(mnem, machInst, __opClass)
1123 {
1124 RegIndex vx = NumVecV8ArchRegs;
1125 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1126 bool baseIsSP = isSP((IntRegIndex) rnsp);
1127
1128 numMicroops = wb ? 1 : 0;
1129
1130 int totNumBytes = numRegs * dataSize / 8;
1131 assert(totNumBytes <= 64);
1132
1133 // The guiding principle here is that no more than 16 bytes can be
1134 // transferred at a time
1135 int numMemMicroops = totNumBytes / 16;
1136 int residuum = totNumBytes % 16;
1137 if (residuum)
1138 ++numMemMicroops;
1139 numMicroops += numMemMicroops;
1140
1141 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1142 numMicroops += numMarshalMicroops;
1143
1144 microOps = new StaticInstPtr[numMicroops];
1145 unsigned uopIdx = 0;
1146 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1147 TLB::AllowUnaligned;
1148
1149 int i = 0;
1150 for (; i < numMemMicroops - 1; ++i) {
1151 microOps[uopIdx++] = new MicroNeonLoad64(
1152 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1153 baseIsSP, 16 /* accSize */, eSize);
1154 }
1155 microOps[uopIdx++] = new MicroNeonLoad64(
1156 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1157 residuum ? residuum : 16 /* accSize */, eSize);
1158
1159 // Writeback microop: the post-increment amount is encoded in "Rm": a
1160 // 64-bit general register OR as '11111' for an immediate value equal to
1161 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1162 if (wb) {
1163 if (rm != ((RegIndex) INTREG_X31)) {
1164 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1165 UXTX, 0);
1166 } else {
1167 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1168 totNumBytes);
1169 }
1170 }
1171
1172 for (int i = 0; i < numMarshalMicroops; ++i) {
1173 switch(numRegs) {
1174 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1175 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1176 numStructElems, 1, i /* step */);
1177 break;
1178 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1179 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1180 numStructElems, 2, i /* step */);
1181 break;
1182 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1183 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1184 numStructElems, 3, i /* step */);
1185 break;
1186 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1187 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1188 numStructElems, 4, i /* step */);
1189 break;
1190 default: panic("Invalid number of registers");
1191 }
1192
1193 }
1194
1195 assert(uopIdx == numMicroops);
1196
1197 for (int i = 0; i < numMicroops - 1; ++i) {
1198 microOps[i]->setDelayedCommit();
1199 }
1200 microOps[numMicroops - 1]->setLastMicroop();
1201 }
1202
1203 VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1204 OpClass __opClass, RegIndex rn, RegIndex vd,
1205 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1206 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1207 PredMacroOp(mnem, machInst, __opClass)
1208 {
1209 RegIndex vx = NumVecV8ArchRegs;
1210 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1211 bool baseIsSP = isSP((IntRegIndex) rnsp);
1212
1213 numMicroops = wb ? 1 : 0;
1214
1215 int totNumBytes = numRegs * dataSize / 8;
1216 assert(totNumBytes <= 64);
1217
1218 // The guiding principle here is that no more than 16 bytes can be
1219 // transferred at a time
1220 int numMemMicroops = totNumBytes / 16;
1221 int residuum = totNumBytes % 16;
1222 if (residuum)
1223 ++numMemMicroops;
1224 numMicroops += numMemMicroops;
1225
1226 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1227 numMicroops += numMarshalMicroops;
1228
1229 microOps = new StaticInstPtr[numMicroops];
1230 unsigned uopIdx = 0;
1231
1232 for (int i = 0; i < numMarshalMicroops; ++i) {
1233 switch (numRegs) {
1234 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1235 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1236 numStructElems, 1, i /* step */);
1237 break;
1238 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1239 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1240 numStructElems, 2, i /* step */);
1241 break;
1242 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1243 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1244 numStructElems, 3, i /* step */);
1245 break;
1246 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1247 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1248 numStructElems, 4, i /* step */);
1249 break;
1250 default: panic("Invalid number of registers");
1251 }
1252 }
1253
1254 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1255 TLB::AllowUnaligned;
1256
1257 int i = 0;
1258 for (; i < numMemMicroops - 1; ++i) {
1259 microOps[uopIdx++] = new MicroNeonStore64(
1260 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1261 baseIsSP, 16 /* accSize */, eSize);
1262 }
1263 microOps[uopIdx++] = new MicroNeonStore64(
1264 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1265 residuum ? residuum : 16 /* accSize */, eSize);
1266
1267 // Writeback microop: the post-increment amount is encoded in "Rm": a
1268 // 64-bit general register OR as '11111' for an immediate value equal to
1269 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1270 if (wb) {
1271 if (rm != ((RegIndex) INTREG_X31)) {
1272 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1273 UXTX, 0);
1274 } else {
1275 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1276 totNumBytes);
1277 }
1278 }
1279
1280 assert(uopIdx == numMicroops);
1281
1282 for (int i = 0; i < numMicroops - 1; i++) {
1283 microOps[i]->setDelayedCommit();
1284 }
1285 microOps[numMicroops - 1]->setLastMicroop();
1286 }
1287
1288 VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1289 OpClass __opClass, RegIndex rn, RegIndex vd,
1290 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1291 uint8_t numStructElems, uint8_t index, bool wb,
1292 bool replicate) :
1293 PredMacroOp(mnem, machInst, __opClass),
1294 eSize(0), dataSize(0), numStructElems(0), index(0),
1295 wb(false), replicate(false)
1296
1297 {
1298 RegIndex vx = NumVecV8ArchRegs;
1299 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1300 bool baseIsSP = isSP((IntRegIndex) rnsp);
1301
1302 numMicroops = wb ? 1 : 0;
1303
1304 int eSizeBytes = 1 << eSize;
1305 int totNumBytes = numStructElems * eSizeBytes;
1306 assert(totNumBytes <= 64);
1307
1308 // The guiding principle here is that no more than 16 bytes can be
1309 // transferred at a time
1310 int numMemMicroops = totNumBytes / 16;
1311 int residuum = totNumBytes % 16;
1312 if (residuum)
1313 ++numMemMicroops;
1314 numMicroops += numMemMicroops;
1315
1316 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1317 numMicroops += numMarshalMicroops;
1318
1319 microOps = new StaticInstPtr[numMicroops];
1320 unsigned uopIdx = 0;
1321
1322 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1323 TLB::AllowUnaligned;
1324
1325 int i = 0;
1326 for (; i < numMemMicroops - 1; ++i) {
1327 microOps[uopIdx++] = new MicroNeonLoad64(
1328 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1329 baseIsSP, 16 /* accSize */, eSize);
1330 }
1331 microOps[uopIdx++] = new MicroNeonLoad64(
1332 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1333 residuum ? residuum : 16 /* accSize */, eSize);
1334
1335 // Writeback microop: the post-increment amount is encoded in "Rm": a
1336 // 64-bit general register OR as '11111' for an immediate value equal to
1337 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1338 if (wb) {
1339 if (rm != ((RegIndex) INTREG_X31)) {
1340 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1341 UXTX, 0);
1342 } else {
1343 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1344 totNumBytes);
1345 }
1346 }
1347
1348 for (int i = 0; i < numMarshalMicroops; ++i) {
1349 microOps[uopIdx++] = new MicroUnpackNeon64(
1350 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1351 numStructElems, index, i /* step */, replicate);
1352 }
1353
1354 assert(uopIdx == numMicroops);
1355
1356 for (int i = 0; i < numMicroops - 1; i++) {
1357 microOps[i]->setDelayedCommit();
1358 }
1359 microOps[numMicroops - 1]->setLastMicroop();
1360 }
1361
1362 VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1363 OpClass __opClass, RegIndex rn, RegIndex vd,
1364 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1365 uint8_t numStructElems, uint8_t index, bool wb,
1366 bool replicate) :
1367 PredMacroOp(mnem, machInst, __opClass),
1368 eSize(0), dataSize(0), numStructElems(0), index(0),
1369 wb(false), replicate(false)
1370 {
1371 RegIndex vx = NumVecV8ArchRegs;
1372 RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1373 bool baseIsSP = isSP((IntRegIndex) rnsp);
1374
1375 numMicroops = wb ? 1 : 0;
1376
1377 int eSizeBytes = 1 << eSize;
1378 int totNumBytes = numStructElems * eSizeBytes;
1379 assert(totNumBytes <= 64);
1380
1381 // The guiding principle here is that no more than 16 bytes can be
1382 // transferred at a time
1383 int numMemMicroops = totNumBytes / 16;
1384 int residuum = totNumBytes % 16;
1385 if (residuum)
1386 ++numMemMicroops;
1387 numMicroops += numMemMicroops;
1388
1389 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1390 numMicroops += numMarshalMicroops;
1391
1392 microOps = new StaticInstPtr[numMicroops];
1393 unsigned uopIdx = 0;
1394
1395 for (int i = 0; i < numMarshalMicroops; ++i) {
1396 microOps[uopIdx++] = new MicroPackNeon64(
1397 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1398 numStructElems, index, i /* step */, replicate);
1399 }
1400
1401 uint32_t memaccessFlags = TLB::MustBeOne | (TLB::ArmFlags) eSize |
1402 TLB::AllowUnaligned;
1403
1404 int i = 0;
1405 for (; i < numMemMicroops - 1; ++i) {
1406 microOps[uopIdx++] = new MicroNeonStore64(
1407 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1408 baseIsSP, 16 /* accsize */, eSize);
1409 }
1410 microOps[uopIdx++] = new MicroNeonStore64(
1411 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1412 residuum ? residuum : 16 /* accSize */, eSize);
1413
1414 // Writeback microop: the post-increment amount is encoded in "Rm": a
1415 // 64-bit general register OR as '11111' for an immediate value equal to
1416 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1417 if (wb) {
1418 if (rm != ((RegIndex) INTREG_X31)) {
1419 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1420 UXTX, 0);
1421 } else {
1422 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1423 totNumBytes);
1424 }
1425 }
1426
1427 assert(uopIdx == numMicroops);
1428
1429 for (int i = 0; i < numMicroops - 1; i++) {
1430 microOps[i]->setDelayedCommit();
1431 }
1432 microOps[numMicroops - 1]->setLastMicroop();
1433 }
1434
1435 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1436 OpClass __opClass, IntRegIndex rn,
1437 RegIndex vd, bool single, bool up,
1438 bool writeback, bool load, uint32_t offset) :
1439 PredMacroOp(mnem, machInst, __opClass)
1440 {
1441 int i = 0;
1442
1443 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1444 // to be functionally identical except that fldmx is deprecated. For now
1445 // we'll assume they're otherwise interchangable.
1446 int count = (single ? offset : (offset / 2));
1447 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1448 microOps = new StaticInstPtr[numMicroops];
1449
1450 int64_t addr = 0;
1451
1452 if (!up)
1453 addr = 4 * offset;
1454
1455 bool tempUp = up;
1456 for (int j = 0; j < count; j++) {
1457 if (load) {
1458 if (single) {
1459 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1460 tempUp, addr);
1461 } else {
1462 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1463 tempUp, addr);
1464 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1465 addr + (up ? 4 : -4));
1466 }
1467 } else {
1468 if (single) {
1469 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1470 tempUp, addr);
1471 } else {
1472 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1473 tempUp, addr);
1474 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1475 addr + (up ? 4 : -4));
1476 }
1477 }
1478 if (!tempUp) {
1479 addr -= (single ? 4 : 8);
1480 // The microops don't handle negative displacement, so turn if we
1481 // hit zero, flip polarity and start adding.
1482 if (addr <= 0) {
1483 tempUp = true;
1484 addr = -addr;
1485 }
1486 } else {
1487 addr += (single ? 4 : 8);
1488 }
1489 }
1490
1491 if (writeback) {
1492 if (up) {
1493 microOps[i++] =
1494 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1495 } else {
1496 microOps[i++] =
1497 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1498 }
1499 }
1500
1501 assert(numMicroops == i);
1502 microOps[numMicroops - 1]->setLastMicroop();
1503
1504 for (StaticInstPtr *curUop = microOps;
1505 !(*curUop)->isLastMicroop(); curUop++) {
1506 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1507 assert(uopPtr);
1508 uopPtr->setDelayedCommit();
1509 }
1510 }
1511
1512 std::string
1513 MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1514 {
1515 std::stringstream ss;
1516 printMnemonic(ss);
1517 printIntReg(ss, ura);
1518 ss << ", ";
1519 printIntReg(ss, urb);
1520 ss << ", ";
1521 ccprintf(ss, "#%d", imm);
1522 return ss.str();
1523 }
1524
1525 std::string
1526 MicroIntImmXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1527 {
1528 std::stringstream ss;
1529 printMnemonic(ss);
1530 printIntReg(ss, ura);
1531 ss << ", ";
1532 printIntReg(ss, urb);
1533 ss << ", ";
1534 ccprintf(ss, "#%d", imm);
1535 return ss.str();
1536 }
1537
1538 std::string
1539 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1540 {
1541 std::stringstream ss;
1542 printMnemonic(ss);
1543 ss << "[PC,CPSR]";
1544 return ss.str();
1545 }
1546
1547 std::string
1548 MicroIntRegXOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1549 {
1550 std::stringstream ss;
1551 printMnemonic(ss);
1552 printIntReg(ss, ura);
1553 ccprintf(ss, ", ");
1554 printIntReg(ss, urb);
1555 printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1556 return ss.str();
1557 }
1558
1559 std::string
1560 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1561 {
1562 std::stringstream ss;
1563 printMnemonic(ss);
1564 printIntReg(ss, ura);
1565 ss << ", ";
1566 printIntReg(ss, urb);
1567 return ss.str();
1568 }
1569
1570 std::string
1571 MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1572 {
1573 std::stringstream ss;
1574 printMnemonic(ss);
1575 printIntReg(ss, ura);
1576 ss << ", ";
1577 printIntReg(ss, urb);
1578 ss << ", ";
1579 printIntReg(ss, urc);
1580 return ss.str();
1581 }
1582
1583 std::string
1584 MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1585 {
1586 std::stringstream ss;
1587 printMnemonic(ss);
1588 if (isFloating())
1589 printFloatReg(ss, ura);
1590 else
1591 printIntReg(ss, ura);
1592 ss << ", [";
1593 printIntReg(ss, urb);
1594 ss << ", ";
1595 ccprintf(ss, "#%d", imm);
1596 ss << "]";
1597 return ss.str();
1598 }
1599
1600 std::string
1601 MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
1602 {
1603 std::stringstream ss;
1604 printMnemonic(ss);
1605 printIntReg(ss, dest);
1606 ss << ",";
1607 printIntReg(ss, dest2);
1608 ss << ", [";
1609 printIntReg(ss, urb);
1610 ss << ", ";
1611 ccprintf(ss, "#%d", imm);
1612 ss << "]";
1613 return ss.str();
1614 }
1615
1616 }