ARM: Fix subtle bug in LDM.
[gem5.git] / src / arch / arm / insts / macromem.cc
1 /*
2 * Copyright (c) 2010 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Stephen Hines
41 */
42
43 #include "arch/arm/insts/macromem.hh"
44 #include "arch/arm/decoder.hh"
45 #include <sstream>
46
47 using namespace std;
48 using namespace ArmISAInst;
49
50 namespace ArmISA
51 {
52
53 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
54 OpClass __opClass, IntRegIndex rn,
55 bool index, bool up, bool user, bool writeback,
56 bool load, uint32_t reglist) :
57 PredMacroOp(mnem, machInst, __opClass)
58 {
59 uint32_t regs = reglist;
60 uint32_t ones = number_of_ones(reglist);
61 // Remember that writeback adds a uop or two and the temp register adds one
62 numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;
63
64 // It's technically legal to do a lot of nothing
65 if (!ones)
66 numMicroops = 1;
67
68 microOps = new StaticInstPtr[numMicroops];
69 uint32_t addr = 0;
70
71 if (!up)
72 addr = (ones << 2) - 4;
73
74 if (!index)
75 addr += 4;
76
77 StaticInstPtr *uop = microOps;
78
79 // Add 0 to Rn and stick it in ureg0.
80 // This is equivalent to a move.
81 *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
82
83 unsigned reg = 0;
84 unsigned regIdx = 0;
85 bool force_user = user & !bits(reglist, 15);
86 bool exception_ret = user & bits(reglist, 15);
87
88 for (int i = 0; i < ones; i++) {
89 // Find the next register.
90 while (!bits(regs, reg))
91 reg++;
92 replaceBits(regs, reg, 0);
93
94 regIdx = reg;
95 if (force_user) {
96 regIdx = intRegInMode(MODE_USER, regIdx);
97 }
98
99 if (load) {
100 if (writeback && i == ones - 1) {
101 // If it's a writeback and this is the last register
102 // do the load into a temporary register which we'll move
103 // into the final one later
104 *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
105 up, addr);
106 } else {
107 // Otherwise just do it normally
108 if (reg == INTREG_PC && exception_ret) {
109 // This must be the exception return form of ldm.
110 *++uop = new MicroLdrRetUop(machInst, regIdx,
111 INTREG_UREG0, up, addr);
112 } else {
113 *++uop = new MicroLdrUop(machInst, regIdx,
114 INTREG_UREG0, up, addr);
115 }
116 }
117 } else {
118 *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
119 }
120
121 if (up)
122 addr += 4;
123 else
124 addr -= 4;
125 }
126
127 if (writeback && ones) {
128 // put the register update after we're done all loading
129 if (up)
130 *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);
131 else
132 *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);
133
134 // If this was a load move the last temporary value into place
135 // this way we can't take an exception after we update the base
136 // register.
137 if (load && reg == INTREG_PC && exception_ret) {
138 *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
139 warn("creating instruction with exception return at curTick:%d\n",
140 curTick());
141 } else if (load) {
142 *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
143 if (reg == INTREG_PC) {
144 (*uop)->setFlag(StaticInstBase::IsControl);
145 (*uop)->setFlag(StaticInstBase::IsCondControl);
146 (*uop)->setFlag(StaticInstBase::IsIndirectControl);
147 // This is created as a RAS POP
148 if (rn == INTREG_SP)
149 (*uop)->setFlag(StaticInstBase::IsReturn);
150
151 }
152 }
153 }
154
155 (*uop)->setLastMicroop();
156
157 for (StaticInstPtr *curUop = microOps;
158 !(*curUop)->isLastMicroop(); curUop++) {
159 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
160 assert(uopPtr);
161 uopPtr->setDelayedCommit();
162 }
163 }
164
165 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
166 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
167 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
168 PredMacroOp(mnem, machInst, __opClass)
169 {
170 assert(regs > 0 && regs <= 4);
171 assert(regs % elems == 0);
172
173 numMicroops = (regs > 2) ? 2 : 1;
174 bool wb = (rm != 15);
175 bool deinterleave = (elems > 1);
176
177 if (wb) numMicroops++;
178 if (deinterleave) numMicroops += (regs / elems);
179 microOps = new StaticInstPtr[numMicroops];
180
181 RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
182
183 uint32_t noAlign = TLB::MustBeOne;
184
185 unsigned uopIdx = 0;
186 switch (regs) {
187 case 4:
188 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
189 size, machInst, rMid, rn, 0, align);
190 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
191 size, machInst, rMid + 4, rn, 16, noAlign);
192 break;
193 case 3:
194 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
195 size, machInst, rMid, rn, 0, align);
196 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
197 size, machInst, rMid + 4, rn, 16, noAlign);
198 break;
199 case 2:
200 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
201 size, machInst, rMid, rn, 0, align);
202 break;
203 case 1:
204 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
205 size, machInst, rMid, rn, 0, align);
206 break;
207 default:
208 // Unknown number of registers
209 microOps[uopIdx++] = new Unknown(machInst);
210 }
211 if (wb) {
212 if (rm != 15 && rm != 13) {
213 microOps[uopIdx++] =
214 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
215 } else {
216 microOps[uopIdx++] =
217 new MicroAddiUop(machInst, rn, rn, regs * 8);
218 }
219 }
220 if (deinterleave) {
221 switch (elems) {
222 case 4:
223 assert(regs == 4);
224 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
225 size, machInst, vd * 2, rMid, inc * 2);
226 break;
227 case 3:
228 assert(regs == 3);
229 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
230 size, machInst, vd * 2, rMid, inc * 2);
231 break;
232 case 2:
233 assert(regs == 4 || regs == 2);
234 if (regs == 4) {
235 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
236 size, machInst, vd * 2, rMid, inc * 2);
237 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
238 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
239 } else {
240 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
241 size, machInst, vd * 2, rMid, inc * 2);
242 }
243 break;
244 default:
245 // Bad number of elements to deinterleave
246 microOps[uopIdx++] = new Unknown(machInst);
247 }
248 }
249 assert(uopIdx == numMicroops);
250
251 for (unsigned i = 0; i < numMicroops - 1; i++) {
252 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
253 assert(uopPtr);
254 uopPtr->setDelayedCommit();
255 }
256 microOps[numMicroops - 1]->setLastMicroop();
257 }
258
259 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
260 OpClass __opClass, bool all, unsigned elems,
261 RegIndex rn, RegIndex vd, unsigned regs,
262 unsigned inc, uint32_t size, uint32_t align,
263 RegIndex rm, unsigned lane) :
264 PredMacroOp(mnem, machInst, __opClass)
265 {
266 assert(regs > 0 && regs <= 4);
267 assert(regs % elems == 0);
268
269 unsigned eBytes = (1 << size);
270 unsigned loadSize = eBytes * elems;
271 unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
272 sizeof(FloatRegBits);
273
274 assert(loadRegs > 0 && loadRegs <= 4);
275
276 numMicroops = 1;
277 bool wb = (rm != 15);
278
279 if (wb) numMicroops++;
280 numMicroops += (regs / elems);
281 microOps = new StaticInstPtr[numMicroops];
282
283 RegIndex ufp0 = NumFloatArchRegs;
284
285 unsigned uopIdx = 0;
286 switch (loadSize) {
287 case 1:
288 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
289 machInst, ufp0, rn, 0, align);
290 break;
291 case 2:
292 if (eBytes == 2) {
293 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
294 machInst, ufp0, rn, 0, align);
295 } else {
296 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
297 machInst, ufp0, rn, 0, align);
298 }
299 break;
300 case 3:
301 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
302 machInst, ufp0, rn, 0, align);
303 break;
304 case 4:
305 switch (eBytes) {
306 case 1:
307 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
308 machInst, ufp0, rn, 0, align);
309 break;
310 case 2:
311 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
312 machInst, ufp0, rn, 0, align);
313 break;
314 case 4:
315 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
316 machInst, ufp0, rn, 0, align);
317 break;
318 }
319 break;
320 case 6:
321 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
322 machInst, ufp0, rn, 0, align);
323 break;
324 case 8:
325 switch (eBytes) {
326 case 2:
327 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
328 machInst, ufp0, rn, 0, align);
329 break;
330 case 4:
331 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
332 machInst, ufp0, rn, 0, align);
333 break;
334 }
335 break;
336 case 12:
337 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
338 machInst, ufp0, rn, 0, align);
339 break;
340 case 16:
341 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
342 machInst, ufp0, rn, 0, align);
343 break;
344 default:
345 // Unrecognized load size
346 microOps[uopIdx++] = new Unknown(machInst);
347 }
348 if (wb) {
349 if (rm != 15 && rm != 13) {
350 microOps[uopIdx++] =
351 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
352 } else {
353 microOps[uopIdx++] =
354 new MicroAddiUop(machInst, rn, rn, loadSize);
355 }
356 }
357 switch (elems) {
358 case 4:
359 assert(regs == 4);
360 switch (size) {
361 case 0:
362 if (all) {
363 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
364 machInst, vd * 2, ufp0, inc * 2);
365 } else {
366 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
367 machInst, vd * 2, ufp0, inc * 2, lane);
368 }
369 break;
370 case 1:
371 if (all) {
372 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
373 machInst, vd * 2, ufp0, inc * 2);
374 } else {
375 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
376 machInst, vd * 2, ufp0, inc * 2, lane);
377 }
378 break;
379 case 2:
380 if (all) {
381 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
382 machInst, vd * 2, ufp0, inc * 2);
383 } else {
384 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
385 machInst, vd * 2, ufp0, inc * 2, lane);
386 }
387 break;
388 default:
389 // Bad size
390 microOps[uopIdx++] = new Unknown(machInst);
391 break;
392 }
393 break;
394 case 3:
395 assert(regs == 3);
396 switch (size) {
397 case 0:
398 if (all) {
399 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
400 machInst, vd * 2, ufp0, inc * 2);
401 } else {
402 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
403 machInst, vd * 2, ufp0, inc * 2, lane);
404 }
405 break;
406 case 1:
407 if (all) {
408 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
409 machInst, vd * 2, ufp0, inc * 2);
410 } else {
411 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
412 machInst, vd * 2, ufp0, inc * 2, lane);
413 }
414 break;
415 case 2:
416 if (all) {
417 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
418 machInst, vd * 2, ufp0, inc * 2);
419 } else {
420 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
421 machInst, vd * 2, ufp0, inc * 2, lane);
422 }
423 break;
424 default:
425 // Bad size
426 microOps[uopIdx++] = new Unknown(machInst);
427 break;
428 }
429 break;
430 case 2:
431 assert(regs == 2);
432 assert(loadRegs <= 2);
433 switch (size) {
434 case 0:
435 if (all) {
436 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
437 machInst, vd * 2, ufp0, inc * 2);
438 } else {
439 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
440 machInst, vd * 2, ufp0, inc * 2, lane);
441 }
442 break;
443 case 1:
444 if (all) {
445 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
446 machInst, vd * 2, ufp0, inc * 2);
447 } else {
448 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
449 machInst, vd * 2, ufp0, inc * 2, lane);
450 }
451 break;
452 case 2:
453 if (all) {
454 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
455 machInst, vd * 2, ufp0, inc * 2);
456 } else {
457 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
458 machInst, vd * 2, ufp0, inc * 2, lane);
459 }
460 break;
461 default:
462 // Bad size
463 microOps[uopIdx++] = new Unknown(machInst);
464 break;
465 }
466 break;
467 case 1:
468 assert(regs == 1 || (all && regs == 2));
469 assert(loadRegs <= 2);
470 for (unsigned offset = 0; offset < regs; offset++) {
471 switch (size) {
472 case 0:
473 if (all) {
474 microOps[uopIdx++] =
475 new MicroUnpackAllNeon2to2Uop<uint8_t>(
476 machInst, (vd + offset) * 2, ufp0, inc * 2);
477 } else {
478 microOps[uopIdx++] =
479 new MicroUnpackNeon2to2Uop<uint8_t>(
480 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
481 }
482 break;
483 case 1:
484 if (all) {
485 microOps[uopIdx++] =
486 new MicroUnpackAllNeon2to2Uop<uint16_t>(
487 machInst, (vd + offset) * 2, ufp0, inc * 2);
488 } else {
489 microOps[uopIdx++] =
490 new MicroUnpackNeon2to2Uop<uint16_t>(
491 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
492 }
493 break;
494 case 2:
495 if (all) {
496 microOps[uopIdx++] =
497 new MicroUnpackAllNeon2to2Uop<uint32_t>(
498 machInst, (vd + offset) * 2, ufp0, inc * 2);
499 } else {
500 microOps[uopIdx++] =
501 new MicroUnpackNeon2to2Uop<uint32_t>(
502 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
503 }
504 break;
505 default:
506 // Bad size
507 microOps[uopIdx++] = new Unknown(machInst);
508 break;
509 }
510 }
511 break;
512 default:
513 // Bad number of elements to unpack
514 microOps[uopIdx++] = new Unknown(machInst);
515 }
516 assert(uopIdx == numMicroops);
517
518 for (unsigned i = 0; i < numMicroops - 1; i++) {
519 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
520 assert(uopPtr);
521 uopPtr->setDelayedCommit();
522 }
523 microOps[numMicroops - 1]->setLastMicroop();
524 }
525
526 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
527 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
528 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
529 PredMacroOp(mnem, machInst, __opClass)
530 {
531 assert(regs > 0 && regs <= 4);
532 assert(regs % elems == 0);
533
534 numMicroops = (regs > 2) ? 2 : 1;
535 bool wb = (rm != 15);
536 bool interleave = (elems > 1);
537
538 if (wb) numMicroops++;
539 if (interleave) numMicroops += (regs / elems);
540 microOps = new StaticInstPtr[numMicroops];
541
542 uint32_t noAlign = TLB::MustBeOne;
543
544 RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
545
546 unsigned uopIdx = 0;
547 if (interleave) {
548 switch (elems) {
549 case 4:
550 assert(regs == 4);
551 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
552 size, machInst, rMid, vd * 2, inc * 2);
553 break;
554 case 3:
555 assert(regs == 3);
556 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
557 size, machInst, rMid, vd * 2, inc * 2);
558 break;
559 case 2:
560 assert(regs == 4 || regs == 2);
561 if (regs == 4) {
562 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
563 size, machInst, rMid, vd * 2, inc * 2);
564 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
565 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
566 } else {
567 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
568 size, machInst, rMid, vd * 2, inc * 2);
569 }
570 break;
571 default:
572 // Bad number of elements to interleave
573 microOps[uopIdx++] = new Unknown(machInst);
574 }
575 }
576 switch (regs) {
577 case 4:
578 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
579 size, machInst, rMid, rn, 0, align);
580 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
581 size, machInst, rMid + 4, rn, 16, noAlign);
582 break;
583 case 3:
584 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
585 size, machInst, rMid, rn, 0, align);
586 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
587 size, machInst, rMid + 4, rn, 16, noAlign);
588 break;
589 case 2:
590 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
591 size, machInst, rMid, rn, 0, align);
592 break;
593 case 1:
594 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
595 size, machInst, rMid, rn, 0, align);
596 break;
597 default:
598 // Unknown number of registers
599 microOps[uopIdx++] = new Unknown(machInst);
600 }
601 if (wb) {
602 if (rm != 15 && rm != 13) {
603 microOps[uopIdx++] =
604 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
605 } else {
606 microOps[uopIdx++] =
607 new MicroAddiUop(machInst, rn, rn, regs * 8);
608 }
609 }
610 assert(uopIdx == numMicroops);
611
612 for (unsigned i = 0; i < numMicroops - 1; i++) {
613 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
614 assert(uopPtr);
615 uopPtr->setDelayedCommit();
616 }
617 microOps[numMicroops - 1]->setLastMicroop();
618 }
619
620 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
621 OpClass __opClass, bool all, unsigned elems,
622 RegIndex rn, RegIndex vd, unsigned regs,
623 unsigned inc, uint32_t size, uint32_t align,
624 RegIndex rm, unsigned lane) :
625 PredMacroOp(mnem, machInst, __opClass)
626 {
627 assert(!all);
628 assert(regs > 0 && regs <= 4);
629 assert(regs % elems == 0);
630
631 unsigned eBytes = (1 << size);
632 unsigned storeSize = eBytes * elems;
633 unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
634 sizeof(FloatRegBits);
635
636 assert(storeRegs > 0 && storeRegs <= 4);
637
638 numMicroops = 1;
639 bool wb = (rm != 15);
640
641 if (wb) numMicroops++;
642 numMicroops += (regs / elems);
643 microOps = new StaticInstPtr[numMicroops];
644
645 RegIndex ufp0 = NumFloatArchRegs;
646
647 unsigned uopIdx = 0;
648 switch (elems) {
649 case 4:
650 assert(regs == 4);
651 switch (size) {
652 case 0:
653 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
654 machInst, ufp0, vd * 2, inc * 2, lane);
655 break;
656 case 1:
657 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
658 machInst, ufp0, vd * 2, inc * 2, lane);
659 break;
660 case 2:
661 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
662 machInst, ufp0, vd * 2, inc * 2, lane);
663 break;
664 default:
665 // Bad size
666 microOps[uopIdx++] = new Unknown(machInst);
667 break;
668 }
669 break;
670 case 3:
671 assert(regs == 3);
672 switch (size) {
673 case 0:
674 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
675 machInst, ufp0, vd * 2, inc * 2, lane);
676 break;
677 case 1:
678 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
679 machInst, ufp0, vd * 2, inc * 2, lane);
680 break;
681 case 2:
682 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
683 machInst, ufp0, vd * 2, inc * 2, lane);
684 break;
685 default:
686 // Bad size
687 microOps[uopIdx++] = new Unknown(machInst);
688 break;
689 }
690 break;
691 case 2:
692 assert(regs == 2);
693 assert(storeRegs <= 2);
694 switch (size) {
695 case 0:
696 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
697 machInst, ufp0, vd * 2, inc * 2, lane);
698 break;
699 case 1:
700 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
701 machInst, ufp0, vd * 2, inc * 2, lane);
702 break;
703 case 2:
704 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
705 machInst, ufp0, vd * 2, inc * 2, lane);
706 break;
707 default:
708 // Bad size
709 microOps[uopIdx++] = new Unknown(machInst);
710 break;
711 }
712 break;
713 case 1:
714 assert(regs == 1 || (all && regs == 2));
715 assert(storeRegs <= 2);
716 for (unsigned offset = 0; offset < regs; offset++) {
717 switch (size) {
718 case 0:
719 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
720 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
721 break;
722 case 1:
723 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
724 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
725 break;
726 case 2:
727 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
728 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
729 break;
730 default:
731 // Bad size
732 microOps[uopIdx++] = new Unknown(machInst);
733 break;
734 }
735 }
736 break;
737 default:
738 // Bad number of elements to unpack
739 microOps[uopIdx++] = new Unknown(machInst);
740 }
741 switch (storeSize) {
742 case 1:
743 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
744 machInst, ufp0, rn, 0, align);
745 break;
746 case 2:
747 if (eBytes == 2) {
748 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
749 machInst, ufp0, rn, 0, align);
750 } else {
751 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
752 machInst, ufp0, rn, 0, align);
753 }
754 break;
755 case 3:
756 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
757 machInst, ufp0, rn, 0, align);
758 break;
759 case 4:
760 switch (eBytes) {
761 case 1:
762 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
763 machInst, ufp0, rn, 0, align);
764 break;
765 case 2:
766 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
767 machInst, ufp0, rn, 0, align);
768 break;
769 case 4:
770 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
771 machInst, ufp0, rn, 0, align);
772 break;
773 }
774 break;
775 case 6:
776 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
777 machInst, ufp0, rn, 0, align);
778 break;
779 case 8:
780 switch (eBytes) {
781 case 2:
782 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
783 machInst, ufp0, rn, 0, align);
784 break;
785 case 4:
786 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
787 machInst, ufp0, rn, 0, align);
788 break;
789 }
790 break;
791 case 12:
792 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
793 machInst, ufp0, rn, 0, align);
794 break;
795 case 16:
796 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
797 machInst, ufp0, rn, 0, align);
798 break;
799 default:
800 // Bad store size
801 microOps[uopIdx++] = new Unknown(machInst);
802 }
803 if (wb) {
804 if (rm != 15 && rm != 13) {
805 microOps[uopIdx++] =
806 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
807 } else {
808 microOps[uopIdx++] =
809 new MicroAddiUop(machInst, rn, rn, storeSize);
810 }
811 }
812 assert(uopIdx == numMicroops);
813
814 for (unsigned i = 0; i < numMicroops - 1; i++) {
815 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
816 assert(uopPtr);
817 uopPtr->setDelayedCommit();
818 }
819 microOps[numMicroops - 1]->setLastMicroop();
820 }
821
822 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
823 OpClass __opClass, IntRegIndex rn,
824 RegIndex vd, bool single, bool up,
825 bool writeback, bool load, uint32_t offset) :
826 PredMacroOp(mnem, machInst, __opClass)
827 {
828 int i = 0;
829
830 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
831 // to be functionally identical except that fldmx is deprecated. For now
832 // we'll assume they're otherwise interchangable.
833 int count = (single ? offset : (offset / 2));
834 if (count == 0 || count > NumFloatArchRegs)
835 warn_once("Bad offset field for VFP load/store multiple.\n");
836 if (count == 0) {
837 // Force there to be at least one microop so the macroop makes sense.
838 writeback = true;
839 }
840 if (count > NumFloatArchRegs)
841 count = NumFloatArchRegs;
842
843 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
844 microOps = new StaticInstPtr[numMicroops];
845
846 int64_t addr = 0;
847
848 if (!up)
849 addr = 4 * offset;
850
851 bool tempUp = up;
852 for (int j = 0; j < count; j++) {
853 if (load) {
854 if (single) {
855 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
856 tempUp, addr);
857 } else {
858 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
859 tempUp, addr);
860 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
861 addr + (up ? 4 : -4));
862 }
863 } else {
864 if (single) {
865 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
866 tempUp, addr);
867 } else {
868 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
869 tempUp, addr);
870 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
871 addr + (up ? 4 : -4));
872 }
873 }
874 if (!tempUp) {
875 addr -= (single ? 4 : 8);
876 // The microops don't handle negative displacement, so turn if we
877 // hit zero, flip polarity and start adding.
878 if (addr <= 0) {
879 tempUp = true;
880 addr = -addr;
881 }
882 } else {
883 addr += (single ? 4 : 8);
884 }
885 }
886
887 if (writeback) {
888 if (up) {
889 microOps[i++] =
890 new MicroAddiUop(machInst, rn, rn, 4 * offset);
891 } else {
892 microOps[i++] =
893 new MicroSubiUop(machInst, rn, rn, 4 * offset);
894 }
895 }
896
897 assert(numMicroops == i);
898 microOps[numMicroops - 1]->setLastMicroop();
899
900 for (StaticInstPtr *curUop = microOps;
901 !(*curUop)->isLastMicroop(); curUop++) {
902 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
903 assert(uopPtr);
904 uopPtr->setDelayedCommit();
905 }
906 }
907
908 std::string
909 MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
910 {
911 std::stringstream ss;
912 printMnemonic(ss);
913 printReg(ss, ura);
914 ss << ", ";
915 printReg(ss, urb);
916 ss << ", ";
917 ccprintf(ss, "#%d", imm);
918 return ss.str();
919 }
920
921 std::string
922 MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
923 {
924 std::stringstream ss;
925 printMnemonic(ss);
926 ss << "[PC,CPSR]";
927 return ss.str();
928 }
929
930 std::string
931 MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
932 {
933 std::stringstream ss;
934 printMnemonic(ss);
935 printReg(ss, ura);
936 ss << ", ";
937 printReg(ss, urb);
938 return ss.str();
939 }
940
941 std::string
942 MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
943 {
944 std::stringstream ss;
945 printMnemonic(ss);
946 printReg(ss, ura);
947 ss << ", ";
948 printReg(ss, urb);
949 ss << ", ";
950 printReg(ss, urc);
951 return ss.str();
952 }
953
954 std::string
955 MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
956 {
957 std::stringstream ss;
958 printMnemonic(ss);
959 printReg(ss, ura);
960 ss << ", [";
961 printReg(ss, urb);
962 ss << ", ";
963 ccprintf(ss, "#%d", imm);
964 ss << "]";
965 return ss.str();
966 }
967
968 }