150bf34e0709db4a459d32bd617a52521e6f8a3b
[binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/types.h>
27 #include <math.h>
28 #include <time.h>
29 #include <limits.h>
30
31 #include "simulator.h"
32 #include "cpustate.h"
33 #include "memory.h"
34
35 #define NO_SP 0
36 #define SP_OK 1
37
38 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
39 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
40 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
41
42 /* Space saver macro. */
43 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
44
45 #define HALT_UNALLOC \
46 do \
47 { \
48 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
49 TRACE_INSN (cpu, \
50 "Unallocated instruction detected at sim line %d," \
51 " exe addr %" PRIx64, \
52 __LINE__, aarch64_get_PC (cpu)); \
53 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
54 sim_stopped, SIM_SIGILL); \
55 } \
56 while (0)
57
58 #define HALT_NYI \
59 do \
60 { \
61 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
62 TRACE_INSN (cpu, \
63 "Unimplemented instruction detected at sim line %d," \
64 " exe addr %" PRIx64, \
65 __LINE__, aarch64_get_PC (cpu)); \
66 if (! TRACE_ANY_P (cpu)) \
67 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
68 aarch64_get_instr (cpu)); \
69 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
70 sim_stopped, SIM_SIGABRT); \
71 } \
72 while (0)
73
74 #define NYI_assert(HI, LO, EXPECTED) \
75 do \
76 { \
77 if (INSTR ((HI), (LO)) != (EXPECTED)) \
78 HALT_NYI; \
79 } \
80 while (0)
81
82 /* Helper functions used by expandLogicalImmediate. */
83
84 /* for i = 1, ... N result<i-1> = 1 other bits are zero */
85 static inline uint64_t
86 ones (int N)
87 {
88 return (N == 64 ? (uint64_t)-1UL : ((1UL << N) - 1));
89 }
90
91 /* result<0> to val<N> */
92 static inline uint64_t
93 pickbit (uint64_t val, int N)
94 {
95 return pickbits64 (val, N, N);
96 }
97
98 static uint64_t
99 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
100 {
101 uint64_t mask;
102 uint64_t imm;
103 unsigned simd_size;
104
105 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
106 (in other words, right rotated by R), then replicated. */
107 if (N != 0)
108 {
109 simd_size = 64;
110 mask = 0xffffffffffffffffull;
111 }
112 else
113 {
114 switch (S)
115 {
116 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
117 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
118 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
119 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
120 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
121 default: return 0;
122 }
123 mask = (1ull << simd_size) - 1;
124 /* Top bits are IGNORED. */
125 R &= simd_size - 1;
126 }
127
128 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
129 if (S == simd_size - 1)
130 return 0;
131
132 /* S+1 consecutive bits to 1. */
133 /* NOTE: S can't be 63 due to detection above. */
134 imm = (1ull << (S + 1)) - 1;
135
136 /* Rotate to the left by simd_size - R. */
137 if (R != 0)
138 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
139
140 /* Replicate the value according to SIMD size. */
141 switch (simd_size)
142 {
143 case 2: imm = (imm << 2) | imm;
144 case 4: imm = (imm << 4) | imm;
145 case 8: imm = (imm << 8) | imm;
146 case 16: imm = (imm << 16) | imm;
147 case 32: imm = (imm << 32) | imm;
148 case 64: break;
149 default: return 0;
150 }
151
152 return imm;
153 }
154
155 /* Instr[22,10] encodes N immr and imms. we want a lookup table
156 for each possible combination i.e. 13 bits worth of int entries. */
157 #define LI_TABLE_SIZE (1 << 13)
158 static uint64_t LITable[LI_TABLE_SIZE];
159
160 void
161 aarch64_init_LIT_table (void)
162 {
163 unsigned index;
164
165 for (index = 0; index < LI_TABLE_SIZE; index++)
166 {
167 uint32_t N = uimm (index, 12, 12);
168 uint32_t immr = uimm (index, 11, 6);
169 uint32_t imms = uimm (index, 5, 0);
170
171 LITable [index] = expand_logical_immediate (imms, immr, N);
172 }
173 }
174
175 static void
176 dexNotify (sim_cpu *cpu)
177 {
178 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
179 2 ==> exit Java, 3 ==> start next bytecode. */
180 uint32_t type = INSTR (14, 0);
181
182 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
183
184 switch (type)
185 {
186 case 0:
187 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 case 1:
191 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
192 aarch64_get_reg_u64 (cpu, R22, 0)); */
193 break;
194 case 2:
195 /* aarch64_notifyMethodExit (); */
196 break;
197 case 3:
198 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
199 aarch64_get_reg_u64 (cpu, R22, 0)); */
200 break;
201 }
202 }
203
204 /* secondary decode within top level groups */
205
206 static void
207 dexPseudo (sim_cpu *cpu)
208 {
209 /* assert instr[28,27] = 00
210
211 We provide 2 pseudo instructions:
212
213 HALT stops execution of the simulator causing an immediate
214 return to the x86 code which entered it.
215
216 CALLOUT initiates recursive entry into x86 code. A register
217 argument holds the address of the x86 routine. Immediate
218 values in the instruction identify the number of general
219 purpose and floating point register arguments to be passed
220 and the type of any value to be returned. */
221
222 uint32_t PSEUDO_HALT = 0xE0000000U;
223 uint32_t PSEUDO_CALLOUT = 0x00018000U;
224 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
225 uint32_t PSEUDO_NOTIFY = 0x00014000U;
226 uint32_t dispatch;
227
228 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
229 {
230 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
231 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
232 sim_stopped, SIM_SIGTRAP);
233 }
234
235 dispatch = INSTR (31, 15);
236
237 /* We do not handle callouts at the moment. */
238 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
239 {
240 TRACE_EVENTS (cpu, " Callout");
241 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
242 sim_stopped, SIM_SIGABRT);
243 }
244
245 else if (dispatch == PSEUDO_NOTIFY)
246 dexNotify (cpu);
247
248 else
249 HALT_UNALLOC;
250 }
251
252 /* Load-store single register (unscaled offset)
253 These instructions employ a base register plus an unscaled signed
254 9 bit offset.
255
256 N.B. the base register (source) can be Xn or SP. all other
257 registers may not be SP. */
258
259 /* 32 bit load 32 bit unscaled signed 9 bit. */
260 static void
261 ldur32 (sim_cpu *cpu, int32_t offset)
262 {
263 unsigned rn = INSTR (9, 5);
264 unsigned rt = INSTR (4, 0);
265
266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
267 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
268 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
269 + offset));
270 }
271
272 /* 64 bit load 64 bit unscaled signed 9 bit. */
273 static void
274 ldur64 (sim_cpu *cpu, int32_t offset)
275 {
276 unsigned rn = INSTR (9, 5);
277 unsigned rt = INSTR (4, 0);
278
279 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
280 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
281 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
282 + offset));
283 }
284
285 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
286 static void
287 ldurb32 (sim_cpu *cpu, int32_t offset)
288 {
289 unsigned rn = INSTR (9, 5);
290 unsigned rt = INSTR (4, 0);
291
292 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
293 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
294 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
295 + offset));
296 }
297
298 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
299 static void
300 ldursb32 (sim_cpu *cpu, int32_t offset)
301 {
302 unsigned rn = INSTR (9, 5);
303 unsigned rt = INSTR (4, 0);
304
305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
306 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
307 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
308 + offset));
309 }
310
311 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
312 static void
313 ldursb64 (sim_cpu *cpu, int32_t offset)
314 {
315 unsigned rn = INSTR (9, 5);
316 unsigned rt = INSTR (4, 0);
317
318 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
319 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
320 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
321 + offset));
322 }
323
324 /* 32 bit load zero-extended short unscaled signed 9 bit */
325 static void
326 ldurh32 (sim_cpu *cpu, int32_t offset)
327 {
328 unsigned rn = INSTR (9, 5);
329 unsigned rd = INSTR (4, 0);
330
331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
332 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
333 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
334 + offset));
335 }
336
337 /* 32 bit load sign-extended short unscaled signed 9 bit */
338 static void
339 ldursh32 (sim_cpu *cpu, int32_t offset)
340 {
341 unsigned rn = INSTR (9, 5);
342 unsigned rd = INSTR (4, 0);
343
344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
345 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
346 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
347 + offset));
348 }
349
350 /* 64 bit load sign-extended short unscaled signed 9 bit */
351 static void
352 ldursh64 (sim_cpu *cpu, int32_t offset)
353 {
354 unsigned rn = INSTR (9, 5);
355 unsigned rt = INSTR (4, 0);
356
357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
358 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
359 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
360 + offset));
361 }
362
363 /* 64 bit load sign-extended word unscaled signed 9 bit */
364 static void
365 ldursw (sim_cpu *cpu, int32_t offset)
366 {
367 unsigned rn = INSTR (9, 5);
368 unsigned rd = INSTR (4, 0);
369
370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
371 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
372 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
373 + offset));
374 }
375
376 /* N.B. with stores the value in source is written to the address
377 identified by source2 modified by offset. */
378
379 /* 32 bit store 32 bit unscaled signed 9 bit. */
380 static void
381 stur32 (sim_cpu *cpu, int32_t offset)
382 {
383 unsigned rn = INSTR (9, 5);
384 unsigned rd = INSTR (4, 0);
385
386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
387 aarch64_set_mem_u32 (cpu,
388 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
389 aarch64_get_reg_u32 (cpu, rd, NO_SP));
390 }
391
392 /* 64 bit store 64 bit unscaled signed 9 bit */
393 static void
394 stur64 (sim_cpu *cpu, int32_t offset)
395 {
396 unsigned rn = INSTR (9, 5);
397 unsigned rd = INSTR (4, 0);
398
399 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
400 aarch64_set_mem_u64 (cpu,
401 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
402 aarch64_get_reg_u64 (cpu, rd, NO_SP));
403 }
404
405 /* 32 bit store byte unscaled signed 9 bit */
406 static void
407 sturb (sim_cpu *cpu, int32_t offset)
408 {
409 unsigned rn = INSTR (9, 5);
410 unsigned rd = INSTR (4, 0);
411
412 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
413 aarch64_set_mem_u8 (cpu,
414 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
415 aarch64_get_reg_u8 (cpu, rd, NO_SP));
416 }
417
418 /* 32 bit store short unscaled signed 9 bit */
419 static void
420 sturh (sim_cpu *cpu, int32_t offset)
421 {
422 unsigned rn = INSTR (9, 5);
423 unsigned rd = INSTR (4, 0);
424
425 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
426 aarch64_set_mem_u16 (cpu,
427 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
428 aarch64_get_reg_u16 (cpu, rd, NO_SP));
429 }
430
431 /* Load single register pc-relative label
432 Offset is a signed 19 bit immediate count in words
433 rt may not be SP. */
434
435 /* 32 bit pc-relative load */
436 static void
437 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
438 {
439 unsigned rd = INSTR (4, 0);
440
441 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
442 aarch64_set_reg_u64 (cpu, rd, NO_SP,
443 aarch64_get_mem_u32
444 (cpu, aarch64_get_PC (cpu) + offset * 4));
445 }
446
447 /* 64 bit pc-relative load */
448 static void
449 ldr_pcrel (sim_cpu *cpu, int32_t offset)
450 {
451 unsigned rd = INSTR (4, 0);
452
453 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
454 aarch64_set_reg_u64 (cpu, rd, NO_SP,
455 aarch64_get_mem_u64
456 (cpu, aarch64_get_PC (cpu) + offset * 4));
457 }
458
459 /* sign extended 32 bit pc-relative load */
460 static void
461 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
462 {
463 unsigned rd = INSTR (4, 0);
464
465 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
466 aarch64_set_reg_u64 (cpu, rd, NO_SP,
467 aarch64_get_mem_s32
468 (cpu, aarch64_get_PC (cpu) + offset * 4));
469 }
470
471 /* float pc-relative load */
472 static void
473 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
474 {
475 unsigned int rd = INSTR (4, 0);
476
477 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
478 aarch64_set_vec_u32 (cpu, rd, 0,
479 aarch64_get_mem_u32
480 (cpu, aarch64_get_PC (cpu) + offset * 4));
481 }
482
483 /* double pc-relative load */
484 static void
485 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
486 {
487 unsigned int st = INSTR (4, 0);
488
489 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
490 aarch64_set_vec_u64 (cpu, st, 0,
491 aarch64_get_mem_u64
492 (cpu, aarch64_get_PC (cpu) + offset * 4));
493 }
494
495 /* long double pc-relative load. */
496 static void
497 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
498 {
499 unsigned int st = INSTR (4, 0);
500 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
501 FRegister a;
502
503 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
504 aarch64_get_mem_long_double (cpu, addr, & a);
505 aarch64_set_FP_long_double (cpu, st, a);
506 }
507
508 /* This can be used to scale an offset by applying
509 the requisite shift. the second argument is either
510 16, 32 or 64. */
511
512 #define SCALE(_offset, _elementSize) \
513 ((_offset) << ScaleShift ## _elementSize)
514
515 /* This can be used to optionally scale a register derived offset
516 by applying the requisite shift as indicated by the Scaling
517 argument. The second argument is either Byte, Short, Word
518 or Long. The third argument is either Scaled or Unscaled.
519 N.B. when _Scaling is Scaled the shift gets ANDed with
520 all 1s while when it is Unscaled it gets ANDed with 0. */
521
522 #define OPT_SCALE(_offset, _elementType, _Scaling) \
523 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
524
525 /* This can be used to zero or sign extend a 32 bit register derived
526 value to a 64 bit value. the first argument must be the value as
527 a uint32_t and the second must be either UXTW or SXTW. The result
528 is returned as an int64_t. */
529
530 static inline int64_t
531 extend (uint32_t value, Extension extension)
532 {
533 union
534 {
535 uint32_t u;
536 int32_t n;
537 } x;
538
539 /* A branchless variant of this ought to be possible. */
540 if (extension == UXTW || extension == NoExtension)
541 return value;
542
543 x.u = value;
544 return x.n;
545 }
546
547 /* Scalar Floating Point
548
549 FP load/store single register (4 addressing modes)
550
551 N.B. the base register (source) can be the stack pointer.
552 The secondary source register (source2) can only be an Xn register. */
553
554 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
555 static void
556 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
557 {
558 unsigned rn = INSTR (9, 5);
559 unsigned st = INSTR (4, 0);
560 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
561
562 if (wb != Post)
563 address += offset;
564
565 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
566 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
567 if (wb == Post)
568 address += offset;
569
570 if (wb != NoWriteBack)
571 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
572 }
573
574 /* Load 8 bit with unsigned 12 bit offset. */
575 static void
576 fldrb_abs (sim_cpu *cpu, uint32_t offset)
577 {
578 unsigned rd = INSTR (4, 0);
579 unsigned rn = INSTR (9, 5);
580 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
581
582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
583 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
584 }
585
586 /* Load 16 bit scaled unsigned 12 bit. */
587 static void
588 fldrh_abs (sim_cpu *cpu, uint32_t offset)
589 {
590 unsigned rd = INSTR (4, 0);
591 unsigned rn = INSTR (9, 5);
592 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
593
594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
595 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
596 }
597
598 /* Load 32 bit scaled unsigned 12 bit. */
599 static void
600 fldrs_abs (sim_cpu *cpu, uint32_t offset)
601 {
602 unsigned rd = INSTR (4, 0);
603 unsigned rn = INSTR (9, 5);
604 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
605
606 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
607 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
608 }
609
610 /* Load 64 bit scaled unsigned 12 bit. */
611 static void
612 fldrd_abs (sim_cpu *cpu, uint32_t offset)
613 {
614 unsigned rd = INSTR (4, 0);
615 unsigned rn = INSTR (9, 5);
616 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
617
618 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
619 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
620 }
621
622 /* Load 128 bit scaled unsigned 12 bit. */
623 static void
624 fldrq_abs (sim_cpu *cpu, uint32_t offset)
625 {
626 unsigned rd = INSTR (4, 0);
627 unsigned rn = INSTR (9, 5);
628 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
629
630 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
631 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
632 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
633 }
634
635 /* Load 32 bit scaled or unscaled zero- or sign-extended
636 32-bit register offset. */
637 static void
638 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
639 {
640 unsigned rm = INSTR (20, 16);
641 unsigned rn = INSTR (9, 5);
642 unsigned st = INSTR (4, 0);
643 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
644 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
645 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
646
647 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
648 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
649 (cpu, address + displacement));
650 }
651
652 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
653 static void
654 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
655 {
656 unsigned rn = INSTR (9, 5);
657 unsigned st = INSTR (4, 0);
658 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
659
660 if (wb != Post)
661 address += offset;
662
663 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
664 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
665
666 if (wb == Post)
667 address += offset;
668
669 if (wb != NoWriteBack)
670 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
671 }
672
673 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
674 static void
675 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
676 {
677 unsigned rm = INSTR (20, 16);
678 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
679 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
680
681 fldrd_wb (cpu, displacement, NoWriteBack);
682 }
683
684 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
685 static void
686 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
687 {
688 FRegister a;
689 unsigned rn = INSTR (9, 5);
690 unsigned st = INSTR (4, 0);
691 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
692
693 if (wb != Post)
694 address += offset;
695
696 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
697 aarch64_get_mem_long_double (cpu, address, & a);
698 aarch64_set_FP_long_double (cpu, st, a);
699
700 if (wb == Post)
701 address += offset;
702
703 if (wb != NoWriteBack)
704 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
705 }
706
707 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
708 static void
709 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
710 {
711 unsigned rm = INSTR (20, 16);
712 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
713 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
714
715 fldrq_wb (cpu, displacement, NoWriteBack);
716 }
717
718 /* Memory Access
719
720 load-store single register
721 There are four addressing modes available here which all employ a
722 64 bit source (base) register.
723
724 N.B. the base register (source) can be the stack pointer.
725 The secondary source register (source2)can only be an Xn register.
726
727 Scaled, 12-bit, unsigned immediate offset, without pre- and
728 post-index options.
729 Unscaled, 9-bit, signed immediate offset with pre- or post-index
730 writeback.
731 scaled or unscaled 64-bit register offset.
732 scaled or unscaled 32-bit extended register offset.
733
734 All offsets are assumed to be raw from the decode i.e. the
735 simulator is expected to adjust scaled offsets based on the
736 accessed data size with register or extended register offset
737 versions the same applies except that in the latter case the
738 operation may also require a sign extend.
739
740 A separate method is provided for each possible addressing mode. */
741
742 /* 32 bit load 32 bit scaled unsigned 12 bit */
743 static void
744 ldr32_abs (sim_cpu *cpu, uint32_t offset)
745 {
746 unsigned rn = INSTR (9, 5);
747 unsigned rt = INSTR (4, 0);
748
749 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
750 /* The target register may not be SP but the source may be. */
751 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
752 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
753 + SCALE (offset, 32)));
754 }
755
756 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
757 static void
758 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
759 {
760 unsigned rn = INSTR (9, 5);
761 unsigned rt = INSTR (4, 0);
762 uint64_t address;
763
764 if (rn == rt && wb != NoWriteBack)
765 HALT_UNALLOC;
766
767 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
768
769 if (wb != Post)
770 address += offset;
771
772 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
773 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
774
775 if (wb == Post)
776 address += offset;
777
778 if (wb != NoWriteBack)
779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
780 }
781
782 /* 32 bit load 32 bit scaled or unscaled
783 zero- or sign-extended 32-bit register offset */
784 static void
785 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
786 {
787 unsigned rm = INSTR (20, 16);
788 unsigned rn = INSTR (9, 5);
789 unsigned rt = INSTR (4, 0);
790 /* rn may reference SP, rm and rt must reference ZR */
791
792 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
793 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
794 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
795
796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
797 aarch64_set_reg_u64 (cpu, rt, NO_SP,
798 aarch64_get_mem_u32 (cpu, address + displacement));
799 }
800
801 /* 64 bit load 64 bit scaled unsigned 12 bit */
802 static void
803 ldr_abs (sim_cpu *cpu, uint32_t offset)
804 {
805 unsigned rn = INSTR (9, 5);
806 unsigned rt = INSTR (4, 0);
807
808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
809 /* The target register may not be SP but the source may be. */
810 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
811 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
812 + SCALE (offset, 64)));
813 }
814
815 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
816 static void
817 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
818 {
819 unsigned rn = INSTR (9, 5);
820 unsigned rt = INSTR (4, 0);
821 uint64_t address;
822
823 if (rn == rt && wb != NoWriteBack)
824 HALT_UNALLOC;
825
826 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
827
828 if (wb != Post)
829 address += offset;
830
831 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
832 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
833
834 if (wb == Post)
835 address += offset;
836
837 if (wb != NoWriteBack)
838 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
839 }
840
841 /* 64 bit load 64 bit scaled or unscaled zero-
842 or sign-extended 32-bit register offset. */
843 static void
844 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
845 {
846 unsigned rm = INSTR (20, 16);
847 unsigned rn = INSTR (9, 5);
848 unsigned rt = INSTR (4, 0);
849 /* rn may reference SP, rm and rt must reference ZR */
850
851 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
852 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
853 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
854
855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
856 aarch64_set_reg_u64 (cpu, rt, NO_SP,
857 aarch64_get_mem_u64 (cpu, address + displacement));
858 }
859
860 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
861 static void
862 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
863 {
864 unsigned rn = INSTR (9, 5);
865 unsigned rt = INSTR (4, 0);
866
867 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
868 /* The target register may not be SP but the source may be
869 there is no scaling required for a byte load. */
870 aarch64_set_reg_u64 (cpu, rt, NO_SP,
871 aarch64_get_mem_u8
872 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
873 }
874
875 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
876 static void
877 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
878 {
879 unsigned rn = INSTR (9, 5);
880 unsigned rt = INSTR (4, 0);
881 uint64_t address;
882
883 if (rn == rt && wb != NoWriteBack)
884 HALT_UNALLOC;
885
886 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
887
888 if (wb != Post)
889 address += offset;
890
891 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
892 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
893
894 if (wb == Post)
895 address += offset;
896
897 if (wb != NoWriteBack)
898 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
899 }
900
901 /* 32 bit load zero-extended byte scaled or unscaled zero-
902 or sign-extended 32-bit register offset. */
903 static void
904 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
905 {
906 unsigned rm = INSTR (20, 16);
907 unsigned rn = INSTR (9, 5);
908 unsigned rt = INSTR (4, 0);
909 /* rn may reference SP, rm and rt must reference ZR */
910
911 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
912 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
913 extension);
914
915 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
916 /* There is no scaling required for a byte load. */
917 aarch64_set_reg_u64 (cpu, rt, NO_SP,
918 aarch64_get_mem_u8 (cpu, address + displacement));
919 }
920
921 /* 64 bit load sign-extended byte unscaled signed 9 bit
922 with pre- or post-writeback. */
923 static void
924 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
925 {
926 unsigned rn = INSTR (9, 5);
927 unsigned rt = INSTR (4, 0);
928 uint64_t address;
929 int64_t val;
930
931 if (rn == rt && wb != NoWriteBack)
932 HALT_UNALLOC;
933
934 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
935
936 if (wb != Post)
937 address += offset;
938
939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
940 val = aarch64_get_mem_s8 (cpu, address);
941 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
942
943 if (wb == Post)
944 address += offset;
945
946 if (wb != NoWriteBack)
947 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
948 }
949
950 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
951 static void
952 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
953 {
954 ldrsb_wb (cpu, offset, NoWriteBack);
955 }
956
957 /* 64 bit load sign-extended byte scaled or unscaled zero-
958 or sign-extended 32-bit register offset. */
959 static void
960 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
961 {
962 unsigned rm = INSTR (20, 16);
963 unsigned rn = INSTR (9, 5);
964 unsigned rt = INSTR (4, 0);
965 /* rn may reference SP, rm and rt must reference ZR */
966
967 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
968 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
969 extension);
970 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
971 /* There is no scaling required for a byte load. */
972 aarch64_set_reg_s64 (cpu, rt, NO_SP,
973 aarch64_get_mem_s8 (cpu, address + displacement));
974 }
975
976 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
977 static void
978 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
979 {
980 unsigned rn = INSTR (9, 5);
981 unsigned rt = INSTR (4, 0);
982 uint32_t val;
983
984 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
985 /* The target register may not be SP but the source may be. */
986 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
987 + SCALE (offset, 16));
988 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
989 }
990
991 /* 32 bit load zero-extended short unscaled signed 9 bit
992 with pre- or post-writeback. */
993 static void
994 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
995 {
996 unsigned rn = INSTR (9, 5);
997 unsigned rt = INSTR (4, 0);
998 uint64_t address;
999
1000 if (rn == rt && wb != NoWriteBack)
1001 HALT_UNALLOC;
1002
1003 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1004
1005 if (wb != Post)
1006 address += offset;
1007
1008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1009 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1010
1011 if (wb == Post)
1012 address += offset;
1013
1014 if (wb != NoWriteBack)
1015 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1016 }
1017
1018 /* 32 bit load zero-extended short scaled or unscaled zero-
1019 or sign-extended 32-bit register offset. */
1020 static void
1021 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1022 {
1023 unsigned rm = INSTR (20, 16);
1024 unsigned rn = INSTR (9, 5);
1025 unsigned rt = INSTR (4, 0);
1026 /* rn may reference SP, rm and rt must reference ZR */
1027
1028 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1029 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1030 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1031
1032 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1033 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1034 aarch64_get_mem_u16 (cpu, address + displacement));
1035 }
1036
1037 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1038 static void
1039 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1040 {
1041 unsigned rn = INSTR (9, 5);
1042 unsigned rt = INSTR (4, 0);
1043 int32_t val;
1044
1045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1046 /* The target register may not be SP but the source may be. */
1047 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1048 + SCALE (offset, 16));
1049 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1050 }
1051
1052 /* 32 bit load sign-extended short unscaled signed 9 bit
1053 with pre- or post-writeback. */
1054 static void
1055 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1056 {
1057 unsigned rn = INSTR (9, 5);
1058 unsigned rt = INSTR (4, 0);
1059 uint64_t address;
1060
1061 if (rn == rt && wb != NoWriteBack)
1062 HALT_UNALLOC;
1063
1064 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1065
1066 if (wb != Post)
1067 address += offset;
1068
1069 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1070 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1071 (int32_t) aarch64_get_mem_s16 (cpu, address));
1072
1073 if (wb == Post)
1074 address += offset;
1075
1076 if (wb != NoWriteBack)
1077 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1078 }
1079
1080 /* 32 bit load sign-extended short scaled or unscaled zero-
1081 or sign-extended 32-bit register offset. */
1082 static void
1083 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1084 {
1085 unsigned rm = INSTR (20, 16);
1086 unsigned rn = INSTR (9, 5);
1087 unsigned rt = INSTR (4, 0);
1088 /* rn may reference SP, rm and rt must reference ZR */
1089
1090 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1091 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1092 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1093
1094 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1095 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1096 (int32_t) aarch64_get_mem_s16
1097 (cpu, address + displacement));
1098 }
1099
1100 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1101 static void
1102 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1103 {
1104 unsigned rn = INSTR (9, 5);
1105 unsigned rt = INSTR (4, 0);
1106 int64_t val;
1107
1108 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1109 /* The target register may not be SP but the source may be. */
1110 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1111 + SCALE (offset, 16));
1112 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1113 }
1114
1115 /* 64 bit load sign-extended short unscaled signed 9 bit
1116 with pre- or post-writeback. */
1117 static void
1118 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1119 {
1120 unsigned rn = INSTR (9, 5);
1121 unsigned rt = INSTR (4, 0);
1122 uint64_t address;
1123 int64_t val;
1124
1125 if (rn == rt && wb != NoWriteBack)
1126 HALT_UNALLOC;
1127
1128 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1129 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1130
1131 if (wb != Post)
1132 address += offset;
1133
1134 val = aarch64_get_mem_s16 (cpu, address);
1135 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1136
1137 if (wb == Post)
1138 address += offset;
1139
1140 if (wb != NoWriteBack)
1141 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1142 }
1143
1144 /* 64 bit load sign-extended short scaled or unscaled zero-
1145 or sign-extended 32-bit register offset. */
1146 static void
1147 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1148 {
1149 unsigned rm = INSTR (20, 16);
1150 unsigned rn = INSTR (9, 5);
1151 unsigned rt = INSTR (4, 0);
1152
1153 /* rn may reference SP, rm and rt must reference ZR */
1154
1155 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1156 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1157 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1158 int64_t val;
1159
1160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1161 val = aarch64_get_mem_s16 (cpu, address + displacement);
1162 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1163 }
1164
1165 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1166 static void
1167 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1168 {
1169 unsigned rn = INSTR (9, 5);
1170 unsigned rt = INSTR (4, 0);
1171 int64_t val;
1172
1173 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1174 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1175 + SCALE (offset, 32));
1176 /* The target register may not be SP but the source may be. */
1177 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1178 }
1179
1180 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1181 with pre- or post-writeback. */
1182 static void
1183 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1184 {
1185 unsigned rn = INSTR (9, 5);
1186 unsigned rt = INSTR (4, 0);
1187 uint64_t address;
1188
1189 if (rn == rt && wb != NoWriteBack)
1190 HALT_UNALLOC;
1191
1192 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1193
1194 if (wb != Post)
1195 address += offset;
1196
1197 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1198 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1199
1200 if (wb == Post)
1201 address += offset;
1202
1203 if (wb != NoWriteBack)
1204 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1205 }
1206
1207 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1208 or sign-extended 32-bit register offset. */
1209 static void
1210 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1211 {
1212 unsigned rm = INSTR (20, 16);
1213 unsigned rn = INSTR (9, 5);
1214 unsigned rt = INSTR (4, 0);
1215 /* rn may reference SP, rm and rt must reference ZR */
1216
1217 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1218 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1219 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1220
1221 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1222 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1223 aarch64_get_mem_s32 (cpu, address + displacement));
1224 }
1225
1226 /* N.B. with stores the value in source is written to the
1227 address identified by source2 modified by source3/offset. */
1228
1229 /* 32 bit store scaled unsigned 12 bit. */
1230 static void
1231 str32_abs (sim_cpu *cpu, uint32_t offset)
1232 {
1233 unsigned rn = INSTR (9, 5);
1234 unsigned rt = INSTR (4, 0);
1235
1236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1237 /* The target register may not be SP but the source may be. */
1238 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1239 + SCALE (offset, 32)),
1240 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1241 }
1242
1243 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1244 static void
1245 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1246 {
1247 unsigned rn = INSTR (9, 5);
1248 unsigned rt = INSTR (4, 0);
1249 uint64_t address;
1250
1251 if (rn == rt && wb != NoWriteBack)
1252 HALT_UNALLOC;
1253
1254 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1255 if (wb != Post)
1256 address += offset;
1257
1258 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1259 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1260
1261 if (wb == Post)
1262 address += offset;
1263
1264 if (wb != NoWriteBack)
1265 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1266 }
1267
1268 /* 32 bit store scaled or unscaled zero- or
1269 sign-extended 32-bit register offset. */
1270 static void
1271 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1272 {
1273 unsigned rm = INSTR (20, 16);
1274 unsigned rn = INSTR (9, 5);
1275 unsigned rt = INSTR (4, 0);
1276
1277 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1278 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1279 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1280
1281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1282 aarch64_set_mem_u32 (cpu, address + displacement,
1283 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1284 }
1285
1286 /* 64 bit store scaled unsigned 12 bit. */
1287 static void
1288 str_abs (sim_cpu *cpu, uint32_t offset)
1289 {
1290 unsigned rn = INSTR (9, 5);
1291 unsigned rt = INSTR (4, 0);
1292
1293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1294 aarch64_set_mem_u64 (cpu,
1295 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1296 + SCALE (offset, 64),
1297 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1298 }
1299
1300 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1301 static void
1302 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1303 {
1304 unsigned rn = INSTR (9, 5);
1305 unsigned rt = INSTR (4, 0);
1306 uint64_t address;
1307
1308 if (rn == rt && wb != NoWriteBack)
1309 HALT_UNALLOC;
1310
1311 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1312
1313 if (wb != Post)
1314 address += offset;
1315
1316 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1317 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1318
1319 if (wb == Post)
1320 address += offset;
1321
1322 if (wb != NoWriteBack)
1323 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1324 }
1325
1326 /* 64 bit store scaled or unscaled zero-
1327 or sign-extended 32-bit register offset. */
1328 static void
1329 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1330 {
1331 unsigned rm = INSTR (20, 16);
1332 unsigned rn = INSTR (9, 5);
1333 unsigned rt = INSTR (4, 0);
1334 /* rn may reference SP, rm and rt must reference ZR */
1335
1336 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1337 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1338 extension);
1339 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1340
1341 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1342 aarch64_set_mem_u64 (cpu, address + displacement,
1343 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1344 }
1345
1346 /* 32 bit store byte scaled unsigned 12 bit. */
1347 static void
1348 strb_abs (sim_cpu *cpu, uint32_t offset)
1349 {
1350 unsigned rn = INSTR (9, 5);
1351 unsigned rt = INSTR (4, 0);
1352
1353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1354 /* The target register may not be SP but the source may be.
1355 There is no scaling required for a byte load. */
1356 aarch64_set_mem_u8 (cpu,
1357 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1358 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1359 }
1360
1361 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1362 static void
1363 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1364 {
1365 unsigned rn = INSTR (9, 5);
1366 unsigned rt = INSTR (4, 0);
1367 uint64_t address;
1368
1369 if (rn == rt && wb != NoWriteBack)
1370 HALT_UNALLOC;
1371
1372 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1373
1374 if (wb != Post)
1375 address += offset;
1376
1377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1378 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1379
1380 if (wb == Post)
1381 address += offset;
1382
1383 if (wb != NoWriteBack)
1384 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1385 }
1386
1387 /* 32 bit store byte scaled or unscaled zero-
1388 or sign-extended 32-bit register offset. */
1389 static void
1390 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1391 {
1392 unsigned rm = INSTR (20, 16);
1393 unsigned rn = INSTR (9, 5);
1394 unsigned rt = INSTR (4, 0);
1395 /* rn may reference SP, rm and rt must reference ZR */
1396
1397 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1398 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1399 extension);
1400
1401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1402 /* There is no scaling required for a byte load. */
1403 aarch64_set_mem_u8 (cpu, address + displacement,
1404 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1405 }
1406
1407 /* 32 bit store short scaled unsigned 12 bit. */
1408 static void
1409 strh_abs (sim_cpu *cpu, uint32_t offset)
1410 {
1411 unsigned rn = INSTR (9, 5);
1412 unsigned rt = INSTR (4, 0);
1413
1414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1415 /* The target register may not be SP but the source may be. */
1416 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1417 + SCALE (offset, 16),
1418 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1419 }
1420
1421 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1422 static void
1423 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1424 {
1425 unsigned rn = INSTR (9, 5);
1426 unsigned rt = INSTR (4, 0);
1427 uint64_t address;
1428
1429 if (rn == rt && wb != NoWriteBack)
1430 HALT_UNALLOC;
1431
1432 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1433
1434 if (wb != Post)
1435 address += offset;
1436
1437 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1438 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1439
1440 if (wb == Post)
1441 address += offset;
1442
1443 if (wb != NoWriteBack)
1444 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1445 }
1446
1447 /* 32 bit store short scaled or unscaled zero-
1448 or sign-extended 32-bit register offset. */
1449 static void
1450 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1451 {
1452 unsigned rm = INSTR (20, 16);
1453 unsigned rn = INSTR (9, 5);
1454 unsigned rt = INSTR (4, 0);
1455 /* rn may reference SP, rm and rt must reference ZR */
1456
1457 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1458 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1459 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1460
1461 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1462 aarch64_set_mem_u16 (cpu, address + displacement,
1463 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1464 }
1465
1466 /* Prefetch unsigned 12 bit. */
1467 static void
1468 prfm_abs (sim_cpu *cpu, uint32_t offset)
1469 {
1470 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1471 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1472 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1473 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1474 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1475 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1476 ow ==> UNALLOC
1477 PrfOp prfop = prfop (instr, 4, 0);
1478 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1479 + SCALE (offset, 64). */
1480
1481 /* TODO : implement prefetch of address. */
1482 }
1483
1484 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1485 static void
1486 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1487 {
1488 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1489 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1490 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1491 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1492 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1493 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1494 ow ==> UNALLOC
1495 rn may reference SP, rm may only reference ZR
1496 PrfOp prfop = prfop (instr, 4, 0);
1497 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1498 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1499 extension);
1500 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1501 uint64_t address = base + displacement. */
1502
1503 /* TODO : implement prefetch of address */
1504 }
1505
1506 /* 64 bit pc-relative prefetch. */
1507 static void
1508 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1509 {
1510 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1511 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1512 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1513 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1514 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1515 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1516 ow ==> UNALLOC
1517 PrfOp prfop = prfop (instr, 4, 0);
1518 uint64_t address = aarch64_get_PC (cpu) + offset. */
1519
1520 /* TODO : implement this */
1521 }
1522
1523 /* Load-store exclusive. */
1524
1525 static void
1526 ldxr (sim_cpu *cpu)
1527 {
1528 unsigned rn = INSTR (9, 5);
1529 unsigned rt = INSTR (4, 0);
1530 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1531 int size = INSTR (31, 30);
1532 /* int ordered = INSTR (15, 15); */
1533 /* int exclusive = ! INSTR (23, 23); */
1534
1535 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1536 switch (size)
1537 {
1538 case 0:
1539 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1540 break;
1541 case 1:
1542 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1543 break;
1544 case 2:
1545 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1546 break;
1547 case 3:
1548 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1549 break;
1550 }
1551 }
1552
1553 static void
1554 stxr (sim_cpu *cpu)
1555 {
1556 unsigned rn = INSTR (9, 5);
1557 unsigned rt = INSTR (4, 0);
1558 unsigned rs = INSTR (20, 16);
1559 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1560 int size = INSTR (31, 30);
1561 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1562
1563 switch (size)
1564 {
1565 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1566 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1567 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1568 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1569 }
1570
1571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1572 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1573 }
1574
1575 static void
1576 dexLoadLiteral (sim_cpu *cpu)
1577 {
1578 /* instr[29,27] == 011
1579 instr[25,24] == 00
1580 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1581 010 ==> LDRX, 011 ==> FLDRD
1582 100 ==> LDRSW, 101 ==> FLDRQ
1583 110 ==> PRFM, 111 ==> UNALLOC
1584 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1585 instr[23, 5] == simm19 */
1586
1587 /* unsigned rt = INSTR (4, 0); */
1588 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1589 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1590
1591 switch (dispatch)
1592 {
1593 case 0: ldr32_pcrel (cpu, imm); break;
1594 case 1: fldrs_pcrel (cpu, imm); break;
1595 case 2: ldr_pcrel (cpu, imm); break;
1596 case 3: fldrd_pcrel (cpu, imm); break;
1597 case 4: ldrsw_pcrel (cpu, imm); break;
1598 case 5: fldrq_pcrel (cpu, imm); break;
1599 case 6: prfm_pcrel (cpu, imm); break;
1600 case 7:
1601 default:
1602 HALT_UNALLOC;
1603 }
1604 }
1605
1606 /* Immediate arithmetic
1607 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1608 value left shifted by 12 bits (done at decode).
1609
1610 N.B. the register args (dest, source) can normally be Xn or SP.
1611 the exception occurs for flag setting instructions which may
1612 only use Xn for the output (dest). */
1613
1614 /* 32 bit add immediate. */
1615 static void
1616 add32 (sim_cpu *cpu, uint32_t aimm)
1617 {
1618 unsigned rn = INSTR (9, 5);
1619 unsigned rd = INSTR (4, 0);
1620
1621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1623 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1624 }
1625
1626 /* 64 bit add immediate. */
1627 static void
1628 add64 (sim_cpu *cpu, uint32_t aimm)
1629 {
1630 unsigned rn = INSTR (9, 5);
1631 unsigned rd = INSTR (4, 0);
1632
1633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1635 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1636 }
1637
1638 static void
1639 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1640 {
1641 int32_t result = value1 + value2;
1642 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1643 uint64_t uresult = (uint64_t)(uint32_t) value1
1644 + (uint64_t)(uint32_t) value2;
1645 uint32_t flags = 0;
1646
1647 if (result == 0)
1648 flags |= Z;
1649
1650 if (result & (1 << 31))
1651 flags |= N;
1652
1653 if (uresult != result)
1654 flags |= C;
1655
1656 if (sresult != result)
1657 flags |= V;
1658
1659 aarch64_set_CPSR (cpu, flags);
1660 }
1661
1662 static void
1663 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1664 {
1665 int64_t sval1 = value1;
1666 int64_t sval2 = value2;
1667 uint64_t result = value1 + value2;
1668 int64_t sresult = sval1 + sval2;
1669 uint32_t flags = 0;
1670
1671 if (result == 0)
1672 flags |= Z;
1673
1674 if (result & (1ULL << 63))
1675 flags |= N;
1676
1677 if (sval1 < 0)
1678 {
1679 if (sval2 < 0)
1680 {
1681 /* Negative plus a negative. Overflow happens if
1682 the result is greater than either of the operands. */
1683 if (sresult > sval1 || sresult > sval2)
1684 flags |= V;
1685 }
1686 /* else Negative plus a positive. Overflow cannot happen. */
1687 }
1688 else /* value1 is +ve. */
1689 {
1690 if (sval2 < 0)
1691 {
1692 /* Overflow can only occur if we computed "0 - MININT". */
1693 if (sval1 == 0 && sval2 == (1LL << 63))
1694 flags |= V;
1695 }
1696 else
1697 {
1698 /* Postive plus positive - overflow has happened if the
1699 result is smaller than either of the operands. */
1700 if (result < value1 || result < value2)
1701 flags |= V | C;
1702 }
1703 }
1704
1705 aarch64_set_CPSR (cpu, flags);
1706 }
1707
1708 #define NEG(a) (((a) & signbit) == signbit)
1709 #define POS(a) (((a) & signbit) == 0)
1710
1711 static void
1712 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1713 {
1714 uint32_t result = value1 - value2;
1715 uint32_t flags = 0;
1716 uint32_t signbit = 1U << 31;
1717
1718 if (result == 0)
1719 flags |= Z;
1720
1721 if (NEG (result))
1722 flags |= N;
1723
1724 if ( (NEG (value1) && POS (value2))
1725 || (NEG (value1) && POS (result))
1726 || (POS (value2) && POS (result)))
1727 flags |= C;
1728
1729 if ( (NEG (value1) && POS (value2) && POS (result))
1730 || (POS (value1) && NEG (value2) && NEG (result)))
1731 flags |= V;
1732
1733 aarch64_set_CPSR (cpu, flags);
1734 }
1735
1736 static void
1737 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1738 {
1739 uint64_t result = value1 - value2;
1740 uint32_t flags = 0;
1741 uint64_t signbit = 1ULL << 63;
1742
1743 if (result == 0)
1744 flags |= Z;
1745
1746 if (NEG (result))
1747 flags |= N;
1748
1749 if ( (NEG (value1) && POS (value2))
1750 || (NEG (value1) && POS (result))
1751 || (POS (value2) && POS (result)))
1752 flags |= C;
1753
1754 if ( (NEG (value1) && POS (value2) && POS (result))
1755 || (POS (value1) && NEG (value2) && NEG (result)))
1756 flags |= V;
1757
1758 aarch64_set_CPSR (cpu, flags);
1759 }
1760
1761 static void
1762 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1763 {
1764 uint32_t flags = 0;
1765
1766 if (result == 0)
1767 flags |= Z;
1768 else
1769 flags &= ~ Z;
1770
1771 if (result & (1 << 31))
1772 flags |= N;
1773 else
1774 flags &= ~ N;
1775
1776 aarch64_set_CPSR (cpu, flags);
1777 }
1778
1779 static void
1780 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1781 {
1782 uint32_t flags = 0;
1783
1784 if (result == 0)
1785 flags |= Z;
1786 else
1787 flags &= ~ Z;
1788
1789 if (result & (1ULL << 63))
1790 flags |= N;
1791 else
1792 flags &= ~ N;
1793
1794 aarch64_set_CPSR (cpu, flags);
1795 }
1796
1797 /* 32 bit add immediate set flags. */
1798 static void
1799 adds32 (sim_cpu *cpu, uint32_t aimm)
1800 {
1801 unsigned rn = INSTR (9, 5);
1802 unsigned rd = INSTR (4, 0);
1803 /* TODO : do we need to worry about signs here? */
1804 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1805
1806 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1807 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1808 set_flags_for_add32 (cpu, value1, aimm);
1809 }
1810
1811 /* 64 bit add immediate set flags. */
1812 static void
1813 adds64 (sim_cpu *cpu, uint32_t aimm)
1814 {
1815 unsigned rn = INSTR (9, 5);
1816 unsigned rd = INSTR (4, 0);
1817 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1818 uint64_t value2 = aimm;
1819
1820 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1821 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1822 set_flags_for_add64 (cpu, value1, value2);
1823 }
1824
1825 /* 32 bit sub immediate. */
1826 static void
1827 sub32 (sim_cpu *cpu, uint32_t aimm)
1828 {
1829 unsigned rn = INSTR (9, 5);
1830 unsigned rd = INSTR (4, 0);
1831
1832 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1833 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1834 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1835 }
1836
1837 /* 64 bit sub immediate. */
1838 static void
1839 sub64 (sim_cpu *cpu, uint32_t aimm)
1840 {
1841 unsigned rn = INSTR (9, 5);
1842 unsigned rd = INSTR (4, 0);
1843
1844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1845 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1846 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1847 }
1848
1849 /* 32 bit sub immediate set flags. */
1850 static void
1851 subs32 (sim_cpu *cpu, uint32_t aimm)
1852 {
1853 unsigned rn = INSTR (9, 5);
1854 unsigned rd = INSTR (4, 0);
1855 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1856 uint32_t value2 = aimm;
1857
1858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1859 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1860 set_flags_for_sub32 (cpu, value1, value2);
1861 }
1862
1863 /* 64 bit sub immediate set flags. */
1864 static void
1865 subs64 (sim_cpu *cpu, uint32_t aimm)
1866 {
1867 unsigned rn = INSTR (9, 5);
1868 unsigned rd = INSTR (4, 0);
1869 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1870 uint32_t value2 = aimm;
1871
1872 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1873 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1874 set_flags_for_sub64 (cpu, value1, value2);
1875 }
1876
1877 /* Data Processing Register. */
1878
1879 /* First two helpers to perform the shift operations. */
1880
1881 static inline uint32_t
1882 shifted32 (uint32_t value, Shift shift, uint32_t count)
1883 {
1884 switch (shift)
1885 {
1886 default:
1887 case LSL:
1888 return (value << count);
1889 case LSR:
1890 return (value >> count);
1891 case ASR:
1892 {
1893 int32_t svalue = value;
1894 return (svalue >> count);
1895 }
1896 case ROR:
1897 {
1898 uint32_t top = value >> count;
1899 uint32_t bottom = value << (32 - count);
1900 return (bottom | top);
1901 }
1902 }
1903 }
1904
1905 static inline uint64_t
1906 shifted64 (uint64_t value, Shift shift, uint32_t count)
1907 {
1908 switch (shift)
1909 {
1910 default:
1911 case LSL:
1912 return (value << count);
1913 case LSR:
1914 return (value >> count);
1915 case ASR:
1916 {
1917 int64_t svalue = value;
1918 return (svalue >> count);
1919 }
1920 case ROR:
1921 {
1922 uint64_t top = value >> count;
1923 uint64_t bottom = value << (64 - count);
1924 return (bottom | top);
1925 }
1926 }
1927 }
1928
1929 /* Arithmetic shifted register.
1930 These allow an optional LSL, ASR or LSR to the second source
1931 register with a count up to the register bit count.
1932
1933 N.B register args may not be SP. */
1934
1935 /* 32 bit ADD shifted register. */
1936 static void
1937 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1938 {
1939 unsigned rm = INSTR (20, 16);
1940 unsigned rn = INSTR (9, 5);
1941 unsigned rd = INSTR (4, 0);
1942
1943 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1944 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1945 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1946 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1947 shift, count));
1948 }
1949
1950 /* 64 bit ADD shifted register. */
1951 static void
1952 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1953 {
1954 unsigned rm = INSTR (20, 16);
1955 unsigned rn = INSTR (9, 5);
1956 unsigned rd = INSTR (4, 0);
1957
1958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1959 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1960 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1961 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1962 shift, count));
1963 }
1964
1965 /* 32 bit ADD shifted register setting flags. */
1966 static void
1967 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1968 {
1969 unsigned rm = INSTR (20, 16);
1970 unsigned rn = INSTR (9, 5);
1971 unsigned rd = INSTR (4, 0);
1972
1973 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1974 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1975 shift, count);
1976
1977 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1978 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1979 set_flags_for_add32 (cpu, value1, value2);
1980 }
1981
1982 /* 64 bit ADD shifted register setting flags. */
1983 static void
1984 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986 unsigned rm = INSTR (20, 16);
1987 unsigned rn = INSTR (9, 5);
1988 unsigned rd = INSTR (4, 0);
1989
1990 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1991 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1992 shift, count);
1993
1994 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1995 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1996 set_flags_for_add64 (cpu, value1, value2);
1997 }
1998
1999 /* 32 bit SUB shifted register. */
2000 static void
2001 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2002 {
2003 unsigned rm = INSTR (20, 16);
2004 unsigned rn = INSTR (9, 5);
2005 unsigned rd = INSTR (4, 0);
2006
2007 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2008 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2009 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2010 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2011 shift, count));
2012 }
2013
2014 /* 64 bit SUB shifted register. */
2015 static void
2016 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2017 {
2018 unsigned rm = INSTR (20, 16);
2019 unsigned rn = INSTR (9, 5);
2020 unsigned rd = INSTR (4, 0);
2021
2022 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2023 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2024 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2025 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2026 shift, count));
2027 }
2028
2029 /* 32 bit SUB shifted register setting flags. */
2030 static void
2031 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2032 {
2033 unsigned rm = INSTR (20, 16);
2034 unsigned rn = INSTR (9, 5);
2035 unsigned rd = INSTR (4, 0);
2036
2037 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2038 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2039 shift, count);
2040
2041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2042 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2043 set_flags_for_sub32 (cpu, value1, value2);
2044 }
2045
2046 /* 64 bit SUB shifted register setting flags. */
2047 static void
2048 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2049 {
2050 unsigned rm = INSTR (20, 16);
2051 unsigned rn = INSTR (9, 5);
2052 unsigned rd = INSTR (4, 0);
2053
2054 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2055 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2056 shift, count);
2057
2058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2059 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2060 set_flags_for_sub64 (cpu, value1, value2);
2061 }
2062
2063 /* First a couple more helpers to fetch the
2064 relevant source register element either
2065 sign or zero extended as required by the
2066 extension value. */
2067
2068 static uint32_t
2069 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2070 {
2071 switch (extension)
2072 {
2073 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2074 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2075 case UXTW: /* Fall through. */
2076 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2077 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2078 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2079 case SXTW: /* Fall through. */
2080 case SXTX: /* Fall through. */
2081 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2082 }
2083 }
2084
2085 static uint64_t
2086 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2087 {
2088 switch (extension)
2089 {
2090 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2091 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2092 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2093 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2094 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2095 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2096 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2097 case SXTX:
2098 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2099 }
2100 }
2101
2102 /* Arithmetic extending register
2103 These allow an optional sign extension of some portion of the
2104 second source register followed by an optional left shift of
2105 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2106
2107 N.B output (dest) and first input arg (source) may normally be Xn
2108 or SP. However, for flag setting operations dest can only be
2109 Xn. Second input registers are always Xn. */
2110
2111 /* 32 bit ADD extending register. */
2112 static void
2113 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2114 {
2115 unsigned rm = INSTR (20, 16);
2116 unsigned rn = INSTR (9, 5);
2117 unsigned rd = INSTR (4, 0);
2118
2119 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2120 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2121 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2122 + (extreg32 (cpu, rm, extension) << shift));
2123 }
2124
2125 /* 64 bit ADD extending register.
2126 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2127 static void
2128 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2129 {
2130 unsigned rm = INSTR (20, 16);
2131 unsigned rn = INSTR (9, 5);
2132 unsigned rd = INSTR (4, 0);
2133
2134 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2135 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2136 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2137 + (extreg64 (cpu, rm, extension) << shift));
2138 }
2139
2140 /* 32 bit ADD extending register setting flags. */
2141 static void
2142 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2143 {
2144 unsigned rm = INSTR (20, 16);
2145 unsigned rn = INSTR (9, 5);
2146 unsigned rd = INSTR (4, 0);
2147
2148 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2149 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2150
2151 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2152 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2153 set_flags_for_add32 (cpu, value1, value2);
2154 }
2155
2156 /* 64 bit ADD extending register setting flags */
2157 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2158 static void
2159 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2160 {
2161 unsigned rm = INSTR (20, 16);
2162 unsigned rn = INSTR (9, 5);
2163 unsigned rd = INSTR (4, 0);
2164
2165 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2166 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2167
2168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2169 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2170 set_flags_for_add64 (cpu, value1, value2);
2171 }
2172
2173 /* 32 bit SUB extending register. */
2174 static void
2175 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2176 {
2177 unsigned rm = INSTR (20, 16);
2178 unsigned rn = INSTR (9, 5);
2179 unsigned rd = INSTR (4, 0);
2180
2181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2182 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2183 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2184 - (extreg32 (cpu, rm, extension) << shift));
2185 }
2186
2187 /* 64 bit SUB extending register. */
2188 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2189 static void
2190 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2191 {
2192 unsigned rm = INSTR (20, 16);
2193 unsigned rn = INSTR (9, 5);
2194 unsigned rd = INSTR (4, 0);
2195
2196 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2197 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2198 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2199 - (extreg64 (cpu, rm, extension) << shift));
2200 }
2201
2202 /* 32 bit SUB extending register setting flags. */
2203 static void
2204 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2205 {
2206 unsigned rm = INSTR (20, 16);
2207 unsigned rn = INSTR (9, 5);
2208 unsigned rd = INSTR (4, 0);
2209
2210 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2211 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2212
2213 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2214 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2215 set_flags_for_sub32 (cpu, value1, value2);
2216 }
2217
2218 /* 64 bit SUB extending register setting flags */
2219 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2220 static void
2221 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2222 {
2223 unsigned rm = INSTR (20, 16);
2224 unsigned rn = INSTR (9, 5);
2225 unsigned rd = INSTR (4, 0);
2226
2227 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2228 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2229
2230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2231 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2232 set_flags_for_sub64 (cpu, value1, value2);
2233 }
2234
2235 static void
2236 dexAddSubtractImmediate (sim_cpu *cpu)
2237 {
2238 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2239 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2240 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2241 instr[28,24] = 10001
2242 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2243 instr[21,10] = uimm12
2244 instr[9,5] = Rn
2245 instr[4,0] = Rd */
2246
2247 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2248 uint32_t shift = INSTR (23, 22);
2249 uint32_t imm = INSTR (21, 10);
2250 uint32_t dispatch = INSTR (31, 29);
2251
2252 NYI_assert (28, 24, 0x11);
2253
2254 if (shift > 1)
2255 HALT_UNALLOC;
2256
2257 if (shift)
2258 imm <<= 12;
2259
2260 switch (dispatch)
2261 {
2262 case 0: add32 (cpu, imm); break;
2263 case 1: adds32 (cpu, imm); break;
2264 case 2: sub32 (cpu, imm); break;
2265 case 3: subs32 (cpu, imm); break;
2266 case 4: add64 (cpu, imm); break;
2267 case 5: adds64 (cpu, imm); break;
2268 case 6: sub64 (cpu, imm); break;
2269 case 7: subs64 (cpu, imm); break;
2270 }
2271 }
2272
2273 static void
2274 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2275 {
2276 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2277 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2278 instr[28,24] = 01011
2279 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2280 instr[21] = 0
2281 instr[20,16] = Rm
2282 instr[15,10] = count : must be 0xxxxx for 32 bit
2283 instr[9,5] = Rn
2284 instr[4,0] = Rd */
2285
2286 uint32_t size = INSTR (31, 31);
2287 uint32_t count = INSTR (15, 10);
2288 Shift shiftType = INSTR (23, 22);
2289
2290 NYI_assert (28, 24, 0x0B);
2291 NYI_assert (21, 21, 0);
2292
2293 /* Shift encoded as ROR is unallocated. */
2294 if (shiftType == ROR)
2295 HALT_UNALLOC;
2296
2297 /* 32 bit operations must have count[5] = 0
2298 or else we have an UNALLOC. */
2299 if (size == 0 && uimm (count, 5, 5))
2300 HALT_UNALLOC;
2301
2302 /* Dispatch on size:op i.e instr [31,29]. */
2303 switch (INSTR (31, 29))
2304 {
2305 case 0: add32_shift (cpu, shiftType, count); break;
2306 case 1: adds32_shift (cpu, shiftType, count); break;
2307 case 2: sub32_shift (cpu, shiftType, count); break;
2308 case 3: subs32_shift (cpu, shiftType, count); break;
2309 case 4: add64_shift (cpu, shiftType, count); break;
2310 case 5: adds64_shift (cpu, shiftType, count); break;
2311 case 6: sub64_shift (cpu, shiftType, count); break;
2312 case 7: subs64_shift (cpu, shiftType, count); break;
2313 }
2314 }
2315
2316 static void
2317 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2318 {
2319 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2320 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2321 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2322 instr[28,24] = 01011
2323 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2324 instr[21] = 1
2325 instr[20,16] = Rm
2326 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2327 000 ==> LSL|UXTW, 001 ==> UXTZ,
2328 000 ==> SXTB, 001 ==> SXTH,
2329 000 ==> SXTW, 001 ==> SXTX,
2330 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2331 instr[9,5] = Rn
2332 instr[4,0] = Rd */
2333
2334 Extension extensionType = INSTR (15, 13);
2335 uint32_t shift = INSTR (12, 10);
2336
2337 NYI_assert (28, 24, 0x0B);
2338 NYI_assert (21, 21, 1);
2339
2340 /* Shift may not exceed 4. */
2341 if (shift > 4)
2342 HALT_UNALLOC;
2343
2344 /* Dispatch on size:op:set?. */
2345 switch (INSTR (31, 29))
2346 {
2347 case 0: add32_ext (cpu, extensionType, shift); break;
2348 case 1: adds32_ext (cpu, extensionType, shift); break;
2349 case 2: sub32_ext (cpu, extensionType, shift); break;
2350 case 3: subs32_ext (cpu, extensionType, shift); break;
2351 case 4: add64_ext (cpu, extensionType, shift); break;
2352 case 5: adds64_ext (cpu, extensionType, shift); break;
2353 case 6: sub64_ext (cpu, extensionType, shift); break;
2354 case 7: subs64_ext (cpu, extensionType, shift); break;
2355 }
2356 }
2357
2358 /* Conditional data processing
2359 Condition register is implicit 3rd source. */
2360
2361 /* 32 bit add with carry. */
2362 /* N.B register args may not be SP. */
2363
2364 static void
2365 adc32 (sim_cpu *cpu)
2366 {
2367 unsigned rm = INSTR (20, 16);
2368 unsigned rn = INSTR (9, 5);
2369 unsigned rd = INSTR (4, 0);
2370
2371 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2372 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2373 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2374 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2375 + IS_SET (C));
2376 }
2377
2378 /* 64 bit add with carry */
2379 static void
2380 adc64 (sim_cpu *cpu)
2381 {
2382 unsigned rm = INSTR (20, 16);
2383 unsigned rn = INSTR (9, 5);
2384 unsigned rd = INSTR (4, 0);
2385
2386 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2387 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2388 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2389 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2390 + IS_SET (C));
2391 }
2392
2393 /* 32 bit add with carry setting flags. */
2394 static void
2395 adcs32 (sim_cpu *cpu)
2396 {
2397 unsigned rm = INSTR (20, 16);
2398 unsigned rn = INSTR (9, 5);
2399 unsigned rd = INSTR (4, 0);
2400
2401 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2402 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2403 uint32_t carry = IS_SET (C);
2404
2405 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2406 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2407 set_flags_for_add32 (cpu, value1, value2 + carry);
2408 }
2409
2410 /* 64 bit add with carry setting flags. */
2411 static void
2412 adcs64 (sim_cpu *cpu)
2413 {
2414 unsigned rm = INSTR (20, 16);
2415 unsigned rn = INSTR (9, 5);
2416 unsigned rd = INSTR (4, 0);
2417
2418 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2419 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2420 uint64_t carry = IS_SET (C);
2421
2422 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2423 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2424 set_flags_for_add64 (cpu, value1, value2 + carry);
2425 }
2426
2427 /* 32 bit sub with carry. */
2428 static void
2429 sbc32 (sim_cpu *cpu)
2430 {
2431 unsigned rm = INSTR (20, 16);
2432 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2433 unsigned rd = INSTR (4, 0);
2434
2435 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2436 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2437 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2438 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2439 - 1 + IS_SET (C));
2440 }
2441
2442 /* 64 bit sub with carry */
2443 static void
2444 sbc64 (sim_cpu *cpu)
2445 {
2446 unsigned rm = INSTR (20, 16);
2447 unsigned rn = INSTR (9, 5);
2448 unsigned rd = INSTR (4, 0);
2449
2450 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2451 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2452 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2453 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2454 - 1 + IS_SET (C));
2455 }
2456
2457 /* 32 bit sub with carry setting flags */
2458 static void
2459 sbcs32 (sim_cpu *cpu)
2460 {
2461 unsigned rm = INSTR (20, 16);
2462 unsigned rn = INSTR (9, 5);
2463 unsigned rd = INSTR (4, 0);
2464
2465 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2466 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2467 uint32_t carry = IS_SET (C);
2468 uint32_t result = value1 - value2 + 1 - carry;
2469
2470 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2471 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2472 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2473 }
2474
2475 /* 64 bit sub with carry setting flags */
2476 static void
2477 sbcs64 (sim_cpu *cpu)
2478 {
2479 unsigned rm = INSTR (20, 16);
2480 unsigned rn = INSTR (9, 5);
2481 unsigned rd = INSTR (4, 0);
2482
2483 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2484 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2485 uint64_t carry = IS_SET (C);
2486 uint64_t result = value1 - value2 + 1 - carry;
2487
2488 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2489 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2490 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2491 }
2492
2493 static void
2494 dexAddSubtractWithCarry (sim_cpu *cpu)
2495 {
2496 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2497 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2498 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2499 instr[28,21] = 1 1010 000
2500 instr[20,16] = Rm
2501 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2502 instr[9,5] = Rn
2503 instr[4,0] = Rd */
2504
2505 uint32_t op2 = INSTR (15, 10);
2506
2507 NYI_assert (28, 21, 0xD0);
2508
2509 if (op2 != 0)
2510 HALT_UNALLOC;
2511
2512 /* Dispatch on size:op:set?. */
2513 switch (INSTR (31, 29))
2514 {
2515 case 0: adc32 (cpu); break;
2516 case 1: adcs32 (cpu); break;
2517 case 2: sbc32 (cpu); break;
2518 case 3: sbcs32 (cpu); break;
2519 case 4: adc64 (cpu); break;
2520 case 5: adcs64 (cpu); break;
2521 case 6: sbc64 (cpu); break;
2522 case 7: sbcs64 (cpu); break;
2523 }
2524 }
2525
2526 static uint32_t
2527 testConditionCode (sim_cpu *cpu, CondCode cc)
2528 {
2529 /* This should be reduceable to branchless logic
2530 by some careful testing of bits in CC followed
2531 by the requisite masking and combining of bits
2532 from the flag register.
2533
2534 For now we do it with a switch. */
2535 int res;
2536
2537 switch (cc)
2538 {
2539 case EQ: res = IS_SET (Z); break;
2540 case NE: res = IS_CLEAR (Z); break;
2541 case CS: res = IS_SET (C); break;
2542 case CC: res = IS_CLEAR (C); break;
2543 case MI: res = IS_SET (N); break;
2544 case PL: res = IS_CLEAR (N); break;
2545 case VS: res = IS_SET (V); break;
2546 case VC: res = IS_CLEAR (V); break;
2547 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2548 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2549 case GE: res = IS_SET (N) == IS_SET (V); break;
2550 case LT: res = IS_SET (N) != IS_SET (V); break;
2551 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2552 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2553 case AL:
2554 case NV:
2555 default:
2556 res = 1;
2557 break;
2558 }
2559 return res;
2560 }
2561
2562 static void
2563 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2564 {
2565 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2566 instr[30] = compare with positive (1) or negative value (0)
2567 instr[29,21] = 1 1101 0010
2568 instr[20,16] = Rm or const
2569 instr[15,12] = cond
2570 instr[11] = compare reg (0) or const (1)
2571 instr[10] = 0
2572 instr[9,5] = Rn
2573 instr[4] = 0
2574 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2575 signed int negate;
2576 unsigned rm;
2577 unsigned rn;
2578
2579 NYI_assert (29, 21, 0x1d2);
2580 NYI_assert (10, 10, 0);
2581 NYI_assert (4, 4, 0);
2582
2583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2584 if (! testConditionCode (cpu, INSTR (15, 12)))
2585 {
2586 aarch64_set_CPSR (cpu, INSTR (3, 0));
2587 return;
2588 }
2589
2590 negate = INSTR (30, 30) ? 1 : -1;
2591 rm = INSTR (20, 16);
2592 rn = INSTR ( 9, 5);
2593
2594 if (INSTR (31, 31))
2595 {
2596 if (INSTR (11, 11))
2597 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2598 negate * (uint64_t) rm);
2599 else
2600 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2601 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2602 }
2603 else
2604 {
2605 if (INSTR (11, 11))
2606 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2607 negate * rm);
2608 else
2609 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2610 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2611 }
2612 }
2613
2614 static void
2615 do_vec_MOV_whole_vector (sim_cpu *cpu)
2616 {
2617 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2618
2619 instr[31] = 0
2620 instr[30] = half(0)/full(1)
2621 instr[29,21] = 001110101
2622 instr[20,16] = Vs
2623 instr[15,10] = 000111
2624 instr[9,5] = Vs
2625 instr[4,0] = Vd */
2626
2627 unsigned vs = INSTR (9, 5);
2628 unsigned vd = INSTR (4, 0);
2629
2630 NYI_assert (29, 21, 0x075);
2631 NYI_assert (15, 10, 0x07);
2632
2633 if (INSTR (20, 16) != vs)
2634 HALT_NYI;
2635
2636 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2637 if (INSTR (30, 30))
2638 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2639
2640 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2641 }
2642
2643 static void
2644 do_vec_MOV_into_scalar (sim_cpu *cpu)
2645 {
2646 /* instr[31] = 0
2647 instr[30] = word(0)/long(1)
2648 instr[29,21] = 00 1110 000
2649 instr[20,18] = element size and index
2650 instr[17,10] = 00 0011 11
2651 instr[9,5] = V source
2652 instr[4,0] = R dest */
2653
2654 unsigned vs = INSTR (9, 5);
2655 unsigned rd = INSTR (4, 0);
2656
2657 NYI_assert (29, 21, 0x070);
2658 NYI_assert (17, 10, 0x0F);
2659
2660 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2661 switch (INSTR (20, 18))
2662 {
2663 case 0x2:
2664 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 0));
2665 break;
2666
2667 case 0x6:
2668 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, vs, 1));
2669 break;
2670
2671 case 0x1:
2672 case 0x3:
2673 case 0x5:
2674 case 0x7:
2675 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u32
2676 (cpu, vs, INSTR (20, 19)));
2677 break;
2678
2679 default:
2680 HALT_NYI;
2681 }
2682 }
2683
2684 static void
2685 do_vec_INS (sim_cpu *cpu)
2686 {
2687 /* instr[31,21] = 01001110000
2688 instr[20,16] = element size and index
2689 instr[15,10] = 000111
2690 instr[9,5] = W source
2691 instr[4,0] = V dest */
2692
2693 int index;
2694 unsigned rs = INSTR (9, 5);
2695 unsigned vd = INSTR (4, 0);
2696
2697 NYI_assert (31, 21, 0x270);
2698 NYI_assert (15, 10, 0x07);
2699
2700 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2701 if (INSTR (16, 16))
2702 {
2703 index = INSTR (20, 17);
2704 aarch64_set_vec_u8 (cpu, vd, index,
2705 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2706 }
2707 else if (INSTR (17, 17))
2708 {
2709 index = INSTR (20, 18);
2710 aarch64_set_vec_u16 (cpu, vd, index,
2711 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2712 }
2713 else if (INSTR (18, 18))
2714 {
2715 index = INSTR (20, 19);
2716 aarch64_set_vec_u32 (cpu, vd, index,
2717 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2718 }
2719 else if (INSTR (19, 19))
2720 {
2721 index = INSTR (20, 20);
2722 aarch64_set_vec_u64 (cpu, vd, index,
2723 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2724 }
2725 else
2726 HALT_NYI;
2727 }
2728
2729 static void
2730 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2731 {
2732 /* instr[31] = 0
2733 instr[30] = half(0)/full(1)
2734 instr[29,21] = 00 1110 000
2735 instr[20,16] = element size and index
2736 instr[15,10] = 0000 01
2737 instr[9,5] = V source
2738 instr[4,0] = V dest. */
2739
2740 unsigned full = INSTR (30, 30);
2741 unsigned vs = INSTR (9, 5);
2742 unsigned vd = INSTR (4, 0);
2743 int i, index;
2744
2745 NYI_assert (29, 21, 0x070);
2746 NYI_assert (15, 10, 0x01);
2747
2748 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2749 if (INSTR (16, 16))
2750 {
2751 index = INSTR (20, 17);
2752
2753 for (i = 0; i < (full ? 16 : 8); i++)
2754 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2755 }
2756 else if (INSTR (17, 17))
2757 {
2758 index = INSTR (20, 18);
2759
2760 for (i = 0; i < (full ? 8 : 4); i++)
2761 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2762 }
2763 else if (INSTR (18, 18))
2764 {
2765 index = INSTR (20, 19);
2766
2767 for (i = 0; i < (full ? 4 : 2); i++)
2768 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2769 }
2770 else
2771 {
2772 if (INSTR (19, 19) == 0)
2773 HALT_UNALLOC;
2774
2775 if (! full)
2776 HALT_UNALLOC;
2777
2778 index = INSTR (20, 20);
2779
2780 for (i = 0; i < 2; i++)
2781 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2782 }
2783 }
2784
2785 static void
2786 do_vec_TBL (sim_cpu *cpu)
2787 {
2788 /* instr[31] = 0
2789 instr[30] = half(0)/full(1)
2790 instr[29,21] = 00 1110 000
2791 instr[20,16] = Vm
2792 instr[15] = 0
2793 instr[14,13] = vec length
2794 instr[12,10] = 000
2795 instr[9,5] = V start
2796 instr[4,0] = V dest */
2797
2798 int full = INSTR (30, 30);
2799 int len = INSTR (14, 13) + 1;
2800 unsigned vm = INSTR (20, 16);
2801 unsigned vn = INSTR (9, 5);
2802 unsigned vd = INSTR (4, 0);
2803 unsigned i;
2804
2805 NYI_assert (29, 21, 0x070);
2806 NYI_assert (12, 10, 0);
2807
2808 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2809 for (i = 0; i < (full ? 16 : 8); i++)
2810 {
2811 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2812 uint8_t val;
2813
2814 if (selector < 16)
2815 val = aarch64_get_vec_u8 (cpu, vn, selector);
2816 else if (selector < 32)
2817 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2818 else if (selector < 48)
2819 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2820 else if (selector < 64)
2821 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2822 else
2823 val = 0;
2824
2825 aarch64_set_vec_u8 (cpu, vd, i, val);
2826 }
2827 }
2828
2829 static void
2830 do_vec_TRN (sim_cpu *cpu)
2831 {
2832 /* instr[31] = 0
2833 instr[30] = half(0)/full(1)
2834 instr[29,24] = 00 1110
2835 instr[23,22] = size
2836 instr[21] = 0
2837 instr[20,16] = Vm
2838 instr[15] = 0
2839 instr[14] = TRN1 (0) / TRN2 (1)
2840 instr[13,10] = 1010
2841 instr[9,5] = V source
2842 instr[4,0] = V dest. */
2843
2844 int full = INSTR (30, 30);
2845 int second = INSTR (14, 14);
2846 unsigned vm = INSTR (20, 16);
2847 unsigned vn = INSTR (9, 5);
2848 unsigned vd = INSTR (4, 0);
2849 unsigned i;
2850
2851 NYI_assert (29, 24, 0x0E);
2852 NYI_assert (13, 10, 0xA);
2853
2854 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2855 switch (INSTR (23, 22))
2856 {
2857 case 0:
2858 for (i = 0; i < (full ? 8 : 4); i++)
2859 {
2860 aarch64_set_vec_u8
2861 (cpu, vd, i * 2,
2862 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2863 aarch64_set_vec_u8
2864 (cpu, vd, 1 * 2 + 1,
2865 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2866 }
2867 break;
2868
2869 case 1:
2870 for (i = 0; i < (full ? 4 : 2); i++)
2871 {
2872 aarch64_set_vec_u16
2873 (cpu, vd, i * 2,
2874 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2875 aarch64_set_vec_u16
2876 (cpu, vd, 1 * 2 + 1,
2877 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2878 }
2879 break;
2880
2881 case 2:
2882 aarch64_set_vec_u32
2883 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2884 aarch64_set_vec_u32
2885 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2886 aarch64_set_vec_u32
2887 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2888 aarch64_set_vec_u32
2889 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2890 break;
2891
2892 case 3:
2893 if (! full)
2894 HALT_UNALLOC;
2895
2896 aarch64_set_vec_u64 (cpu, vd, 0,
2897 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2898 aarch64_set_vec_u64 (cpu, vd, 1,
2899 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2900 break;
2901 }
2902 }
2903
2904 static void
2905 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2906 {
2907 /* instr[31] = 0
2908 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2909 [must be 1 for 64-bit xfer]
2910 instr[29,20] = 00 1110 0000
2911 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2912 0100=> 32-bits. 1000=>64-bits
2913 instr[15,10] = 0000 11
2914 instr[9,5] = W source
2915 instr[4,0] = V dest. */
2916
2917 unsigned i;
2918 unsigned Vd = INSTR (4, 0);
2919 unsigned Rs = INSTR (9, 5);
2920 int both = INSTR (30, 30);
2921
2922 NYI_assert (29, 20, 0x0E0);
2923 NYI_assert (15, 10, 0x03);
2924
2925 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2926 switch (INSTR (19, 16))
2927 {
2928 case 1:
2929 for (i = 0; i < (both ? 16 : 8); i++)
2930 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
2931 break;
2932
2933 case 2:
2934 for (i = 0; i < (both ? 8 : 4); i++)
2935 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
2936 break;
2937
2938 case 4:
2939 for (i = 0; i < (both ? 4 : 2); i++)
2940 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
2941 break;
2942
2943 case 8:
2944 if (!both)
2945 HALT_NYI;
2946 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2947 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
2948 break;
2949
2950 default:
2951 HALT_NYI;
2952 }
2953 }
2954
2955 static void
2956 do_vec_UZP (sim_cpu *cpu)
2957 {
2958 /* instr[31] = 0
2959 instr[30] = half(0)/full(1)
2960 instr[29,24] = 00 1110
2961 instr[23,22] = size: byte(00), half(01), word (10), long (11)
2962 instr[21] = 0
2963 instr[20,16] = Vm
2964 instr[15] = 0
2965 instr[14] = lower (0) / upper (1)
2966 instr[13,10] = 0110
2967 instr[9,5] = Vn
2968 instr[4,0] = Vd. */
2969
2970 int full = INSTR (30, 30);
2971 int upper = INSTR (14, 14);
2972
2973 unsigned vm = INSTR (20, 16);
2974 unsigned vn = INSTR (9, 5);
2975 unsigned vd = INSTR (4, 0);
2976
2977 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
2978 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
2979 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
2980 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
2981
2982 uint64_t val1 = 0;
2983 uint64_t val2 = 0;
2984
2985 uint64_t input1 = upper ? val_n1 : val_m1;
2986 uint64_t input2 = upper ? val_n2 : val_m2;
2987 unsigned i;
2988
2989 NYI_assert (29, 24, 0x0E);
2990 NYI_assert (21, 21, 0);
2991 NYI_assert (15, 15, 0);
2992 NYI_assert (13, 10, 6);
2993
2994 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2995 switch (INSTR (23, 23))
2996 {
2997 case 0:
2998 for (i = 0; i < 8; i++)
2999 {
3000 val1 |= (input1 >> (i * 8)) & (0xFFULL << (i * 8));
3001 val2 |= (input2 >> (i * 8)) & (0xFFULL << (i * 8));
3002 }
3003 break;
3004
3005 case 1:
3006 for (i = 0; i < 4; i++)
3007 {
3008 val1 |= (input1 >> (i * 16)) & (0xFFFFULL << (i * 16));
3009 val2 |= (input2 >> (i * 16)) & (0xFFFFULL << (i * 16));
3010 }
3011 break;
3012
3013 case 2:
3014 val1 = ((input1 & 0xFFFFFFFF) | ((input1 >> 32) & 0xFFFFFFFF00000000ULL));
3015 val2 = ((input2 & 0xFFFFFFFF) | ((input2 >> 32) & 0xFFFFFFFF00000000ULL));
3016
3017 case 3:
3018 val1 = input1;
3019 val2 = input2;
3020 break;
3021 }
3022
3023 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3024 if (full)
3025 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3026 }
3027
3028 static void
3029 do_vec_ZIP (sim_cpu *cpu)
3030 {
3031 /* instr[31] = 0
3032 instr[30] = half(0)/full(1)
3033 instr[29,24] = 00 1110
3034 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3035 instr[21] = 0
3036 instr[20,16] = Vm
3037 instr[15] = 0
3038 instr[14] = lower (0) / upper (1)
3039 instr[13,10] = 1110
3040 instr[9,5] = Vn
3041 instr[4,0] = Vd. */
3042
3043 int full = INSTR (30, 30);
3044 int upper = INSTR (14, 14);
3045
3046 unsigned vm = INSTR (20, 16);
3047 unsigned vn = INSTR (9, 5);
3048 unsigned vd = INSTR (4, 0);
3049
3050 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3051 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3052 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3053 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3054
3055 uint64_t val1 = 0;
3056 uint64_t val2 = 0;
3057
3058 uint64_t input1 = upper ? val_n1 : val_m1;
3059 uint64_t input2 = upper ? val_n2 : val_m2;
3060
3061 NYI_assert (29, 24, 0x0E);
3062 NYI_assert (21, 21, 0);
3063 NYI_assert (15, 15, 0);
3064 NYI_assert (13, 10, 0xE);
3065
3066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3067 switch (INSTR (23, 23))
3068 {
3069 case 0:
3070 val1 =
3071 ((input1 << 0) & (0xFF << 0))
3072 | ((input2 << 8) & (0xFF << 8))
3073 | ((input1 << 8) & (0xFF << 16))
3074 | ((input2 << 16) & (0xFF << 24))
3075 | ((input1 << 16) & (0xFFULL << 32))
3076 | ((input2 << 24) & (0xFFULL << 40))
3077 | ((input1 << 24) & (0xFFULL << 48))
3078 | ((input2 << 32) & (0xFFULL << 56));
3079
3080 val2 =
3081 ((input1 >> 32) & (0xFF << 0))
3082 | ((input2 >> 24) & (0xFF << 8))
3083 | ((input1 >> 24) & (0xFF << 16))
3084 | ((input2 >> 16) & (0xFF << 24))
3085 | ((input1 >> 16) & (0xFFULL << 32))
3086 | ((input2 >> 8) & (0xFFULL << 40))
3087 | ((input1 >> 8) & (0xFFULL << 48))
3088 | ((input2 >> 0) & (0xFFULL << 56));
3089 break;
3090
3091 case 1:
3092 val1 =
3093 ((input1 << 0) & (0xFFFF << 0))
3094 | ((input2 << 16) & (0xFFFF << 16))
3095 | ((input1 << 16) & (0xFFFFULL << 32))
3096 | ((input2 << 32) & (0xFFFFULL << 48));
3097
3098 val2 =
3099 ((input1 >> 32) & (0xFFFF << 0))
3100 | ((input2 >> 16) & (0xFFFF << 16))
3101 | ((input1 >> 16) & (0xFFFFULL << 32))
3102 | ((input2 >> 0) & (0xFFFFULL << 48));
3103 break;
3104
3105 case 2:
3106 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3107 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3108 break;
3109
3110 case 3:
3111 val1 = input1;
3112 val2 = input2;
3113 break;
3114 }
3115
3116 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3117 if (full)
3118 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3119 }
3120
3121 /* Floating point immediates are encoded in 8 bits.
3122 fpimm[7] = sign bit.
3123 fpimm[6:4] = signed exponent.
3124 fpimm[3:0] = fraction (assuming leading 1).
3125 i.e. F = s * 1.f * 2^(e - b). */
3126
3127 static float
3128 fp_immediate_for_encoding_32 (uint32_t imm8)
3129 {
3130 float u;
3131 uint32_t s, e, f, i;
3132
3133 s = (imm8 >> 7) & 0x1;
3134 e = (imm8 >> 4) & 0x7;
3135 f = imm8 & 0xf;
3136
3137 /* The fp value is s * n/16 * 2r where n is 16+e. */
3138 u = (16.0 + f) / 16.0;
3139
3140 /* N.B. exponent is signed. */
3141 if (e < 4)
3142 {
3143 int epos = e;
3144
3145 for (i = 0; i <= epos; i++)
3146 u *= 2.0;
3147 }
3148 else
3149 {
3150 int eneg = 7 - e;
3151
3152 for (i = 0; i < eneg; i++)
3153 u /= 2.0;
3154 }
3155
3156 if (s)
3157 u = - u;
3158
3159 return u;
3160 }
3161
3162 static double
3163 fp_immediate_for_encoding_64 (uint32_t imm8)
3164 {
3165 double u;
3166 uint32_t s, e, f, i;
3167
3168 s = (imm8 >> 7) & 0x1;
3169 e = (imm8 >> 4) & 0x7;
3170 f = imm8 & 0xf;
3171
3172 /* The fp value is s * n/16 * 2r where n is 16+e. */
3173 u = (16.0 + f) / 16.0;
3174
3175 /* N.B. exponent is signed. */
3176 if (e < 4)
3177 {
3178 int epos = e;
3179
3180 for (i = 0; i <= epos; i++)
3181 u *= 2.0;
3182 }
3183 else
3184 {
3185 int eneg = 7 - e;
3186
3187 for (i = 0; i < eneg; i++)
3188 u /= 2.0;
3189 }
3190
3191 if (s)
3192 u = - u;
3193
3194 return u;
3195 }
3196
3197 static void
3198 do_vec_MOV_immediate (sim_cpu *cpu)
3199 {
3200 /* instr[31] = 0
3201 instr[30] = full/half selector
3202 instr[29,19] = 00111100000
3203 instr[18,16] = high 3 bits of uimm8
3204 instr[15,12] = size & shift:
3205 0000 => 32-bit
3206 0010 => 32-bit + LSL#8
3207 0100 => 32-bit + LSL#16
3208 0110 => 32-bit + LSL#24
3209 1010 => 16-bit + LSL#8
3210 1000 => 16-bit
3211 1101 => 32-bit + MSL#16
3212 1100 => 32-bit + MSL#8
3213 1110 => 8-bit
3214 1111 => double
3215 instr[11,10] = 01
3216 instr[9,5] = low 5-bits of uimm8
3217 instr[4,0] = Vd. */
3218
3219 int full = INSTR (30, 30);
3220 unsigned vd = INSTR (4, 0);
3221 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3222 unsigned i;
3223
3224 NYI_assert (29, 19, 0x1E0);
3225 NYI_assert (11, 10, 1);
3226
3227 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3228 switch (INSTR (15, 12))
3229 {
3230 case 0x0: /* 32-bit, no shift. */
3231 case 0x2: /* 32-bit, shift by 8. */
3232 case 0x4: /* 32-bit, shift by 16. */
3233 case 0x6: /* 32-bit, shift by 24. */
3234 val <<= (8 * INSTR (14, 13));
3235 for (i = 0; i < (full ? 4 : 2); i++)
3236 aarch64_set_vec_u32 (cpu, vd, i, val);
3237 break;
3238
3239 case 0xa: /* 16-bit, shift by 8. */
3240 val <<= 8;
3241 /* Fall through. */
3242 case 0x8: /* 16-bit, no shift. */
3243 for (i = 0; i < (full ? 8 : 4); i++)
3244 aarch64_set_vec_u16 (cpu, vd, i, val);
3245 /* Fall through. */
3246 case 0xd: /* 32-bit, mask shift by 16. */
3247 val <<= 8;
3248 val |= 0xFF;
3249 /* Fall through. */
3250 case 0xc: /* 32-bit, mask shift by 8. */
3251 val <<= 8;
3252 val |= 0xFF;
3253 for (i = 0; i < (full ? 4 : 2); i++)
3254 aarch64_set_vec_u32 (cpu, vd, i, val);
3255 break;
3256
3257 case 0xe: /* 8-bit, no shift. */
3258 for (i = 0; i < (full ? 16 : 8); i++)
3259 aarch64_set_vec_u8 (cpu, vd, i, val);
3260 break;
3261
3262 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3263 {
3264 float u = fp_immediate_for_encoding_32 (val);
3265 for (i = 0; i < (full ? 4 : 2); i++)
3266 aarch64_set_vec_float (cpu, vd, i, u);
3267 break;
3268 }
3269
3270 default:
3271 HALT_NYI;
3272 }
3273 }
3274
3275 static void
3276 do_vec_MVNI (sim_cpu *cpu)
3277 {
3278 /* instr[31] = 0
3279 instr[30] = full/half selector
3280 instr[29,19] = 10111100000
3281 instr[18,16] = high 3 bits of uimm8
3282 instr[15,12] = selector
3283 instr[11,10] = 01
3284 instr[9,5] = low 5-bits of uimm8
3285 instr[4,0] = Vd. */
3286
3287 int full = INSTR (30, 30);
3288 unsigned vd = INSTR (4, 0);
3289 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3290 unsigned i;
3291
3292 NYI_assert (29, 19, 0x5E0);
3293 NYI_assert (11, 10, 1);
3294
3295 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3296 switch (INSTR (15, 12))
3297 {
3298 case 0x0: /* 32-bit, no shift. */
3299 case 0x2: /* 32-bit, shift by 8. */
3300 case 0x4: /* 32-bit, shift by 16. */
3301 case 0x6: /* 32-bit, shift by 24. */
3302 val <<= (8 * INSTR (14, 13));
3303 val = ~ val;
3304 for (i = 0; i < (full ? 4 : 2); i++)
3305 aarch64_set_vec_u32 (cpu, vd, i, val);
3306 return;
3307
3308 case 0xa: /* 16-bit, 8 bit shift. */
3309 val <<= 8;
3310 case 0x8: /* 16-bit, no shift. */
3311 val = ~ val;
3312 for (i = 0; i < (full ? 8 : 4); i++)
3313 aarch64_set_vec_u16 (cpu, vd, i, val);
3314 return;
3315
3316 case 0xd: /* 32-bit, mask shift by 16. */
3317 val <<= 8;
3318 val |= 0xFF;
3319 case 0xc: /* 32-bit, mask shift by 8. */
3320 val <<= 8;
3321 val |= 0xFF;
3322 val = ~ val;
3323 for (i = 0; i < (full ? 4 : 2); i++)
3324 aarch64_set_vec_u32 (cpu, vd, i, val);
3325 return;
3326
3327 case 0xE: /* MOVI Dn, #mask64 */
3328 {
3329 uint64_t mask = 0;
3330
3331 for (i = 0; i < 8; i++)
3332 if (val & (1 << i))
3333 mask |= (0xFFUL << (i * 8));
3334 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3335 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3336 return;
3337 }
3338
3339 case 0xf: /* FMOV Vd.2D, #fpimm. */
3340 {
3341 double u = fp_immediate_for_encoding_64 (val);
3342
3343 if (! full)
3344 HALT_UNALLOC;
3345
3346 aarch64_set_vec_double (cpu, vd, 0, u);
3347 aarch64_set_vec_double (cpu, vd, 1, u);
3348 return;
3349 }
3350
3351 default:
3352 HALT_NYI;
3353 }
3354 }
3355
3356 #define ABS(A) ((A) < 0 ? - (A) : (A))
3357
3358 static void
3359 do_vec_ABS (sim_cpu *cpu)
3360 {
3361 /* instr[31] = 0
3362 instr[30] = half(0)/full(1)
3363 instr[29,24] = 00 1110
3364 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3365 instr[21,10] = 10 0000 1011 10
3366 instr[9,5] = Vn
3367 instr[4.0] = Vd. */
3368
3369 unsigned vn = INSTR (9, 5);
3370 unsigned vd = INSTR (4, 0);
3371 unsigned full = INSTR (30, 30);
3372 unsigned i;
3373
3374 NYI_assert (29, 24, 0x0E);
3375 NYI_assert (21, 10, 0x82E);
3376
3377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3378 switch (INSTR (23, 22))
3379 {
3380 case 0:
3381 for (i = 0; i < (full ? 16 : 8); i++)
3382 aarch64_set_vec_s8 (cpu, vd, i,
3383 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3384 break;
3385
3386 case 1:
3387 for (i = 0; i < (full ? 8 : 4); i++)
3388 aarch64_set_vec_s16 (cpu, vd, i,
3389 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3390 break;
3391
3392 case 2:
3393 for (i = 0; i < (full ? 4 : 2); i++)
3394 aarch64_set_vec_s32 (cpu, vd, i,
3395 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3396 break;
3397
3398 case 3:
3399 if (! full)
3400 HALT_NYI;
3401 for (i = 0; i < 2; i++)
3402 aarch64_set_vec_s64 (cpu, vd, i,
3403 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3404 break;
3405 }
3406 }
3407
3408 static void
3409 do_vec_ADDV (sim_cpu *cpu)
3410 {
3411 /* instr[31] = 0
3412 instr[30] = full/half selector
3413 instr[29,24] = 00 1110
3414 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3415 instr[21,10] = 11 0001 1011 10
3416 instr[9,5] = Vm
3417 instr[4.0] = Rd. */
3418
3419 unsigned vm = INSTR (9, 5);
3420 unsigned rd = INSTR (4, 0);
3421 unsigned i;
3422 uint64_t val = 0;
3423 int full = INSTR (30, 30);
3424
3425 NYI_assert (29, 24, 0x0E);
3426 NYI_assert (21, 10, 0xC6E);
3427
3428 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3429 switch (INSTR (23, 22))
3430 {
3431 case 0:
3432 for (i = 0; i < (full ? 16 : 8); i++)
3433 val += aarch64_get_vec_u8 (cpu, vm, i);
3434 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3435 return;
3436
3437 case 1:
3438 for (i = 0; i < (full ? 8 : 4); i++)
3439 val += aarch64_get_vec_u16 (cpu, vm, i);
3440 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3441 return;
3442
3443 case 2:
3444 for (i = 0; i < (full ? 4 : 2); i++)
3445 val += aarch64_get_vec_u32 (cpu, vm, i);
3446 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3447 return;
3448
3449 case 3:
3450 if (! full)
3451 HALT_UNALLOC;
3452 val = aarch64_get_vec_u64 (cpu, vm, 0);
3453 val += aarch64_get_vec_u64 (cpu, vm, 1);
3454 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
3455 return;
3456 }
3457 }
3458
3459 static void
3460 do_vec_ins_2 (sim_cpu *cpu)
3461 {
3462 /* instr[31,21] = 01001110000
3463 instr[20,18] = size & element selector
3464 instr[17,14] = 0000
3465 instr[13] = direction: to vec(0), from vec (1)
3466 instr[12,10] = 111
3467 instr[9,5] = Vm
3468 instr[4,0] = Vd. */
3469
3470 unsigned elem;
3471 unsigned vm = INSTR (9, 5);
3472 unsigned vd = INSTR (4, 0);
3473
3474 NYI_assert (31, 21, 0x270);
3475 NYI_assert (17, 14, 0);
3476 NYI_assert (12, 10, 7);
3477
3478 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3479 if (INSTR (13, 13) == 1)
3480 {
3481 if (INSTR (18, 18) == 1)
3482 {
3483 /* 32-bit moves. */
3484 elem = INSTR (20, 19);
3485 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3486 aarch64_get_vec_u32 (cpu, vm, elem));
3487 }
3488 else
3489 {
3490 /* 64-bit moves. */
3491 if (INSTR (19, 19) != 1)
3492 HALT_NYI;
3493
3494 elem = INSTR (20, 20);
3495 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3496 aarch64_get_vec_u64 (cpu, vm, elem));
3497 }
3498 }
3499 else
3500 {
3501 if (INSTR (18, 18) == 1)
3502 {
3503 /* 32-bit moves. */
3504 elem = INSTR (20, 19);
3505 aarch64_set_vec_u32 (cpu, vd, elem,
3506 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3507 }
3508 else
3509 {
3510 /* 64-bit moves. */
3511 if (INSTR (19, 19) != 1)
3512 HALT_NYI;
3513
3514 elem = INSTR (20, 20);
3515 aarch64_set_vec_u64 (cpu, vd, elem,
3516 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3517 }
3518 }
3519 }
3520
3521 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3522 do \
3523 { \
3524 DST_TYPE a[N], b[N]; \
3525 \
3526 for (i = 0; i < (N); i++) \
3527 { \
3528 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3529 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3530 } \
3531 for (i = 0; i < (N); i++) \
3532 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3533 } \
3534 while (0)
3535
3536 static void
3537 do_vec_mull (sim_cpu *cpu)
3538 {
3539 /* instr[31] = 0
3540 instr[30] = lower(0)/upper(1) selector
3541 instr[29] = signed(0)/unsigned(1)
3542 instr[28,24] = 0 1110
3543 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3544 instr[21] = 1
3545 instr[20,16] = Vm
3546 instr[15,10] = 11 0000
3547 instr[9,5] = Vn
3548 instr[4.0] = Vd. */
3549
3550 int unsign = INSTR (29, 29);
3551 int bias = INSTR (30, 30);
3552 unsigned vm = INSTR (20, 16);
3553 unsigned vn = INSTR ( 9, 5);
3554 unsigned vd = INSTR ( 4, 0);
3555 unsigned i;
3556
3557 NYI_assert (28, 24, 0x0E);
3558 NYI_assert (15, 10, 0x30);
3559
3560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3561 /* NB: Read source values before writing results, in case
3562 the source and destination vectors are the same. */
3563 switch (INSTR (23, 22))
3564 {
3565 case 0:
3566 if (bias)
3567 bias = 8;
3568 if (unsign)
3569 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3570 else
3571 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3572 return;
3573
3574 case 1:
3575 if (bias)
3576 bias = 4;
3577 if (unsign)
3578 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3579 else
3580 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3581 return;
3582
3583 case 2:
3584 if (bias)
3585 bias = 2;
3586 if (unsign)
3587 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3588 else
3589 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3590 return;
3591
3592 case 3:
3593 HALT_NYI;
3594 }
3595 }
3596
3597 static void
3598 do_vec_fadd (sim_cpu *cpu)
3599 {
3600 /* instr[31] = 0
3601 instr[30] = half(0)/full(1)
3602 instr[29,24] = 001110
3603 instr[23] = FADD(0)/FSUB(1)
3604 instr[22] = float (0)/double(1)
3605 instr[21] = 1
3606 instr[20,16] = Vm
3607 instr[15,10] = 110101
3608 instr[9,5] = Vn
3609 instr[4.0] = Vd. */
3610
3611 unsigned vm = INSTR (20, 16);
3612 unsigned vn = INSTR (9, 5);
3613 unsigned vd = INSTR (4, 0);
3614 unsigned i;
3615 int full = INSTR (30, 30);
3616
3617 NYI_assert (29, 24, 0x0E);
3618 NYI_assert (21, 21, 1);
3619 NYI_assert (15, 10, 0x35);
3620
3621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3622 if (INSTR (23, 23))
3623 {
3624 if (INSTR (22, 22))
3625 {
3626 if (! full)
3627 HALT_NYI;
3628
3629 for (i = 0; i < 2; i++)
3630 aarch64_set_vec_double (cpu, vd, i,
3631 aarch64_get_vec_double (cpu, vn, i)
3632 - aarch64_get_vec_double (cpu, vm, i));
3633 }
3634 else
3635 {
3636 for (i = 0; i < (full ? 4 : 2); i++)
3637 aarch64_set_vec_float (cpu, vd, i,
3638 aarch64_get_vec_float (cpu, vn, i)
3639 - aarch64_get_vec_float (cpu, vm, i));
3640 }
3641 }
3642 else
3643 {
3644 if (INSTR (22, 22))
3645 {
3646 if (! full)
3647 HALT_NYI;
3648
3649 for (i = 0; i < 2; i++)
3650 aarch64_set_vec_double (cpu, vd, i,
3651 aarch64_get_vec_double (cpu, vm, i)
3652 + aarch64_get_vec_double (cpu, vn, i));
3653 }
3654 else
3655 {
3656 for (i = 0; i < (full ? 4 : 2); i++)
3657 aarch64_set_vec_float (cpu, vd, i,
3658 aarch64_get_vec_float (cpu, vm, i)
3659 + aarch64_get_vec_float (cpu, vn, i));
3660 }
3661 }
3662 }
3663
3664 static void
3665 do_vec_add (sim_cpu *cpu)
3666 {
3667 /* instr[31] = 0
3668 instr[30] = full/half selector
3669 instr[29,24] = 001110
3670 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3671 instr[21] = 1
3672 instr[20,16] = Vn
3673 instr[15,10] = 100001
3674 instr[9,5] = Vm
3675 instr[4.0] = Vd. */
3676
3677 unsigned vm = INSTR (20, 16);
3678 unsigned vn = INSTR (9, 5);
3679 unsigned vd = INSTR (4, 0);
3680 unsigned i;
3681 int full = INSTR (30, 30);
3682
3683 NYI_assert (29, 24, 0x0E);
3684 NYI_assert (21, 21, 1);
3685 NYI_assert (15, 10, 0x21);
3686
3687 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3688 switch (INSTR (23, 22))
3689 {
3690 case 0:
3691 for (i = 0; i < (full ? 16 : 8); i++)
3692 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3693 + aarch64_get_vec_u8 (cpu, vm, i));
3694 return;
3695
3696 case 1:
3697 for (i = 0; i < (full ? 8 : 4); i++)
3698 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3699 + aarch64_get_vec_u16 (cpu, vm, i));
3700 return;
3701
3702 case 2:
3703 for (i = 0; i < (full ? 4 : 2); i++)
3704 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3705 + aarch64_get_vec_u32 (cpu, vm, i));
3706 return;
3707
3708 case 3:
3709 if (! full)
3710 HALT_UNALLOC;
3711 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3712 + aarch64_get_vec_u64 (cpu, vm, 0));
3713 aarch64_set_vec_u64 (cpu, vd, 1,
3714 aarch64_get_vec_u64 (cpu, vn, 1)
3715 + aarch64_get_vec_u64 (cpu, vm, 1));
3716 return;
3717 }
3718 }
3719
3720 static void
3721 do_vec_mul (sim_cpu *cpu)
3722 {
3723 /* instr[31] = 0
3724 instr[30] = full/half selector
3725 instr[29,24] = 00 1110
3726 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3727 instr[21] = 1
3728 instr[20,16] = Vn
3729 instr[15,10] = 10 0111
3730 instr[9,5] = Vm
3731 instr[4.0] = Vd. */
3732
3733 unsigned vm = INSTR (20, 16);
3734 unsigned vn = INSTR (9, 5);
3735 unsigned vd = INSTR (4, 0);
3736 unsigned i;
3737 int full = INSTR (30, 30);
3738 int bias = 0;
3739
3740 NYI_assert (29, 24, 0x0E);
3741 NYI_assert (21, 21, 1);
3742 NYI_assert (15, 10, 0x27);
3743
3744 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3745 switch (INSTR (23, 22))
3746 {
3747 case 0:
3748 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16);
3749 return;
3750
3751 case 1:
3752 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32);
3753 return;
3754
3755 case 2:
3756 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64);
3757 return;
3758
3759 case 3:
3760 HALT_UNALLOC;
3761 }
3762 }
3763
3764 static void
3765 do_vec_MLA (sim_cpu *cpu)
3766 {
3767 /* instr[31] = 0
3768 instr[30] = full/half selector
3769 instr[29,24] = 00 1110
3770 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3771 instr[21] = 1
3772 instr[20,16] = Vn
3773 instr[15,10] = 1001 01
3774 instr[9,5] = Vm
3775 instr[4.0] = Vd. */
3776
3777 unsigned vm = INSTR (20, 16);
3778 unsigned vn = INSTR (9, 5);
3779 unsigned vd = INSTR (4, 0);
3780 unsigned i;
3781 int full = INSTR (30, 30);
3782
3783 NYI_assert (29, 24, 0x0E);
3784 NYI_assert (21, 21, 1);
3785 NYI_assert (15, 10, 0x25);
3786
3787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3788 switch (INSTR (23, 22))
3789 {
3790 case 0:
3791 {
3792 uint16_t a[16], b[16];
3793
3794 for (i = 0; i < (full ? 16 : 8); i++)
3795 {
3796 a[i] = aarch64_get_vec_u8 (cpu, vn, i);
3797 b[i] = aarch64_get_vec_u8 (cpu, vm, i);
3798 }
3799
3800 for (i = 0; i < (full ? 16 : 8); i++)
3801 {
3802 uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
3803
3804 aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
3805 }
3806 }
3807 return;
3808
3809 case 1:
3810 {
3811 uint32_t a[8], b[8];
3812
3813 for (i = 0; i < (full ? 8 : 4); i++)
3814 {
3815 a[i] = aarch64_get_vec_u16 (cpu, vn, i);
3816 b[i] = aarch64_get_vec_u16 (cpu, vm, i);
3817 }
3818
3819 for (i = 0; i < (full ? 8 : 4); i++)
3820 {
3821 uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
3822
3823 aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
3824 }
3825 }
3826 return;
3827
3828 case 2:
3829 {
3830 uint64_t a[4], b[4];
3831
3832 for (i = 0; i < (full ? 4 : 2); i++)
3833 {
3834 a[i] = aarch64_get_vec_u32 (cpu, vn, i);
3835 b[i] = aarch64_get_vec_u32 (cpu, vm, i);
3836 }
3837
3838 for (i = 0; i < (full ? 4 : 2); i++)
3839 {
3840 uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
3841
3842 aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
3843 }
3844 }
3845 return;
3846
3847 case 3:
3848 HALT_UNALLOC;
3849 }
3850 }
3851
3852 static float
3853 fmaxnm (float a, float b)
3854 {
3855 if (fpclassify (a) == FP_NORMAL)
3856 {
3857 if (fpclassify (b) == FP_NORMAL)
3858 return a > b ? a : b;
3859 return a;
3860 }
3861 else if (fpclassify (b) == FP_NORMAL)
3862 return b;
3863 return a;
3864 }
3865
3866 static float
3867 fminnm (float a, float b)
3868 {
3869 if (fpclassify (a) == FP_NORMAL)
3870 {
3871 if (fpclassify (b) == FP_NORMAL)
3872 return a < b ? a : b;
3873 return a;
3874 }
3875 else if (fpclassify (b) == FP_NORMAL)
3876 return b;
3877 return a;
3878 }
3879
3880 static double
3881 dmaxnm (double a, double b)
3882 {
3883 if (fpclassify (a) == FP_NORMAL)
3884 {
3885 if (fpclassify (b) == FP_NORMAL)
3886 return a > b ? a : b;
3887 return a;
3888 }
3889 else if (fpclassify (b) == FP_NORMAL)
3890 return b;
3891 return a;
3892 }
3893
3894 static double
3895 dminnm (double a, double b)
3896 {
3897 if (fpclassify (a) == FP_NORMAL)
3898 {
3899 if (fpclassify (b) == FP_NORMAL)
3900 return a < b ? a : b;
3901 return a;
3902 }
3903 else if (fpclassify (b) == FP_NORMAL)
3904 return b;
3905 return a;
3906 }
3907
3908 static void
3909 do_vec_FminmaxNMP (sim_cpu *cpu)
3910 {
3911 /* instr [31] = 0
3912 instr [30] = half (0)/full (1)
3913 instr [29,24] = 10 1110
3914 instr [23] = max(0)/min(1)
3915 instr [22] = float (0)/double (1)
3916 instr [21] = 1
3917 instr [20,16] = Vn
3918 instr [15,10] = 1100 01
3919 instr [9,5] = Vm
3920 instr [4.0] = Vd. */
3921
3922 unsigned vm = INSTR (20, 16);
3923 unsigned vn = INSTR (9, 5);
3924 unsigned vd = INSTR (4, 0);
3925 int full = INSTR (30, 30);
3926
3927 NYI_assert (29, 24, 0x2E);
3928 NYI_assert (21, 21, 1);
3929 NYI_assert (15, 10, 0x31);
3930
3931 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3932 if (INSTR (22, 22))
3933 {
3934 double (* fn)(double, double) = INSTR (23, 23)
3935 ? dminnm : dmaxnm;
3936
3937 if (! full)
3938 HALT_NYI;
3939 aarch64_set_vec_double (cpu, vd, 0,
3940 fn (aarch64_get_vec_double (cpu, vn, 0),
3941 aarch64_get_vec_double (cpu, vn, 1)));
3942 aarch64_set_vec_double (cpu, vd, 0,
3943 fn (aarch64_get_vec_double (cpu, vm, 0),
3944 aarch64_get_vec_double (cpu, vm, 1)));
3945 }
3946 else
3947 {
3948 float (* fn)(float, float) = INSTR (23, 23)
3949 ? fminnm : fmaxnm;
3950
3951 aarch64_set_vec_float (cpu, vd, 0,
3952 fn (aarch64_get_vec_float (cpu, vn, 0),
3953 aarch64_get_vec_float (cpu, vn, 1)));
3954 if (full)
3955 aarch64_set_vec_float (cpu, vd, 1,
3956 fn (aarch64_get_vec_float (cpu, vn, 2),
3957 aarch64_get_vec_float (cpu, vn, 3)));
3958
3959 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
3960 fn (aarch64_get_vec_float (cpu, vm, 0),
3961 aarch64_get_vec_float (cpu, vm, 1)));
3962 if (full)
3963 aarch64_set_vec_float (cpu, vd, 3,
3964 fn (aarch64_get_vec_float (cpu, vm, 2),
3965 aarch64_get_vec_float (cpu, vm, 3)));
3966 }
3967 }
3968
3969 static void
3970 do_vec_AND (sim_cpu *cpu)
3971 {
3972 /* instr[31] = 0
3973 instr[30] = half (0)/full (1)
3974 instr[29,21] = 001110001
3975 instr[20,16] = Vm
3976 instr[15,10] = 000111
3977 instr[9,5] = Vn
3978 instr[4.0] = Vd. */
3979
3980 unsigned vm = INSTR (20, 16);
3981 unsigned vn = INSTR (9, 5);
3982 unsigned vd = INSTR (4, 0);
3983 unsigned i;
3984 int full = INSTR (30, 30);
3985
3986 NYI_assert (29, 21, 0x071);
3987 NYI_assert (15, 10, 0x07);
3988
3989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3990 for (i = 0; i < (full ? 4 : 2); i++)
3991 aarch64_set_vec_u32 (cpu, vd, i,
3992 aarch64_get_vec_u32 (cpu, vn, i)
3993 & aarch64_get_vec_u32 (cpu, vm, i));
3994 }
3995
3996 static void
3997 do_vec_BSL (sim_cpu *cpu)
3998 {
3999 /* instr[31] = 0
4000 instr[30] = half (0)/full (1)
4001 instr[29,21] = 101110011
4002 instr[20,16] = Vm
4003 instr[15,10] = 000111
4004 instr[9,5] = Vn
4005 instr[4.0] = Vd. */
4006
4007 unsigned vm = INSTR (20, 16);
4008 unsigned vn = INSTR (9, 5);
4009 unsigned vd = INSTR (4, 0);
4010 unsigned i;
4011 int full = INSTR (30, 30);
4012
4013 NYI_assert (29, 21, 0x173);
4014 NYI_assert (15, 10, 0x07);
4015
4016 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4017 for (i = 0; i < (full ? 16 : 8); i++)
4018 aarch64_set_vec_u8 (cpu, vd, i,
4019 ( aarch64_get_vec_u8 (cpu, vd, i)
4020 & aarch64_get_vec_u8 (cpu, vn, i))
4021 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4022 & aarch64_get_vec_u8 (cpu, vm, i)));
4023 }
4024
4025 static void
4026 do_vec_EOR (sim_cpu *cpu)
4027 {
4028 /* instr[31] = 0
4029 instr[30] = half (0)/full (1)
4030 instr[29,21] = 10 1110 001
4031 instr[20,16] = Vm
4032 instr[15,10] = 000111
4033 instr[9,5] = Vn
4034 instr[4.0] = Vd. */
4035
4036 unsigned vm = INSTR (20, 16);
4037 unsigned vn = INSTR (9, 5);
4038 unsigned vd = INSTR (4, 0);
4039 unsigned i;
4040 int full = INSTR (30, 30);
4041
4042 NYI_assert (29, 21, 0x171);
4043 NYI_assert (15, 10, 0x07);
4044
4045 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4046 for (i = 0; i < (full ? 4 : 2); i++)
4047 aarch64_set_vec_u32 (cpu, vd, i,
4048 aarch64_get_vec_u32 (cpu, vn, i)
4049 ^ aarch64_get_vec_u32 (cpu, vm, i));
4050 }
4051
4052 static void
4053 do_vec_bit (sim_cpu *cpu)
4054 {
4055 /* instr[31] = 0
4056 instr[30] = half (0)/full (1)
4057 instr[29,23] = 10 1110 1
4058 instr[22] = BIT (0) / BIF (1)
4059 instr[21] = 1
4060 instr[20,16] = Vm
4061 instr[15,10] = 0001 11
4062 instr[9,5] = Vn
4063 instr[4.0] = Vd. */
4064
4065 unsigned vm = INSTR (20, 16);
4066 unsigned vn = INSTR (9, 5);
4067 unsigned vd = INSTR (4, 0);
4068 unsigned full = INSTR (30, 30);
4069 unsigned test_false = INSTR (22, 22);
4070 unsigned i;
4071
4072 NYI_assert (29, 23, 0x5D);
4073 NYI_assert (21, 21, 1);
4074 NYI_assert (15, 10, 0x07);
4075
4076 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4077 if (test_false)
4078 {
4079 for (i = 0; i < (full ? 16 : 8); i++)
4080 if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
4081 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4082 }
4083 else
4084 {
4085 for (i = 0; i < (full ? 16 : 8); i++)
4086 if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
4087 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
4088 }
4089 }
4090
4091 static void
4092 do_vec_ORN (sim_cpu *cpu)
4093 {
4094 /* instr[31] = 0
4095 instr[30] = half (0)/full (1)
4096 instr[29,21] = 00 1110 111
4097 instr[20,16] = Vm
4098 instr[15,10] = 00 0111
4099 instr[9,5] = Vn
4100 instr[4.0] = Vd. */
4101
4102 unsigned vm = INSTR (20, 16);
4103 unsigned vn = INSTR (9, 5);
4104 unsigned vd = INSTR (4, 0);
4105 unsigned i;
4106 int full = INSTR (30, 30);
4107
4108 NYI_assert (29, 21, 0x077);
4109 NYI_assert (15, 10, 0x07);
4110
4111 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4112 for (i = 0; i < (full ? 16 : 8); i++)
4113 aarch64_set_vec_u8 (cpu, vd, i,
4114 aarch64_get_vec_u8 (cpu, vn, i)
4115 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4116 }
4117
4118 static void
4119 do_vec_ORR (sim_cpu *cpu)
4120 {
4121 /* instr[31] = 0
4122 instr[30] = half (0)/full (1)
4123 instr[29,21] = 00 1110 101
4124 instr[20,16] = Vm
4125 instr[15,10] = 0001 11
4126 instr[9,5] = Vn
4127 instr[4.0] = Vd. */
4128
4129 unsigned vm = INSTR (20, 16);
4130 unsigned vn = INSTR (9, 5);
4131 unsigned vd = INSTR (4, 0);
4132 unsigned i;
4133 int full = INSTR (30, 30);
4134
4135 NYI_assert (29, 21, 0x075);
4136 NYI_assert (15, 10, 0x07);
4137
4138 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4139 for (i = 0; i < (full ? 16 : 8); i++)
4140 aarch64_set_vec_u8 (cpu, vd, i,
4141 aarch64_get_vec_u8 (cpu, vn, i)
4142 | aarch64_get_vec_u8 (cpu, vm, i));
4143 }
4144
4145 static void
4146 do_vec_BIC (sim_cpu *cpu)
4147 {
4148 /* instr[31] = 0
4149 instr[30] = half (0)/full (1)
4150 instr[29,21] = 00 1110 011
4151 instr[20,16] = Vm
4152 instr[15,10] = 00 0111
4153 instr[9,5] = Vn
4154 instr[4.0] = Vd. */
4155
4156 unsigned vm = INSTR (20, 16);
4157 unsigned vn = INSTR (9, 5);
4158 unsigned vd = INSTR (4, 0);
4159 unsigned i;
4160 int full = INSTR (30, 30);
4161
4162 NYI_assert (29, 21, 0x073);
4163 NYI_assert (15, 10, 0x07);
4164
4165 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4166 for (i = 0; i < (full ? 16 : 8); i++)
4167 aarch64_set_vec_u8 (cpu, vd, i,
4168 aarch64_get_vec_u8 (cpu, vn, i)
4169 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4170 }
4171
4172 static void
4173 do_vec_XTN (sim_cpu *cpu)
4174 {
4175 /* instr[31] = 0
4176 instr[30] = first part (0)/ second part (1)
4177 instr[29,24] = 00 1110
4178 instr[23,22] = size: byte(00), half(01), word (10)
4179 instr[21,10] = 1000 0100 1010
4180 instr[9,5] = Vs
4181 instr[4,0] = Vd. */
4182
4183 unsigned vs = INSTR (9, 5);
4184 unsigned vd = INSTR (4, 0);
4185 unsigned bias = INSTR (30, 30);
4186 unsigned i;
4187
4188 NYI_assert (29, 24, 0x0E);
4189 NYI_assert (21, 10, 0x84A);
4190
4191 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4192 switch (INSTR (23, 22))
4193 {
4194 case 0:
4195 if (bias)
4196 for (i = 0; i < 8; i++)
4197 aarch64_set_vec_u8 (cpu, vd, i + 8,
4198 aarch64_get_vec_u16 (cpu, vs, i) >> 8);
4199 else
4200 for (i = 0; i < 8; i++)
4201 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i));
4202 return;
4203
4204 case 1:
4205 if (bias)
4206 for (i = 0; i < 4; i++)
4207 aarch64_set_vec_u16 (cpu, vd, i + 4,
4208 aarch64_get_vec_u32 (cpu, vs, i) >> 16);
4209 else
4210 for (i = 0; i < 4; i++)
4211 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i));
4212 return;
4213
4214 case 2:
4215 if (bias)
4216 for (i = 0; i < 2; i++)
4217 aarch64_set_vec_u32 (cpu, vd, i + 4,
4218 aarch64_get_vec_u64 (cpu, vs, i) >> 32);
4219 else
4220 for (i = 0; i < 2; i++)
4221 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i));
4222 return;
4223 }
4224 }
4225
4226 static void
4227 do_vec_maxv (sim_cpu *cpu)
4228 {
4229 /* instr[31] = 0
4230 instr[30] = half(0)/full(1)
4231 instr[29] = signed (0)/unsigned(1)
4232 instr[28,24] = 0 1110
4233 instr[23,22] = size: byte(00), half(01), word (10)
4234 instr[21] = 1
4235 instr[20,17] = 1 000
4236 instr[16] = max(0)/min(1)
4237 instr[15,10] = 1010 10
4238 instr[9,5] = V source
4239 instr[4.0] = R dest. */
4240
4241 unsigned vs = INSTR (9, 5);
4242 unsigned rd = INSTR (4, 0);
4243 unsigned full = INSTR (30, 30);
4244 unsigned i;
4245
4246 NYI_assert (28, 24, 0x0E);
4247 NYI_assert (21, 21, 1);
4248 NYI_assert (20, 17, 8);
4249 NYI_assert (15, 10, 0x2A);
4250
4251 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4252 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4253 {
4254 case 0: /* SMAXV. */
4255 {
4256 int64_t smax;
4257 switch (INSTR (23, 22))
4258 {
4259 case 0:
4260 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4261 for (i = 1; i < (full ? 16 : 8); i++)
4262 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4263 break;
4264 case 1:
4265 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4266 for (i = 1; i < (full ? 8 : 4); i++)
4267 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4268 break;
4269 case 2:
4270 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4271 for (i = 1; i < (full ? 4 : 2); i++)
4272 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4273 break;
4274 case 3:
4275 HALT_UNALLOC;
4276 }
4277 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4278 return;
4279 }
4280
4281 case 1: /* SMINV. */
4282 {
4283 int64_t smin;
4284 switch (INSTR (23, 22))
4285 {
4286 case 0:
4287 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4288 for (i = 1; i < (full ? 16 : 8); i++)
4289 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4290 break;
4291 case 1:
4292 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4293 for (i = 1; i < (full ? 8 : 4); i++)
4294 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4295 break;
4296 case 2:
4297 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4298 for (i = 1; i < (full ? 4 : 2); i++)
4299 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4300 break;
4301
4302 case 3:
4303 HALT_UNALLOC;
4304 }
4305 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4306 return;
4307 }
4308
4309 case 2: /* UMAXV. */
4310 {
4311 uint64_t umax;
4312 switch (INSTR (23, 22))
4313 {
4314 case 0:
4315 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4316 for (i = 1; i < (full ? 16 : 8); i++)
4317 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4318 break;
4319 case 1:
4320 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4321 for (i = 1; i < (full ? 8 : 4); i++)
4322 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4323 break;
4324 case 2:
4325 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4326 for (i = 1; i < (full ? 4 : 2); i++)
4327 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4328 break;
4329
4330 case 3:
4331 HALT_UNALLOC;
4332 }
4333 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4334 return;
4335 }
4336
4337 case 3: /* UMINV. */
4338 {
4339 uint64_t umin;
4340 switch (INSTR (23, 22))
4341 {
4342 case 0:
4343 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4344 for (i = 1; i < (full ? 16 : 8); i++)
4345 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4346 break;
4347 case 1:
4348 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4349 for (i = 1; i < (full ? 8 : 4); i++)
4350 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4351 break;
4352 case 2:
4353 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4354 for (i = 1; i < (full ? 4 : 2); i++)
4355 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4356 break;
4357
4358 case 3:
4359 HALT_UNALLOC;
4360 }
4361 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4362 return;
4363 }
4364 }
4365 }
4366
4367 static void
4368 do_vec_fminmaxV (sim_cpu *cpu)
4369 {
4370 /* instr[31,24] = 0110 1110
4371 instr[23] = max(0)/min(1)
4372 instr[22,14] = 011 0000 11
4373 instr[13,12] = nm(00)/normal(11)
4374 instr[11,10] = 10
4375 instr[9,5] = V source
4376 instr[4.0] = R dest. */
4377
4378 unsigned vs = INSTR (9, 5);
4379 unsigned rd = INSTR (4, 0);
4380 unsigned i;
4381 float res = aarch64_get_vec_float (cpu, vs, 0);
4382
4383 NYI_assert (31, 24, 0x6E);
4384 NYI_assert (22, 14, 0x0C3);
4385 NYI_assert (11, 10, 2);
4386
4387 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4388 if (INSTR (23, 23))
4389 {
4390 switch (INSTR (13, 12))
4391 {
4392 case 0: /* FMNINNMV. */
4393 for (i = 1; i < 4; i++)
4394 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4395 break;
4396
4397 case 3: /* FMINV. */
4398 for (i = 1; i < 4; i++)
4399 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4400 break;
4401
4402 default:
4403 HALT_NYI;
4404 }
4405 }
4406 else
4407 {
4408 switch (INSTR (13, 12))
4409 {
4410 case 0: /* FMNAXNMV. */
4411 for (i = 1; i < 4; i++)
4412 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4413 break;
4414
4415 case 3: /* FMAXV. */
4416 for (i = 1; i < 4; i++)
4417 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4418 break;
4419
4420 default:
4421 HALT_NYI;
4422 }
4423 }
4424
4425 aarch64_set_FP_float (cpu, rd, res);
4426 }
4427
4428 static void
4429 do_vec_Fminmax (sim_cpu *cpu)
4430 {
4431 /* instr[31] = 0
4432 instr[30] = half(0)/full(1)
4433 instr[29,24] = 00 1110
4434 instr[23] = max(0)/min(1)
4435 instr[22] = float(0)/double(1)
4436 instr[21] = 1
4437 instr[20,16] = Vm
4438 instr[15,14] = 11
4439 instr[13,12] = nm(00)/normal(11)
4440 instr[11,10] = 01
4441 instr[9,5] = Vn
4442 instr[4,0] = Vd. */
4443
4444 unsigned vm = INSTR (20, 16);
4445 unsigned vn = INSTR (9, 5);
4446 unsigned vd = INSTR (4, 0);
4447 unsigned full = INSTR (30, 30);
4448 unsigned min = INSTR (23, 23);
4449 unsigned i;
4450
4451 NYI_assert (29, 24, 0x0E);
4452 NYI_assert (21, 21, 1);
4453 NYI_assert (15, 14, 3);
4454 NYI_assert (11, 10, 1);
4455
4456 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4457 if (INSTR (22, 22))
4458 {
4459 double (* func)(double, double);
4460
4461 if (! full)
4462 HALT_NYI;
4463
4464 if (INSTR (13, 12) == 0)
4465 func = min ? dminnm : dmaxnm;
4466 else if (INSTR (13, 12) == 3)
4467 func = min ? fmin : fmax;
4468 else
4469 HALT_NYI;
4470
4471 for (i = 0; i < 2; i++)
4472 aarch64_set_vec_double (cpu, vd, i,
4473 func (aarch64_get_vec_double (cpu, vn, i),
4474 aarch64_get_vec_double (cpu, vm, i)));
4475 }
4476 else
4477 {
4478 float (* func)(float, float);
4479
4480 if (INSTR (13, 12) == 0)
4481 func = min ? fminnm : fmaxnm;
4482 else if (INSTR (13, 12) == 3)
4483 func = min ? fminf : fmaxf;
4484 else
4485 HALT_NYI;
4486
4487 for (i = 0; i < (full ? 4 : 2); i++)
4488 aarch64_set_vec_float (cpu, vd, i,
4489 func (aarch64_get_vec_float (cpu, vn, i),
4490 aarch64_get_vec_float (cpu, vm, i)));
4491 }
4492 }
4493
4494 static void
4495 do_vec_SCVTF (sim_cpu *cpu)
4496 {
4497 /* instr[31] = 0
4498 instr[30] = Q
4499 instr[29,23] = 00 1110 0
4500 instr[22] = float(0)/double(1)
4501 instr[21,10] = 10 0001 1101 10
4502 instr[9,5] = Vn
4503 instr[4,0] = Vd. */
4504
4505 unsigned vn = INSTR (9, 5);
4506 unsigned vd = INSTR (4, 0);
4507 unsigned full = INSTR (30, 30);
4508 unsigned size = INSTR (22, 22);
4509 unsigned i;
4510
4511 NYI_assert (29, 23, 0x1C);
4512 NYI_assert (21, 10, 0x876);
4513
4514 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4515 if (size)
4516 {
4517 if (! full)
4518 HALT_UNALLOC;
4519
4520 for (i = 0; i < 2; i++)
4521 {
4522 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4523 aarch64_set_vec_double (cpu, vd, i, val);
4524 }
4525 }
4526 else
4527 {
4528 for (i = 0; i < (full ? 4 : 2); i++)
4529 {
4530 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4531 aarch64_set_vec_float (cpu, vd, i, val);
4532 }
4533 }
4534 }
4535
4536 #define VEC_CMP(SOURCE, CMP) \
4537 do \
4538 { \
4539 switch (size) \
4540 { \
4541 case 0: \
4542 for (i = 0; i < (full ? 16 : 8); i++) \
4543 aarch64_set_vec_u8 (cpu, vd, i, \
4544 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4545 CMP \
4546 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4547 ? -1 : 0); \
4548 return; \
4549 case 1: \
4550 for (i = 0; i < (full ? 8 : 4); i++) \
4551 aarch64_set_vec_u16 (cpu, vd, i, \
4552 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4553 CMP \
4554 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4555 ? -1 : 0); \
4556 return; \
4557 case 2: \
4558 for (i = 0; i < (full ? 4 : 2); i++) \
4559 aarch64_set_vec_u32 (cpu, vd, i, \
4560 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4561 CMP \
4562 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4563 ? -1 : 0); \
4564 return; \
4565 case 3: \
4566 if (! full) \
4567 HALT_UNALLOC; \
4568 for (i = 0; i < 2; i++) \
4569 aarch64_set_vec_u64 (cpu, vd, i, \
4570 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4571 CMP \
4572 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4573 ? -1ULL : 0); \
4574 return; \
4575 } \
4576 } \
4577 while (0)
4578
4579 #define VEC_CMP0(SOURCE, CMP) \
4580 do \
4581 { \
4582 switch (size) \
4583 { \
4584 case 0: \
4585 for (i = 0; i < (full ? 16 : 8); i++) \
4586 aarch64_set_vec_u8 (cpu, vd, i, \
4587 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4588 CMP 0 ? -1 : 0); \
4589 return; \
4590 case 1: \
4591 for (i = 0; i < (full ? 8 : 4); i++) \
4592 aarch64_set_vec_u16 (cpu, vd, i, \
4593 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4594 CMP 0 ? -1 : 0); \
4595 return; \
4596 case 2: \
4597 for (i = 0; i < (full ? 4 : 2); i++) \
4598 aarch64_set_vec_u32 (cpu, vd, i, \
4599 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4600 CMP 0 ? -1 : 0); \
4601 return; \
4602 case 3: \
4603 if (! full) \
4604 HALT_UNALLOC; \
4605 for (i = 0; i < 2; i++) \
4606 aarch64_set_vec_u64 (cpu, vd, i, \
4607 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4608 CMP 0 ? -1ULL : 0); \
4609 return; \
4610 } \
4611 } \
4612 while (0)
4613
4614 #define VEC_FCMP0(CMP) \
4615 do \
4616 { \
4617 if (vm != 0) \
4618 HALT_NYI; \
4619 if (INSTR (22, 22)) \
4620 { \
4621 if (! full) \
4622 HALT_NYI; \
4623 for (i = 0; i < 2; i++) \
4624 aarch64_set_vec_u64 (cpu, vd, i, \
4625 aarch64_get_vec_double (cpu, vn, i) \
4626 CMP 0.0 ? -1 : 0); \
4627 } \
4628 else \
4629 { \
4630 for (i = 0; i < (full ? 4 : 2); i++) \
4631 aarch64_set_vec_u32 (cpu, vd, i, \
4632 aarch64_get_vec_float (cpu, vn, i) \
4633 CMP 0.0 ? -1 : 0); \
4634 } \
4635 return; \
4636 } \
4637 while (0)
4638
4639 #define VEC_FCMP(CMP) \
4640 do \
4641 { \
4642 if (INSTR (22, 22)) \
4643 { \
4644 if (! full) \
4645 HALT_NYI; \
4646 for (i = 0; i < 2; i++) \
4647 aarch64_set_vec_u64 (cpu, vd, i, \
4648 aarch64_get_vec_double (cpu, vn, i) \
4649 CMP \
4650 aarch64_get_vec_double (cpu, vm, i) \
4651 ? -1 : 0); \
4652 } \
4653 else \
4654 { \
4655 for (i = 0; i < (full ? 4 : 2); i++) \
4656 aarch64_set_vec_u32 (cpu, vd, i, \
4657 aarch64_get_vec_float (cpu, vn, i) \
4658 CMP \
4659 aarch64_get_vec_float (cpu, vm, i) \
4660 ? -1 : 0); \
4661 } \
4662 return; \
4663 } \
4664 while (0)
4665
4666 static void
4667 do_vec_compare (sim_cpu *cpu)
4668 {
4669 /* instr[31] = 0
4670 instr[30] = half(0)/full(1)
4671 instr[29] = part-of-comparison-type
4672 instr[28,24] = 0 1110
4673 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4674 type of float compares: single (-0) / double (-1)
4675 instr[21] = 1
4676 instr[20,16] = Vm or 00000 (compare vs 0)
4677 instr[15,10] = part-of-comparison-type
4678 instr[9,5] = Vn
4679 instr[4.0] = Vd. */
4680
4681 int full = INSTR (30, 30);
4682 int size = INSTR (23, 22);
4683 unsigned vm = INSTR (20, 16);
4684 unsigned vn = INSTR (9, 5);
4685 unsigned vd = INSTR (4, 0);
4686 unsigned i;
4687
4688 NYI_assert (28, 24, 0x0E);
4689 NYI_assert (21, 21, 1);
4690
4691 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4692 if ((INSTR (11, 11)
4693 && INSTR (14, 14))
4694 || ((INSTR (11, 11) == 0
4695 && INSTR (10, 10) == 0)))
4696 {
4697 /* A compare vs 0. */
4698 if (vm != 0)
4699 {
4700 if (INSTR (15, 10) == 0x2A)
4701 do_vec_maxv (cpu);
4702 else if (INSTR (15, 10) == 0x32
4703 || INSTR (15, 10) == 0x3E)
4704 do_vec_fminmaxV (cpu);
4705 else if (INSTR (29, 23) == 0x1C
4706 && INSTR (21, 10) == 0x876)
4707 do_vec_SCVTF (cpu);
4708 else
4709 HALT_NYI;
4710 return;
4711 }
4712 }
4713
4714 if (INSTR (14, 14))
4715 {
4716 /* A floating point compare. */
4717 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4718 | INSTR (13, 10);
4719
4720 NYI_assert (15, 15, 1);
4721
4722 switch (decode)
4723 {
4724 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4725 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4726 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4727 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4728 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4729 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4730 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4731 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4732
4733 default:
4734 HALT_NYI;
4735 }
4736 }
4737 else
4738 {
4739 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4740
4741 switch (decode)
4742 {
4743 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4744 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4745 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4746 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4747 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4748 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4749 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4750 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4751 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4752 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4753 default:
4754 if (vm == 0)
4755 HALT_NYI;
4756 do_vec_maxv (cpu);
4757 }
4758 }
4759 }
4760
4761 static void
4762 do_vec_SSHL (sim_cpu *cpu)
4763 {
4764 /* instr[31] = 0
4765 instr[30] = first part (0)/ second part (1)
4766 instr[29,24] = 00 1110
4767 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4768 instr[21] = 1
4769 instr[20,16] = Vm
4770 instr[15,10] = 0100 01
4771 instr[9,5] = Vn
4772 instr[4,0] = Vd. */
4773
4774 unsigned full = INSTR (30, 30);
4775 unsigned vm = INSTR (20, 16);
4776 unsigned vn = INSTR (9, 5);
4777 unsigned vd = INSTR (4, 0);
4778 unsigned i;
4779 signed int shift;
4780
4781 NYI_assert (29, 24, 0x0E);
4782 NYI_assert (21, 21, 1);
4783 NYI_assert (15, 10, 0x11);
4784
4785 /* FIXME: What is a signed shift left in this context ?. */
4786
4787 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4788 switch (INSTR (23, 22))
4789 {
4790 case 0:
4791 for (i = 0; i < (full ? 16 : 8); i++)
4792 {
4793 shift = aarch64_get_vec_s8 (cpu, vm, i);
4794 if (shift >= 0)
4795 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4796 << shift);
4797 else
4798 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4799 >> - shift);
4800 }
4801 return;
4802
4803 case 1:
4804 for (i = 0; i < (full ? 8 : 4); i++)
4805 {
4806 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4807 if (shift >= 0)
4808 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4809 << shift);
4810 else
4811 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4812 >> - shift);
4813 }
4814 return;
4815
4816 case 2:
4817 for (i = 0; i < (full ? 4 : 2); i++)
4818 {
4819 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4820 if (shift >= 0)
4821 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4822 << shift);
4823 else
4824 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4825 >> - shift);
4826 }
4827 return;
4828
4829 case 3:
4830 if (! full)
4831 HALT_UNALLOC;
4832 for (i = 0; i < 2; i++)
4833 {
4834 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4835 if (shift >= 0)
4836 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4837 << shift);
4838 else
4839 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4840 >> - shift);
4841 }
4842 return;
4843 }
4844 }
4845
4846 static void
4847 do_vec_USHL (sim_cpu *cpu)
4848 {
4849 /* instr[31] = 0
4850 instr[30] = first part (0)/ second part (1)
4851 instr[29,24] = 10 1110
4852 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4853 instr[21] = 1
4854 instr[20,16] = Vm
4855 instr[15,10] = 0100 01
4856 instr[9,5] = Vn
4857 instr[4,0] = Vd */
4858
4859 unsigned full = INSTR (30, 30);
4860 unsigned vm = INSTR (20, 16);
4861 unsigned vn = INSTR (9, 5);
4862 unsigned vd = INSTR (4, 0);
4863 unsigned i;
4864 signed int shift;
4865
4866 NYI_assert (29, 24, 0x2E);
4867 NYI_assert (15, 10, 0x11);
4868
4869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4870 switch (INSTR (23, 22))
4871 {
4872 case 0:
4873 for (i = 0; i < (full ? 16 : 8); i++)
4874 {
4875 shift = aarch64_get_vec_s8 (cpu, vm, i);
4876 if (shift >= 0)
4877 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4878 << shift);
4879 else
4880 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4881 >> - shift);
4882 }
4883 return;
4884
4885 case 1:
4886 for (i = 0; i < (full ? 8 : 4); i++)
4887 {
4888 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4889 if (shift >= 0)
4890 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4891 << shift);
4892 else
4893 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
4894 >> - shift);
4895 }
4896 return;
4897
4898 case 2:
4899 for (i = 0; i < (full ? 4 : 2); i++)
4900 {
4901 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4902 if (shift >= 0)
4903 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4904 << shift);
4905 else
4906 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
4907 >> - shift);
4908 }
4909 return;
4910
4911 case 3:
4912 if (! full)
4913 HALT_UNALLOC;
4914 for (i = 0; i < 2; i++)
4915 {
4916 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4917 if (shift >= 0)
4918 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4919 << shift);
4920 else
4921 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
4922 >> - shift);
4923 }
4924 return;
4925 }
4926 }
4927
4928 static void
4929 do_vec_FMLA (sim_cpu *cpu)
4930 {
4931 /* instr[31] = 0
4932 instr[30] = full/half selector
4933 instr[29,23] = 0011100
4934 instr[22] = size: 0=>float, 1=>double
4935 instr[21] = 1
4936 instr[20,16] = Vn
4937 instr[15,10] = 1100 11
4938 instr[9,5] = Vm
4939 instr[4.0] = Vd. */
4940
4941 unsigned vm = INSTR (20, 16);
4942 unsigned vn = INSTR (9, 5);
4943 unsigned vd = INSTR (4, 0);
4944 unsigned i;
4945 int full = INSTR (30, 30);
4946
4947 NYI_assert (29, 23, 0x1C);
4948 NYI_assert (21, 21, 1);
4949 NYI_assert (15, 10, 0x33);
4950
4951 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4952 if (INSTR (22, 22))
4953 {
4954 if (! full)
4955 HALT_UNALLOC;
4956 for (i = 0; i < 2; i++)
4957 aarch64_set_vec_double (cpu, vd, i,
4958 aarch64_get_vec_double (cpu, vn, i) *
4959 aarch64_get_vec_double (cpu, vm, i) +
4960 aarch64_get_vec_double (cpu, vd, i));
4961 }
4962 else
4963 {
4964 for (i = 0; i < (full ? 4 : 2); i++)
4965 aarch64_set_vec_float (cpu, vd, i,
4966 aarch64_get_vec_float (cpu, vn, i) *
4967 aarch64_get_vec_float (cpu, vm, i) +
4968 aarch64_get_vec_float (cpu, vd, i));
4969 }
4970 }
4971
4972 static void
4973 do_vec_max (sim_cpu *cpu)
4974 {
4975 /* instr[31] = 0
4976 instr[30] = full/half selector
4977 instr[29] = SMAX (0) / UMAX (1)
4978 instr[28,24] = 0 1110
4979 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
4980 instr[21] = 1
4981 instr[20,16] = Vn
4982 instr[15,10] = 0110 01
4983 instr[9,5] = Vm
4984 instr[4.0] = Vd. */
4985
4986 unsigned vm = INSTR (20, 16);
4987 unsigned vn = INSTR (9, 5);
4988 unsigned vd = INSTR (4, 0);
4989 unsigned i;
4990 int full = INSTR (30, 30);
4991
4992 NYI_assert (28, 24, 0x0E);
4993 NYI_assert (21, 21, 1);
4994 NYI_assert (15, 10, 0x19);
4995
4996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4997 if (INSTR (29, 29))
4998 {
4999 switch (INSTR (23, 22))
5000 {
5001 case 0:
5002 for (i = 0; i < (full ? 16 : 8); i++)
5003 aarch64_set_vec_u8 (cpu, vd, i,
5004 aarch64_get_vec_u8 (cpu, vn, i)
5005 > aarch64_get_vec_u8 (cpu, vm, i)
5006 ? aarch64_get_vec_u8 (cpu, vn, i)
5007 : aarch64_get_vec_u8 (cpu, vm, i));
5008 return;
5009
5010 case 1:
5011 for (i = 0; i < (full ? 8 : 4); i++)
5012 aarch64_set_vec_u16 (cpu, vd, i,
5013 aarch64_get_vec_u16 (cpu, vn, i)
5014 > aarch64_get_vec_u16 (cpu, vm, i)
5015 ? aarch64_get_vec_u16 (cpu, vn, i)
5016 : aarch64_get_vec_u16 (cpu, vm, i));
5017 return;
5018
5019 case 2:
5020 for (i = 0; i < (full ? 4 : 2); i++)
5021 aarch64_set_vec_u32 (cpu, vd, i,
5022 aarch64_get_vec_u32 (cpu, vn, i)
5023 > aarch64_get_vec_u32 (cpu, vm, i)
5024 ? aarch64_get_vec_u32 (cpu, vn, i)
5025 : aarch64_get_vec_u32 (cpu, vm, i));
5026 return;
5027
5028 case 3:
5029 HALT_UNALLOC;
5030 }
5031 }
5032 else
5033 {
5034 switch (INSTR (23, 22))
5035 {
5036 case 0:
5037 for (i = 0; i < (full ? 16 : 8); i++)
5038 aarch64_set_vec_s8 (cpu, vd, i,
5039 aarch64_get_vec_s8 (cpu, vn, i)
5040 > aarch64_get_vec_s8 (cpu, vm, i)
5041 ? aarch64_get_vec_s8 (cpu, vn, i)
5042 : aarch64_get_vec_s8 (cpu, vm, i));
5043 return;
5044
5045 case 1:
5046 for (i = 0; i < (full ? 8 : 4); i++)
5047 aarch64_set_vec_s16 (cpu, vd, i,
5048 aarch64_get_vec_s16 (cpu, vn, i)
5049 > aarch64_get_vec_s16 (cpu, vm, i)
5050 ? aarch64_get_vec_s16 (cpu, vn, i)
5051 : aarch64_get_vec_s16 (cpu, vm, i));
5052 return;
5053
5054 case 2:
5055 for (i = 0; i < (full ? 4 : 2); i++)
5056 aarch64_set_vec_s32 (cpu, vd, i,
5057 aarch64_get_vec_s32 (cpu, vn, i)
5058 > aarch64_get_vec_s32 (cpu, vm, i)
5059 ? aarch64_get_vec_s32 (cpu, vn, i)
5060 : aarch64_get_vec_s32 (cpu, vm, i));
5061 return;
5062
5063 case 3:
5064 HALT_UNALLOC;
5065 }
5066 }
5067 }
5068
5069 static void
5070 do_vec_min (sim_cpu *cpu)
5071 {
5072 /* instr[31] = 0
5073 instr[30] = full/half selector
5074 instr[29] = SMIN (0) / UMIN (1)
5075 instr[28,24] = 0 1110
5076 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5077 instr[21] = 1
5078 instr[20,16] = Vn
5079 instr[15,10] = 0110 11
5080 instr[9,5] = Vm
5081 instr[4.0] = Vd. */
5082
5083 unsigned vm = INSTR (20, 16);
5084 unsigned vn = INSTR (9, 5);
5085 unsigned vd = INSTR (4, 0);
5086 unsigned i;
5087 int full = INSTR (30, 30);
5088
5089 NYI_assert (28, 24, 0x0E);
5090 NYI_assert (21, 21, 1);
5091 NYI_assert (15, 10, 0x1B);
5092
5093 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5094 if (INSTR (29, 29))
5095 {
5096 switch (INSTR (23, 22))
5097 {
5098 case 0:
5099 for (i = 0; i < (full ? 16 : 8); i++)
5100 aarch64_set_vec_u8 (cpu, vd, i,
5101 aarch64_get_vec_u8 (cpu, vn, i)
5102 < aarch64_get_vec_u8 (cpu, vm, i)
5103 ? aarch64_get_vec_u8 (cpu, vn, i)
5104 : aarch64_get_vec_u8 (cpu, vm, i));
5105 return;
5106
5107 case 1:
5108 for (i = 0; i < (full ? 8 : 4); i++)
5109 aarch64_set_vec_u16 (cpu, vd, i,
5110 aarch64_get_vec_u16 (cpu, vn, i)
5111 < aarch64_get_vec_u16 (cpu, vm, i)
5112 ? aarch64_get_vec_u16 (cpu, vn, i)
5113 : aarch64_get_vec_u16 (cpu, vm, i));
5114 return;
5115
5116 case 2:
5117 for (i = 0; i < (full ? 4 : 2); i++)
5118 aarch64_set_vec_u32 (cpu, vd, i,
5119 aarch64_get_vec_u32 (cpu, vn, i)
5120 < aarch64_get_vec_u32 (cpu, vm, i)
5121 ? aarch64_get_vec_u32 (cpu, vn, i)
5122 : aarch64_get_vec_u32 (cpu, vm, i));
5123 return;
5124
5125 case 3:
5126 HALT_UNALLOC;
5127 }
5128 }
5129 else
5130 {
5131 switch (INSTR (23, 22))
5132 {
5133 case 0:
5134 for (i = 0; i < (full ? 16 : 8); i++)
5135 aarch64_set_vec_s8 (cpu, vd, i,
5136 aarch64_get_vec_s8 (cpu, vn, i)
5137 < aarch64_get_vec_s8 (cpu, vm, i)
5138 ? aarch64_get_vec_s8 (cpu, vn, i)
5139 : aarch64_get_vec_s8 (cpu, vm, i));
5140 return;
5141
5142 case 1:
5143 for (i = 0; i < (full ? 8 : 4); i++)
5144 aarch64_set_vec_s16 (cpu, vd, i,
5145 aarch64_get_vec_s16 (cpu, vn, i)
5146 < aarch64_get_vec_s16 (cpu, vm, i)
5147 ? aarch64_get_vec_s16 (cpu, vn, i)
5148 : aarch64_get_vec_s16 (cpu, vm, i));
5149 return;
5150
5151 case 2:
5152 for (i = 0; i < (full ? 4 : 2); i++)
5153 aarch64_set_vec_s32 (cpu, vd, i,
5154 aarch64_get_vec_s32 (cpu, vn, i)
5155 < aarch64_get_vec_s32 (cpu, vm, i)
5156 ? aarch64_get_vec_s32 (cpu, vn, i)
5157 : aarch64_get_vec_s32 (cpu, vm, i));
5158 return;
5159
5160 case 3:
5161 HALT_UNALLOC;
5162 }
5163 }
5164 }
5165
5166 static void
5167 do_vec_sub_long (sim_cpu *cpu)
5168 {
5169 /* instr[31] = 0
5170 instr[30] = lower (0) / upper (1)
5171 instr[29] = signed (0) / unsigned (1)
5172 instr[28,24] = 0 1110
5173 instr[23,22] = size: bytes (00), half (01), word (10)
5174 instr[21] = 1
5175 insrt[20,16] = Vm
5176 instr[15,10] = 0010 00
5177 instr[9,5] = Vn
5178 instr[4,0] = V dest. */
5179
5180 unsigned size = INSTR (23, 22);
5181 unsigned vm = INSTR (20, 16);
5182 unsigned vn = INSTR (9, 5);
5183 unsigned vd = INSTR (4, 0);
5184 unsigned bias = 0;
5185 unsigned i;
5186
5187 NYI_assert (28, 24, 0x0E);
5188 NYI_assert (21, 21, 1);
5189 NYI_assert (15, 10, 0x08);
5190
5191 if (size == 3)
5192 HALT_UNALLOC;
5193
5194 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5195 switch (INSTR (30, 29))
5196 {
5197 case 2: /* SSUBL2. */
5198 bias = 2;
5199 case 0: /* SSUBL. */
5200 switch (size)
5201 {
5202 case 0:
5203 bias *= 3;
5204 for (i = 0; i < 8; i++)
5205 aarch64_set_vec_s16 (cpu, vd, i,
5206 aarch64_get_vec_s8 (cpu, vn, i + bias)
5207 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5208 break;
5209
5210 case 1:
5211 bias *= 2;
5212 for (i = 0; i < 4; i++)
5213 aarch64_set_vec_s32 (cpu, vd, i,
5214 aarch64_get_vec_s16 (cpu, vn, i + bias)
5215 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5216 break;
5217
5218 case 2:
5219 for (i = 0; i < 2; i++)
5220 aarch64_set_vec_s64 (cpu, vd, i,
5221 aarch64_get_vec_s32 (cpu, vn, i + bias)
5222 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5223 break;
5224
5225 default:
5226 HALT_UNALLOC;
5227 }
5228 break;
5229
5230 case 3: /* USUBL2. */
5231 bias = 2;
5232 case 1: /* USUBL. */
5233 switch (size)
5234 {
5235 case 0:
5236 bias *= 3;
5237 for (i = 0; i < 8; i++)
5238 aarch64_set_vec_u16 (cpu, vd, i,
5239 aarch64_get_vec_u8 (cpu, vn, i + bias)
5240 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5241 break;
5242
5243 case 1:
5244 bias *= 2;
5245 for (i = 0; i < 4; i++)
5246 aarch64_set_vec_u32 (cpu, vd, i,
5247 aarch64_get_vec_u16 (cpu, vn, i + bias)
5248 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5249 break;
5250
5251 case 2:
5252 for (i = 0; i < 2; i++)
5253 aarch64_set_vec_u64 (cpu, vd, i,
5254 aarch64_get_vec_u32 (cpu, vn, i + bias)
5255 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5256 break;
5257
5258 default:
5259 HALT_UNALLOC;
5260 }
5261 break;
5262 }
5263 }
5264
5265 static void
5266 do_vec_ADDP (sim_cpu *cpu)
5267 {
5268 /* instr[31] = 0
5269 instr[30] = half(0)/full(1)
5270 instr[29,24] = 00 1110
5271 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5272 instr[21] = 1
5273 insrt[20,16] = Vm
5274 instr[15,10] = 1011 11
5275 instr[9,5] = Vn
5276 instr[4,0] = V dest. */
5277
5278 FRegister copy_vn;
5279 FRegister copy_vm;
5280 unsigned full = INSTR (30, 30);
5281 unsigned size = INSTR (23, 22);
5282 unsigned vm = INSTR (20, 16);
5283 unsigned vn = INSTR (9, 5);
5284 unsigned vd = INSTR (4, 0);
5285 unsigned i, range;
5286
5287 NYI_assert (29, 24, 0x0E);
5288 NYI_assert (21, 21, 1);
5289 NYI_assert (15, 10, 0x2F);
5290
5291 /* Make copies of the source registers in case vd == vn/vm. */
5292 copy_vn = cpu->fr[vn];
5293 copy_vm = cpu->fr[vm];
5294
5295 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5296 switch (size)
5297 {
5298 case 0:
5299 range = full ? 8 : 4;
5300 for (i = 0; i < range; i++)
5301 {
5302 aarch64_set_vec_u8 (cpu, vd, i,
5303 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5304 aarch64_set_vec_u8 (cpu, vd, i + range,
5305 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5306 }
5307 return;
5308
5309 case 1:
5310 range = full ? 4 : 2;
5311 for (i = 0; i < range; i++)
5312 {
5313 aarch64_set_vec_u16 (cpu, vd, i,
5314 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5315 aarch64_set_vec_u16 (cpu, vd, i + range,
5316 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5317 }
5318 return;
5319
5320 case 2:
5321 range = full ? 2 : 1;
5322 for (i = 0; i < range; i++)
5323 {
5324 aarch64_set_vec_u32 (cpu, vd, i,
5325 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5326 aarch64_set_vec_u32 (cpu, vd, i + range,
5327 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5328 }
5329 return;
5330
5331 case 3:
5332 if (! full)
5333 HALT_UNALLOC;
5334 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5335 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5336 return;
5337 }
5338 }
5339
5340 static void
5341 do_vec_UMOV (sim_cpu *cpu)
5342 {
5343 /* instr[31] = 0
5344 instr[30] = 32-bit(0)/64-bit(1)
5345 instr[29,21] = 00 1110 000
5346 insrt[20,16] = size & index
5347 instr[15,10] = 0011 11
5348 instr[9,5] = V source
5349 instr[4,0] = R dest. */
5350
5351 unsigned vs = INSTR (9, 5);
5352 unsigned rd = INSTR (4, 0);
5353 unsigned index;
5354
5355 NYI_assert (29, 21, 0x070);
5356 NYI_assert (15, 10, 0x0F);
5357
5358 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5359 if (INSTR (16, 16))
5360 {
5361 /* Byte transfer. */
5362 index = INSTR (20, 17);
5363 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5364 aarch64_get_vec_u8 (cpu, vs, index));
5365 }
5366 else if (INSTR (17, 17))
5367 {
5368 index = INSTR (20, 18);
5369 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5370 aarch64_get_vec_u16 (cpu, vs, index));
5371 }
5372 else if (INSTR (18, 18))
5373 {
5374 index = INSTR (20, 19);
5375 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5376 aarch64_get_vec_u32 (cpu, vs, index));
5377 }
5378 else
5379 {
5380 if (INSTR (30, 30) != 1)
5381 HALT_UNALLOC;
5382
5383 index = INSTR (20, 20);
5384 aarch64_set_reg_u64 (cpu, rd, NO_SP,
5385 aarch64_get_vec_u64 (cpu, vs, index));
5386 }
5387 }
5388
5389 static void
5390 do_vec_FABS (sim_cpu *cpu)
5391 {
5392 /* instr[31] = 0
5393 instr[30] = half(0)/full(1)
5394 instr[29,23] = 00 1110 1
5395 instr[22] = float(0)/double(1)
5396 instr[21,16] = 10 0000
5397 instr[15,10] = 1111 10
5398 instr[9,5] = Vn
5399 instr[4,0] = Vd. */
5400
5401 unsigned vn = INSTR (9, 5);
5402 unsigned vd = INSTR (4, 0);
5403 unsigned full = INSTR (30, 30);
5404 unsigned i;
5405
5406 NYI_assert (29, 23, 0x1D);
5407 NYI_assert (21, 10, 0x83E);
5408
5409 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5410 if (INSTR (22, 22))
5411 {
5412 if (! full)
5413 HALT_NYI;
5414
5415 for (i = 0; i < 2; i++)
5416 aarch64_set_vec_double (cpu, vd, i,
5417 fabs (aarch64_get_vec_double (cpu, vn, i)));
5418 }
5419 else
5420 {
5421 for (i = 0; i < (full ? 4 : 2); i++)
5422 aarch64_set_vec_float (cpu, vd, i,
5423 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5424 }
5425 }
5426
5427 static void
5428 do_vec_FCVTZS (sim_cpu *cpu)
5429 {
5430 /* instr[31] = 0
5431 instr[30] = half (0) / all (1)
5432 instr[29,23] = 00 1110 1
5433 instr[22] = single (0) / double (1)
5434 instr[21,10] = 10 0001 1011 10
5435 instr[9,5] = Rn
5436 instr[4,0] = Rd. */
5437
5438 unsigned rn = INSTR (9, 5);
5439 unsigned rd = INSTR (4, 0);
5440 unsigned full = INSTR (30, 30);
5441 unsigned i;
5442
5443 NYI_assert (31, 31, 0);
5444 NYI_assert (29, 23, 0x1D);
5445 NYI_assert (21, 10, 0x86E);
5446
5447 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5448 if (INSTR (22, 22))
5449 {
5450 if (! full)
5451 HALT_UNALLOC;
5452
5453 for (i = 0; i < 2; i++)
5454 aarch64_set_vec_s64 (cpu, rd, i,
5455 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5456 }
5457 else
5458 for (i = 0; i < (full ? 4 : 2); i++)
5459 aarch64_set_vec_s32 (cpu, rd, i,
5460 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5461 }
5462
5463 static void
5464 do_vec_REV64 (sim_cpu *cpu)
5465 {
5466 /* instr[31] = 0
5467 instr[30] = full/half
5468 instr[29,24] = 00 1110
5469 instr[23,22] = size
5470 instr[21,10] = 10 0000 0000 10
5471 instr[9,5] = Rn
5472 instr[4,0] = Rd. */
5473
5474 unsigned rn = INSTR (9, 5);
5475 unsigned rd = INSTR (4, 0);
5476 unsigned size = INSTR (23, 22);
5477 unsigned full = INSTR (30, 30);
5478 unsigned i;
5479 FRegister val;
5480
5481 NYI_assert (29, 24, 0x0E);
5482 NYI_assert (21, 10, 0x802);
5483
5484 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5485 switch (size)
5486 {
5487 case 0:
5488 for (i = 0; i < (full ? 16 : 8); i++)
5489 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5490 break;
5491
5492 case 1:
5493 for (i = 0; i < (full ? 8 : 4); i++)
5494 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5495 break;
5496
5497 case 2:
5498 for (i = 0; i < (full ? 4 : 2); i++)
5499 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5500 break;
5501
5502 case 3:
5503 HALT_UNALLOC;
5504 }
5505
5506 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5507 if (full)
5508 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5509 }
5510
5511 static void
5512 do_vec_REV16 (sim_cpu *cpu)
5513 {
5514 /* instr[31] = 0
5515 instr[30] = full/half
5516 instr[29,24] = 00 1110
5517 instr[23,22] = size
5518 instr[21,10] = 10 0000 0001 10
5519 instr[9,5] = Rn
5520 instr[4,0] = Rd. */
5521
5522 unsigned rn = INSTR (9, 5);
5523 unsigned rd = INSTR (4, 0);
5524 unsigned size = INSTR (23, 22);
5525 unsigned full = INSTR (30, 30);
5526 unsigned i;
5527 FRegister val;
5528
5529 NYI_assert (29, 24, 0x0E);
5530 NYI_assert (21, 10, 0x806);
5531
5532 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5533 switch (size)
5534 {
5535 case 0:
5536 for (i = 0; i < (full ? 16 : 8); i++)
5537 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5538 break;
5539
5540 default:
5541 HALT_UNALLOC;
5542 }
5543
5544 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5545 if (full)
5546 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5547 }
5548
5549 static void
5550 do_vec_op1 (sim_cpu *cpu)
5551 {
5552 /* instr[31] = 0
5553 instr[30] = half/full
5554 instr[29,24] = 00 1110
5555 instr[23,21] = ???
5556 instr[20,16] = Vm
5557 instr[15,10] = sub-opcode
5558 instr[9,5] = Vn
5559 instr[4,0] = Vd */
5560 NYI_assert (29, 24, 0x0E);
5561
5562 if (INSTR (21, 21) == 0)
5563 {
5564 if (INSTR (23, 22) == 0)
5565 {
5566 if (INSTR (30, 30) == 1
5567 && INSTR (17, 14) == 0
5568 && INSTR (12, 10) == 7)
5569 return do_vec_ins_2 (cpu);
5570
5571 switch (INSTR (15, 10))
5572 {
5573 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5574 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5575 case 0x07: do_vec_INS (cpu); return;
5576 case 0x0A: do_vec_TRN (cpu); return;
5577
5578 case 0x0F:
5579 if (INSTR (17, 16) == 0)
5580 {
5581 do_vec_MOV_into_scalar (cpu);
5582 return;
5583 }
5584 break;
5585
5586 case 0x00:
5587 case 0x08:
5588 case 0x10:
5589 case 0x18:
5590 do_vec_TBL (cpu); return;
5591
5592 case 0x06:
5593 case 0x16:
5594 do_vec_UZP (cpu); return;
5595
5596 case 0x0E:
5597 case 0x1E:
5598 do_vec_ZIP (cpu); return;
5599
5600 default:
5601 HALT_NYI;
5602 }
5603 }
5604
5605 switch (INSTR (13, 10))
5606 {
5607 case 0x6: do_vec_UZP (cpu); return;
5608 case 0xE: do_vec_ZIP (cpu); return;
5609 case 0xA: do_vec_TRN (cpu); return;
5610 case 0xF: do_vec_UMOV (cpu); return;
5611 default: HALT_NYI;
5612 }
5613 }
5614
5615 switch (INSTR (15, 10))
5616 {
5617 case 0x02: do_vec_REV64 (cpu); return;
5618 case 0x06: do_vec_REV16 (cpu); return;
5619
5620 case 0x07:
5621 switch (INSTR (23, 21))
5622 {
5623 case 1: do_vec_AND (cpu); return;
5624 case 3: do_vec_BIC (cpu); return;
5625 case 5: do_vec_ORR (cpu); return;
5626 case 7: do_vec_ORN (cpu); return;
5627 default: HALT_NYI;
5628 }
5629
5630 case 0x08: do_vec_sub_long (cpu); return;
5631 case 0x0a: do_vec_XTN (cpu); return;
5632 case 0x11: do_vec_SSHL (cpu); return;
5633 case 0x19: do_vec_max (cpu); return;
5634 case 0x1B: do_vec_min (cpu); return;
5635 case 0x21: do_vec_add (cpu); return;
5636 case 0x25: do_vec_MLA (cpu); return;
5637 case 0x27: do_vec_mul (cpu); return;
5638 case 0x2F: do_vec_ADDP (cpu); return;
5639 case 0x30: do_vec_mull (cpu); return;
5640 case 0x33: do_vec_FMLA (cpu); return;
5641 case 0x35: do_vec_fadd (cpu); return;
5642
5643 case 0x2E:
5644 switch (INSTR (20, 16))
5645 {
5646 case 0x00: do_vec_ABS (cpu); return;
5647 case 0x01: do_vec_FCVTZS (cpu); return;
5648 case 0x11: do_vec_ADDV (cpu); return;
5649 default: HALT_NYI;
5650 }
5651
5652 case 0x31:
5653 case 0x3B:
5654 do_vec_Fminmax (cpu); return;
5655
5656 case 0x0D:
5657 case 0x0F:
5658 case 0x22:
5659 case 0x23:
5660 case 0x26:
5661 case 0x2A:
5662 case 0x32:
5663 case 0x36:
5664 case 0x39:
5665 case 0x3A:
5666 do_vec_compare (cpu); return;
5667
5668 case 0x3E:
5669 do_vec_FABS (cpu); return;
5670
5671 default:
5672 HALT_NYI;
5673 }
5674 }
5675
5676 static void
5677 do_vec_xtl (sim_cpu *cpu)
5678 {
5679 /* instr[31] = 0
5680 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5681 instr[28,22] = 0 1111 00
5682 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5683 instr[15,10] = 1010 01
5684 instr[9,5] = V source
5685 instr[4,0] = V dest. */
5686
5687 unsigned vs = INSTR (9, 5);
5688 unsigned vd = INSTR (4, 0);
5689 unsigned i, shift, bias = 0;
5690
5691 NYI_assert (28, 22, 0x3C);
5692 NYI_assert (15, 10, 0x29);
5693
5694 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5695 switch (INSTR (30, 29))
5696 {
5697 case 2: /* SXTL2, SSHLL2. */
5698 bias = 2;
5699 case 0: /* SXTL, SSHLL. */
5700 if (INSTR (21, 21))
5701 {
5702 int64_t val1, val2;
5703
5704 shift = INSTR (20, 16);
5705 /* Get the source values before setting the destination values
5706 in case the source and destination are the same. */
5707 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5708 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5709 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5710 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5711 }
5712 else if (INSTR (20, 20))
5713 {
5714 int32_t v[4];
5715 int32_t v1,v2,v3,v4;
5716
5717 shift = INSTR (19, 16);
5718 bias *= 2;
5719 for (i = 0; i < 4; i++)
5720 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5721 for (i = 0; i < 4; i++)
5722 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5723 }
5724 else
5725 {
5726 int16_t v[8];
5727 NYI_assert (19, 19, 1);
5728
5729 shift = INSTR (18, 16);
5730 bias *= 3;
5731 for (i = 0; i < 8; i++)
5732 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5733 for (i = 0; i < 8; i++)
5734 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5735 }
5736 return;
5737
5738 case 3: /* UXTL2, USHLL2. */
5739 bias = 2;
5740 case 1: /* UXTL, USHLL. */
5741 if (INSTR (21, 21))
5742 {
5743 uint64_t v1, v2;
5744 shift = INSTR (20, 16);
5745 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5746 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5747 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5748 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5749 }
5750 else if (INSTR (20, 20))
5751 {
5752 uint32_t v[4];
5753 shift = INSTR (19, 16);
5754 bias *= 2;
5755 for (i = 0; i < 4; i++)
5756 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5757 for (i = 0; i < 4; i++)
5758 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5759 }
5760 else
5761 {
5762 uint16_t v[8];
5763 NYI_assert (19, 19, 1);
5764
5765 shift = INSTR (18, 16);
5766 bias *= 3;
5767 for (i = 0; i < 8; i++)
5768 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5769 for (i = 0; i < 8; i++)
5770 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5771 }
5772 return;
5773 }
5774 }
5775
5776 static void
5777 do_vec_SHL (sim_cpu *cpu)
5778 {
5779 /* instr [31] = 0
5780 instr [30] = half(0)/full(1)
5781 instr [29,23] = 001 1110
5782 instr [22,16] = size and shift amount
5783 instr [15,10] = 01 0101
5784 instr [9, 5] = Vs
5785 instr [4, 0] = Vd. */
5786
5787 int shift;
5788 int full = INSTR (30, 30);
5789 unsigned vs = INSTR (9, 5);
5790 unsigned vd = INSTR (4, 0);
5791 unsigned i;
5792
5793 NYI_assert (29, 23, 0x1E);
5794 NYI_assert (15, 10, 0x15);
5795
5796 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5797 if (INSTR (22, 22))
5798 {
5799 shift = INSTR (21, 16);
5800
5801 if (full == 0)
5802 HALT_UNALLOC;
5803
5804 for (i = 0; i < 2; i++)
5805 {
5806 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5807 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5808 }
5809
5810 return;
5811 }
5812
5813 if (INSTR (21, 21))
5814 {
5815 shift = INSTR (20, 16);
5816
5817 for (i = 0; i < (full ? 4 : 2); i++)
5818 {
5819 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5820 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5821 }
5822
5823 return;
5824 }
5825
5826 if (INSTR (20, 20))
5827 {
5828 shift = INSTR (19, 16);
5829
5830 for (i = 0; i < (full ? 8 : 4); i++)
5831 {
5832 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5833 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5834 }
5835
5836 return;
5837 }
5838
5839 if (INSTR (19, 19) == 0)
5840 HALT_UNALLOC;
5841
5842 shift = INSTR (18, 16);
5843
5844 for (i = 0; i < (full ? 16 : 8); i++)
5845 {
5846 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5847 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5848 }
5849 }
5850
5851 static void
5852 do_vec_SSHR_USHR (sim_cpu *cpu)
5853 {
5854 /* instr [31] = 0
5855 instr [30] = half(0)/full(1)
5856 instr [29] = signed(0)/unsigned(1)
5857 instr [28,23] = 0 1111 0
5858 instr [22,16] = size and shift amount
5859 instr [15,10] = 0000 01
5860 instr [9, 5] = Vs
5861 instr [4, 0] = Vd. */
5862
5863 int full = INSTR (30, 30);
5864 int sign = ! INSTR (29, 29);
5865 unsigned shift = INSTR (22, 16);
5866 unsigned vs = INSTR (9, 5);
5867 unsigned vd = INSTR (4, 0);
5868 unsigned i;
5869
5870 NYI_assert (28, 23, 0x1E);
5871 NYI_assert (15, 10, 0x01);
5872
5873 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5874 if (INSTR (22, 22))
5875 {
5876 shift = 128 - shift;
5877
5878 if (full == 0)
5879 HALT_UNALLOC;
5880
5881 if (sign)
5882 for (i = 0; i < 2; i++)
5883 {
5884 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
5885 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
5886 }
5887 else
5888 for (i = 0; i < 2; i++)
5889 {
5890 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5891 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
5892 }
5893
5894 return;
5895 }
5896
5897 if (INSTR (21, 21))
5898 {
5899 shift = 64 - shift;
5900
5901 if (sign)
5902 for (i = 0; i < (full ? 4 : 2); i++)
5903 {
5904 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
5905 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
5906 }
5907 else
5908 for (i = 0; i < (full ? 4 : 2); i++)
5909 {
5910 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5911 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
5912 }
5913
5914 return;
5915 }
5916
5917 if (INSTR (20, 20))
5918 {
5919 shift = 32 - shift;
5920
5921 if (sign)
5922 for (i = 0; i < (full ? 8 : 4); i++)
5923 {
5924 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
5925 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
5926 }
5927 else
5928 for (i = 0; i < (full ? 8 : 4); i++)
5929 {
5930 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5931 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
5932 }
5933
5934 return;
5935 }
5936
5937 if (INSTR (19, 19) == 0)
5938 HALT_UNALLOC;
5939
5940 shift = 16 - shift;
5941
5942 if (sign)
5943 for (i = 0; i < (full ? 16 : 8); i++)
5944 {
5945 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
5946 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
5947 }
5948 else
5949 for (i = 0; i < (full ? 16 : 8); i++)
5950 {
5951 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5952 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
5953 }
5954 }
5955
5956 static void
5957 do_vec_MUL_by_element (sim_cpu *cpu)
5958 {
5959 /* instr[31] = 0
5960 instr[30] = half/full
5961 instr[29,24] = 00 1111
5962 instr[23,22] = size
5963 instr[21] = L
5964 instr[20] = M
5965 instr[19,16] = m
5966 instr[15,12] = 1000
5967 instr[11] = H
5968 instr[10] = 0
5969 instr[9,5] = Vn
5970 instr[4,0] = Vd */
5971
5972 unsigned full = INSTR (30, 30);
5973 unsigned L = INSTR (21, 21);
5974 unsigned H = INSTR (11, 11);
5975 unsigned vn = INSTR (9, 5);
5976 unsigned vd = INSTR (4, 0);
5977 unsigned size = INSTR (23, 22);
5978 unsigned index;
5979 unsigned vm;
5980 unsigned e;
5981
5982 NYI_assert (29, 24, 0x0F);
5983 NYI_assert (15, 12, 0x8);
5984 NYI_assert (10, 10, 0);
5985
5986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5987 switch (size)
5988 {
5989 case 1:
5990 {
5991 /* 16 bit products. */
5992 uint16_t product;
5993 uint16_t element1;
5994 uint16_t element2;
5995
5996 index = (H << 2) | (L << 1) | INSTR (20, 20);
5997 vm = INSTR (19, 16);
5998 element2 = aarch64_get_vec_u16 (cpu, vm, index);
5999
6000 for (e = 0; e < (full ? 8 : 4); e ++)
6001 {
6002 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6003 product = element1 * element2;
6004 aarch64_set_vec_u16 (cpu, vd, e, product);
6005 }
6006 }
6007 break;
6008
6009 case 2:
6010 {
6011 /* 32 bit products. */
6012 uint32_t product;
6013 uint32_t element1;
6014 uint32_t element2;
6015
6016 index = (H << 1) | L;
6017 vm = INSTR (20, 16);
6018 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6019
6020 for (e = 0; e < (full ? 4 : 2); e ++)
6021 {
6022 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6023 product = element1 * element2;
6024 aarch64_set_vec_u32 (cpu, vd, e, product);
6025 }
6026 }
6027 break;
6028
6029 default:
6030 HALT_UNALLOC;
6031 }
6032 }
6033
6034 static void
6035 do_FMLA_by_element (sim_cpu *cpu)
6036 {
6037 /* instr[31] = 0
6038 instr[30] = half/full
6039 instr[29,23] = 00 1111 1
6040 instr[22] = size
6041 instr[21] = L
6042 instr[20,16] = m
6043 instr[15,12] = 0001
6044 instr[11] = H
6045 instr[10] = 0
6046 instr[9,5] = Vn
6047 instr[4,0] = Vd */
6048
6049 unsigned full = INSTR (30, 30);
6050 unsigned size = INSTR (22, 22);
6051 unsigned L = INSTR (21, 21);
6052 unsigned vm = INSTR (20, 16);
6053 unsigned H = INSTR (11, 11);
6054 unsigned vn = INSTR (9, 5);
6055 unsigned vd = INSTR (4, 0);
6056 unsigned e;
6057
6058 NYI_assert (29, 23, 0x1F);
6059 NYI_assert (15, 12, 0x1);
6060 NYI_assert (10, 10, 0);
6061
6062 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6063 if (size)
6064 {
6065 double element1, element2;
6066
6067 if (! full || L)
6068 HALT_UNALLOC;
6069
6070 element2 = aarch64_get_vec_double (cpu, vm, H);
6071
6072 for (e = 0; e < 2; e++)
6073 {
6074 element1 = aarch64_get_vec_double (cpu, vn, e);
6075 element1 *= element2;
6076 element1 += aarch64_get_vec_double (cpu, vd, e);
6077 aarch64_set_vec_double (cpu, vd, e, element1);
6078 }
6079 }
6080 else
6081 {
6082 float element1;
6083 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6084
6085 for (e = 0; e < (full ? 4 : 2); e++)
6086 {
6087 element1 = aarch64_get_vec_float (cpu, vn, e);
6088 element1 *= element2;
6089 element1 += aarch64_get_vec_float (cpu, vd, e);
6090 aarch64_set_vec_float (cpu, vd, e, element1);
6091 }
6092 }
6093 }
6094
6095 static void
6096 do_vec_op2 (sim_cpu *cpu)
6097 {
6098 /* instr[31] = 0
6099 instr[30] = half/full
6100 instr[29,24] = 00 1111
6101 instr[23] = ?
6102 instr[22,16] = element size & index
6103 instr[15,10] = sub-opcode
6104 instr[9,5] = Vm
6105 instr[4,0] = Vd */
6106
6107 NYI_assert (29, 24, 0x0F);
6108
6109 if (INSTR (23, 23) != 0)
6110 {
6111 switch (INSTR (15, 10))
6112 {
6113 case 0x04:
6114 case 0x06:
6115 do_FMLA_by_element (cpu);
6116 return;
6117
6118 case 0x20:
6119 case 0x22:
6120 do_vec_MUL_by_element (cpu);
6121 return;
6122
6123 default:
6124 HALT_NYI;
6125 }
6126 }
6127 else
6128 {
6129 switch (INSTR (15, 10))
6130 {
6131 case 0x01: do_vec_SSHR_USHR (cpu); return;
6132 case 0x15: do_vec_SHL (cpu); return;
6133 case 0x20:
6134 case 0x22: do_vec_MUL_by_element (cpu); return;
6135 case 0x29: do_vec_xtl (cpu); return;
6136 default: HALT_NYI;
6137 }
6138 }
6139 }
6140
6141 static void
6142 do_vec_neg (sim_cpu *cpu)
6143 {
6144 /* instr[31] = 0
6145 instr[30] = full(1)/half(0)
6146 instr[29,24] = 10 1110
6147 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6148 instr[21,10] = 1000 0010 1110
6149 instr[9,5] = Vs
6150 instr[4,0] = Vd */
6151
6152 int full = INSTR (30, 30);
6153 unsigned vs = INSTR (9, 5);
6154 unsigned vd = INSTR (4, 0);
6155 unsigned i;
6156
6157 NYI_assert (29, 24, 0x2E);
6158 NYI_assert (21, 10, 0x82E);
6159
6160 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6161 switch (INSTR (23, 22))
6162 {
6163 case 0:
6164 for (i = 0; i < (full ? 16 : 8); i++)
6165 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6166 return;
6167
6168 case 1:
6169 for (i = 0; i < (full ? 8 : 4); i++)
6170 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6171 return;
6172
6173 case 2:
6174 for (i = 0; i < (full ? 4 : 2); i++)
6175 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6176 return;
6177
6178 case 3:
6179 if (! full)
6180 HALT_NYI;
6181 for (i = 0; i < 2; i++)
6182 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6183 return;
6184 }
6185 }
6186
6187 static void
6188 do_vec_sqrt (sim_cpu *cpu)
6189 {
6190 /* instr[31] = 0
6191 instr[30] = full(1)/half(0)
6192 instr[29,23] = 101 1101
6193 instr[22] = single(0)/double(1)
6194 instr[21,10] = 1000 0111 1110
6195 instr[9,5] = Vs
6196 instr[4,0] = Vd. */
6197
6198 int full = INSTR (30, 30);
6199 unsigned vs = INSTR (9, 5);
6200 unsigned vd = INSTR (4, 0);
6201 unsigned i;
6202
6203 NYI_assert (29, 23, 0x5B);
6204 NYI_assert (21, 10, 0x87E);
6205
6206 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6207 if (INSTR (22, 22) == 0)
6208 for (i = 0; i < (full ? 4 : 2); i++)
6209 aarch64_set_vec_float (cpu, vd, i,
6210 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6211 else
6212 for (i = 0; i < 2; i++)
6213 aarch64_set_vec_double (cpu, vd, i,
6214 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6215 }
6216
6217 static void
6218 do_vec_mls_indexed (sim_cpu *cpu)
6219 {
6220 /* instr[31] = 0
6221 instr[30] = half(0)/full(1)
6222 instr[29,24] = 10 1111
6223 instr[23,22] = 16-bit(01)/32-bit(10)
6224 instr[21,20+11] = index (if 16-bit)
6225 instr[21+11] = index (if 32-bit)
6226 instr[20,16] = Vm
6227 instr[15,12] = 0100
6228 instr[11] = part of index
6229 instr[10] = 0
6230 instr[9,5] = Vs
6231 instr[4,0] = Vd. */
6232
6233 int full = INSTR (30, 30);
6234 unsigned vs = INSTR (9, 5);
6235 unsigned vd = INSTR (4, 0);
6236 unsigned vm = INSTR (20, 16);
6237 unsigned i;
6238
6239 NYI_assert (15, 12, 4);
6240 NYI_assert (10, 10, 0);
6241
6242 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6243 switch (INSTR (23, 22))
6244 {
6245 case 1:
6246 {
6247 unsigned elem;
6248 uint32_t val;
6249
6250 if (vm > 15)
6251 HALT_NYI;
6252
6253 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6254 val = aarch64_get_vec_u16 (cpu, vm, elem);
6255
6256 for (i = 0; i < (full ? 8 : 4); i++)
6257 aarch64_set_vec_u32 (cpu, vd, i,
6258 aarch64_get_vec_u32 (cpu, vd, i) -
6259 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6260 return;
6261 }
6262
6263 case 2:
6264 {
6265 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6266 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6267
6268 for (i = 0; i < (full ? 4 : 2); i++)
6269 aarch64_set_vec_u64 (cpu, vd, i,
6270 aarch64_get_vec_u64 (cpu, vd, i) -
6271 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6272 return;
6273 }
6274
6275 case 0:
6276 case 3:
6277 default:
6278 HALT_NYI;
6279 }
6280 }
6281
6282 static void
6283 do_vec_SUB (sim_cpu *cpu)
6284 {
6285 /* instr [31] = 0
6286 instr [30] = half(0)/full(1)
6287 instr [29,24] = 10 1110
6288 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6289 instr [21] = 1
6290 instr [20,16] = Vm
6291 instr [15,10] = 10 0001
6292 instr [9, 5] = Vn
6293 instr [4, 0] = Vd. */
6294
6295 unsigned full = INSTR (30, 30);
6296 unsigned vm = INSTR (20, 16);
6297 unsigned vn = INSTR (9, 5);
6298 unsigned vd = INSTR (4, 0);
6299 unsigned i;
6300
6301 NYI_assert (29, 24, 0x2E);
6302 NYI_assert (21, 21, 1);
6303 NYI_assert (15, 10, 0x21);
6304
6305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6306 switch (INSTR (23, 22))
6307 {
6308 case 0:
6309 for (i = 0; i < (full ? 16 : 8); i++)
6310 aarch64_set_vec_s8 (cpu, vd, i,
6311 aarch64_get_vec_s8 (cpu, vn, i)
6312 - aarch64_get_vec_s8 (cpu, vm, i));
6313 return;
6314
6315 case 1:
6316 for (i = 0; i < (full ? 8 : 4); i++)
6317 aarch64_set_vec_s16 (cpu, vd, i,
6318 aarch64_get_vec_s16 (cpu, vn, i)
6319 - aarch64_get_vec_s16 (cpu, vm, i));
6320 return;
6321
6322 case 2:
6323 for (i = 0; i < (full ? 4 : 2); i++)
6324 aarch64_set_vec_s32 (cpu, vd, i,
6325 aarch64_get_vec_s32 (cpu, vn, i)
6326 - aarch64_get_vec_s32 (cpu, vm, i));
6327 return;
6328
6329 case 3:
6330 if (full == 0)
6331 HALT_UNALLOC;
6332
6333 for (i = 0; i < 2; i++)
6334 aarch64_set_vec_s64 (cpu, vd, i,
6335 aarch64_get_vec_s64 (cpu, vn, i)
6336 - aarch64_get_vec_s64 (cpu, vm, i));
6337 return;
6338 }
6339 }
6340
6341 static void
6342 do_vec_MLS (sim_cpu *cpu)
6343 {
6344 /* instr [31] = 0
6345 instr [30] = half(0)/full(1)
6346 instr [29,24] = 10 1110
6347 instr [23,22] = size: byte(00, half(01), word (10)
6348 instr [21] = 1
6349 instr [20,16] = Vm
6350 instr [15,10] = 10 0101
6351 instr [9, 5] = Vn
6352 instr [4, 0] = Vd. */
6353
6354 unsigned full = INSTR (30, 30);
6355 unsigned vm = INSTR (20, 16);
6356 unsigned vn = INSTR (9, 5);
6357 unsigned vd = INSTR (4, 0);
6358 unsigned i;
6359
6360 NYI_assert (29, 24, 0x2E);
6361 NYI_assert (21, 21, 1);
6362 NYI_assert (15, 10, 0x25);
6363
6364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6365 switch (INSTR (23, 22))
6366 {
6367 case 0:
6368 for (i = 0; i < (full ? 16 : 8); i++)
6369 aarch64_set_vec_u8 (cpu, vd, i,
6370 (aarch64_get_vec_u8 (cpu, vn, i)
6371 * aarch64_get_vec_u8 (cpu, vm, i))
6372 - aarch64_get_vec_u8 (cpu, vd, i));
6373 return;
6374
6375 case 1:
6376 for (i = 0; i < (full ? 8 : 4); i++)
6377 aarch64_set_vec_u16 (cpu, vd, i,
6378 (aarch64_get_vec_u16 (cpu, vn, i)
6379 * aarch64_get_vec_u16 (cpu, vm, i))
6380 - aarch64_get_vec_u16 (cpu, vd, i));
6381 return;
6382
6383 case 2:
6384 for (i = 0; i < (full ? 4 : 2); i++)
6385 aarch64_set_vec_u32 (cpu, vd, i,
6386 (aarch64_get_vec_u32 (cpu, vn, i)
6387 * aarch64_get_vec_u32 (cpu, vm, i))
6388 - aarch64_get_vec_u32 (cpu, vd, i));
6389 return;
6390
6391 default:
6392 HALT_UNALLOC;
6393 }
6394 }
6395
6396 static void
6397 do_vec_FDIV (sim_cpu *cpu)
6398 {
6399 /* instr [31] = 0
6400 instr [30] = half(0)/full(1)
6401 instr [29,23] = 10 1110 0
6402 instr [22] = float()/double(1)
6403 instr [21] = 1
6404 instr [20,16] = Vm
6405 instr [15,10] = 1111 11
6406 instr [9, 5] = Vn
6407 instr [4, 0] = Vd. */
6408
6409 unsigned full = INSTR (30, 30);
6410 unsigned vm = INSTR (20, 16);
6411 unsigned vn = INSTR (9, 5);
6412 unsigned vd = INSTR (4, 0);
6413 unsigned i;
6414
6415 NYI_assert (29, 23, 0x5C);
6416 NYI_assert (21, 21, 1);
6417 NYI_assert (15, 10, 0x3F);
6418
6419 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6420 if (INSTR (22, 22))
6421 {
6422 if (! full)
6423 HALT_UNALLOC;
6424
6425 for (i = 0; i < 2; i++)
6426 aarch64_set_vec_double (cpu, vd, i,
6427 aarch64_get_vec_double (cpu, vn, i)
6428 / aarch64_get_vec_double (cpu, vm, i));
6429 }
6430 else
6431 for (i = 0; i < (full ? 4 : 2); i++)
6432 aarch64_set_vec_float (cpu, vd, i,
6433 aarch64_get_vec_float (cpu, vn, i)
6434 / aarch64_get_vec_float (cpu, vm, i));
6435 }
6436
6437 static void
6438 do_vec_FMUL (sim_cpu *cpu)
6439 {
6440 /* instr [31] = 0
6441 instr [30] = half(0)/full(1)
6442 instr [29,23] = 10 1110 0
6443 instr [22] = float(0)/double(1)
6444 instr [21] = 1
6445 instr [20,16] = Vm
6446 instr [15,10] = 1101 11
6447 instr [9, 5] = Vn
6448 instr [4, 0] = Vd. */
6449
6450 unsigned full = INSTR (30, 30);
6451 unsigned vm = INSTR (20, 16);
6452 unsigned vn = INSTR (9, 5);
6453 unsigned vd = INSTR (4, 0);
6454 unsigned i;
6455
6456 NYI_assert (29, 23, 0x5C);
6457 NYI_assert (21, 21, 1);
6458 NYI_assert (15, 10, 0x37);
6459
6460 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6461 if (INSTR (22, 22))
6462 {
6463 if (! full)
6464 HALT_UNALLOC;
6465
6466 for (i = 0; i < 2; i++)
6467 aarch64_set_vec_double (cpu, vd, i,
6468 aarch64_get_vec_double (cpu, vn, i)
6469 * aarch64_get_vec_double (cpu, vm, i));
6470 }
6471 else
6472 for (i = 0; i < (full ? 4 : 2); i++)
6473 aarch64_set_vec_float (cpu, vd, i,
6474 aarch64_get_vec_float (cpu, vn, i)
6475 * aarch64_get_vec_float (cpu, vm, i));
6476 }
6477
6478 static void
6479 do_vec_FADDP (sim_cpu *cpu)
6480 {
6481 /* instr [31] = 0
6482 instr [30] = half(0)/full(1)
6483 instr [29,23] = 10 1110 0
6484 instr [22] = float(0)/double(1)
6485 instr [21] = 1
6486 instr [20,16] = Vm
6487 instr [15,10] = 1101 01
6488 instr [9, 5] = Vn
6489 instr [4, 0] = Vd. */
6490
6491 unsigned full = INSTR (30, 30);
6492 unsigned vm = INSTR (20, 16);
6493 unsigned vn = INSTR (9, 5);
6494 unsigned vd = INSTR (4, 0);
6495
6496 NYI_assert (29, 23, 0x5C);
6497 NYI_assert (21, 21, 1);
6498 NYI_assert (15, 10, 0x35);
6499
6500 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6501 if (INSTR (22, 22))
6502 {
6503 /* Extract values before adding them incase vd == vn/vm. */
6504 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6505 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6506 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6507 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6508
6509 if (! full)
6510 HALT_UNALLOC;
6511
6512 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6513 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6514 }
6515 else
6516 {
6517 /* Extract values before adding them incase vd == vn/vm. */
6518 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6519 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6520 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6521 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6522
6523 if (full)
6524 {
6525 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6526 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6527 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6528 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6529
6530 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6531 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6532 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6533 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6534 }
6535 else
6536 {
6537 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6538 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6539 }
6540 }
6541 }
6542
6543 static void
6544 do_vec_FSQRT (sim_cpu *cpu)
6545 {
6546 /* instr[31] = 0
6547 instr[30] = half(0)/full(1)
6548 instr[29,23] = 10 1110 1
6549 instr[22] = single(0)/double(1)
6550 instr[21,10] = 10 0001 1111 10
6551 instr[9,5] = Vsrc
6552 instr[4,0] = Vdest. */
6553
6554 unsigned vn = INSTR (9, 5);
6555 unsigned vd = INSTR (4, 0);
6556 unsigned full = INSTR (30, 30);
6557 int i;
6558
6559 NYI_assert (29, 23, 0x5D);
6560 NYI_assert (21, 10, 0x87E);
6561
6562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6563 if (INSTR (22, 22))
6564 {
6565 if (! full)
6566 HALT_UNALLOC;
6567
6568 for (i = 0; i < 2; i++)
6569 aarch64_set_vec_double (cpu, vd, i,
6570 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6571 }
6572 else
6573 {
6574 for (i = 0; i < (full ? 4 : 2); i++)
6575 aarch64_set_vec_float (cpu, vd, i,
6576 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6577 }
6578 }
6579
6580 static void
6581 do_vec_FNEG (sim_cpu *cpu)
6582 {
6583 /* instr[31] = 0
6584 instr[30] = half (0)/full (1)
6585 instr[29,23] = 10 1110 1
6586 instr[22] = single (0)/double (1)
6587 instr[21,10] = 10 0000 1111 10
6588 instr[9,5] = Vsrc
6589 instr[4,0] = Vdest. */
6590
6591 unsigned vn = INSTR (9, 5);
6592 unsigned vd = INSTR (4, 0);
6593 unsigned full = INSTR (30, 30);
6594 int i;
6595
6596 NYI_assert (29, 23, 0x5D);
6597 NYI_assert (21, 10, 0x83E);
6598
6599 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6600 if (INSTR (22, 22))
6601 {
6602 if (! full)
6603 HALT_UNALLOC;
6604
6605 for (i = 0; i < 2; i++)
6606 aarch64_set_vec_double (cpu, vd, i,
6607 - aarch64_get_vec_double (cpu, vn, i));
6608 }
6609 else
6610 {
6611 for (i = 0; i < (full ? 4 : 2); i++)
6612 aarch64_set_vec_float (cpu, vd, i,
6613 - aarch64_get_vec_float (cpu, vn, i));
6614 }
6615 }
6616
6617 static void
6618 do_vec_NOT (sim_cpu *cpu)
6619 {
6620 /* instr[31] = 0
6621 instr[30] = half (0)/full (1)
6622 instr[29,10] = 10 1110 0010 0000 0101 10
6623 instr[9,5] = Vn
6624 instr[4.0] = Vd. */
6625
6626 unsigned vn = INSTR (9, 5);
6627 unsigned vd = INSTR (4, 0);
6628 unsigned i;
6629 int full = INSTR (30, 30);
6630
6631 NYI_assert (29, 10, 0xB8816);
6632
6633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6634 for (i = 0; i < (full ? 16 : 8); i++)
6635 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6636 }
6637
6638 static unsigned int
6639 clz (uint64_t val, unsigned size)
6640 {
6641 uint64_t mask = 1;
6642 int count;
6643
6644 mask <<= (size - 1);
6645 count = 0;
6646 do
6647 {
6648 if (val & mask)
6649 break;
6650 mask >>= 1;
6651 count ++;
6652 }
6653 while (mask);
6654
6655 return count;
6656 }
6657
6658 static void
6659 do_vec_CLZ (sim_cpu *cpu)
6660 {
6661 /* instr[31] = 0
6662 instr[30] = half (0)/full (1)
6663 instr[29,24] = 10 1110
6664 instr[23,22] = size
6665 instr[21,10] = 10 0000 0100 10
6666 instr[9,5] = Vn
6667 instr[4.0] = Vd. */
6668
6669 unsigned vn = INSTR (9, 5);
6670 unsigned vd = INSTR (4, 0);
6671 unsigned i;
6672 int full = INSTR (30,30);
6673
6674 NYI_assert (29, 24, 0x2E);
6675 NYI_assert (21, 10, 0x812);
6676
6677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6678 switch (INSTR (23, 22))
6679 {
6680 case 0:
6681 for (i = 0; i < (full ? 16 : 8); i++)
6682 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6683 break;
6684 case 1:
6685 for (i = 0; i < (full ? 8 : 4); i++)
6686 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6687 break;
6688 case 2:
6689 for (i = 0; i < (full ? 4 : 2); i++)
6690 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6691 break;
6692 case 3:
6693 if (! full)
6694 HALT_UNALLOC;
6695 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6696 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6697 break;
6698 }
6699 }
6700
6701 static void
6702 do_vec_MOV_element (sim_cpu *cpu)
6703 {
6704 /* instr[31,21] = 0110 1110 000
6705 instr[20,16] = size & dest index
6706 instr[15] = 0
6707 instr[14,11] = source index
6708 instr[10] = 1
6709 instr[9,5] = Vs
6710 instr[4.0] = Vd. */
6711
6712 unsigned vs = INSTR (9, 5);
6713 unsigned vd = INSTR (4, 0);
6714 unsigned src_index;
6715 unsigned dst_index;
6716
6717 NYI_assert (31, 21, 0x370);
6718 NYI_assert (15, 15, 0);
6719 NYI_assert (10, 10, 1);
6720
6721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6722 if (INSTR (16, 16))
6723 {
6724 /* Move a byte. */
6725 src_index = INSTR (14, 11);
6726 dst_index = INSTR (20, 17);
6727 aarch64_set_vec_u8 (cpu, vd, dst_index,
6728 aarch64_get_vec_u8 (cpu, vs, src_index));
6729 }
6730 else if (INSTR (17, 17))
6731 {
6732 /* Move 16-bits. */
6733 NYI_assert (11, 11, 0);
6734 src_index = INSTR (14, 12);
6735 dst_index = INSTR (20, 18);
6736 aarch64_set_vec_u16 (cpu, vd, dst_index,
6737 aarch64_get_vec_u16 (cpu, vs, src_index));
6738 }
6739 else if (INSTR (18, 18))
6740 {
6741 /* Move 32-bits. */
6742 NYI_assert (12, 11, 0);
6743 src_index = INSTR (14, 13);
6744 dst_index = INSTR (20, 19);
6745 aarch64_set_vec_u32 (cpu, vd, dst_index,
6746 aarch64_get_vec_u32 (cpu, vs, src_index));
6747 }
6748 else
6749 {
6750 NYI_assert (19, 19, 1);
6751 NYI_assert (13, 11, 0);
6752 src_index = INSTR (14, 14);
6753 dst_index = INSTR (20, 20);
6754 aarch64_set_vec_u64 (cpu, vd, dst_index,
6755 aarch64_get_vec_u64 (cpu, vs, src_index));
6756 }
6757 }
6758
6759 static void
6760 do_vec_REV32 (sim_cpu *cpu)
6761 {
6762 /* instr[31] = 0
6763 instr[30] = full/half
6764 instr[29,24] = 10 1110
6765 instr[23,22] = size
6766 instr[21,10] = 10 0000 0000 10
6767 instr[9,5] = Rn
6768 instr[4,0] = Rd. */
6769
6770 unsigned rn = INSTR (9, 5);
6771 unsigned rd = INSTR (4, 0);
6772 unsigned size = INSTR (23, 22);
6773 unsigned full = INSTR (30, 30);
6774 unsigned i;
6775 FRegister val;
6776
6777 NYI_assert (29, 24, 0x2E);
6778 NYI_assert (21, 10, 0x802);
6779
6780 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6781 switch (size)
6782 {
6783 case 0:
6784 for (i = 0; i < (full ? 16 : 8); i++)
6785 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6786 break;
6787
6788 case 1:
6789 for (i = 0; i < (full ? 8 : 4); i++)
6790 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6791 break;
6792
6793 default:
6794 HALT_UNALLOC;
6795 }
6796
6797 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6798 if (full)
6799 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6800 }
6801
6802 static void
6803 do_vec_EXT (sim_cpu *cpu)
6804 {
6805 /* instr[31] = 0
6806 instr[30] = full/half
6807 instr[29,21] = 10 1110 000
6808 instr[20,16] = Vm
6809 instr[15] = 0
6810 instr[14,11] = source index
6811 instr[10] = 0
6812 instr[9,5] = Vn
6813 instr[4.0] = Vd. */
6814
6815 unsigned vm = INSTR (20, 16);
6816 unsigned vn = INSTR (9, 5);
6817 unsigned vd = INSTR (4, 0);
6818 unsigned src_index = INSTR (14, 11);
6819 unsigned full = INSTR (30, 30);
6820 unsigned i;
6821 unsigned j;
6822 FRegister val;
6823
6824 NYI_assert (31, 21, 0x370);
6825 NYI_assert (15, 15, 0);
6826 NYI_assert (10, 10, 0);
6827
6828 if (!full && (src_index & 0x8))
6829 HALT_UNALLOC;
6830
6831 j = 0;
6832
6833 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6834 for (i = src_index; i < (full ? 16 : 8); i++)
6835 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6836 for (i = 0; i < src_index; i++)
6837 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6838
6839 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6840 if (full)
6841 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6842 }
6843
6844 static void
6845 dexAdvSIMD0 (sim_cpu *cpu)
6846 {
6847 /* instr [28,25] = 0 111. */
6848 if ( INSTR (15, 10) == 0x07
6849 && (INSTR (9, 5) ==
6850 INSTR (20, 16)))
6851 {
6852 if (INSTR (31, 21) == 0x075
6853 || INSTR (31, 21) == 0x275)
6854 {
6855 do_vec_MOV_whole_vector (cpu);
6856 return;
6857 }
6858 }
6859
6860 if (INSTR (29, 19) == 0x1E0)
6861 {
6862 do_vec_MOV_immediate (cpu);
6863 return;
6864 }
6865
6866 if (INSTR (29, 19) == 0x5E0)
6867 {
6868 do_vec_MVNI (cpu);
6869 return;
6870 }
6871
6872 if (INSTR (29, 19) == 0x1C0
6873 || INSTR (29, 19) == 0x1C1)
6874 {
6875 if (INSTR (15, 10) == 0x03)
6876 {
6877 do_vec_DUP_scalar_into_vector (cpu);
6878 return;
6879 }
6880 }
6881
6882 switch (INSTR (29, 24))
6883 {
6884 case 0x0E: do_vec_op1 (cpu); return;
6885 case 0x0F: do_vec_op2 (cpu); return;
6886
6887 case 0x2E:
6888 if (INSTR (21, 21) == 1)
6889 {
6890 switch (INSTR (15, 10))
6891 {
6892 case 0x02:
6893 do_vec_REV32 (cpu);
6894 return;
6895
6896 case 0x07:
6897 switch (INSTR (23, 22))
6898 {
6899 case 0: do_vec_EOR (cpu); return;
6900 case 1: do_vec_BSL (cpu); return;
6901 case 2:
6902 case 3: do_vec_bit (cpu); return;
6903 }
6904 break;
6905
6906 case 0x08: do_vec_sub_long (cpu); return;
6907 case 0x11: do_vec_USHL (cpu); return;
6908 case 0x12: do_vec_CLZ (cpu); return;
6909 case 0x16: do_vec_NOT (cpu); return;
6910 case 0x19: do_vec_max (cpu); return;
6911 case 0x1B: do_vec_min (cpu); return;
6912 case 0x21: do_vec_SUB (cpu); return;
6913 case 0x25: do_vec_MLS (cpu); return;
6914 case 0x31: do_vec_FminmaxNMP (cpu); return;
6915 case 0x35: do_vec_FADDP (cpu); return;
6916 case 0x37: do_vec_FMUL (cpu); return;
6917 case 0x3F: do_vec_FDIV (cpu); return;
6918
6919 case 0x3E:
6920 switch (INSTR (20, 16))
6921 {
6922 case 0x00: do_vec_FNEG (cpu); return;
6923 case 0x01: do_vec_FSQRT (cpu); return;
6924 default: HALT_NYI;
6925 }
6926
6927 case 0x0D:
6928 case 0x0F:
6929 case 0x22:
6930 case 0x23:
6931 case 0x26:
6932 case 0x2A:
6933 case 0x32:
6934 case 0x36:
6935 case 0x39:
6936 case 0x3A:
6937 do_vec_compare (cpu); return;
6938
6939 default:
6940 break;
6941 }
6942 }
6943
6944 if (INSTR (31, 21) == 0x370)
6945 {
6946 if (INSTR (10, 10))
6947 do_vec_MOV_element (cpu);
6948 else
6949 do_vec_EXT (cpu);
6950 return;
6951 }
6952
6953 switch (INSTR (21, 10))
6954 {
6955 case 0x82E: do_vec_neg (cpu); return;
6956 case 0x87E: do_vec_sqrt (cpu); return;
6957 default:
6958 if (INSTR (15, 10) == 0x30)
6959 {
6960 do_vec_mull (cpu);
6961 return;
6962 }
6963 break;
6964 }
6965 break;
6966
6967 case 0x2f:
6968 switch (INSTR (15, 10))
6969 {
6970 case 0x01: do_vec_SSHR_USHR (cpu); return;
6971 case 0x10:
6972 case 0x12: do_vec_mls_indexed (cpu); return;
6973 case 0x29: do_vec_xtl (cpu); return;
6974 default:
6975 HALT_NYI;
6976 }
6977
6978 default:
6979 break;
6980 }
6981
6982 HALT_NYI;
6983 }
6984
6985 /* 3 sources. */
6986
6987 /* Float multiply add. */
6988 static void
6989 fmadds (sim_cpu *cpu)
6990 {
6991 unsigned sa = INSTR (14, 10);
6992 unsigned sm = INSTR (20, 16);
6993 unsigned sn = INSTR ( 9, 5);
6994 unsigned sd = INSTR ( 4, 0);
6995
6996 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6997 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
6998 + aarch64_get_FP_float (cpu, sn)
6999 * aarch64_get_FP_float (cpu, sm));
7000 }
7001
7002 /* Double multiply add. */
7003 static void
7004 fmaddd (sim_cpu *cpu)
7005 {
7006 unsigned sa = INSTR (14, 10);
7007 unsigned sm = INSTR (20, 16);
7008 unsigned sn = INSTR ( 9, 5);
7009 unsigned sd = INSTR ( 4, 0);
7010
7011 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7012 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7013 + aarch64_get_FP_double (cpu, sn)
7014 * aarch64_get_FP_double (cpu, sm));
7015 }
7016
7017 /* Float multiply subtract. */
7018 static void
7019 fmsubs (sim_cpu *cpu)
7020 {
7021 unsigned sa = INSTR (14, 10);
7022 unsigned sm = INSTR (20, 16);
7023 unsigned sn = INSTR ( 9, 5);
7024 unsigned sd = INSTR ( 4, 0);
7025
7026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7027 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7028 - aarch64_get_FP_float (cpu, sn)
7029 * aarch64_get_FP_float (cpu, sm));
7030 }
7031
7032 /* Double multiply subtract. */
7033 static void
7034 fmsubd (sim_cpu *cpu)
7035 {
7036 unsigned sa = INSTR (14, 10);
7037 unsigned sm = INSTR (20, 16);
7038 unsigned sn = INSTR ( 9, 5);
7039 unsigned sd = INSTR ( 4, 0);
7040
7041 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7042 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7043 - aarch64_get_FP_double (cpu, sn)
7044 * aarch64_get_FP_double (cpu, sm));
7045 }
7046
7047 /* Float negative multiply add. */
7048 static void
7049 fnmadds (sim_cpu *cpu)
7050 {
7051 unsigned sa = INSTR (14, 10);
7052 unsigned sm = INSTR (20, 16);
7053 unsigned sn = INSTR ( 9, 5);
7054 unsigned sd = INSTR ( 4, 0);
7055
7056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7057 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7058 + (- aarch64_get_FP_float (cpu, sn))
7059 * aarch64_get_FP_float (cpu, sm));
7060 }
7061
7062 /* Double negative multiply add. */
7063 static void
7064 fnmaddd (sim_cpu *cpu)
7065 {
7066 unsigned sa = INSTR (14, 10);
7067 unsigned sm = INSTR (20, 16);
7068 unsigned sn = INSTR ( 9, 5);
7069 unsigned sd = INSTR ( 4, 0);
7070
7071 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7072 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7073 + (- aarch64_get_FP_double (cpu, sn))
7074 * aarch64_get_FP_double (cpu, sm));
7075 }
7076
7077 /* Float negative multiply subtract. */
7078 static void
7079 fnmsubs (sim_cpu *cpu)
7080 {
7081 unsigned sa = INSTR (14, 10);
7082 unsigned sm = INSTR (20, 16);
7083 unsigned sn = INSTR ( 9, 5);
7084 unsigned sd = INSTR ( 4, 0);
7085
7086 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7087 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7088 + aarch64_get_FP_float (cpu, sn)
7089 * aarch64_get_FP_float (cpu, sm));
7090 }
7091
7092 /* Double negative multiply subtract. */
7093 static void
7094 fnmsubd (sim_cpu *cpu)
7095 {
7096 unsigned sa = INSTR (14, 10);
7097 unsigned sm = INSTR (20, 16);
7098 unsigned sn = INSTR ( 9, 5);
7099 unsigned sd = INSTR ( 4, 0);
7100
7101 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7102 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7103 + aarch64_get_FP_double (cpu, sn)
7104 * aarch64_get_FP_double (cpu, sm));
7105 }
7106
7107 static void
7108 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7109 {
7110 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7111 instr[30] = 0
7112 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7113 instr[28,25] = 1111
7114 instr[24] = 1
7115 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7116 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7117 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7118
7119 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7120 /* dispatch on combined type:o1:o2. */
7121 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7122
7123 if (M_S != 0)
7124 HALT_UNALLOC;
7125
7126 switch (dispatch)
7127 {
7128 case 0: fmadds (cpu); return;
7129 case 1: fmsubs (cpu); return;
7130 case 2: fnmadds (cpu); return;
7131 case 3: fnmsubs (cpu); return;
7132 case 4: fmaddd (cpu); return;
7133 case 5: fmsubd (cpu); return;
7134 case 6: fnmaddd (cpu); return;
7135 case 7: fnmsubd (cpu); return;
7136 default:
7137 /* type > 1 is currently unallocated. */
7138 HALT_UNALLOC;
7139 }
7140 }
7141
7142 static void
7143 dexSimpleFPFixedConvert (sim_cpu *cpu)
7144 {
7145 HALT_NYI;
7146 }
7147
7148 static void
7149 dexSimpleFPCondCompare (sim_cpu *cpu)
7150 {
7151 /* instr [31,23] = 0001 1110 0
7152 instr [22] = type
7153 instr [21] = 1
7154 instr [20,16] = Rm
7155 instr [15,12] = condition
7156 instr [11,10] = 01
7157 instr [9,5] = Rn
7158 instr [4] = 0
7159 instr [3,0] = nzcv */
7160
7161 unsigned rm = INSTR (20, 16);
7162 unsigned rn = INSTR (9, 5);
7163
7164 NYI_assert (31, 23, 0x3C);
7165 NYI_assert (11, 10, 0x1);
7166 NYI_assert (4, 4, 0);
7167
7168 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7169 if (! testConditionCode (cpu, INSTR (15, 12)))
7170 {
7171 aarch64_set_CPSR (cpu, INSTR (3, 0));
7172 return;
7173 }
7174
7175 if (INSTR (22, 22))
7176 {
7177 /* Double precision. */
7178 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7179 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7180
7181 /* FIXME: Check for NaNs. */
7182 if (val1 == val2)
7183 aarch64_set_CPSR (cpu, (Z | C));
7184 else if (val1 < val2)
7185 aarch64_set_CPSR (cpu, N);
7186 else /* val1 > val2 */
7187 aarch64_set_CPSR (cpu, C);
7188 }
7189 else
7190 {
7191 /* Single precision. */
7192 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7193 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7194
7195 /* FIXME: Check for NaNs. */
7196 if (val1 == val2)
7197 aarch64_set_CPSR (cpu, (Z | C));
7198 else if (val1 < val2)
7199 aarch64_set_CPSR (cpu, N);
7200 else /* val1 > val2 */
7201 aarch64_set_CPSR (cpu, C);
7202 }
7203 }
7204
7205 /* 2 sources. */
7206
7207 /* Float add. */
7208 static void
7209 fadds (sim_cpu *cpu)
7210 {
7211 unsigned sm = INSTR (20, 16);
7212 unsigned sn = INSTR ( 9, 5);
7213 unsigned sd = INSTR ( 4, 0);
7214
7215 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7216 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7217 + aarch64_get_FP_float (cpu, sm));
7218 }
7219
7220 /* Double add. */
7221 static void
7222 faddd (sim_cpu *cpu)
7223 {
7224 unsigned sm = INSTR (20, 16);
7225 unsigned sn = INSTR ( 9, 5);
7226 unsigned sd = INSTR ( 4, 0);
7227
7228 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7229 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7230 + aarch64_get_FP_double (cpu, sm));
7231 }
7232
7233 /* Float divide. */
7234 static void
7235 fdivs (sim_cpu *cpu)
7236 {
7237 unsigned sm = INSTR (20, 16);
7238 unsigned sn = INSTR ( 9, 5);
7239 unsigned sd = INSTR ( 4, 0);
7240
7241 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7242 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7243 / aarch64_get_FP_float (cpu, sm));
7244 }
7245
7246 /* Double divide. */
7247 static void
7248 fdivd (sim_cpu *cpu)
7249 {
7250 unsigned sm = INSTR (20, 16);
7251 unsigned sn = INSTR ( 9, 5);
7252 unsigned sd = INSTR ( 4, 0);
7253
7254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7255 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7256 / aarch64_get_FP_double (cpu, sm));
7257 }
7258
7259 /* Float multiply. */
7260 static void
7261 fmuls (sim_cpu *cpu)
7262 {
7263 unsigned sm = INSTR (20, 16);
7264 unsigned sn = INSTR ( 9, 5);
7265 unsigned sd = INSTR ( 4, 0);
7266
7267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7268 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7269 * aarch64_get_FP_float (cpu, sm));
7270 }
7271
7272 /* Double multiply. */
7273 static void
7274 fmuld (sim_cpu *cpu)
7275 {
7276 unsigned sm = INSTR (20, 16);
7277 unsigned sn = INSTR ( 9, 5);
7278 unsigned sd = INSTR ( 4, 0);
7279
7280 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7281 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7282 * aarch64_get_FP_double (cpu, sm));
7283 }
7284
7285 /* Float negate and multiply. */
7286 static void
7287 fnmuls (sim_cpu *cpu)
7288 {
7289 unsigned sm = INSTR (20, 16);
7290 unsigned sn = INSTR ( 9, 5);
7291 unsigned sd = INSTR ( 4, 0);
7292
7293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7294 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7295 * aarch64_get_FP_float (cpu, sm)));
7296 }
7297
7298 /* Double negate and multiply. */
7299 static void
7300 fnmuld (sim_cpu *cpu)
7301 {
7302 unsigned sm = INSTR (20, 16);
7303 unsigned sn = INSTR ( 9, 5);
7304 unsigned sd = INSTR ( 4, 0);
7305
7306 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7307 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7308 * aarch64_get_FP_double (cpu, sm)));
7309 }
7310
7311 /* Float subtract. */
7312 static void
7313 fsubs (sim_cpu *cpu)
7314 {
7315 unsigned sm = INSTR (20, 16);
7316 unsigned sn = INSTR ( 9, 5);
7317 unsigned sd = INSTR ( 4, 0);
7318
7319 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7320 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7321 - aarch64_get_FP_float (cpu, sm));
7322 }
7323
7324 /* Double subtract. */
7325 static void
7326 fsubd (sim_cpu *cpu)
7327 {
7328 unsigned sm = INSTR (20, 16);
7329 unsigned sn = INSTR ( 9, 5);
7330 unsigned sd = INSTR ( 4, 0);
7331
7332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7333 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7334 - aarch64_get_FP_double (cpu, sm));
7335 }
7336
7337 static void
7338 do_FMINNM (sim_cpu *cpu)
7339 {
7340 /* instr[31,23] = 0 0011 1100
7341 instr[22] = float(0)/double(1)
7342 instr[21] = 1
7343 instr[20,16] = Sm
7344 instr[15,10] = 01 1110
7345 instr[9,5] = Sn
7346 instr[4,0] = Cpu */
7347
7348 unsigned sm = INSTR (20, 16);
7349 unsigned sn = INSTR ( 9, 5);
7350 unsigned sd = INSTR ( 4, 0);
7351
7352 NYI_assert (31, 23, 0x03C);
7353 NYI_assert (15, 10, 0x1E);
7354
7355 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7356 if (INSTR (22, 22))
7357 aarch64_set_FP_double (cpu, sd,
7358 dminnm (aarch64_get_FP_double (cpu, sn),
7359 aarch64_get_FP_double (cpu, sm)));
7360 else
7361 aarch64_set_FP_float (cpu, sd,
7362 fminnm (aarch64_get_FP_float (cpu, sn),
7363 aarch64_get_FP_float (cpu, sm)));
7364 }
7365
7366 static void
7367 do_FMAXNM (sim_cpu *cpu)
7368 {
7369 /* instr[31,23] = 0 0011 1100
7370 instr[22] = float(0)/double(1)
7371 instr[21] = 1
7372 instr[20,16] = Sm
7373 instr[15,10] = 01 1010
7374 instr[9,5] = Sn
7375 instr[4,0] = Cpu */
7376
7377 unsigned sm = INSTR (20, 16);
7378 unsigned sn = INSTR ( 9, 5);
7379 unsigned sd = INSTR ( 4, 0);
7380
7381 NYI_assert (31, 23, 0x03C);
7382 NYI_assert (15, 10, 0x1A);
7383
7384 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7385 if (INSTR (22, 22))
7386 aarch64_set_FP_double (cpu, sd,
7387 dmaxnm (aarch64_get_FP_double (cpu, sn),
7388 aarch64_get_FP_double (cpu, sm)));
7389 else
7390 aarch64_set_FP_float (cpu, sd,
7391 fmaxnm (aarch64_get_FP_float (cpu, sn),
7392 aarch64_get_FP_float (cpu, sm)));
7393 }
7394
7395 static void
7396 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7397 {
7398 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7399 instr[30] = 0
7400 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7401 instr[28,25] = 1111
7402 instr[24] = 0
7403 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7404 instr[21] = 1
7405 instr[20,16] = Vm
7406 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7407 0010 ==> FADD, 0011 ==> FSUB,
7408 0100 ==> FMAX, 0101 ==> FMIN
7409 0110 ==> FMAXNM, 0111 ==> FMINNM
7410 1000 ==> FNMUL, ow ==> UNALLOC
7411 instr[11,10] = 10
7412 instr[9,5] = Vn
7413 instr[4,0] = Vd */
7414
7415 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7416 uint32_t type = INSTR (23, 22);
7417 /* Dispatch on opcode. */
7418 uint32_t dispatch = INSTR (15, 12);
7419
7420 if (type > 1)
7421 HALT_UNALLOC;
7422
7423 if (M_S != 0)
7424 HALT_UNALLOC;
7425
7426 if (type)
7427 switch (dispatch)
7428 {
7429 case 0: fmuld (cpu); return;
7430 case 1: fdivd (cpu); return;
7431 case 2: faddd (cpu); return;
7432 case 3: fsubd (cpu); return;
7433 case 6: do_FMAXNM (cpu); return;
7434 case 7: do_FMINNM (cpu); return;
7435 case 8: fnmuld (cpu); return;
7436
7437 /* Have not yet implemented fmax and fmin. */
7438 case 4:
7439 case 5:
7440 HALT_NYI;
7441
7442 default:
7443 HALT_UNALLOC;
7444 }
7445 else /* type == 0 => floats. */
7446 switch (dispatch)
7447 {
7448 case 0: fmuls (cpu); return;
7449 case 1: fdivs (cpu); return;
7450 case 2: fadds (cpu); return;
7451 case 3: fsubs (cpu); return;
7452 case 6: do_FMAXNM (cpu); return;
7453 case 7: do_FMINNM (cpu); return;
7454 case 8: fnmuls (cpu); return;
7455
7456 case 4:
7457 case 5:
7458 HALT_NYI;
7459
7460 default:
7461 HALT_UNALLOC;
7462 }
7463 }
7464
7465 static void
7466 dexSimpleFPCondSelect (sim_cpu *cpu)
7467 {
7468 /* FCSEL
7469 instr[31,23] = 0 0011 1100
7470 instr[22] = 0=>single 1=>double
7471 instr[21] = 1
7472 instr[20,16] = Sm
7473 instr[15,12] = cond
7474 instr[11,10] = 11
7475 instr[9,5] = Sn
7476 instr[4,0] = Cpu */
7477 unsigned sm = INSTR (20, 16);
7478 unsigned sn = INSTR ( 9, 5);
7479 unsigned sd = INSTR ( 4, 0);
7480 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7481
7482 NYI_assert (31, 23, 0x03C);
7483 NYI_assert (11, 10, 0x3);
7484
7485 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7486 if (INSTR (22, 22))
7487 aarch64_set_FP_double (cpu, sd, set ? sn : sm);
7488 else
7489 aarch64_set_FP_float (cpu, sd, set ? sn : sm);
7490 }
7491
7492 /* Store 32 bit unscaled signed 9 bit. */
7493 static void
7494 fsturs (sim_cpu *cpu, int32_t offset)
7495 {
7496 unsigned int rn = INSTR (9, 5);
7497 unsigned int st = INSTR (4, 0);
7498
7499 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7500 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7501 aarch64_get_vec_u32 (cpu, rn, 0));
7502 }
7503
7504 /* Store 64 bit unscaled signed 9 bit. */
7505 static void
7506 fsturd (sim_cpu *cpu, int32_t offset)
7507 {
7508 unsigned int rn = INSTR (9, 5);
7509 unsigned int st = INSTR (4, 0);
7510
7511 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7512 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, st, 1) + offset,
7513 aarch64_get_vec_u64 (cpu, rn, 0));
7514 }
7515
7516 /* Store 128 bit unscaled signed 9 bit. */
7517 static void
7518 fsturq (sim_cpu *cpu, int32_t offset)
7519 {
7520 unsigned int rn = INSTR (9, 5);
7521 unsigned int st = INSTR (4, 0);
7522 FRegister a;
7523
7524 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7525 aarch64_get_FP_long_double (cpu, rn, & a);
7526 aarch64_set_mem_long_double (cpu,
7527 aarch64_get_reg_u64 (cpu, st, 1)
7528 + offset, a);
7529 }
7530
7531 /* TODO FP move register. */
7532
7533 /* 32 bit fp to fp move register. */
7534 static void
7535 ffmovs (sim_cpu *cpu)
7536 {
7537 unsigned int rn = INSTR (9, 5);
7538 unsigned int st = INSTR (4, 0);
7539
7540 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7541 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7542 }
7543
7544 /* 64 bit fp to fp move register. */
7545 static void
7546 ffmovd (sim_cpu *cpu)
7547 {
7548 unsigned int rn = INSTR (9, 5);
7549 unsigned int st = INSTR (4, 0);
7550
7551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7552 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7553 }
7554
7555 /* 32 bit GReg to Vec move register. */
7556 static void
7557 fgmovs (sim_cpu *cpu)
7558 {
7559 unsigned int rn = INSTR (9, 5);
7560 unsigned int st = INSTR (4, 0);
7561
7562 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7563 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7564 }
7565
7566 /* 64 bit g to fp move register. */
7567 static void
7568 fgmovd (sim_cpu *cpu)
7569 {
7570 unsigned int rn = INSTR (9, 5);
7571 unsigned int st = INSTR (4, 0);
7572
7573 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7574 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7575 }
7576
7577 /* 32 bit fp to g move register. */
7578 static void
7579 gfmovs (sim_cpu *cpu)
7580 {
7581 unsigned int rn = INSTR (9, 5);
7582 unsigned int st = INSTR (4, 0);
7583
7584 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7585 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7586 }
7587
7588 /* 64 bit fp to g move register. */
7589 static void
7590 gfmovd (sim_cpu *cpu)
7591 {
7592 unsigned int rn = INSTR (9, 5);
7593 unsigned int st = INSTR (4, 0);
7594
7595 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7596 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7597 }
7598
7599 /* FP move immediate
7600
7601 These install an immediate 8 bit value in the target register
7602 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7603 bit exponent. */
7604
7605 static void
7606 fmovs (sim_cpu *cpu)
7607 {
7608 unsigned int sd = INSTR (4, 0);
7609 uint32_t imm = INSTR (20, 13);
7610 float f = fp_immediate_for_encoding_32 (imm);
7611
7612 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7613 aarch64_set_FP_float (cpu, sd, f);
7614 }
7615
7616 static void
7617 fmovd (sim_cpu *cpu)
7618 {
7619 unsigned int sd = INSTR (4, 0);
7620 uint32_t imm = INSTR (20, 13);
7621 double d = fp_immediate_for_encoding_64 (imm);
7622
7623 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7624 aarch64_set_FP_double (cpu, sd, d);
7625 }
7626
7627 static void
7628 dexSimpleFPImmediate (sim_cpu *cpu)
7629 {
7630 /* instr[31,23] == 00111100
7631 instr[22] == type : single(0)/double(1)
7632 instr[21] == 1
7633 instr[20,13] == imm8
7634 instr[12,10] == 100
7635 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7636 instr[4,0] == Rd */
7637 uint32_t imm5 = INSTR (9, 5);
7638
7639 NYI_assert (31, 23, 0x3C);
7640
7641 if (imm5 != 0)
7642 HALT_UNALLOC;
7643
7644 if (INSTR (22, 22))
7645 fmovd (cpu);
7646 else
7647 fmovs (cpu);
7648 }
7649
7650 /* TODO specific decode and execute for group Load Store. */
7651
7652 /* TODO FP load/store single register (unscaled offset). */
7653
7654 /* TODO load 8 bit unscaled signed 9 bit. */
7655 /* TODO load 16 bit unscaled signed 9 bit. */
7656
7657 /* Load 32 bit unscaled signed 9 bit. */
7658 static void
7659 fldurs (sim_cpu *cpu, int32_t offset)
7660 {
7661 unsigned int rn = INSTR (9, 5);
7662 unsigned int st = INSTR (4, 0);
7663
7664 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7665 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7666 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7667 }
7668
7669 /* Load 64 bit unscaled signed 9 bit. */
7670 static void
7671 fldurd (sim_cpu *cpu, int32_t offset)
7672 {
7673 unsigned int rn = INSTR (9, 5);
7674 unsigned int st = INSTR (4, 0);
7675
7676 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7677 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7678 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7679 }
7680
7681 /* Load 128 bit unscaled signed 9 bit. */
7682 static void
7683 fldurq (sim_cpu *cpu, int32_t offset)
7684 {
7685 unsigned int rn = INSTR (9, 5);
7686 unsigned int st = INSTR (4, 0);
7687 FRegister a;
7688 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7689
7690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7691 aarch64_get_mem_long_double (cpu, addr, & a);
7692 aarch64_set_FP_long_double (cpu, st, a);
7693 }
7694
7695 /* TODO store 8 bit unscaled signed 9 bit. */
7696 /* TODO store 16 bit unscaled signed 9 bit. */
7697
7698
7699 /* 1 source. */
7700
7701 /* Float absolute value. */
7702 static void
7703 fabss (sim_cpu *cpu)
7704 {
7705 unsigned sn = INSTR (9, 5);
7706 unsigned sd = INSTR (4, 0);
7707 float value = aarch64_get_FP_float (cpu, sn);
7708
7709 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7710 aarch64_set_FP_float (cpu, sd, fabsf (value));
7711 }
7712
7713 /* Double absolute value. */
7714 static void
7715 fabcpu (sim_cpu *cpu)
7716 {
7717 unsigned sn = INSTR (9, 5);
7718 unsigned sd = INSTR (4, 0);
7719 double value = aarch64_get_FP_double (cpu, sn);
7720
7721 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7722 aarch64_set_FP_double (cpu, sd, fabs (value));
7723 }
7724
7725 /* Float negative value. */
7726 static void
7727 fnegs (sim_cpu *cpu)
7728 {
7729 unsigned sn = INSTR (9, 5);
7730 unsigned sd = INSTR (4, 0);
7731
7732 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7733 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7734 }
7735
7736 /* Double negative value. */
7737 static void
7738 fnegd (sim_cpu *cpu)
7739 {
7740 unsigned sn = INSTR (9, 5);
7741 unsigned sd = INSTR (4, 0);
7742
7743 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7744 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7745 }
7746
7747 /* Float square root. */
7748 static void
7749 fsqrts (sim_cpu *cpu)
7750 {
7751 unsigned sn = INSTR (9, 5);
7752 unsigned sd = INSTR (4, 0);
7753
7754 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7755 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7756 }
7757
7758 /* Double square root. */
7759 static void
7760 fsqrtd (sim_cpu *cpu)
7761 {
7762 unsigned sn = INSTR (9, 5);
7763 unsigned sd = INSTR (4, 0);
7764
7765 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7766 aarch64_set_FP_double (cpu, sd,
7767 sqrt (aarch64_get_FP_double (cpu, sn)));
7768 }
7769
7770 /* Convert double to float. */
7771 static void
7772 fcvtds (sim_cpu *cpu)
7773 {
7774 unsigned sn = INSTR (9, 5);
7775 unsigned sd = INSTR (4, 0);
7776
7777 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7778 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7779 }
7780
7781 /* Convert float to double. */
7782 static void
7783 fcvtcpu (sim_cpu *cpu)
7784 {
7785 unsigned sn = INSTR (9, 5);
7786 unsigned sd = INSTR (4, 0);
7787
7788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7789 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7790 }
7791
7792 static void
7793 do_FRINT (sim_cpu *cpu)
7794 {
7795 /* instr[31,23] = 0001 1110 0
7796 instr[22] = single(0)/double(1)
7797 instr[21,18] = 1001
7798 instr[17,15] = rounding mode
7799 instr[14,10] = 10000
7800 instr[9,5] = source
7801 instr[4,0] = dest */
7802
7803 float val;
7804 unsigned rs = INSTR (9, 5);
7805 unsigned rd = INSTR (4, 0);
7806 unsigned int rmode = INSTR (17, 15);
7807
7808 NYI_assert (31, 23, 0x03C);
7809 NYI_assert (21, 18, 0x9);
7810 NYI_assert (14, 10, 0x10);
7811
7812 if (rmode == 6 || rmode == 7)
7813 /* FIXME: Add support for rmode == 6 exactness check. */
7814 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7815
7816 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7817 if (INSTR (22, 22))
7818 {
7819 double val = aarch64_get_FP_double (cpu, rs);
7820
7821 switch (rmode)
7822 {
7823 case 0: /* mode N: nearest or even. */
7824 {
7825 double rval = round (val);
7826
7827 if (val - rval == 0.5)
7828 {
7829 if (((rval / 2.0) * 2.0) != rval)
7830 rval += 1.0;
7831 }
7832
7833 aarch64_set_FP_double (cpu, rd, round (val));
7834 return;
7835 }
7836
7837 case 1: /* mode P: towards +inf. */
7838 if (val < 0.0)
7839 aarch64_set_FP_double (cpu, rd, trunc (val));
7840 else
7841 aarch64_set_FP_double (cpu, rd, round (val));
7842 return;
7843
7844 case 2: /* mode M: towards -inf. */
7845 if (val < 0.0)
7846 aarch64_set_FP_double (cpu, rd, round (val));
7847 else
7848 aarch64_set_FP_double (cpu, rd, trunc (val));
7849 return;
7850
7851 case 3: /* mode Z: towards 0. */
7852 aarch64_set_FP_double (cpu, rd, trunc (val));
7853 return;
7854
7855 case 4: /* mode A: away from 0. */
7856 aarch64_set_FP_double (cpu, rd, round (val));
7857 return;
7858
7859 case 6: /* mode X: use FPCR with exactness check. */
7860 case 7: /* mode I: use FPCR mode. */
7861 HALT_NYI;
7862
7863 default:
7864 HALT_UNALLOC;
7865 }
7866 }
7867
7868 val = aarch64_get_FP_float (cpu, rs);
7869
7870 switch (rmode)
7871 {
7872 case 0: /* mode N: nearest or even. */
7873 {
7874 float rval = roundf (val);
7875
7876 if (val - rval == 0.5)
7877 {
7878 if (((rval / 2.0) * 2.0) != rval)
7879 rval += 1.0;
7880 }
7881
7882 aarch64_set_FP_float (cpu, rd, rval);
7883 return;
7884 }
7885
7886 case 1: /* mode P: towards +inf. */
7887 if (val < 0.0)
7888 aarch64_set_FP_float (cpu, rd, truncf (val));
7889 else
7890 aarch64_set_FP_float (cpu, rd, roundf (val));
7891 return;
7892
7893 case 2: /* mode M: towards -inf. */
7894 if (val < 0.0)
7895 aarch64_set_FP_float (cpu, rd, truncf (val));
7896 else
7897 aarch64_set_FP_float (cpu, rd, roundf (val));
7898 return;
7899
7900 case 3: /* mode Z: towards 0. */
7901 aarch64_set_FP_float (cpu, rd, truncf (val));
7902 return;
7903
7904 case 4: /* mode A: away from 0. */
7905 aarch64_set_FP_float (cpu, rd, roundf (val));
7906 return;
7907
7908 case 6: /* mode X: use FPCR with exactness check. */
7909 case 7: /* mode I: use FPCR mode. */
7910 HALT_NYI;
7911
7912 default:
7913 HALT_UNALLOC;
7914 }
7915 }
7916
7917 /* Convert half to float. */
7918 static void
7919 do_FCVT_half_to_single (sim_cpu *cpu)
7920 {
7921 unsigned rn = INSTR (9, 5);
7922 unsigned rd = INSTR (4, 0);
7923
7924 NYI_assert (31, 10, 0x7B890);
7925
7926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7927 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
7928 }
7929
7930 /* Convert half to double. */
7931 static void
7932 do_FCVT_half_to_double (sim_cpu *cpu)
7933 {
7934 unsigned rn = INSTR (9, 5);
7935 unsigned rd = INSTR (4, 0);
7936
7937 NYI_assert (31, 10, 0x7B8B0);
7938
7939 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7940 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
7941 }
7942
7943 static void
7944 do_FCVT_single_to_half (sim_cpu *cpu)
7945 {
7946 unsigned rn = INSTR (9, 5);
7947 unsigned rd = INSTR (4, 0);
7948
7949 NYI_assert (31, 10, 0x788F0);
7950
7951 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7952 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
7953 }
7954
7955 /* Convert double to half. */
7956 static void
7957 do_FCVT_double_to_half (sim_cpu *cpu)
7958 {
7959 unsigned rn = INSTR (9, 5);
7960 unsigned rd = INSTR (4, 0);
7961
7962 NYI_assert (31, 10, 0x798F0);
7963
7964 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7965 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
7966 }
7967
7968 static void
7969 dexSimpleFPDataProc1Source (sim_cpu *cpu)
7970 {
7971 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7972 instr[30] = 0
7973 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7974 instr[28,25] = 1111
7975 instr[24] = 0
7976 instr[23,22] ==> type : 00 ==> source is single,
7977 01 ==> source is double
7978 10 ==> UNALLOC
7979 11 ==> UNALLOC or source is half
7980 instr[21] = 1
7981 instr[20,15] ==> opcode : with type 00 or 01
7982 000000 ==> FMOV, 000001 ==> FABS,
7983 000010 ==> FNEG, 000011 ==> FSQRT,
7984 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
7985 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
7986 001000 ==> FRINTN, 001001 ==> FRINTP,
7987 001010 ==> FRINTM, 001011 ==> FRINTZ,
7988 001100 ==> FRINTA, 001101 ==> UNALLOC
7989 001110 ==> FRINTX, 001111 ==> FRINTI
7990 with type 11
7991 000100 ==> FCVT (half-to-single)
7992 000101 ==> FCVT (half-to-double)
7993 instr[14,10] = 10000. */
7994
7995 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7996 uint32_t type = INSTR (23, 22);
7997 uint32_t opcode = INSTR (20, 15);
7998
7999 if (M_S != 0)
8000 HALT_UNALLOC;
8001
8002 if (type == 3)
8003 {
8004 if (opcode == 4)
8005 do_FCVT_half_to_single (cpu);
8006 else if (opcode == 5)
8007 do_FCVT_half_to_double (cpu);
8008 else
8009 HALT_UNALLOC;
8010 return;
8011 }
8012
8013 if (type == 2)
8014 HALT_UNALLOC;
8015
8016 switch (opcode)
8017 {
8018 case 0:
8019 if (type)
8020 ffmovd (cpu);
8021 else
8022 ffmovs (cpu);
8023 return;
8024
8025 case 1:
8026 if (type)
8027 fabcpu (cpu);
8028 else
8029 fabss (cpu);
8030 return;
8031
8032 case 2:
8033 if (type)
8034 fnegd (cpu);
8035 else
8036 fnegs (cpu);
8037 return;
8038
8039 case 3:
8040 if (type)
8041 fsqrtd (cpu);
8042 else
8043 fsqrts (cpu);
8044 return;
8045
8046 case 4:
8047 if (type)
8048 fcvtds (cpu);
8049 else
8050 HALT_UNALLOC;
8051 return;
8052
8053 case 5:
8054 if (type)
8055 HALT_UNALLOC;
8056 fcvtcpu (cpu);
8057 return;
8058
8059 case 8: /* FRINTN etc. */
8060 case 9:
8061 case 10:
8062 case 11:
8063 case 12:
8064 case 14:
8065 case 15:
8066 do_FRINT (cpu);
8067 return;
8068
8069 case 7:
8070 if (INSTR (22, 22))
8071 do_FCVT_double_to_half (cpu);
8072 else
8073 do_FCVT_single_to_half (cpu);
8074 return;
8075
8076 case 13:
8077 HALT_NYI;
8078
8079 default:
8080 HALT_UNALLOC;
8081 }
8082 }
8083
8084 /* 32 bit signed int to float. */
8085 static void
8086 scvtf32 (sim_cpu *cpu)
8087 {
8088 unsigned rn = INSTR (9, 5);
8089 unsigned sd = INSTR (4, 0);
8090
8091 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8092 aarch64_set_FP_float
8093 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8094 }
8095
8096 /* signed int to float. */
8097 static void
8098 scvtf (sim_cpu *cpu)
8099 {
8100 unsigned rn = INSTR (9, 5);
8101 unsigned sd = INSTR (4, 0);
8102
8103 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8104 aarch64_set_FP_float
8105 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8106 }
8107
8108 /* 32 bit signed int to double. */
8109 static void
8110 scvtd32 (sim_cpu *cpu)
8111 {
8112 unsigned rn = INSTR (9, 5);
8113 unsigned sd = INSTR (4, 0);
8114
8115 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8116 aarch64_set_FP_double
8117 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8118 }
8119
8120 /* signed int to double. */
8121 static void
8122 scvtd (sim_cpu *cpu)
8123 {
8124 unsigned rn = INSTR (9, 5);
8125 unsigned sd = INSTR (4, 0);
8126
8127 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8128 aarch64_set_FP_double
8129 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8130 }
8131
8132 static const float FLOAT_INT_MAX = (float) INT_MAX;
8133 static const float FLOAT_INT_MIN = (float) INT_MIN;
8134 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8135 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8136 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8137 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8138 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8139 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8140
8141 /* Check for FP exception conditions:
8142 NaN raises IO
8143 Infinity raises IO
8144 Out of Range raises IO and IX and saturates value
8145 Denormal raises ID and IX and sets to zero. */
8146 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8147 do \
8148 { \
8149 switch (fpclassify (F)) \
8150 { \
8151 case FP_INFINITE: \
8152 case FP_NAN: \
8153 aarch64_set_FPSR (cpu, IO); \
8154 if (signbit (F)) \
8155 VALUE = ITYPE##_MAX; \
8156 else \
8157 VALUE = ITYPE##_MIN; \
8158 break; \
8159 \
8160 case FP_NORMAL: \
8161 if (F >= FTYPE##_##ITYPE##_MAX) \
8162 { \
8163 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8164 VALUE = ITYPE##_MAX; \
8165 } \
8166 else if (F <= FTYPE##_##ITYPE##_MIN) \
8167 { \
8168 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8169 VALUE = ITYPE##_MIN; \
8170 } \
8171 break; \
8172 \
8173 case FP_SUBNORMAL: \
8174 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8175 VALUE = 0; \
8176 break; \
8177 \
8178 default: \
8179 case FP_ZERO: \
8180 VALUE = 0; \
8181 break; \
8182 } \
8183 } \
8184 while (0)
8185
8186 /* 32 bit convert float to signed int truncate towards zero. */
8187 static void
8188 fcvtszs32 (sim_cpu *cpu)
8189 {
8190 unsigned sn = INSTR (9, 5);
8191 unsigned rd = INSTR (4, 0);
8192 /* TODO : check that this rounds toward zero. */
8193 float f = aarch64_get_FP_float (cpu, sn);
8194 int32_t value = (int32_t) f;
8195
8196 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8197
8198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8199 /* Avoid sign extension to 64 bit. */
8200 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8201 }
8202
8203 /* 64 bit convert float to signed int truncate towards zero. */
8204 static void
8205 fcvtszs (sim_cpu *cpu)
8206 {
8207 unsigned sn = INSTR (9, 5);
8208 unsigned rd = INSTR (4, 0);
8209 float f = aarch64_get_FP_float (cpu, sn);
8210 int64_t value = (int64_t) f;
8211
8212 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8213
8214 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8215 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8216 }
8217
8218 /* 32 bit convert double to signed int truncate towards zero. */
8219 static void
8220 fcvtszd32 (sim_cpu *cpu)
8221 {
8222 unsigned sn = INSTR (9, 5);
8223 unsigned rd = INSTR (4, 0);
8224 /* TODO : check that this rounds toward zero. */
8225 double d = aarch64_get_FP_double (cpu, sn);
8226 int32_t value = (int32_t) d;
8227
8228 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8229
8230 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8231 /* Avoid sign extension to 64 bit. */
8232 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8233 }
8234
8235 /* 64 bit convert double to signed int truncate towards zero. */
8236 static void
8237 fcvtszd (sim_cpu *cpu)
8238 {
8239 unsigned sn = INSTR (9, 5);
8240 unsigned rd = INSTR (4, 0);
8241 /* TODO : check that this rounds toward zero. */
8242 double d = aarch64_get_FP_double (cpu, sn);
8243 int64_t value;
8244
8245 value = (int64_t) d;
8246
8247 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8248
8249 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8250 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8251 }
8252
8253 static void
8254 do_fcvtzu (sim_cpu *cpu)
8255 {
8256 /* instr[31] = size: 32-bit (0), 64-bit (1)
8257 instr[30,23] = 00111100
8258 instr[22] = type: single (0)/ double (1)
8259 instr[21] = enable (0)/disable(1) precision
8260 instr[20,16] = 11001
8261 instr[15,10] = precision
8262 instr[9,5] = Rs
8263 instr[4,0] = Rd. */
8264
8265 unsigned rs = INSTR (9, 5);
8266 unsigned rd = INSTR (4, 0);
8267
8268 NYI_assert (30, 23, 0x3C);
8269 NYI_assert (20, 16, 0x19);
8270
8271 if (INSTR (21, 21) != 1)
8272 /* Convert to fixed point. */
8273 HALT_NYI;
8274
8275 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8276 if (INSTR (31, 31))
8277 {
8278 /* Convert to unsigned 64-bit integer. */
8279 if (INSTR (22, 22))
8280 {
8281 double d = aarch64_get_FP_double (cpu, rs);
8282 uint64_t value = (uint64_t) d;
8283
8284 /* Do not raise an exception if we have reached ULONG_MAX. */
8285 if (value != (1UL << 63))
8286 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8287
8288 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8289 }
8290 else
8291 {
8292 float f = aarch64_get_FP_float (cpu, rs);
8293 uint64_t value = (uint64_t) f;
8294
8295 /* Do not raise an exception if we have reached ULONG_MAX. */
8296 if (value != (1UL << 63))
8297 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8298
8299 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8300 }
8301 }
8302 else
8303 {
8304 uint32_t value;
8305
8306 /* Convert to unsigned 32-bit integer. */
8307 if (INSTR (22, 22))
8308 {
8309 double d = aarch64_get_FP_double (cpu, rs);
8310
8311 value = (uint32_t) d;
8312 /* Do not raise an exception if we have reached UINT_MAX. */
8313 if (value != (1UL << 31))
8314 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8315 }
8316 else
8317 {
8318 float f = aarch64_get_FP_float (cpu, rs);
8319
8320 value = (uint32_t) f;
8321 /* Do not raise an exception if we have reached UINT_MAX. */
8322 if (value != (1UL << 31))
8323 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8324 }
8325
8326 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8327 }
8328 }
8329
8330 static void
8331 do_UCVTF (sim_cpu *cpu)
8332 {
8333 /* instr[31] = size: 32-bit (0), 64-bit (1)
8334 instr[30,23] = 001 1110 0
8335 instr[22] = type: single (0)/ double (1)
8336 instr[21] = enable (0)/disable(1) precision
8337 instr[20,16] = 0 0011
8338 instr[15,10] = precision
8339 instr[9,5] = Rs
8340 instr[4,0] = Rd. */
8341
8342 unsigned rs = INSTR (9, 5);
8343 unsigned rd = INSTR (4, 0);
8344
8345 NYI_assert (30, 23, 0x3C);
8346 NYI_assert (20, 16, 0x03);
8347
8348 if (INSTR (21, 21) != 1)
8349 HALT_NYI;
8350
8351 /* FIXME: Add exception raising. */
8352 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8353 if (INSTR (31, 31))
8354 {
8355 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8356
8357 if (INSTR (22, 22))
8358 aarch64_set_FP_double (cpu, rd, (double) value);
8359 else
8360 aarch64_set_FP_float (cpu, rd, (float) value);
8361 }
8362 else
8363 {
8364 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8365
8366 if (INSTR (22, 22))
8367 aarch64_set_FP_double (cpu, rd, (double) value);
8368 else
8369 aarch64_set_FP_float (cpu, rd, (float) value);
8370 }
8371 }
8372
8373 static void
8374 float_vector_move (sim_cpu *cpu)
8375 {
8376 /* instr[31,17] == 100 1111 0101 0111
8377 instr[16] ==> direction 0=> to GR, 1=> from GR
8378 instr[15,10] => ???
8379 instr[9,5] ==> source
8380 instr[4,0] ==> dest. */
8381
8382 unsigned rn = INSTR (9, 5);
8383 unsigned rd = INSTR (4, 0);
8384
8385 NYI_assert (31, 17, 0x4F57);
8386
8387 if (INSTR (15, 10) != 0)
8388 HALT_UNALLOC;
8389
8390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8391 if (INSTR (16, 16))
8392 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8393 else
8394 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8395 }
8396
8397 static void
8398 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8399 {
8400 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8401 instr[30 = 0
8402 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8403 instr[28,25] = 1111
8404 instr[24] = 0
8405 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8406 instr[21] = 1
8407 instr[20,19] = rmode
8408 instr[18,16] = opcode
8409 instr[15,10] = 10 0000 */
8410
8411 uint32_t rmode_opcode;
8412 uint32_t size_type;
8413 uint32_t type;
8414 uint32_t size;
8415 uint32_t S;
8416
8417 if (INSTR (31, 17) == 0x4F57)
8418 {
8419 float_vector_move (cpu);
8420 return;
8421 }
8422
8423 size = INSTR (31, 31);
8424 S = INSTR (29, 29);
8425 if (S != 0)
8426 HALT_UNALLOC;
8427
8428 type = INSTR (23, 22);
8429 if (type > 1)
8430 HALT_UNALLOC;
8431
8432 rmode_opcode = INSTR (20, 16);
8433 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8434
8435 switch (rmode_opcode)
8436 {
8437 case 2: /* SCVTF. */
8438 switch (size_type)
8439 {
8440 case 0: scvtf32 (cpu); return;
8441 case 1: scvtd32 (cpu); return;
8442 case 2: scvtf (cpu); return;
8443 case 3: scvtd (cpu); return;
8444 }
8445
8446 case 6: /* FMOV GR, Vec. */
8447 switch (size_type)
8448 {
8449 case 0: gfmovs (cpu); return;
8450 case 3: gfmovd (cpu); return;
8451 default: HALT_UNALLOC;
8452 }
8453
8454 case 7: /* FMOV vec, GR. */
8455 switch (size_type)
8456 {
8457 case 0: fgmovs (cpu); return;
8458 case 3: fgmovd (cpu); return;
8459 default: HALT_UNALLOC;
8460 }
8461
8462 case 24: /* FCVTZS. */
8463 switch (size_type)
8464 {
8465 case 0: fcvtszs32 (cpu); return;
8466 case 1: fcvtszd32 (cpu); return;
8467 case 2: fcvtszs (cpu); return;
8468 case 3: fcvtszd (cpu); return;
8469 }
8470
8471 case 25: do_fcvtzu (cpu); return;
8472 case 3: do_UCVTF (cpu); return;
8473
8474 case 0: /* FCVTNS. */
8475 case 1: /* FCVTNU. */
8476 case 4: /* FCVTAS. */
8477 case 5: /* FCVTAU. */
8478 case 8: /* FCVPTS. */
8479 case 9: /* FCVTPU. */
8480 case 16: /* FCVTMS. */
8481 case 17: /* FCVTMU. */
8482 default:
8483 HALT_NYI;
8484 }
8485 }
8486
8487 static void
8488 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8489 {
8490 uint32_t flags;
8491
8492 if (isnan (fvalue1) || isnan (fvalue2))
8493 flags = C|V;
8494 else
8495 {
8496 float result = fvalue1 - fvalue2;
8497
8498 if (result == 0.0)
8499 flags = Z|C;
8500 else if (result < 0)
8501 flags = N;
8502 else /* (result > 0). */
8503 flags = C;
8504 }
8505
8506 aarch64_set_CPSR (cpu, flags);
8507 }
8508
8509 static void
8510 fcmps (sim_cpu *cpu)
8511 {
8512 unsigned sm = INSTR (20, 16);
8513 unsigned sn = INSTR ( 9, 5);
8514
8515 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8516 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8517
8518 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8519 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8520 }
8521
8522 /* Float compare to zero -- Invalid Operation exception
8523 only on signaling NaNs. */
8524 static void
8525 fcmpzs (sim_cpu *cpu)
8526 {
8527 unsigned sn = INSTR ( 9, 5);
8528 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8529
8530 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8531 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8532 }
8533
8534 /* Float compare -- Invalid Operation exception on all NaNs. */
8535 static void
8536 fcmpes (sim_cpu *cpu)
8537 {
8538 unsigned sm = INSTR (20, 16);
8539 unsigned sn = INSTR ( 9, 5);
8540
8541 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8542 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8543
8544 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8545 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8546 }
8547
8548 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8549 static void
8550 fcmpzes (sim_cpu *cpu)
8551 {
8552 unsigned sn = INSTR ( 9, 5);
8553 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8554
8555 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8556 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8557 }
8558
8559 static void
8560 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8561 {
8562 uint32_t flags;
8563
8564 if (isnan (dval1) || isnan (dval2))
8565 flags = C|V;
8566 else
8567 {
8568 double result = dval1 - dval2;
8569
8570 if (result == 0.0)
8571 flags = Z|C;
8572 else if (result < 0)
8573 flags = N;
8574 else /* (result > 0). */
8575 flags = C;
8576 }
8577
8578 aarch64_set_CPSR (cpu, flags);
8579 }
8580
8581 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8582 static void
8583 fcmpd (sim_cpu *cpu)
8584 {
8585 unsigned sm = INSTR (20, 16);
8586 unsigned sn = INSTR ( 9, 5);
8587
8588 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8589 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8590
8591 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8592 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8593 }
8594
8595 /* Double compare to zero -- Invalid Operation exception
8596 only on signaling NaNs. */
8597 static void
8598 fcmpzd (sim_cpu *cpu)
8599 {
8600 unsigned sn = INSTR ( 9, 5);
8601 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8602
8603 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8604 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8605 }
8606
8607 /* Double compare -- Invalid Operation exception on all NaNs. */
8608 static void
8609 fcmped (sim_cpu *cpu)
8610 {
8611 unsigned sm = INSTR (20, 16);
8612 unsigned sn = INSTR ( 9, 5);
8613
8614 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8615 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8616
8617 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8618 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8619 }
8620
8621 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8622 static void
8623 fcmpzed (sim_cpu *cpu)
8624 {
8625 unsigned sn = INSTR ( 9, 5);
8626 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8627
8628 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8629 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8630 }
8631
8632 static void
8633 dexSimpleFPCompare (sim_cpu *cpu)
8634 {
8635 /* assert instr[28,25] == 1111
8636 instr[30:24:21:13,10] = 0011000
8637 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8638 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8639 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8640 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8641 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8642 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8643 ow ==> UNALLOC */
8644 uint32_t dispatch;
8645 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8646 uint32_t type = INSTR (23, 22);
8647 uint32_t op = INSTR (15, 14);
8648 uint32_t op2_2_0 = INSTR (2, 0);
8649
8650 if (op2_2_0 != 0)
8651 HALT_UNALLOC;
8652
8653 if (M_S != 0)
8654 HALT_UNALLOC;
8655
8656 if (type > 1)
8657 HALT_UNALLOC;
8658
8659 if (op != 0)
8660 HALT_UNALLOC;
8661
8662 /* dispatch on type and top 2 bits of opcode. */
8663 dispatch = (type << 2) | INSTR (4, 3);
8664
8665 switch (dispatch)
8666 {
8667 case 0: fcmps (cpu); return;
8668 case 1: fcmpzs (cpu); return;
8669 case 2: fcmpes (cpu); return;
8670 case 3: fcmpzes (cpu); return;
8671 case 4: fcmpd (cpu); return;
8672 case 5: fcmpzd (cpu); return;
8673 case 6: fcmped (cpu); return;
8674 case 7: fcmpzed (cpu); return;
8675 }
8676 }
8677
8678 static void
8679 do_scalar_FADDP (sim_cpu *cpu)
8680 {
8681 /* instr [31,23] = 0111 1110 0
8682 instr [22] = single(0)/double(1)
8683 instr [21,10] = 11 0000 1101 10
8684 instr [9,5] = Fn
8685 instr [4,0] = Fd. */
8686
8687 unsigned Fn = INSTR (9, 5);
8688 unsigned Fd = INSTR (4, 0);
8689
8690 NYI_assert (31, 23, 0x0FC);
8691 NYI_assert (21, 10, 0xC36);
8692
8693 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8694 if (INSTR (22, 22))
8695 {
8696 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8697 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8698
8699 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8700 }
8701 else
8702 {
8703 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8704 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8705
8706 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8707 }
8708 }
8709
8710 /* Floating point absolute difference. */
8711
8712 static void
8713 do_scalar_FABD (sim_cpu *cpu)
8714 {
8715 /* instr [31,23] = 0111 1110 1
8716 instr [22] = float(0)/double(1)
8717 instr [21] = 1
8718 instr [20,16] = Rm
8719 instr [15,10] = 1101 01
8720 instr [9, 5] = Rn
8721 instr [4, 0] = Rd. */
8722
8723 unsigned rm = INSTR (20, 16);
8724 unsigned rn = INSTR (9, 5);
8725 unsigned rd = INSTR (4, 0);
8726
8727 NYI_assert (31, 23, 0x0FD);
8728 NYI_assert (21, 21, 1);
8729 NYI_assert (15, 10, 0x35);
8730
8731 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8732 if (INSTR (22, 22))
8733 aarch64_set_FP_double (cpu, rd,
8734 fabs (aarch64_get_FP_double (cpu, rn)
8735 - aarch64_get_FP_double (cpu, rm)));
8736 else
8737 aarch64_set_FP_float (cpu, rd,
8738 fabsf (aarch64_get_FP_float (cpu, rn)
8739 - aarch64_get_FP_float (cpu, rm)));
8740 }
8741
8742 static void
8743 do_scalar_CMGT (sim_cpu *cpu)
8744 {
8745 /* instr [31,21] = 0101 1110 111
8746 instr [20,16] = Rm
8747 instr [15,10] = 00 1101
8748 instr [9, 5] = Rn
8749 instr [4, 0] = Rd. */
8750
8751 unsigned rm = INSTR (20, 16);
8752 unsigned rn = INSTR (9, 5);
8753 unsigned rd = INSTR (4, 0);
8754
8755 NYI_assert (31, 21, 0x2F7);
8756 NYI_assert (15, 10, 0x0D);
8757
8758 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8759 aarch64_set_vec_u64 (cpu, rd, 0,
8760 aarch64_get_vec_u64 (cpu, rn, 0) >
8761 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8762 }
8763
8764 static void
8765 do_scalar_USHR (sim_cpu *cpu)
8766 {
8767 /* instr [31,23] = 0111 1111 0
8768 instr [22,16] = shift amount
8769 instr [15,10] = 0000 01
8770 instr [9, 5] = Rn
8771 instr [4, 0] = Rd. */
8772
8773 unsigned amount = 128 - INSTR (22, 16);
8774 unsigned rn = INSTR (9, 5);
8775 unsigned rd = INSTR (4, 0);
8776
8777 NYI_assert (31, 23, 0x0FE);
8778 NYI_assert (15, 10, 0x01);
8779
8780 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8781 aarch64_set_vec_u64 (cpu, rd, 0,
8782 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8783 }
8784
8785 static void
8786 do_scalar_SSHL (sim_cpu *cpu)
8787 {
8788 /* instr [31,21] = 0101 1110 111
8789 instr [20,16] = Rm
8790 instr [15,10] = 0100 01
8791 instr [9, 5] = Rn
8792 instr [4, 0] = Rd. */
8793
8794 unsigned rm = INSTR (20, 16);
8795 unsigned rn = INSTR (9, 5);
8796 unsigned rd = INSTR (4, 0);
8797 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8798
8799 NYI_assert (31, 21, 0x2F7);
8800 NYI_assert (15, 10, 0x11);
8801
8802 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8803 if (shift >= 0)
8804 aarch64_set_vec_s64 (cpu, rd, 0,
8805 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8806 else
8807 aarch64_set_vec_s64 (cpu, rd, 0,
8808 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8809 }
8810
8811 static void
8812 do_scalar_shift (sim_cpu *cpu)
8813 {
8814 /* instr [31,23] = 0101 1111 0
8815 instr [22,16] = shift amount
8816 instr [15,10] = 0101 01 [SHL]
8817 instr [15,10] = 0000 01 [SSHR]
8818 instr [9, 5] = Rn
8819 instr [4, 0] = Rd. */
8820
8821 unsigned rn = INSTR (9, 5);
8822 unsigned rd = INSTR (4, 0);
8823 unsigned amount;
8824
8825 NYI_assert (31, 23, 0x0BE);
8826
8827 if (INSTR (22, 22) == 0)
8828 HALT_UNALLOC;
8829
8830 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8831 switch (INSTR (15, 10))
8832 {
8833 case 0x01: /* SSHR */
8834 amount = 128 - INSTR (22, 16);
8835 aarch64_set_vec_s64 (cpu, rd, 0,
8836 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
8837 return;
8838 case 0x15: /* SHL */
8839 amount = INSTR (22, 16) - 64;
8840 aarch64_set_vec_u64 (cpu, rd, 0,
8841 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
8842 return;
8843 default:
8844 HALT_NYI;
8845 }
8846 }
8847
8848 /* FCMEQ FCMGT FCMGE. */
8849 static void
8850 do_scalar_FCM (sim_cpu *cpu)
8851 {
8852 /* instr [31,30] = 01
8853 instr [29] = U
8854 instr [28,24] = 1 1110
8855 instr [23] = E
8856 instr [22] = size
8857 instr [21] = 1
8858 instr [20,16] = Rm
8859 instr [15,12] = 1110
8860 instr [11] = AC
8861 instr [10] = 1
8862 instr [9, 5] = Rn
8863 instr [4, 0] = Rd. */
8864
8865 unsigned rm = INSTR (20, 16);
8866 unsigned rn = INSTR (9, 5);
8867 unsigned rd = INSTR (4, 0);
8868 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
8869 unsigned result;
8870 float val1;
8871 float val2;
8872
8873 NYI_assert (31, 30, 1);
8874 NYI_assert (28, 24, 0x1E);
8875 NYI_assert (21, 21, 1);
8876 NYI_assert (15, 12, 0xE);
8877 NYI_assert (10, 10, 1);
8878
8879 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8880 if (INSTR (22, 22))
8881 {
8882 double val1 = aarch64_get_FP_double (cpu, rn);
8883 double val2 = aarch64_get_FP_double (cpu, rm);
8884
8885 switch (EUac)
8886 {
8887 case 0: /* 000 */
8888 result = val1 == val2;
8889 break;
8890
8891 case 3: /* 011 */
8892 val1 = fabs (val1);
8893 val2 = fabs (val2);
8894 /* Fall through. */
8895 case 2: /* 010 */
8896 result = val1 >= val2;
8897 break;
8898
8899 case 7: /* 111 */
8900 val1 = fabs (val1);
8901 val2 = fabs (val2);
8902 /* Fall through. */
8903 case 6: /* 110 */
8904 result = val1 > val2;
8905 break;
8906
8907 default:
8908 HALT_UNALLOC;
8909 }
8910
8911 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8912 return;
8913 }
8914
8915 val1 = aarch64_get_FP_float (cpu, rn);
8916 val2 = aarch64_get_FP_float (cpu, rm);
8917
8918 switch (EUac)
8919 {
8920 case 0: /* 000 */
8921 result = val1 == val2;
8922 break;
8923
8924 case 3: /* 011 */
8925 val1 = fabsf (val1);
8926 val2 = fabsf (val2);
8927 /* Fall through. */
8928 case 2: /* 010 */
8929 result = val1 >= val2;
8930 break;
8931
8932 case 7: /* 111 */
8933 val1 = fabsf (val1);
8934 val2 = fabsf (val2);
8935 /* Fall through. */
8936 case 6: /* 110 */
8937 result = val1 > val2;
8938 break;
8939
8940 default:
8941 HALT_UNALLOC;
8942 }
8943
8944 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
8945 }
8946
8947 /* An alias of DUP. */
8948 static void
8949 do_scalar_MOV (sim_cpu *cpu)
8950 {
8951 /* instr [31,21] = 0101 1110 000
8952 instr [20,16] = imm5
8953 instr [15,10] = 0000 01
8954 instr [9, 5] = Rn
8955 instr [4, 0] = Rd. */
8956
8957 unsigned rn = INSTR (9, 5);
8958 unsigned rd = INSTR (4, 0);
8959 unsigned index;
8960
8961 NYI_assert (31, 21, 0x2F0);
8962 NYI_assert (15, 10, 0x01);
8963
8964 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8965 if (INSTR (16, 16))
8966 {
8967 /* 8-bit. */
8968 index = INSTR (20, 17);
8969 aarch64_set_vec_u8
8970 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
8971 }
8972 else if (INSTR (17, 17))
8973 {
8974 /* 16-bit. */
8975 index = INSTR (20, 18);
8976 aarch64_set_vec_u16
8977 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
8978 }
8979 else if (INSTR (18, 18))
8980 {
8981 /* 32-bit. */
8982 index = INSTR (20, 19);
8983 aarch64_set_vec_u32
8984 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
8985 }
8986 else if (INSTR (19, 19))
8987 {
8988 /* 64-bit. */
8989 index = INSTR (20, 20);
8990 aarch64_set_vec_u64
8991 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
8992 }
8993 else
8994 HALT_UNALLOC;
8995 }
8996
8997 static void
8998 do_scalar_NEG (sim_cpu *cpu)
8999 {
9000 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9001 instr [9, 5] = Rn
9002 instr [4, 0] = Rd. */
9003
9004 unsigned rn = INSTR (9, 5);
9005 unsigned rd = INSTR (4, 0);
9006
9007 NYI_assert (31, 10, 0x1FB82E);
9008
9009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9010 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9011 }
9012
9013 static void
9014 do_scalar_USHL (sim_cpu *cpu)
9015 {
9016 /* instr [31,21] = 0111 1110 111
9017 instr [20,16] = Rm
9018 instr [15,10] = 0100 01
9019 instr [9, 5] = Rn
9020 instr [4, 0] = Rd. */
9021
9022 unsigned rm = INSTR (20, 16);
9023 unsigned rn = INSTR (9, 5);
9024 unsigned rd = INSTR (4, 0);
9025 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9026
9027 NYI_assert (31, 21, 0x3F7);
9028 NYI_assert (15, 10, 0x11);
9029
9030 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9031 if (shift >= 0)
9032 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9033 else
9034 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9035 }
9036
9037 static void
9038 do_double_add (sim_cpu *cpu)
9039 {
9040 /* instr [31,21] = 0101 1110 111
9041 instr [20,16] = Fn
9042 instr [15,10] = 1000 01
9043 instr [9,5] = Fm
9044 instr [4,0] = Fd. */
9045 unsigned Fd;
9046 unsigned Fm;
9047 unsigned Fn;
9048 double val1;
9049 double val2;
9050
9051 NYI_assert (31, 21, 0x2F7);
9052 NYI_assert (15, 10, 0x21);
9053
9054 Fd = INSTR (4, 0);
9055 Fm = INSTR (9, 5);
9056 Fn = INSTR (20, 16);
9057
9058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9059 val1 = aarch64_get_FP_double (cpu, Fm);
9060 val2 = aarch64_get_FP_double (cpu, Fn);
9061
9062 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9063 }
9064
9065 static void
9066 do_scalar_UCVTF (sim_cpu *cpu)
9067 {
9068 /* instr [31,23] = 0111 1110 0
9069 instr [22] = single(0)/double(1)
9070 instr [21,10] = 10 0001 1101 10
9071 instr [9,5] = rn
9072 instr [4,0] = rd. */
9073
9074 unsigned rn = INSTR (9, 5);
9075 unsigned rd = INSTR (4, 0);
9076
9077 NYI_assert (31, 23, 0x0FC);
9078 NYI_assert (21, 10, 0x876);
9079
9080 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9081 if (INSTR (22, 22))
9082 {
9083 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9084
9085 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9086 }
9087 else
9088 {
9089 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9090
9091 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9092 }
9093 }
9094
9095 static void
9096 do_scalar_vec (sim_cpu *cpu)
9097 {
9098 /* instr [30] = 1. */
9099 /* instr [28,25] = 1111. */
9100 switch (INSTR (31, 23))
9101 {
9102 case 0xBC:
9103 switch (INSTR (15, 10))
9104 {
9105 case 0x01: do_scalar_MOV (cpu); return;
9106 case 0x39: do_scalar_FCM (cpu); return;
9107 case 0x3B: do_scalar_FCM (cpu); return;
9108 }
9109 break;
9110
9111 case 0xBE: do_scalar_shift (cpu); return;
9112
9113 case 0xFC:
9114 switch (INSTR (15, 10))
9115 {
9116 case 0x36:
9117 switch (INSTR (21, 16))
9118 {
9119 case 0x30: do_scalar_FADDP (cpu); return;
9120 case 0x21: do_scalar_UCVTF (cpu); return;
9121 }
9122 HALT_NYI;
9123 case 0x39: do_scalar_FCM (cpu); return;
9124 case 0x3B: do_scalar_FCM (cpu); return;
9125 }
9126 break;
9127
9128 case 0xFD:
9129 switch (INSTR (15, 10))
9130 {
9131 case 0x0D: do_scalar_CMGT (cpu); return;
9132 case 0x11: do_scalar_USHL (cpu); return;
9133 case 0x2E: do_scalar_NEG (cpu); return;
9134 case 0x35: do_scalar_FABD (cpu); return;
9135 case 0x39: do_scalar_FCM (cpu); return;
9136 case 0x3B: do_scalar_FCM (cpu); return;
9137 default:
9138 HALT_NYI;
9139 }
9140
9141 case 0xFE: do_scalar_USHR (cpu); return;
9142
9143 case 0xBD:
9144 switch (INSTR (15, 10))
9145 {
9146 case 0x21: do_double_add (cpu); return;
9147 case 0x11: do_scalar_SSHL (cpu); return;
9148 default:
9149 HALT_NYI;
9150 }
9151
9152 default:
9153 HALT_NYI;
9154 }
9155 }
9156
9157 static void
9158 dexAdvSIMD1 (sim_cpu *cpu)
9159 {
9160 /* instr [28,25] = 1 111. */
9161
9162 /* We are currently only interested in the basic
9163 scalar fp routines which all have bit 30 = 0. */
9164 if (INSTR (30, 30))
9165 do_scalar_vec (cpu);
9166
9167 /* instr[24] is set for FP data processing 3-source and clear for
9168 all other basic scalar fp instruction groups. */
9169 else if (INSTR (24, 24))
9170 dexSimpleFPDataProc3Source (cpu);
9171
9172 /* instr[21] is clear for floating <-> fixed conversions and set for
9173 all other basic scalar fp instruction groups. */
9174 else if (!INSTR (21, 21))
9175 dexSimpleFPFixedConvert (cpu);
9176
9177 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9178 11 ==> cond select, 00 ==> other. */
9179 else
9180 switch (INSTR (11, 10))
9181 {
9182 case 1: dexSimpleFPCondCompare (cpu); return;
9183 case 2: dexSimpleFPDataProc2Source (cpu); return;
9184 case 3: dexSimpleFPCondSelect (cpu); return;
9185
9186 default:
9187 /* Now an ordered cascade of tests.
9188 FP immediate has instr [12] == 1.
9189 FP compare has instr [13] == 1.
9190 FP Data Proc 1 Source has instr [14] == 1.
9191 FP floating <--> integer conversions has instr [15] == 0. */
9192 if (INSTR (12, 12))
9193 dexSimpleFPImmediate (cpu);
9194
9195 else if (INSTR (13, 13))
9196 dexSimpleFPCompare (cpu);
9197
9198 else if (INSTR (14, 14))
9199 dexSimpleFPDataProc1Source (cpu);
9200
9201 else if (!INSTR (15, 15))
9202 dexSimpleFPIntegerConvert (cpu);
9203
9204 else
9205 /* If we get here then instr[15] == 1 which means UNALLOC. */
9206 HALT_UNALLOC;
9207 }
9208 }
9209
9210 /* PC relative addressing. */
9211
9212 static void
9213 pcadr (sim_cpu *cpu)
9214 {
9215 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9216 instr[30,29] = immlo
9217 instr[23,5] = immhi. */
9218 uint64_t address;
9219 unsigned rd = INSTR (4, 0);
9220 uint32_t isPage = INSTR (31, 31);
9221 union { int64_t u64; uint64_t s64; } imm;
9222 uint64_t offset;
9223
9224 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9225 offset = imm.u64;
9226 offset = (offset << 2) | INSTR (30, 29);
9227
9228 address = aarch64_get_PC (cpu);
9229
9230 if (isPage)
9231 {
9232 offset <<= 12;
9233 address &= ~0xfff;
9234 }
9235
9236 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9237 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9238 }
9239
9240 /* Specific decode and execute for group Data Processing Immediate. */
9241
9242 static void
9243 dexPCRelAddressing (sim_cpu *cpu)
9244 {
9245 /* assert instr[28,24] = 10000. */
9246 pcadr (cpu);
9247 }
9248
9249 /* Immediate logical.
9250 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9251 16, 32 or 64 bit sequence pulled out at decode and possibly
9252 inverting it..
9253
9254 N.B. the output register (dest) can normally be Xn or SP
9255 the exception occurs for flag setting instructions which may
9256 only use Xn for the output (dest). The input register can
9257 never be SP. */
9258
9259 /* 32 bit and immediate. */
9260 static void
9261 and32 (sim_cpu *cpu, uint32_t bimm)
9262 {
9263 unsigned rn = INSTR (9, 5);
9264 unsigned rd = INSTR (4, 0);
9265
9266 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9267 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9268 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9269 }
9270
9271 /* 64 bit and immediate. */
9272 static void
9273 and64 (sim_cpu *cpu, uint64_t bimm)
9274 {
9275 unsigned rn = INSTR (9, 5);
9276 unsigned rd = INSTR (4, 0);
9277
9278 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9279 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9280 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9281 }
9282
9283 /* 32 bit and immediate set flags. */
9284 static void
9285 ands32 (sim_cpu *cpu, uint32_t bimm)
9286 {
9287 unsigned rn = INSTR (9, 5);
9288 unsigned rd = INSTR (4, 0);
9289
9290 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9291 uint32_t value2 = bimm;
9292
9293 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9294 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9295 set_flags_for_binop32 (cpu, value1 & value2);
9296 }
9297
9298 /* 64 bit and immediate set flags. */
9299 static void
9300 ands64 (sim_cpu *cpu, uint64_t bimm)
9301 {
9302 unsigned rn = INSTR (9, 5);
9303 unsigned rd = INSTR (4, 0);
9304
9305 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9306 uint64_t value2 = bimm;
9307
9308 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9309 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9310 set_flags_for_binop64 (cpu, value1 & value2);
9311 }
9312
9313 /* 32 bit exclusive or immediate. */
9314 static void
9315 eor32 (sim_cpu *cpu, uint32_t bimm)
9316 {
9317 unsigned rn = INSTR (9, 5);
9318 unsigned rd = INSTR (4, 0);
9319
9320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9321 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9322 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9323 }
9324
9325 /* 64 bit exclusive or immediate. */
9326 static void
9327 eor64 (sim_cpu *cpu, uint64_t bimm)
9328 {
9329 unsigned rn = INSTR (9, 5);
9330 unsigned rd = INSTR (4, 0);
9331
9332 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9333 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9334 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9335 }
9336
9337 /* 32 bit or immediate. */
9338 static void
9339 orr32 (sim_cpu *cpu, uint32_t bimm)
9340 {
9341 unsigned rn = INSTR (9, 5);
9342 unsigned rd = INSTR (4, 0);
9343
9344 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9345 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9346 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9347 }
9348
9349 /* 64 bit or immediate. */
9350 static void
9351 orr64 (sim_cpu *cpu, uint64_t bimm)
9352 {
9353 unsigned rn = INSTR (9, 5);
9354 unsigned rd = INSTR (4, 0);
9355
9356 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9357 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9358 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9359 }
9360
9361 /* Logical shifted register.
9362 These allow an optional LSL, ASR, LSR or ROR to the second source
9363 register with a count up to the register bit count.
9364 N.B register args may not be SP. */
9365
9366 /* 32 bit AND shifted register. */
9367 static void
9368 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9369 {
9370 unsigned rm = INSTR (20, 16);
9371 unsigned rn = INSTR (9, 5);
9372 unsigned rd = INSTR (4, 0);
9373
9374 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9375 aarch64_set_reg_u64
9376 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9377 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9378 }
9379
9380 /* 64 bit AND shifted register. */
9381 static void
9382 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9383 {
9384 unsigned rm = INSTR (20, 16);
9385 unsigned rn = INSTR (9, 5);
9386 unsigned rd = INSTR (4, 0);
9387
9388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9389 aarch64_set_reg_u64
9390 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9391 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9392 }
9393
9394 /* 32 bit AND shifted register setting flags. */
9395 static void
9396 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9397 {
9398 unsigned rm = INSTR (20, 16);
9399 unsigned rn = INSTR (9, 5);
9400 unsigned rd = INSTR (4, 0);
9401
9402 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9403 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9404 shift, count);
9405
9406 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9407 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9408 set_flags_for_binop32 (cpu, value1 & value2);
9409 }
9410
9411 /* 64 bit AND shifted register setting flags. */
9412 static void
9413 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9414 {
9415 unsigned rm = INSTR (20, 16);
9416 unsigned rn = INSTR (9, 5);
9417 unsigned rd = INSTR (4, 0);
9418
9419 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9420 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9421 shift, count);
9422
9423 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9424 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9425 set_flags_for_binop64 (cpu, value1 & value2);
9426 }
9427
9428 /* 32 bit BIC shifted register. */
9429 static void
9430 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9431 {
9432 unsigned rm = INSTR (20, 16);
9433 unsigned rn = INSTR (9, 5);
9434 unsigned rd = INSTR (4, 0);
9435
9436 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9437 aarch64_set_reg_u64
9438 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9439 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9440 }
9441
9442 /* 64 bit BIC shifted register. */
9443 static void
9444 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9445 {
9446 unsigned rm = INSTR (20, 16);
9447 unsigned rn = INSTR (9, 5);
9448 unsigned rd = INSTR (4, 0);
9449
9450 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9451 aarch64_set_reg_u64
9452 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9453 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9454 }
9455
9456 /* 32 bit BIC shifted register setting flags. */
9457 static void
9458 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9459 {
9460 unsigned rm = INSTR (20, 16);
9461 unsigned rn = INSTR (9, 5);
9462 unsigned rd = INSTR (4, 0);
9463
9464 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9465 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9466 shift, count);
9467
9468 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9469 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9470 set_flags_for_binop32 (cpu, value1 & value2);
9471 }
9472
9473 /* 64 bit BIC shifted register setting flags. */
9474 static void
9475 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9476 {
9477 unsigned rm = INSTR (20, 16);
9478 unsigned rn = INSTR (9, 5);
9479 unsigned rd = INSTR (4, 0);
9480
9481 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9482 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9483 shift, count);
9484
9485 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9486 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9487 set_flags_for_binop64 (cpu, value1 & value2);
9488 }
9489
9490 /* 32 bit EON shifted register. */
9491 static void
9492 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9493 {
9494 unsigned rm = INSTR (20, 16);
9495 unsigned rn = INSTR (9, 5);
9496 unsigned rd = INSTR (4, 0);
9497
9498 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9499 aarch64_set_reg_u64
9500 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9501 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9502 }
9503
9504 /* 64 bit EON shifted register. */
9505 static void
9506 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9507 {
9508 unsigned rm = INSTR (20, 16);
9509 unsigned rn = INSTR (9, 5);
9510 unsigned rd = INSTR (4, 0);
9511
9512 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9513 aarch64_set_reg_u64
9514 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9515 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9516 }
9517
9518 /* 32 bit EOR shifted register. */
9519 static void
9520 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9521 {
9522 unsigned rm = INSTR (20, 16);
9523 unsigned rn = INSTR (9, 5);
9524 unsigned rd = INSTR (4, 0);
9525
9526 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9527 aarch64_set_reg_u64
9528 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9529 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9530 }
9531
9532 /* 64 bit EOR shifted register. */
9533 static void
9534 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9535 {
9536 unsigned rm = INSTR (20, 16);
9537 unsigned rn = INSTR (9, 5);
9538 unsigned rd = INSTR (4, 0);
9539
9540 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9541 aarch64_set_reg_u64
9542 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9543 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9544 }
9545
9546 /* 32 bit ORR shifted register. */
9547 static void
9548 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9549 {
9550 unsigned rm = INSTR (20, 16);
9551 unsigned rn = INSTR (9, 5);
9552 unsigned rd = INSTR (4, 0);
9553
9554 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9555 aarch64_set_reg_u64
9556 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9557 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9558 }
9559
9560 /* 64 bit ORR shifted register. */
9561 static void
9562 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9563 {
9564 unsigned rm = INSTR (20, 16);
9565 unsigned rn = INSTR (9, 5);
9566 unsigned rd = INSTR (4, 0);
9567
9568 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9569 aarch64_set_reg_u64
9570 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9571 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9572 }
9573
9574 /* 32 bit ORN shifted register. */
9575 static void
9576 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9577 {
9578 unsigned rm = INSTR (20, 16);
9579 unsigned rn = INSTR (9, 5);
9580 unsigned rd = INSTR (4, 0);
9581
9582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9583 aarch64_set_reg_u64
9584 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9585 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9586 }
9587
9588 /* 64 bit ORN shifted register. */
9589 static void
9590 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9591 {
9592 unsigned rm = INSTR (20, 16);
9593 unsigned rn = INSTR (9, 5);
9594 unsigned rd = INSTR (4, 0);
9595
9596 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9597 aarch64_set_reg_u64
9598 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9599 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9600 }
9601
9602 static void
9603 dexLogicalImmediate (sim_cpu *cpu)
9604 {
9605 /* assert instr[28,23] = 1001000
9606 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9607 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9608 instr[22] = N : used to construct immediate mask
9609 instr[21,16] = immr
9610 instr[15,10] = imms
9611 instr[9,5] = Rn
9612 instr[4,0] = Rd */
9613
9614 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9615 uint32_t size = INSTR (31, 31);
9616 uint32_t N = INSTR (22, 22);
9617 /* uint32_t immr = INSTR (21, 16);. */
9618 /* uint32_t imms = INSTR (15, 10);. */
9619 uint32_t index = INSTR (22, 10);
9620 uint64_t bimm64 = LITable [index];
9621 uint32_t dispatch = INSTR (30, 29);
9622
9623 if (~size & N)
9624 HALT_UNALLOC;
9625
9626 if (!bimm64)
9627 HALT_UNALLOC;
9628
9629 if (size == 0)
9630 {
9631 uint32_t bimm = (uint32_t) bimm64;
9632
9633 switch (dispatch)
9634 {
9635 case 0: and32 (cpu, bimm); return;
9636 case 1: orr32 (cpu, bimm); return;
9637 case 2: eor32 (cpu, bimm); return;
9638 case 3: ands32 (cpu, bimm); return;
9639 }
9640 }
9641 else
9642 {
9643 switch (dispatch)
9644 {
9645 case 0: and64 (cpu, bimm64); return;
9646 case 1: orr64 (cpu, bimm64); return;
9647 case 2: eor64 (cpu, bimm64); return;
9648 case 3: ands64 (cpu, bimm64); return;
9649 }
9650 }
9651 HALT_UNALLOC;
9652 }
9653
9654 /* Immediate move.
9655 The uimm argument is a 16 bit value to be inserted into the
9656 target register the pos argument locates the 16 bit word in the
9657 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9658 3} for 64 bit.
9659 N.B register arg may not be SP so it should be.
9660 accessed using the setGZRegisterXXX accessors. */
9661
9662 /* 32 bit move 16 bit immediate zero remaining shorts. */
9663 static void
9664 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9665 {
9666 unsigned rd = INSTR (4, 0);
9667
9668 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9669 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9670 }
9671
9672 /* 64 bit move 16 bit immediate zero remaining shorts. */
9673 static void
9674 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9675 {
9676 unsigned rd = INSTR (4, 0);
9677
9678 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9679 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9680 }
9681
9682 /* 32 bit move 16 bit immediate negated. */
9683 static void
9684 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9685 {
9686 unsigned rd = INSTR (4, 0);
9687
9688 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9689 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9690 }
9691
9692 /* 64 bit move 16 bit immediate negated. */
9693 static void
9694 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9695 {
9696 unsigned rd = INSTR (4, 0);
9697
9698 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9699 aarch64_set_reg_u64
9700 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
9701 ^ 0xffffffffffffffffULL));
9702 }
9703
9704 /* 32 bit move 16 bit immediate keep remaining shorts. */
9705 static void
9706 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9707 {
9708 unsigned rd = INSTR (4, 0);
9709 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9710 uint32_t value = val << (pos * 16);
9711 uint32_t mask = ~(0xffffU << (pos * 16));
9712
9713 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9714 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9715 }
9716
9717 /* 64 bit move 16 it immediate keep remaining shorts. */
9718 static void
9719 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9720 {
9721 unsigned rd = INSTR (4, 0);
9722 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
9723 uint64_t value = (uint64_t) val << (pos * 16);
9724 uint64_t mask = ~(0xffffULL << (pos * 16));
9725
9726 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9727 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
9728 }
9729
9730 static void
9731 dexMoveWideImmediate (sim_cpu *cpu)
9732 {
9733 /* assert instr[28:23] = 100101
9734 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9735 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
9736 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
9737 instr[20,5] = uimm16
9738 instr[4,0] = Rd */
9739
9740 /* N.B. the (multiple of 16) shift is applied by the called routine,
9741 we just pass the multiplier. */
9742
9743 uint32_t imm;
9744 uint32_t size = INSTR (31, 31);
9745 uint32_t op = INSTR (30, 29);
9746 uint32_t shift = INSTR (22, 21);
9747
9748 /* 32 bit can only shift 0 or 1 lot of 16.
9749 anything else is an unallocated instruction. */
9750 if (size == 0 && (shift > 1))
9751 HALT_UNALLOC;
9752
9753 if (op == 1)
9754 HALT_UNALLOC;
9755
9756 imm = INSTR (20, 5);
9757
9758 if (size == 0)
9759 {
9760 if (op == 0)
9761 movn32 (cpu, imm, shift);
9762 else if (op == 2)
9763 movz32 (cpu, imm, shift);
9764 else
9765 movk32 (cpu, imm, shift);
9766 }
9767 else
9768 {
9769 if (op == 0)
9770 movn64 (cpu, imm, shift);
9771 else if (op == 2)
9772 movz64 (cpu, imm, shift);
9773 else
9774 movk64 (cpu, imm, shift);
9775 }
9776 }
9777
9778 /* Bitfield operations.
9779 These take a pair of bit positions r and s which are in {0..31}
9780 or {0..63} depending on the instruction word size.
9781 N.B register args may not be SP. */
9782
9783 /* OK, we start with ubfm which just needs to pick
9784 some bits out of source zero the rest and write
9785 the result to dest. Just need two logical shifts. */
9786
9787 /* 32 bit bitfield move, left and right of affected zeroed
9788 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9789 static void
9790 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9791 {
9792 unsigned rd;
9793 unsigned rn = INSTR (9, 5);
9794 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9795
9796 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9797 if (r <= s)
9798 {
9799 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9800 We want only bits s:xxx:r at the bottom of the word
9801 so we LSL bit s up to bit 31 i.e. by 31 - s
9802 and then we LSR to bring bit 31 down to bit s - r
9803 i.e. by 31 + r - s. */
9804 value <<= 31 - s;
9805 value >>= 31 + r - s;
9806 }
9807 else
9808 {
9809 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
9810 We want only bits s:xxx:0 starting at it 31-(r-1)
9811 so we LSL bit s up to bit 31 i.e. by 31 - s
9812 and then we LSL to bring bit 31 down to 31-(r-1)+s
9813 i.e. by r - (s + 1). */
9814 value <<= 31 - s;
9815 value >>= r - (s + 1);
9816 }
9817
9818 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9819 rd = INSTR (4, 0);
9820 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9821 }
9822
9823 /* 64 bit bitfield move, left and right of affected zeroed
9824 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9825 static void
9826 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9827 {
9828 unsigned rd;
9829 unsigned rn = INSTR (9, 5);
9830 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9831
9832 if (r <= s)
9833 {
9834 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9835 We want only bits s:xxx:r at the bottom of the word.
9836 So we LSL bit s up to bit 63 i.e. by 63 - s
9837 and then we LSR to bring bit 63 down to bit s - r
9838 i.e. by 63 + r - s. */
9839 value <<= 63 - s;
9840 value >>= 63 + r - s;
9841 }
9842 else
9843 {
9844 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
9845 We want only bits s:xxx:0 starting at it 63-(r-1).
9846 So we LSL bit s up to bit 63 i.e. by 63 - s
9847 and then we LSL to bring bit 63 down to 63-(r-1)+s
9848 i.e. by r - (s + 1). */
9849 value <<= 63 - s;
9850 value >>= r - (s + 1);
9851 }
9852
9853 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9854 rd = INSTR (4, 0);
9855 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
9856 }
9857
9858 /* The signed versions need to insert sign bits
9859 on the left of the inserted bit field. so we do
9860 much the same as the unsigned version except we
9861 use an arithmetic shift right -- this just means
9862 we need to operate on signed values. */
9863
9864 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
9865 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9866 static void
9867 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9868 {
9869 unsigned rd;
9870 unsigned rn = INSTR (9, 5);
9871 /* as per ubfm32 but use an ASR instead of an LSR. */
9872 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
9873
9874 if (r <= s)
9875 {
9876 value <<= 31 - s;
9877 value >>= 31 + r - s;
9878 }
9879 else
9880 {
9881 value <<= 31 - s;
9882 value >>= r - (s + 1);
9883 }
9884
9885 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9886 rd = INSTR (4, 0);
9887 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
9888 }
9889
9890 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
9891 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9892 static void
9893 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9894 {
9895 unsigned rd;
9896 unsigned rn = INSTR (9, 5);
9897 /* acpu per ubfm but use an ASR instead of an LSR. */
9898 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
9899
9900 if (r <= s)
9901 {
9902 value <<= 63 - s;
9903 value >>= 63 + r - s;
9904 }
9905 else
9906 {
9907 value <<= 63 - s;
9908 value >>= r - (s + 1);
9909 }
9910
9911 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9912 rd = INSTR (4, 0);
9913 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
9914 }
9915
9916 /* Finally, these versions leave non-affected bits
9917 as is. so we need to generate the bits as per
9918 ubfm and also generate a mask to pick the
9919 bits from the original and computed values. */
9920
9921 /* 32 bit bitfield move, non-affected bits left as is.
9922 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
9923 static void
9924 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
9925 {
9926 unsigned rn = INSTR (9, 5);
9927 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9928 uint32_t mask = -1;
9929 unsigned rd;
9930 uint32_t value2;
9931
9932 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
9933 if (r <= s)
9934 {
9935 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
9936 We want only bits s:xxx:r at the bottom of the word
9937 so we LSL bit s up to bit 31 i.e. by 31 - s
9938 and then we LSR to bring bit 31 down to bit s - r
9939 i.e. by 31 + r - s. */
9940 value <<= 31 - s;
9941 value >>= 31 + r - s;
9942 /* the mask must include the same bits. */
9943 mask <<= 31 - s;
9944 mask >>= 31 + r - s;
9945 }
9946 else
9947 {
9948 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
9949 We want only bits s:xxx:0 starting at it 31-(r-1)
9950 so we LSL bit s up to bit 31 i.e. by 31 - s
9951 and then we LSL to bring bit 31 down to 31-(r-1)+s
9952 i.e. by r - (s + 1). */
9953 value <<= 31 - s;
9954 value >>= r - (s + 1);
9955 /* The mask must include the same bits. */
9956 mask <<= 31 - s;
9957 mask >>= r - (s + 1);
9958 }
9959
9960 rd = INSTR (4, 0);
9961 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
9962
9963 value2 &= ~mask;
9964 value2 |= value;
9965
9966 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9967 aarch64_set_reg_u64
9968 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
9969 }
9970
9971 /* 64 bit bitfield move, non-affected bits left as is.
9972 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
9973 static void
9974 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
9975 {
9976 unsigned rd;
9977 unsigned rn = INSTR (9, 5);
9978 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9979 uint64_t mask = 0xffffffffffffffffULL;
9980
9981 if (r <= s)
9982 {
9983 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
9984 We want only bits s:xxx:r at the bottom of the word
9985 so we LSL bit s up to bit 63 i.e. by 63 - s
9986 and then we LSR to bring bit 63 down to bit s - r
9987 i.e. by 63 + r - s. */
9988 value <<= 63 - s;
9989 value >>= 63 + r - s;
9990 /* The mask must include the same bits. */
9991 mask <<= 63 - s;
9992 mask >>= 63 + r - s;
9993 }
9994 else
9995 {
9996 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
9997 We want only bits s:xxx:0 starting at it 63-(r-1)
9998 so we LSL bit s up to bit 63 i.e. by 63 - s
9999 and then we LSL to bring bit 63 down to 63-(r-1)+s
10000 i.e. by r - (s + 1). */
10001 value <<= 63 - s;
10002 value >>= r - (s + 1);
10003 /* The mask must include the same bits. */
10004 mask <<= 63 - s;
10005 mask >>= r - (s + 1);
10006 }
10007
10008 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10009 rd = INSTR (4, 0);
10010 aarch64_set_reg_u64
10011 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10012 }
10013
10014 static void
10015 dexBitfieldImmediate (sim_cpu *cpu)
10016 {
10017 /* assert instr[28:23] = 100110
10018 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10019 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10020 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10021 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10022 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10023 instr[9,5] = Rn
10024 instr[4,0] = Rd */
10025
10026 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10027 uint32_t dispatch;
10028 uint32_t imms;
10029 uint32_t size = INSTR (31, 31);
10030 uint32_t N = INSTR (22, 22);
10031 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10032 /* or else we have an UNALLOC. */
10033 uint32_t immr = INSTR (21, 16);
10034
10035 if (~size & N)
10036 HALT_UNALLOC;
10037
10038 if (!size && uimm (immr, 5, 5))
10039 HALT_UNALLOC;
10040
10041 imms = INSTR (15, 10);
10042 if (!size && uimm (imms, 5, 5))
10043 HALT_UNALLOC;
10044
10045 /* Switch on combined size and op. */
10046 dispatch = INSTR (31, 29);
10047 switch (dispatch)
10048 {
10049 case 0: sbfm32 (cpu, immr, imms); return;
10050 case 1: bfm32 (cpu, immr, imms); return;
10051 case 2: ubfm32 (cpu, immr, imms); return;
10052 case 4: sbfm (cpu, immr, imms); return;
10053 case 5: bfm (cpu, immr, imms); return;
10054 case 6: ubfm (cpu, immr, imms); return;
10055 default: HALT_UNALLOC;
10056 }
10057 }
10058
10059 static void
10060 do_EXTR_32 (sim_cpu *cpu)
10061 {
10062 /* instr[31:21] = 00010011100
10063 instr[20,16] = Rm
10064 instr[15,10] = imms : 0xxxxx for 32 bit
10065 instr[9,5] = Rn
10066 instr[4,0] = Rd */
10067 unsigned rm = INSTR (20, 16);
10068 unsigned imms = INSTR (15, 10) & 31;
10069 unsigned rn = INSTR ( 9, 5);
10070 unsigned rd = INSTR ( 4, 0);
10071 uint64_t val1;
10072 uint64_t val2;
10073
10074 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10075 val1 >>= imms;
10076 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10077 val2 <<= (32 - imms);
10078
10079 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10080 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10081 }
10082
10083 static void
10084 do_EXTR_64 (sim_cpu *cpu)
10085 {
10086 /* instr[31:21] = 10010011100
10087 instr[20,16] = Rm
10088 instr[15,10] = imms
10089 instr[9,5] = Rn
10090 instr[4,0] = Rd */
10091 unsigned rm = INSTR (20, 16);
10092 unsigned imms = INSTR (15, 10) & 63;
10093 unsigned rn = INSTR ( 9, 5);
10094 unsigned rd = INSTR ( 4, 0);
10095 uint64_t val;
10096
10097 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10098 val >>= imms;
10099 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10100
10101 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10102 }
10103
10104 static void
10105 dexExtractImmediate (sim_cpu *cpu)
10106 {
10107 /* assert instr[28:23] = 100111
10108 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10109 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10110 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10111 instr[21] = op0 : must be 0 or UNALLOC
10112 instr[20,16] = Rm
10113 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10114 instr[9,5] = Rn
10115 instr[4,0] = Rd */
10116
10117 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10118 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10119 uint32_t dispatch;
10120 uint32_t size = INSTR (31, 31);
10121 uint32_t N = INSTR (22, 22);
10122 /* 32 bit operations must have imms[5] = 0
10123 or else we have an UNALLOC. */
10124 uint32_t imms = INSTR (15, 10);
10125
10126 if (size ^ N)
10127 HALT_UNALLOC;
10128
10129 if (!size && uimm (imms, 5, 5))
10130 HALT_UNALLOC;
10131
10132 /* Switch on combined size and op. */
10133 dispatch = INSTR (31, 29);
10134
10135 if (dispatch == 0)
10136 do_EXTR_32 (cpu);
10137
10138 else if (dispatch == 4)
10139 do_EXTR_64 (cpu);
10140
10141 else if (dispatch == 1)
10142 HALT_NYI;
10143 else
10144 HALT_UNALLOC;
10145 }
10146
10147 static void
10148 dexDPImm (sim_cpu *cpu)
10149 {
10150 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10151 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10152 bits [25,23] of a DPImm are the secondary dispatch vector. */
10153 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10154
10155 switch (group2)
10156 {
10157 case DPIMM_PCADR_000:
10158 case DPIMM_PCADR_001:
10159 dexPCRelAddressing (cpu);
10160 return;
10161
10162 case DPIMM_ADDSUB_010:
10163 case DPIMM_ADDSUB_011:
10164 dexAddSubtractImmediate (cpu);
10165 return;
10166
10167 case DPIMM_LOG_100:
10168 dexLogicalImmediate (cpu);
10169 return;
10170
10171 case DPIMM_MOV_101:
10172 dexMoveWideImmediate (cpu);
10173 return;
10174
10175 case DPIMM_BITF_110:
10176 dexBitfieldImmediate (cpu);
10177 return;
10178
10179 case DPIMM_EXTR_111:
10180 dexExtractImmediate (cpu);
10181 return;
10182
10183 default:
10184 /* Should never reach here. */
10185 HALT_NYI;
10186 }
10187 }
10188
10189 static void
10190 dexLoadUnscaledImmediate (sim_cpu *cpu)
10191 {
10192 /* instr[29,24] == 111_00
10193 instr[21] == 0
10194 instr[11,10] == 00
10195 instr[31,30] = size
10196 instr[26] = V
10197 instr[23,22] = opc
10198 instr[20,12] = simm9
10199 instr[9,5] = rn may be SP. */
10200 /* unsigned rt = INSTR (4, 0); */
10201 uint32_t V = INSTR (26, 26);
10202 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10203 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10204
10205 if (!V)
10206 {
10207 /* GReg operations. */
10208 switch (dispatch)
10209 {
10210 case 0: sturb (cpu, imm); return;
10211 case 1: ldurb32 (cpu, imm); return;
10212 case 2: ldursb64 (cpu, imm); return;
10213 case 3: ldursb32 (cpu, imm); return;
10214 case 4: sturh (cpu, imm); return;
10215 case 5: ldurh32 (cpu, imm); return;
10216 case 6: ldursh64 (cpu, imm); return;
10217 case 7: ldursh32 (cpu, imm); return;
10218 case 8: stur32 (cpu, imm); return;
10219 case 9: ldur32 (cpu, imm); return;
10220 case 10: ldursw (cpu, imm); return;
10221 case 12: stur64 (cpu, imm); return;
10222 case 13: ldur64 (cpu, imm); return;
10223
10224 case 14:
10225 /* PRFUM NYI. */
10226 HALT_NYI;
10227
10228 default:
10229 case 11:
10230 case 15:
10231 HALT_UNALLOC;
10232 }
10233 }
10234
10235 /* FReg operations. */
10236 switch (dispatch)
10237 {
10238 case 2: fsturq (cpu, imm); return;
10239 case 3: fldurq (cpu, imm); return;
10240 case 8: fsturs (cpu, imm); return;
10241 case 9: fldurs (cpu, imm); return;
10242 case 12: fsturd (cpu, imm); return;
10243 case 13: fldurd (cpu, imm); return;
10244
10245 case 0: /* STUR 8 bit FP. */
10246 case 1: /* LDUR 8 bit FP. */
10247 case 4: /* STUR 16 bit FP. */
10248 case 5: /* LDUR 8 bit FP. */
10249 HALT_NYI;
10250
10251 default:
10252 case 6:
10253 case 7:
10254 case 10:
10255 case 11:
10256 case 14:
10257 case 15:
10258 HALT_UNALLOC;
10259 }
10260 }
10261
10262 /* N.B. A preliminary note regarding all the ldrs<x>32
10263 instructions
10264
10265 The signed value loaded by these instructions is cast to unsigned
10266 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10267 64 bit element of the GReg union. this performs a 32 bit sign extension
10268 (as required) but avoids 64 bit sign extension, thus ensuring that the
10269 top half of the register word is zero. this is what the spec demands
10270 when a 32 bit load occurs. */
10271
10272 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10273 static void
10274 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10275 {
10276 unsigned int rn = INSTR (9, 5);
10277 unsigned int rt = INSTR (4, 0);
10278
10279 /* The target register may not be SP but the source may be
10280 there is no scaling required for a byte load. */
10281 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10282 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10283 (int64_t) aarch64_get_mem_s8 (cpu, address));
10284 }
10285
10286 /* 32 bit load sign-extended byte scaled or unscaled zero-
10287 or sign-extended 32-bit register offset. */
10288 static void
10289 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10290 {
10291 unsigned int rm = INSTR (20, 16);
10292 unsigned int rn = INSTR (9, 5);
10293 unsigned int rt = INSTR (4, 0);
10294
10295 /* rn may reference SP, rm and rt must reference ZR. */
10296
10297 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10298 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10299 extension);
10300
10301 /* There is no scaling required for a byte load. */
10302 aarch64_set_reg_u64
10303 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10304 + displacement));
10305 }
10306
10307 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10308 pre- or post-writeback. */
10309 static void
10310 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10311 {
10312 uint64_t address;
10313 unsigned int rn = INSTR (9, 5);
10314 unsigned int rt = INSTR (4, 0);
10315
10316 if (rn == rt && wb != NoWriteBack)
10317 HALT_UNALLOC;
10318
10319 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10320
10321 if (wb == Pre)
10322 address += offset;
10323
10324 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10325 (int64_t) aarch64_get_mem_s8 (cpu, address));
10326
10327 if (wb == Post)
10328 address += offset;
10329
10330 if (wb != NoWriteBack)
10331 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10332 }
10333
10334 /* 8 bit store scaled. */
10335 static void
10336 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10337 {
10338 unsigned st = INSTR (4, 0);
10339 unsigned rn = INSTR (9, 5);
10340
10341 aarch64_set_mem_u8 (cpu,
10342 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10343 aarch64_get_vec_u8 (cpu, st, 0));
10344 }
10345
10346 /* 8 bit store scaled or unscaled zero- or
10347 sign-extended 8-bit register offset. */
10348 static void
10349 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10350 {
10351 unsigned rm = INSTR (20, 16);
10352 unsigned rn = INSTR (9, 5);
10353 unsigned st = INSTR (4, 0);
10354
10355 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10356 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10357 extension);
10358 uint64_t displacement = scaling == Scaled ? extended : 0;
10359
10360 aarch64_set_mem_u8
10361 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10362 }
10363
10364 /* 16 bit store scaled. */
10365 static void
10366 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10367 {
10368 unsigned st = INSTR (4, 0);
10369 unsigned rn = INSTR (9, 5);
10370
10371 aarch64_set_mem_u16
10372 (cpu,
10373 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10374 aarch64_get_vec_u16 (cpu, st, 0));
10375 }
10376
10377 /* 16 bit store scaled or unscaled zero-
10378 or sign-extended 16-bit register offset. */
10379 static void
10380 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10381 {
10382 unsigned rm = INSTR (20, 16);
10383 unsigned rn = INSTR (9, 5);
10384 unsigned st = INSTR (4, 0);
10385
10386 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10387 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10388 extension);
10389 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10390
10391 aarch64_set_mem_u16
10392 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10393 }
10394
10395 /* 32 bit store scaled unsigned 12 bit. */
10396 static void
10397 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10398 {
10399 unsigned st = INSTR (4, 0);
10400 unsigned rn = INSTR (9, 5);
10401
10402 aarch64_set_mem_u32
10403 (cpu,
10404 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10405 aarch64_get_vec_u32 (cpu, st, 0));
10406 }
10407
10408 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10409 static void
10410 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10411 {
10412 unsigned rn = INSTR (9, 5);
10413 unsigned st = INSTR (4, 0);
10414
10415 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10416
10417 if (wb != Post)
10418 address += offset;
10419
10420 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10421
10422 if (wb == Post)
10423 address += offset;
10424
10425 if (wb != NoWriteBack)
10426 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10427 }
10428
10429 /* 32 bit store scaled or unscaled zero-
10430 or sign-extended 32-bit register offset. */
10431 static void
10432 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10433 {
10434 unsigned rm = INSTR (20, 16);
10435 unsigned rn = INSTR (9, 5);
10436 unsigned st = INSTR (4, 0);
10437
10438 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10439 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10440 extension);
10441 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10442
10443 aarch64_set_mem_u32
10444 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10445 }
10446
10447 /* 64 bit store scaled unsigned 12 bit. */
10448 static void
10449 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10450 {
10451 unsigned st = INSTR (4, 0);
10452 unsigned rn = INSTR (9, 5);
10453
10454 aarch64_set_mem_u64
10455 (cpu,
10456 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10457 aarch64_get_vec_u64 (cpu, st, 0));
10458 }
10459
10460 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10461 static void
10462 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10463 {
10464 unsigned rn = INSTR (9, 5);
10465 unsigned st = INSTR (4, 0);
10466
10467 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10468
10469 if (wb != Post)
10470 address += offset;
10471
10472 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10473
10474 if (wb == Post)
10475 address += offset;
10476
10477 if (wb != NoWriteBack)
10478 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10479 }
10480
10481 /* 64 bit store scaled or unscaled zero-
10482 or sign-extended 32-bit register offset. */
10483 static void
10484 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10485 {
10486 unsigned rm = INSTR (20, 16);
10487 unsigned rn = INSTR (9, 5);
10488 unsigned st = INSTR (4, 0);
10489
10490 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10491 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10492 extension);
10493 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10494
10495 aarch64_set_mem_u64
10496 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10497 }
10498
10499 /* 128 bit store scaled unsigned 12 bit. */
10500 static void
10501 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10502 {
10503 FRegister a;
10504 unsigned st = INSTR (4, 0);
10505 unsigned rn = INSTR (9, 5);
10506 uint64_t addr;
10507
10508 aarch64_get_FP_long_double (cpu, st, & a);
10509
10510 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10511 aarch64_set_mem_long_double (cpu, addr, a);
10512 }
10513
10514 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10515 static void
10516 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10517 {
10518 FRegister a;
10519 unsigned rn = INSTR (9, 5);
10520 unsigned st = INSTR (4, 0);
10521 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10522
10523 if (wb != Post)
10524 address += offset;
10525
10526 aarch64_get_FP_long_double (cpu, st, & a);
10527 aarch64_set_mem_long_double (cpu, address, a);
10528
10529 if (wb == Post)
10530 address += offset;
10531
10532 if (wb != NoWriteBack)
10533 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10534 }
10535
10536 /* 128 bit store scaled or unscaled zero-
10537 or sign-extended 32-bit register offset. */
10538 static void
10539 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10540 {
10541 unsigned rm = INSTR (20, 16);
10542 unsigned rn = INSTR (9, 5);
10543 unsigned st = INSTR (4, 0);
10544
10545 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10546 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10547 extension);
10548 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10549
10550 FRegister a;
10551
10552 aarch64_get_FP_long_double (cpu, st, & a);
10553 aarch64_set_mem_long_double (cpu, address + displacement, a);
10554 }
10555
10556 static void
10557 dexLoadImmediatePrePost (sim_cpu *cpu)
10558 {
10559 /* instr[31,30] = size
10560 instr[29,27] = 111
10561 instr[26] = V
10562 instr[25,24] = 00
10563 instr[23,22] = opc
10564 instr[21] = 0
10565 instr[20,12] = simm9
10566 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10567 instr[10] = 0
10568 instr[9,5] = Rn may be SP.
10569 instr[4,0] = Rt */
10570
10571 uint32_t V = INSTR (26, 26);
10572 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10573 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10574 WriteBack wb = INSTR (11, 11);
10575
10576 if (!V)
10577 {
10578 /* GReg operations. */
10579 switch (dispatch)
10580 {
10581 case 0: strb_wb (cpu, imm, wb); return;
10582 case 1: ldrb32_wb (cpu, imm, wb); return;
10583 case 2: ldrsb_wb (cpu, imm, wb); return;
10584 case 3: ldrsb32_wb (cpu, imm, wb); return;
10585 case 4: strh_wb (cpu, imm, wb); return;
10586 case 5: ldrh32_wb (cpu, imm, wb); return;
10587 case 6: ldrsh64_wb (cpu, imm, wb); return;
10588 case 7: ldrsh32_wb (cpu, imm, wb); return;
10589 case 8: str32_wb (cpu, imm, wb); return;
10590 case 9: ldr32_wb (cpu, imm, wb); return;
10591 case 10: ldrsw_wb (cpu, imm, wb); return;
10592 case 12: str_wb (cpu, imm, wb); return;
10593 case 13: ldr_wb (cpu, imm, wb); return;
10594
10595 default:
10596 case 11:
10597 case 14:
10598 case 15:
10599 HALT_UNALLOC;
10600 }
10601 }
10602
10603 /* FReg operations. */
10604 switch (dispatch)
10605 {
10606 case 2: fstrq_wb (cpu, imm, wb); return;
10607 case 3: fldrq_wb (cpu, imm, wb); return;
10608 case 8: fstrs_wb (cpu, imm, wb); return;
10609 case 9: fldrs_wb (cpu, imm, wb); return;
10610 case 12: fstrd_wb (cpu, imm, wb); return;
10611 case 13: fldrd_wb (cpu, imm, wb); return;
10612
10613 case 0: /* STUR 8 bit FP. */
10614 case 1: /* LDUR 8 bit FP. */
10615 case 4: /* STUR 16 bit FP. */
10616 case 5: /* LDUR 8 bit FP. */
10617 HALT_NYI;
10618
10619 default:
10620 case 6:
10621 case 7:
10622 case 10:
10623 case 11:
10624 case 14:
10625 case 15:
10626 HALT_UNALLOC;
10627 }
10628 }
10629
10630 static void
10631 dexLoadRegisterOffset (sim_cpu *cpu)
10632 {
10633 /* instr[31,30] = size
10634 instr[29,27] = 111
10635 instr[26] = V
10636 instr[25,24] = 00
10637 instr[23,22] = opc
10638 instr[21] = 1
10639 instr[20,16] = rm
10640 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10641 110 ==> SXTW, 111 ==> SXTX,
10642 ow ==> RESERVED
10643 instr[12] = scaled
10644 instr[11,10] = 10
10645 instr[9,5] = rn
10646 instr[4,0] = rt. */
10647
10648 uint32_t V = INSTR (26, 26);
10649 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10650 Scaling scale = INSTR (12, 12);
10651 Extension extensionType = INSTR (15, 13);
10652
10653 /* Check for illegal extension types. */
10654 if (uimm (extensionType, 1, 1) == 0)
10655 HALT_UNALLOC;
10656
10657 if (extensionType == UXTX || extensionType == SXTX)
10658 extensionType = NoExtension;
10659
10660 if (!V)
10661 {
10662 /* GReg operations. */
10663 switch (dispatch)
10664 {
10665 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10666 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10667 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10668 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10669 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10670 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10671 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10672 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10673 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10674 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10675 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10676 case 12: str_scale_ext (cpu, scale, extensionType); return;
10677 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10678 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10679
10680 default:
10681 case 11:
10682 case 15:
10683 HALT_UNALLOC;
10684 }
10685 }
10686
10687 /* FReg operations. */
10688 switch (dispatch)
10689 {
10690 case 1: /* LDUR 8 bit FP. */
10691 HALT_NYI;
10692 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10693 case 5: /* LDUR 8 bit FP. */
10694 HALT_NYI;
10695 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10696 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10697
10698 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
10699 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
10700 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
10701 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
10702 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
10703
10704 default:
10705 case 6:
10706 case 7:
10707 case 10:
10708 case 11:
10709 case 14:
10710 case 15:
10711 HALT_UNALLOC;
10712 }
10713 }
10714
10715 static void
10716 dexLoadUnsignedImmediate (sim_cpu *cpu)
10717 {
10718 /* instr[29,24] == 111_01
10719 instr[31,30] = size
10720 instr[26] = V
10721 instr[23,22] = opc
10722 instr[21,10] = uimm12 : unsigned immediate offset
10723 instr[9,5] = rn may be SP.
10724 instr[4,0] = rt. */
10725
10726 uint32_t V = INSTR (26,26);
10727 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10728 uint32_t imm = INSTR (21, 10);
10729
10730 if (!V)
10731 {
10732 /* GReg operations. */
10733 switch (dispatch)
10734 {
10735 case 0: strb_abs (cpu, imm); return;
10736 case 1: ldrb32_abs (cpu, imm); return;
10737 case 2: ldrsb_abs (cpu, imm); return;
10738 case 3: ldrsb32_abs (cpu, imm); return;
10739 case 4: strh_abs (cpu, imm); return;
10740 case 5: ldrh32_abs (cpu, imm); return;
10741 case 6: ldrsh_abs (cpu, imm); return;
10742 case 7: ldrsh32_abs (cpu, imm); return;
10743 case 8: str32_abs (cpu, imm); return;
10744 case 9: ldr32_abs (cpu, imm); return;
10745 case 10: ldrsw_abs (cpu, imm); return;
10746 case 12: str_abs (cpu, imm); return;
10747 case 13: ldr_abs (cpu, imm); return;
10748 case 14: prfm_abs (cpu, imm); return;
10749
10750 default:
10751 case 11:
10752 case 15:
10753 HALT_UNALLOC;
10754 }
10755 }
10756
10757 /* FReg operations. */
10758 switch (dispatch)
10759 {
10760 case 0: fstrb_abs (cpu, imm); return;
10761 case 4: fstrh_abs (cpu, imm); return;
10762 case 8: fstrs_abs (cpu, imm); return;
10763 case 12: fstrd_abs (cpu, imm); return;
10764 case 2: fstrq_abs (cpu, imm); return;
10765
10766 case 1: fldrb_abs (cpu, imm); return;
10767 case 5: fldrh_abs (cpu, imm); return;
10768 case 9: fldrs_abs (cpu, imm); return;
10769 case 13: fldrd_abs (cpu, imm); return;
10770 case 3: fldrq_abs (cpu, imm); return;
10771
10772 default:
10773 case 6:
10774 case 7:
10775 case 10:
10776 case 11:
10777 case 14:
10778 case 15:
10779 HALT_UNALLOC;
10780 }
10781 }
10782
10783 static void
10784 dexLoadExclusive (sim_cpu *cpu)
10785 {
10786 /* assert instr[29:24] = 001000;
10787 instr[31,30] = size
10788 instr[23] = 0 if exclusive
10789 instr[22] = L : 1 if load, 0 if store
10790 instr[21] = 1 if pair
10791 instr[20,16] = Rs
10792 instr[15] = o0 : 1 if ordered
10793 instr[14,10] = Rt2
10794 instr[9,5] = Rn
10795 instr[4.0] = Rt. */
10796
10797 switch (INSTR (22, 21))
10798 {
10799 case 2: ldxr (cpu); return;
10800 case 0: stxr (cpu); return;
10801 default: HALT_NYI;
10802 }
10803 }
10804
10805 static void
10806 dexLoadOther (sim_cpu *cpu)
10807 {
10808 uint32_t dispatch;
10809
10810 /* instr[29,25] = 111_0
10811 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
10812 instr[21:11,10] is the secondary dispatch. */
10813 if (INSTR (24, 24))
10814 {
10815 dexLoadUnsignedImmediate (cpu);
10816 return;
10817 }
10818
10819 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
10820 switch (dispatch)
10821 {
10822 case 0: dexLoadUnscaledImmediate (cpu); return;
10823 case 1: dexLoadImmediatePrePost (cpu); return;
10824 case 3: dexLoadImmediatePrePost (cpu); return;
10825 case 6: dexLoadRegisterOffset (cpu); return;
10826
10827 default:
10828 case 2:
10829 case 4:
10830 case 5:
10831 case 7:
10832 HALT_NYI;
10833 }
10834 }
10835
10836 static void
10837 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10838 {
10839 unsigned rn = INSTR (14, 10);
10840 unsigned rd = INSTR (9, 5);
10841 unsigned rm = INSTR (4, 0);
10842 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10843
10844 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10845 HALT_UNALLOC; /* ??? */
10846
10847 offset <<= 2;
10848
10849 if (wb != Post)
10850 address += offset;
10851
10852 aarch64_set_mem_u32 (cpu, address,
10853 aarch64_get_reg_u32 (cpu, rm, NO_SP));
10854 aarch64_set_mem_u32 (cpu, address + 4,
10855 aarch64_get_reg_u32 (cpu, rn, NO_SP));
10856
10857 if (wb == Post)
10858 address += offset;
10859
10860 if (wb != NoWriteBack)
10861 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10862 }
10863
10864 static void
10865 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10866 {
10867 unsigned rn = INSTR (14, 10);
10868 unsigned rd = INSTR (9, 5);
10869 unsigned rm = INSTR (4, 0);
10870 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10871
10872 if ((rn == rd || rm == rd) && wb != NoWriteBack)
10873 HALT_UNALLOC; /* ??? */
10874
10875 offset <<= 3;
10876
10877 if (wb != Post)
10878 address += offset;
10879
10880 aarch64_set_mem_u64 (cpu, address,
10881 aarch64_get_reg_u64 (cpu, rm, NO_SP));
10882 aarch64_set_mem_u64 (cpu, address + 8,
10883 aarch64_get_reg_u64 (cpu, rn, NO_SP));
10884
10885 if (wb == Post)
10886 address += offset;
10887
10888 if (wb != NoWriteBack)
10889 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10890 }
10891
10892 static void
10893 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10894 {
10895 unsigned rn = INSTR (14, 10);
10896 unsigned rd = INSTR (9, 5);
10897 unsigned rm = INSTR (4, 0);
10898 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10899
10900 /* Treat this as unalloc to make sure we don't do it. */
10901 if (rn == rm)
10902 HALT_UNALLOC;
10903
10904 offset <<= 2;
10905
10906 if (wb != Post)
10907 address += offset;
10908
10909 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
10910 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
10911
10912 if (wb == Post)
10913 address += offset;
10914
10915 if (wb != NoWriteBack)
10916 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10917 }
10918
10919 static void
10920 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10921 {
10922 unsigned rn = INSTR (14, 10);
10923 unsigned rd = INSTR (9, 5);
10924 unsigned rm = INSTR (4, 0);
10925 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10926
10927 /* Treat this as unalloc to make sure we don't do it. */
10928 if (rn == rm)
10929 HALT_UNALLOC;
10930
10931 offset <<= 2;
10932
10933 if (wb != Post)
10934 address += offset;
10935
10936 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
10937 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
10938
10939 if (wb == Post)
10940 address += offset;
10941
10942 if (wb != NoWriteBack)
10943 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10944 }
10945
10946 static void
10947 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
10948 {
10949 unsigned rn = INSTR (14, 10);
10950 unsigned rd = INSTR (9, 5);
10951 unsigned rm = INSTR (4, 0);
10952 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
10953
10954 /* Treat this as unalloc to make sure we don't do it. */
10955 if (rn == rm)
10956 HALT_UNALLOC;
10957
10958 offset <<= 3;
10959
10960 if (wb != Post)
10961 address += offset;
10962
10963 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
10964 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
10965
10966 if (wb == Post)
10967 address += offset;
10968
10969 if (wb != NoWriteBack)
10970 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
10971 }
10972
10973 static void
10974 dex_load_store_pair_gr (sim_cpu *cpu)
10975 {
10976 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
10977 instr[29,25] = instruction encoding: 101_0
10978 instr[26] = V : 1 if fp 0 if gp
10979 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
10980 instr[22] = load/store (1=> load)
10981 instr[21,15] = signed, scaled, offset
10982 instr[14,10] = Rn
10983 instr[ 9, 5] = Rd
10984 instr[ 4, 0] = Rm. */
10985
10986 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
10987 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
10988
10989 switch (dispatch)
10990 {
10991 case 2: store_pair_u32 (cpu, offset, Post); return;
10992 case 3: load_pair_u32 (cpu, offset, Post); return;
10993 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
10994 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
10995 case 6: store_pair_u32 (cpu, offset, Pre); return;
10996 case 7: load_pair_u32 (cpu, offset, Pre); return;
10997
10998 case 11: load_pair_s32 (cpu, offset, Post); return;
10999 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11000 case 15: load_pair_s32 (cpu, offset, Pre); return;
11001
11002 case 18: store_pair_u64 (cpu, offset, Post); return;
11003 case 19: load_pair_u64 (cpu, offset, Post); return;
11004 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11005 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11006 case 22: store_pair_u64 (cpu, offset, Pre); return;
11007 case 23: load_pair_u64 (cpu, offset, Pre); return;
11008
11009 default:
11010 HALT_UNALLOC;
11011 }
11012 }
11013
11014 static void
11015 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11016 {
11017 unsigned rn = INSTR (14, 10);
11018 unsigned rd = INSTR (9, 5);
11019 unsigned rm = INSTR (4, 0);
11020 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11021
11022 offset <<= 2;
11023
11024 if (wb != Post)
11025 address += offset;
11026
11027 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11028 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11029
11030 if (wb == Post)
11031 address += offset;
11032
11033 if (wb != NoWriteBack)
11034 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11035 }
11036
11037 static void
11038 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11039 {
11040 unsigned rn = INSTR (14, 10);
11041 unsigned rd = INSTR (9, 5);
11042 unsigned rm = INSTR (4, 0);
11043 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11044
11045 offset <<= 3;
11046
11047 if (wb != Post)
11048 address += offset;
11049
11050 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11051 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11052
11053 if (wb == Post)
11054 address += offset;
11055
11056 if (wb != NoWriteBack)
11057 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11058 }
11059
11060 static void
11061 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11062 {
11063 FRegister a;
11064 unsigned rn = INSTR (14, 10);
11065 unsigned rd = INSTR (9, 5);
11066 unsigned rm = INSTR (4, 0);
11067 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11068
11069 offset <<= 4;
11070
11071 if (wb != Post)
11072 address += offset;
11073
11074 aarch64_get_FP_long_double (cpu, rm, & a);
11075 aarch64_set_mem_long_double (cpu, address, a);
11076 aarch64_get_FP_long_double (cpu, rn, & a);
11077 aarch64_set_mem_long_double (cpu, address + 16, a);
11078
11079 if (wb == Post)
11080 address += offset;
11081
11082 if (wb != NoWriteBack)
11083 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11084 }
11085
11086 static void
11087 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11088 {
11089 unsigned rn = INSTR (14, 10);
11090 unsigned rd = INSTR (9, 5);
11091 unsigned rm = INSTR (4, 0);
11092 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11093
11094 if (rm == rn)
11095 HALT_UNALLOC;
11096
11097 offset <<= 2;
11098
11099 if (wb != Post)
11100 address += offset;
11101
11102 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11103 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11104
11105 if (wb == Post)
11106 address += offset;
11107
11108 if (wb != NoWriteBack)
11109 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11110 }
11111
11112 static void
11113 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11114 {
11115 unsigned rn = INSTR (14, 10);
11116 unsigned rd = INSTR (9, 5);
11117 unsigned rm = INSTR (4, 0);
11118 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11119
11120 if (rm == rn)
11121 HALT_UNALLOC;
11122
11123 offset <<= 3;
11124
11125 if (wb != Post)
11126 address += offset;
11127
11128 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11129 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11130
11131 if (wb == Post)
11132 address += offset;
11133
11134 if (wb != NoWriteBack)
11135 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11136 }
11137
11138 static void
11139 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11140 {
11141 FRegister a;
11142 unsigned rn = INSTR (14, 10);
11143 unsigned rd = INSTR (9, 5);
11144 unsigned rm = INSTR (4, 0);
11145 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11146
11147 if (rm == rn)
11148 HALT_UNALLOC;
11149
11150 offset <<= 4;
11151
11152 if (wb != Post)
11153 address += offset;
11154
11155 aarch64_get_mem_long_double (cpu, address, & a);
11156 aarch64_set_FP_long_double (cpu, rm, a);
11157 aarch64_get_mem_long_double (cpu, address + 16, & a);
11158 aarch64_set_FP_long_double (cpu, rn, a);
11159
11160 if (wb == Post)
11161 address += offset;
11162
11163 if (wb != NoWriteBack)
11164 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11165 }
11166
11167 static void
11168 dex_load_store_pair_fp (sim_cpu *cpu)
11169 {
11170 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11171 instr[29,25] = instruction encoding
11172 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11173 instr[22] = load/store (1=> load)
11174 instr[21,15] = signed, scaled, offset
11175 instr[14,10] = Rn
11176 instr[ 9, 5] = Rd
11177 instr[ 4, 0] = Rm */
11178
11179 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11180 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11181
11182 switch (dispatch)
11183 {
11184 case 2: store_pair_float (cpu, offset, Post); return;
11185 case 3: load_pair_float (cpu, offset, Post); return;
11186 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11187 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11188 case 6: store_pair_float (cpu, offset, Pre); return;
11189 case 7: load_pair_float (cpu, offset, Pre); return;
11190
11191 case 10: store_pair_double (cpu, offset, Post); return;
11192 case 11: load_pair_double (cpu, offset, Post); return;
11193 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11194 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11195 case 14: store_pair_double (cpu, offset, Pre); return;
11196 case 15: load_pair_double (cpu, offset, Pre); return;
11197
11198 case 18: store_pair_long_double (cpu, offset, Post); return;
11199 case 19: load_pair_long_double (cpu, offset, Post); return;
11200 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11201 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11202 case 22: store_pair_long_double (cpu, offset, Pre); return;
11203 case 23: load_pair_long_double (cpu, offset, Pre); return;
11204
11205 default:
11206 HALT_UNALLOC;
11207 }
11208 }
11209
11210 static inline unsigned
11211 vec_reg (unsigned v, unsigned o)
11212 {
11213 return (v + o) & 0x3F;
11214 }
11215
11216 /* Load multiple N-element structures to N consecutive registers. */
11217 static void
11218 vec_load (sim_cpu *cpu, uint64_t address, unsigned N)
11219 {
11220 int all = INSTR (30, 30);
11221 unsigned size = INSTR (11, 10);
11222 unsigned vd = INSTR (4, 0);
11223 unsigned i;
11224
11225 switch (size)
11226 {
11227 case 0: /* 8-bit operations. */
11228 if (all)
11229 for (i = 0; i < (16 * N); i++)
11230 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15,
11231 aarch64_get_mem_u8 (cpu, address + i));
11232 else
11233 for (i = 0; i < (8 * N); i++)
11234 aarch64_set_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7,
11235 aarch64_get_mem_u8 (cpu, address + i));
11236 return;
11237
11238 case 1: /* 16-bit operations. */
11239 if (all)
11240 for (i = 0; i < (8 * N); i++)
11241 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7,
11242 aarch64_get_mem_u16 (cpu, address + i * 2));
11243 else
11244 for (i = 0; i < (4 * N); i++)
11245 aarch64_set_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3,
11246 aarch64_get_mem_u16 (cpu, address + i * 2));
11247 return;
11248
11249 case 2: /* 32-bit operations. */
11250 if (all)
11251 for (i = 0; i < (4 * N); i++)
11252 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3,
11253 aarch64_get_mem_u32 (cpu, address + i * 4));
11254 else
11255 for (i = 0; i < (2 * N); i++)
11256 aarch64_set_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1,
11257 aarch64_get_mem_u32 (cpu, address + i * 4));
11258 return;
11259
11260 case 3: /* 64-bit operations. */
11261 if (all)
11262 for (i = 0; i < (2 * N); i++)
11263 aarch64_set_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1,
11264 aarch64_get_mem_u64 (cpu, address + i * 8));
11265 else
11266 for (i = 0; i < N; i++)
11267 aarch64_set_vec_u64 (cpu, vec_reg (vd, i), 0,
11268 aarch64_get_mem_u64 (cpu, address + i * 8));
11269 return;
11270 }
11271 }
11272
11273 /* LD4: load multiple 4-element to four consecutive registers. */
11274 static void
11275 LD4 (sim_cpu *cpu, uint64_t address)
11276 {
11277 vec_load (cpu, address, 4);
11278 }
11279
11280 /* LD3: load multiple 3-element structures to three consecutive registers. */
11281 static void
11282 LD3 (sim_cpu *cpu, uint64_t address)
11283 {
11284 vec_load (cpu, address, 3);
11285 }
11286
11287 /* LD2: load multiple 2-element structures to two consecutive registers. */
11288 static void
11289 LD2 (sim_cpu *cpu, uint64_t address)
11290 {
11291 vec_load (cpu, address, 2);
11292 }
11293
11294 /* Load multiple 1-element structures into one register. */
11295 static void
11296 LD1_1 (sim_cpu *cpu, uint64_t address)
11297 {
11298 int all = INSTR (30, 30);
11299 unsigned size = INSTR (11, 10);
11300 unsigned vd = INSTR (4, 0);
11301 unsigned i;
11302
11303 switch (size)
11304 {
11305 case 0:
11306 /* LD1 {Vd.16b}, addr, #16 */
11307 /* LD1 {Vd.8b}, addr, #8 */
11308 for (i = 0; i < (all ? 16 : 8); i++)
11309 aarch64_set_vec_u8 (cpu, vd, i,
11310 aarch64_get_mem_u8 (cpu, address + i));
11311 return;
11312
11313 case 1:
11314 /* LD1 {Vd.8h}, addr, #16 */
11315 /* LD1 {Vd.4h}, addr, #8 */
11316 for (i = 0; i < (all ? 8 : 4); i++)
11317 aarch64_set_vec_u16 (cpu, vd, i,
11318 aarch64_get_mem_u16 (cpu, address + i * 2));
11319 return;
11320
11321 case 2:
11322 /* LD1 {Vd.4s}, addr, #16 */
11323 /* LD1 {Vd.2s}, addr, #8 */
11324 for (i = 0; i < (all ? 4 : 2); i++)
11325 aarch64_set_vec_u32 (cpu, vd, i,
11326 aarch64_get_mem_u32 (cpu, address + i * 4));
11327 return;
11328
11329 case 3:
11330 /* LD1 {Vd.2d}, addr, #16 */
11331 /* LD1 {Vd.1d}, addr, #8 */
11332 for (i = 0; i < (all ? 2 : 1); i++)
11333 aarch64_set_vec_u64 (cpu, vd, i,
11334 aarch64_get_mem_u64 (cpu, address + i * 8));
11335 return;
11336 }
11337 }
11338
11339 /* Load multiple 1-element structures into two registers. */
11340 static void
11341 LD1_2 (sim_cpu *cpu, uint64_t address)
11342 {
11343 /* FIXME: This algorithm is *exactly* the same as the LD2 version.
11344 So why have two different instructions ? There must be something
11345 wrong somewhere. */
11346 vec_load (cpu, address, 2);
11347 }
11348
11349 /* Load multiple 1-element structures into three registers. */
11350 static void
11351 LD1_3 (sim_cpu *cpu, uint64_t address)
11352 {
11353 /* FIXME: This algorithm is *exactly* the same as the LD3 version.
11354 So why have two different instructions ? There must be something
11355 wrong somewhere. */
11356 vec_load (cpu, address, 3);
11357 }
11358
11359 /* Load multiple 1-element structures into four registers. */
11360 static void
11361 LD1_4 (sim_cpu *cpu, uint64_t address)
11362 {
11363 /* FIXME: This algorithm is *exactly* the same as the LD4 version.
11364 So why have two different instructions ? There must be something
11365 wrong somewhere. */
11366 vec_load (cpu, address, 4);
11367 }
11368
11369 /* Store multiple N-element structures to N consecutive registers. */
11370 static void
11371 vec_store (sim_cpu *cpu, uint64_t address, unsigned N)
11372 {
11373 int all = INSTR (30, 30);
11374 unsigned size = INSTR (11, 10);
11375 unsigned vd = INSTR (4, 0);
11376 unsigned i;
11377
11378 switch (size)
11379 {
11380 case 0: /* 8-bit operations. */
11381 if (all)
11382 for (i = 0; i < (16 * N); i++)
11383 aarch64_set_mem_u8
11384 (cpu, address + i,
11385 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 4), i & 15));
11386 else
11387 for (i = 0; i < (8 * N); i++)
11388 aarch64_set_mem_u8
11389 (cpu, address + i,
11390 aarch64_get_vec_u8 (cpu, vec_reg (vd, i >> 3), i & 7));
11391 return;
11392
11393 case 1: /* 16-bit operations. */
11394 if (all)
11395 for (i = 0; i < (8 * N); i++)
11396 aarch64_set_mem_u16
11397 (cpu, address + i * 2,
11398 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 3), i & 7));
11399 else
11400 for (i = 0; i < (4 * N); i++)
11401 aarch64_set_mem_u16
11402 (cpu, address + i * 2,
11403 aarch64_get_vec_u16 (cpu, vec_reg (vd, i >> 2), i & 3));
11404 return;
11405
11406 case 2: /* 32-bit operations. */
11407 if (all)
11408 for (i = 0; i < (4 * N); i++)
11409 aarch64_set_mem_u32
11410 (cpu, address + i * 4,
11411 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 2), i & 3));
11412 else
11413 for (i = 0; i < (2 * N); i++)
11414 aarch64_set_mem_u32
11415 (cpu, address + i * 4,
11416 aarch64_get_vec_u32 (cpu, vec_reg (vd, i >> 1), i & 1));
11417 return;
11418
11419 case 3: /* 64-bit operations. */
11420 if (all)
11421 for (i = 0; i < (2 * N); i++)
11422 aarch64_set_mem_u64
11423 (cpu, address + i * 8,
11424 aarch64_get_vec_u64 (cpu, vec_reg (vd, i >> 1), i & 1));
11425 else
11426 for (i = 0; i < N; i++)
11427 aarch64_set_mem_u64
11428 (cpu, address + i * 8,
11429 aarch64_get_vec_u64 (cpu, vec_reg (vd, i), 0));
11430 return;
11431 }
11432 }
11433
11434 /* Store multiple 4-element structure to four consecutive registers. */
11435 static void
11436 ST4 (sim_cpu *cpu, uint64_t address)
11437 {
11438 vec_store (cpu, address, 4);
11439 }
11440
11441 /* Store multiple 3-element structures to three consecutive registers. */
11442 static void
11443 ST3 (sim_cpu *cpu, uint64_t address)
11444 {
11445 vec_store (cpu, address, 3);
11446 }
11447
11448 /* Store multiple 2-element structures to two consecutive registers. */
11449 static void
11450 ST2 (sim_cpu *cpu, uint64_t address)
11451 {
11452 vec_store (cpu, address, 2);
11453 }
11454
11455 /* Store multiple 1-element structures into one register. */
11456 static void
11457 ST1_1 (sim_cpu *cpu, uint64_t address)
11458 {
11459 int all = INSTR (30, 30);
11460 unsigned size = INSTR (11, 10);
11461 unsigned vd = INSTR (4, 0);
11462 unsigned i;
11463
11464 switch (size)
11465 {
11466 case 0:
11467 for (i = 0; i < (all ? 16 : 8); i++)
11468 aarch64_set_mem_u8 (cpu, address + i,
11469 aarch64_get_vec_u8 (cpu, vd, i));
11470 return;
11471
11472 case 1:
11473 for (i = 0; i < (all ? 8 : 4); i++)
11474 aarch64_set_mem_u16 (cpu, address + i * 2,
11475 aarch64_get_vec_u16 (cpu, vd, i));
11476 return;
11477
11478 case 2:
11479 for (i = 0; i < (all ? 4 : 2); i++)
11480 aarch64_set_mem_u32 (cpu, address + i * 4,
11481 aarch64_get_vec_u32 (cpu, vd, i));
11482 return;
11483
11484 case 3:
11485 for (i = 0; i < (all ? 2 : 1); i++)
11486 aarch64_set_mem_u64 (cpu, address + i * 8,
11487 aarch64_get_vec_u64 (cpu, vd, i));
11488 return;
11489 }
11490 }
11491
11492 /* Store multiple 1-element structures into two registers. */
11493 static void
11494 ST1_2 (sim_cpu *cpu, uint64_t address)
11495 {
11496 /* FIXME: This algorithm is *exactly* the same as the ST2 version.
11497 So why have two different instructions ? There must be
11498 something wrong somewhere. */
11499 vec_store (cpu, address, 2);
11500 }
11501
11502 /* Store multiple 1-element structures into three registers. */
11503 static void
11504 ST1_3 (sim_cpu *cpu, uint64_t address)
11505 {
11506 /* FIXME: This algorithm is *exactly* the same as the ST3 version.
11507 So why have two different instructions ? There must be
11508 something wrong somewhere. */
11509 vec_store (cpu, address, 3);
11510 }
11511
11512 /* Store multiple 1-element structures into four registers. */
11513 static void
11514 ST1_4 (sim_cpu *cpu, uint64_t address)
11515 {
11516 /* FIXME: This algorithm is *exactly* the same as the ST4 version.
11517 So why have two different instructions ? There must be
11518 something wrong somewhere. */
11519 vec_store (cpu, address, 4);
11520 }
11521
11522 static void
11523 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11524 {
11525 /* instr[31] = 0
11526 instr[30] = element selector 0=>half, 1=>all elements
11527 instr[29,24] = 00 1101
11528 instr[23] = 0=>simple, 1=>post
11529 instr[22] = 1
11530 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11531 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11532 11111 (immediate post inc)
11533 instr[15,14] = 11
11534 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11535 instr[12] = 0
11536 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11537 10=> word(s), 11=> double(d)
11538 instr[9,5] = address
11539 instr[4,0] = Vd */
11540
11541 unsigned full = INSTR (30, 30);
11542 unsigned vd = INSTR (4, 0);
11543 unsigned size = INSTR (11, 10);
11544 int i;
11545
11546 NYI_assert (29, 24, 0x0D);
11547 NYI_assert (22, 22, 1);
11548 NYI_assert (15, 14, 3);
11549 NYI_assert (12, 12, 0);
11550
11551 switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
11552 {
11553 case 0: /* LD1R. */
11554 switch (size)
11555 {
11556 case 0:
11557 {
11558 uint8_t val = aarch64_get_mem_u8 (cpu, address);
11559 for (i = 0; i < (full ? 16 : 8); i++)
11560 aarch64_set_vec_u8 (cpu, vd, i, val);
11561 break;
11562 }
11563
11564 case 1:
11565 {
11566 uint16_t val = aarch64_get_mem_u16 (cpu, address);
11567 for (i = 0; i < (full ? 8 : 4); i++)
11568 aarch64_set_vec_u16 (cpu, vd, i, val);
11569 break;
11570 }
11571
11572 case 2:
11573 {
11574 uint32_t val = aarch64_get_mem_u32 (cpu, address);
11575 for (i = 0; i < (full ? 4 : 2); i++)
11576 aarch64_set_vec_u32 (cpu, vd, i, val);
11577 break;
11578 }
11579
11580 case 3:
11581 {
11582 uint64_t val = aarch64_get_mem_u64 (cpu, address);
11583 for (i = 0; i < (full ? 2 : 1); i++)
11584 aarch64_set_vec_u64 (cpu, vd, i, val);
11585 break;
11586 }
11587
11588 default:
11589 HALT_UNALLOC;
11590 }
11591 break;
11592
11593 case 1: /* LD2R. */
11594 switch (size)
11595 {
11596 case 0:
11597 {
11598 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11599 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11600
11601 for (i = 0; i < (full ? 16 : 8); i++)
11602 {
11603 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11604 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11605 }
11606 break;
11607 }
11608
11609 case 1:
11610 {
11611 uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
11612 uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11613
11614 for (i = 0; i < (full ? 8 : 4); i++)
11615 {
11616 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11617 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11618 }
11619 break;
11620 }
11621
11622 case 2:
11623 {
11624 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11625 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11626
11627 for (i = 0; i < (full ? 4 : 2); i++)
11628 {
11629 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11630 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11631 }
11632 break;
11633 }
11634
11635 case 3:
11636 {
11637 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11638 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11639
11640 for (i = 0; i < (full ? 2 : 1); i++)
11641 {
11642 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11643 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11644 }
11645 break;
11646 }
11647
11648 default:
11649 HALT_UNALLOC;
11650 }
11651 break;
11652
11653 case 2: /* LD3R. */
11654 switch (size)
11655 {
11656 case 0:
11657 {
11658 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11659 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11660 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11661
11662 for (i = 0; i < (full ? 16 : 8); i++)
11663 {
11664 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11665 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11666 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11667 }
11668 }
11669 break;
11670
11671 case 1:
11672 {
11673 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11674 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11675 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11676
11677 for (i = 0; i < (full ? 8 : 4); i++)
11678 {
11679 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11680 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11681 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11682 }
11683 }
11684 break;
11685
11686 case 2:
11687 {
11688 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11689 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11690 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11691
11692 for (i = 0; i < (full ? 4 : 2); i++)
11693 {
11694 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11695 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11696 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11697 }
11698 }
11699 break;
11700
11701 case 3:
11702 {
11703 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11704 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11705 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11706
11707 for (i = 0; i < (full ? 2 : 1); i++)
11708 {
11709 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11710 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11711 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11712 }
11713 }
11714 break;
11715
11716 default:
11717 HALT_UNALLOC;
11718 }
11719 break;
11720
11721 case 3: /* LD4R. */
11722 switch (size)
11723 {
11724 case 0:
11725 {
11726 uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
11727 uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
11728 uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
11729 uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
11730
11731 for (i = 0; i < (full ? 16 : 8); i++)
11732 {
11733 aarch64_set_vec_u8 (cpu, vd, 0, val1);
11734 aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
11735 aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
11736 aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
11737 }
11738 }
11739 break;
11740
11741 case 1:
11742 {
11743 uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
11744 uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
11745 uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
11746 uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
11747
11748 for (i = 0; i < (full ? 8 : 4); i++)
11749 {
11750 aarch64_set_vec_u16 (cpu, vd, 0, val1);
11751 aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
11752 aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
11753 aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
11754 }
11755 }
11756 break;
11757
11758 case 2:
11759 {
11760 uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
11761 uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
11762 uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
11763 uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
11764
11765 for (i = 0; i < (full ? 4 : 2); i++)
11766 {
11767 aarch64_set_vec_u32 (cpu, vd, 0, val1);
11768 aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
11769 aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
11770 aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
11771 }
11772 }
11773 break;
11774
11775 case 3:
11776 {
11777 uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
11778 uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
11779 uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
11780 uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
11781
11782 for (i = 0; i < (full ? 2 : 1); i++)
11783 {
11784 aarch64_set_vec_u64 (cpu, vd, 0, val1);
11785 aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
11786 aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
11787 aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
11788 }
11789 }
11790 break;
11791
11792 default:
11793 HALT_UNALLOC;
11794 }
11795 break;
11796
11797 default:
11798 HALT_UNALLOC;
11799 }
11800 }
11801
11802 static void
11803 do_vec_load_store (sim_cpu *cpu)
11804 {
11805 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11806
11807 instr[31] = 0
11808 instr[30] = element selector 0=>half, 1=>all elements
11809 instr[29,25] = 00110
11810 instr[24] = ?
11811 instr[23] = 0=>simple, 1=>post
11812 instr[22] = 0=>store, 1=>load
11813 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11814 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11815 11111 (immediate post inc)
11816 instr[15,12] = elements and destinations. eg for load:
11817 0000=>LD4 => load multiple 4-element to
11818 four consecutive registers
11819 0100=>LD3 => load multiple 3-element to
11820 three consecutive registers
11821 1000=>LD2 => load multiple 2-element to
11822 two consecutive registers
11823 0010=>LD1 => load multiple 1-element to
11824 four consecutive registers
11825 0110=>LD1 => load multiple 1-element to
11826 three consecutive registers
11827 1010=>LD1 => load multiple 1-element to
11828 two consecutive registers
11829 0111=>LD1 => load multiple 1-element to
11830 one register
11831 1100=>LDR1,LDR2
11832 1110=>LDR3,LDR4
11833 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11834 10=> word(s), 11=> double(d)
11835 instr[9,5] = Vn, can be SP
11836 instr[4,0] = Vd */
11837
11838 int post;
11839 int load;
11840 unsigned vn;
11841 uint64_t address;
11842 int type;
11843
11844 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
11845 HALT_NYI;
11846
11847 type = INSTR (15, 12);
11848 if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
11849 HALT_NYI;
11850
11851 post = INSTR (23, 23);
11852 load = INSTR (22, 22);
11853 vn = INSTR (9, 5);
11854 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
11855
11856 if (post)
11857 {
11858 unsigned vm = INSTR (20, 16);
11859
11860 if (vm == R31)
11861 {
11862 unsigned sizeof_operation;
11863
11864 switch (type)
11865 {
11866 case 0: sizeof_operation = 32; break;
11867 case 4: sizeof_operation = 24; break;
11868 case 8: sizeof_operation = 16; break;
11869
11870 case 0xC:
11871 sizeof_operation = INSTR (21, 21) ? 2 : 1;
11872 sizeof_operation <<= INSTR (11, 10);
11873 break;
11874
11875 case 0xE:
11876 sizeof_operation = INSTR (21, 21) ? 8 : 4;
11877 sizeof_operation <<= INSTR (11, 10);
11878 break;
11879
11880 case 7:
11881 /* One register, immediate offset variant. */
11882 sizeof_operation = 8;
11883 break;
11884
11885 case 10:
11886 /* Two registers, immediate offset variant. */
11887 sizeof_operation = 16;
11888 break;
11889
11890 case 6:
11891 /* Three registers, immediate offset variant. */
11892 sizeof_operation = 24;
11893 break;
11894
11895 case 2:
11896 /* Four registers, immediate offset variant. */
11897 sizeof_operation = 32;
11898 break;
11899
11900 default:
11901 HALT_UNALLOC;
11902 }
11903
11904 if (INSTR (30, 30))
11905 sizeof_operation *= 2;
11906
11907 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
11908 }
11909 else
11910 aarch64_set_reg_u64 (cpu, vn, SP_OK,
11911 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
11912 }
11913 else
11914 {
11915 NYI_assert (20, 16, 0);
11916 }
11917
11918 if (load)
11919 {
11920 switch (type)
11921 {
11922 case 0: LD4 (cpu, address); return;
11923 case 4: LD3 (cpu, address); return;
11924 case 8: LD2 (cpu, address); return;
11925 case 2: LD1_4 (cpu, address); return;
11926 case 6: LD1_3 (cpu, address); return;
11927 case 10: LD1_2 (cpu, address); return;
11928 case 7: LD1_1 (cpu, address); return;
11929
11930 case 0xE:
11931 case 0xC: do_vec_LDnR (cpu, address); return;
11932
11933 default:
11934 HALT_NYI;
11935 }
11936 }
11937
11938 /* Stores. */
11939 switch (type)
11940 {
11941 case 0: ST4 (cpu, address); return;
11942 case 4: ST3 (cpu, address); return;
11943 case 8: ST2 (cpu, address); return;
11944 case 2: ST1_4 (cpu, address); return;
11945 case 6: ST1_3 (cpu, address); return;
11946 case 10: ST1_2 (cpu, address); return;
11947 case 7: ST1_1 (cpu, address); return;
11948 default:
11949 HALT_NYI;
11950 }
11951 }
11952
11953 static void
11954 dexLdSt (sim_cpu *cpu)
11955 {
11956 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
11957 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
11958 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
11959 bits [29,28:26] of a LS are the secondary dispatch vector. */
11960 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
11961
11962 switch (group2)
11963 {
11964 case LS_EXCL_000:
11965 dexLoadExclusive (cpu); return;
11966
11967 case LS_LIT_010:
11968 case LS_LIT_011:
11969 dexLoadLiteral (cpu); return;
11970
11971 case LS_OTHER_110:
11972 case LS_OTHER_111:
11973 dexLoadOther (cpu); return;
11974
11975 case LS_ADVSIMD_001:
11976 do_vec_load_store (cpu); return;
11977
11978 case LS_PAIR_100:
11979 dex_load_store_pair_gr (cpu); return;
11980
11981 case LS_PAIR_101:
11982 dex_load_store_pair_fp (cpu); return;
11983
11984 default:
11985 /* Should never reach here. */
11986 HALT_NYI;
11987 }
11988 }
11989
11990 /* Specific decode and execute for group Data Processing Register. */
11991
11992 static void
11993 dexLogicalShiftedRegister (sim_cpu *cpu)
11994 {
11995 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
11996 instr[30,29] = op
11997 instr[28:24] = 01010
11998 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
11999 instr[21] = N
12000 instr[20,16] = Rm
12001 instr[15,10] = count : must be 0xxxxx for 32 bit
12002 instr[9,5] = Rn
12003 instr[4,0] = Rd */
12004
12005 uint32_t size = INSTR (31, 31);
12006 Shift shiftType = INSTR (23, 22);
12007 uint32_t count = INSTR (15, 10);
12008
12009 /* 32 bit operations must have count[5] = 0.
12010 or else we have an UNALLOC. */
12011 if (size == 0 && uimm (count, 5, 5))
12012 HALT_UNALLOC;
12013
12014 /* Dispatch on size:op:N. */
12015 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12016 {
12017 case 0: and32_shift (cpu, shiftType, count); return;
12018 case 1: bic32_shift (cpu, shiftType, count); return;
12019 case 2: orr32_shift (cpu, shiftType, count); return;
12020 case 3: orn32_shift (cpu, shiftType, count); return;
12021 case 4: eor32_shift (cpu, shiftType, count); return;
12022 case 5: eon32_shift (cpu, shiftType, count); return;
12023 case 6: ands32_shift (cpu, shiftType, count); return;
12024 case 7: bics32_shift (cpu, shiftType, count); return;
12025 case 8: and64_shift (cpu, shiftType, count); return;
12026 case 9: bic64_shift (cpu, shiftType, count); return;
12027 case 10:orr64_shift (cpu, shiftType, count); return;
12028 case 11:orn64_shift (cpu, shiftType, count); return;
12029 case 12:eor64_shift (cpu, shiftType, count); return;
12030 case 13:eon64_shift (cpu, shiftType, count); return;
12031 case 14:ands64_shift (cpu, shiftType, count); return;
12032 case 15:bics64_shift (cpu, shiftType, count); return;
12033 }
12034 }
12035
12036 /* 32 bit conditional select. */
12037 static void
12038 csel32 (sim_cpu *cpu, CondCode cc)
12039 {
12040 unsigned rm = INSTR (20, 16);
12041 unsigned rn = INSTR (9, 5);
12042 unsigned rd = INSTR (4, 0);
12043
12044 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12045 testConditionCode (cpu, cc)
12046 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12047 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12048 }
12049
12050 /* 64 bit conditional select. */
12051 static void
12052 csel64 (sim_cpu *cpu, CondCode cc)
12053 {
12054 unsigned rm = INSTR (20, 16);
12055 unsigned rn = INSTR (9, 5);
12056 unsigned rd = INSTR (4, 0);
12057
12058 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12059 testConditionCode (cpu, cc)
12060 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12061 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12062 }
12063
12064 /* 32 bit conditional increment. */
12065 static void
12066 csinc32 (sim_cpu *cpu, CondCode cc)
12067 {
12068 unsigned rm = INSTR (20, 16);
12069 unsigned rn = INSTR (9, 5);
12070 unsigned rd = INSTR (4, 0);
12071
12072 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12073 testConditionCode (cpu, cc)
12074 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12075 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12076 }
12077
12078 /* 64 bit conditional increment. */
12079 static void
12080 csinc64 (sim_cpu *cpu, CondCode cc)
12081 {
12082 unsigned rm = INSTR (20, 16);
12083 unsigned rn = INSTR (9, 5);
12084 unsigned rd = INSTR (4, 0);
12085
12086 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12087 testConditionCode (cpu, cc)
12088 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12089 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12090 }
12091
12092 /* 32 bit conditional invert. */
12093 static void
12094 csinv32 (sim_cpu *cpu, CondCode cc)
12095 {
12096 unsigned rm = INSTR (20, 16);
12097 unsigned rn = INSTR (9, 5);
12098 unsigned rd = INSTR (4, 0);
12099
12100 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12101 testConditionCode (cpu, cc)
12102 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12103 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12104 }
12105
12106 /* 64 bit conditional invert. */
12107 static void
12108 csinv64 (sim_cpu *cpu, CondCode cc)
12109 {
12110 unsigned rm = INSTR (20, 16);
12111 unsigned rn = INSTR (9, 5);
12112 unsigned rd = INSTR (4, 0);
12113
12114 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12115 testConditionCode (cpu, cc)
12116 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12117 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12118 }
12119
12120 /* 32 bit conditional negate. */
12121 static void
12122 csneg32 (sim_cpu *cpu, CondCode cc)
12123 {
12124 unsigned rm = INSTR (20, 16);
12125 unsigned rn = INSTR (9, 5);
12126 unsigned rd = INSTR (4, 0);
12127
12128 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12129 testConditionCode (cpu, cc)
12130 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12131 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12132 }
12133
12134 /* 64 bit conditional negate. */
12135 static void
12136 csneg64 (sim_cpu *cpu, CondCode cc)
12137 {
12138 unsigned rm = INSTR (20, 16);
12139 unsigned rn = INSTR (9, 5);
12140 unsigned rd = INSTR (4, 0);
12141
12142 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12143 testConditionCode (cpu, cc)
12144 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12145 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12146 }
12147
12148 static void
12149 dexCondSelect (sim_cpu *cpu)
12150 {
12151 /* instr[28,21] = 11011011
12152 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12153 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12154 100 ==> CSINV, 101 ==> CSNEG,
12155 _1_ ==> UNALLOC
12156 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12157 instr[15,12] = cond
12158 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12159
12160 CondCode cc = INSTR (15, 12);
12161 uint32_t S = INSTR (29, 29);
12162 uint32_t op2 = INSTR (11, 10);
12163
12164 if (S == 1)
12165 HALT_UNALLOC;
12166
12167 if (op2 & 0x2)
12168 HALT_UNALLOC;
12169
12170 switch ((INSTR (31, 30) << 1) | op2)
12171 {
12172 case 0: csel32 (cpu, cc); return;
12173 case 1: csinc32 (cpu, cc); return;
12174 case 2: csinv32 (cpu, cc); return;
12175 case 3: csneg32 (cpu, cc); return;
12176 case 4: csel64 (cpu, cc); return;
12177 case 5: csinc64 (cpu, cc); return;
12178 case 6: csinv64 (cpu, cc); return;
12179 case 7: csneg64 (cpu, cc); return;
12180 }
12181 }
12182
12183 /* Some helpers for counting leading 1 or 0 bits. */
12184
12185 /* Counts the number of leading bits which are the same
12186 in a 32 bit value in the range 1 to 32. */
12187 static uint32_t
12188 leading32 (uint32_t value)
12189 {
12190 int32_t mask= 0xffff0000;
12191 uint32_t count= 16; /* Counts number of bits set in mask. */
12192 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12193 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12194
12195 while (lo + 1 < hi)
12196 {
12197 int32_t test = (value & mask);
12198
12199 if (test == 0 || test == mask)
12200 {
12201 lo = count;
12202 count = (lo + hi) / 2;
12203 mask >>= (count - lo);
12204 }
12205 else
12206 {
12207 hi = count;
12208 count = (lo + hi) / 2;
12209 mask <<= hi - count;
12210 }
12211 }
12212
12213 if (lo != hi)
12214 {
12215 int32_t test;
12216
12217 mask >>= 1;
12218 test = (value & mask);
12219
12220 if (test == 0 || test == mask)
12221 count = hi;
12222 else
12223 count = lo;
12224 }
12225
12226 return count;
12227 }
12228
12229 /* Counts the number of leading bits which are the same
12230 in a 64 bit value in the range 1 to 64. */
12231 static uint64_t
12232 leading64 (uint64_t value)
12233 {
12234 int64_t mask= 0xffffffff00000000LL;
12235 uint64_t count = 32; /* Counts number of bits set in mask. */
12236 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12237 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12238
12239 while (lo + 1 < hi)
12240 {
12241 int64_t test = (value & mask);
12242
12243 if (test == 0 || test == mask)
12244 {
12245 lo = count;
12246 count = (lo + hi) / 2;
12247 mask >>= (count - lo);
12248 }
12249 else
12250 {
12251 hi = count;
12252 count = (lo + hi) / 2;
12253 mask <<= hi - count;
12254 }
12255 }
12256
12257 if (lo != hi)
12258 {
12259 int64_t test;
12260
12261 mask >>= 1;
12262 test = (value & mask);
12263
12264 if (test == 0 || test == mask)
12265 count = hi;
12266 else
12267 count = lo;
12268 }
12269
12270 return count;
12271 }
12272
12273 /* Bit operations. */
12274 /* N.B register args may not be SP. */
12275
12276 /* 32 bit count leading sign bits. */
12277 static void
12278 cls32 (sim_cpu *cpu)
12279 {
12280 unsigned rn = INSTR (9, 5);
12281 unsigned rd = INSTR (4, 0);
12282
12283 /* N.B. the result needs to exclude the leading bit. */
12284 aarch64_set_reg_u64
12285 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12286 }
12287
12288 /* 64 bit count leading sign bits. */
12289 static void
12290 cls64 (sim_cpu *cpu)
12291 {
12292 unsigned rn = INSTR (9, 5);
12293 unsigned rd = INSTR (4, 0);
12294
12295 /* N.B. the result needs to exclude the leading bit. */
12296 aarch64_set_reg_u64
12297 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12298 }
12299
12300 /* 32 bit count leading zero bits. */
12301 static void
12302 clz32 (sim_cpu *cpu)
12303 {
12304 unsigned rn = INSTR (9, 5);
12305 unsigned rd = INSTR (4, 0);
12306 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12307
12308 /* if the sign (top) bit is set then the count is 0. */
12309 if (pick32 (value, 31, 31))
12310 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12311 else
12312 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12313 }
12314
12315 /* 64 bit count leading zero bits. */
12316 static void
12317 clz64 (sim_cpu *cpu)
12318 {
12319 unsigned rn = INSTR (9, 5);
12320 unsigned rd = INSTR (4, 0);
12321 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12322
12323 /* if the sign (top) bit is set then the count is 0. */
12324 if (pick64 (value, 63, 63))
12325 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12326 else
12327 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12328 }
12329
12330 /* 32 bit reverse bits. */
12331 static void
12332 rbit32 (sim_cpu *cpu)
12333 {
12334 unsigned rn = INSTR (9, 5);
12335 unsigned rd = INSTR (4, 0);
12336 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12337 uint32_t result = 0;
12338 int i;
12339
12340 for (i = 0; i < 32; i++)
12341 {
12342 result <<= 1;
12343 result |= (value & 1);
12344 value >>= 1;
12345 }
12346 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12347 }
12348
12349 /* 64 bit reverse bits. */
12350 static void
12351 rbit64 (sim_cpu *cpu)
12352 {
12353 unsigned rn = INSTR (9, 5);
12354 unsigned rd = INSTR (4, 0);
12355 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12356 uint64_t result = 0;
12357 int i;
12358
12359 for (i = 0; i < 64; i++)
12360 {
12361 result <<= 1;
12362 result |= (value & 1UL);
12363 value >>= 1;
12364 }
12365 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12366 }
12367
12368 /* 32 bit reverse bytes. */
12369 static void
12370 rev32 (sim_cpu *cpu)
12371 {
12372 unsigned rn = INSTR (9, 5);
12373 unsigned rd = INSTR (4, 0);
12374 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12375 uint32_t result = 0;
12376 int i;
12377
12378 for (i = 0; i < 4; i++)
12379 {
12380 result <<= 8;
12381 result |= (value & 0xff);
12382 value >>= 8;
12383 }
12384 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12385 }
12386
12387 /* 64 bit reverse bytes. */
12388 static void
12389 rev64 (sim_cpu *cpu)
12390 {
12391 unsigned rn = INSTR (9, 5);
12392 unsigned rd = INSTR (4, 0);
12393 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12394 uint64_t result = 0;
12395 int i;
12396
12397 for (i = 0; i < 8; i++)
12398 {
12399 result <<= 8;
12400 result |= (value & 0xffULL);
12401 value >>= 8;
12402 }
12403 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12404 }
12405
12406 /* 32 bit reverse shorts. */
12407 /* N.B.this reverses the order of the bytes in each half word. */
12408 static void
12409 revh32 (sim_cpu *cpu)
12410 {
12411 unsigned rn = INSTR (9, 5);
12412 unsigned rd = INSTR (4, 0);
12413 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12414 uint32_t result = 0;
12415 int i;
12416
12417 for (i = 0; i < 2; i++)
12418 {
12419 result <<= 8;
12420 result |= (value & 0x00ff00ff);
12421 value >>= 8;
12422 }
12423 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12424 }
12425
12426 /* 64 bit reverse shorts. */
12427 /* N.B.this reverses the order of the bytes in each half word. */
12428 static void
12429 revh64 (sim_cpu *cpu)
12430 {
12431 unsigned rn = INSTR (9, 5);
12432 unsigned rd = INSTR (4, 0);
12433 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12434 uint64_t result = 0;
12435 int i;
12436
12437 for (i = 0; i < 2; i++)
12438 {
12439 result <<= 8;
12440 result |= (value & 0x00ff00ff00ff00ffULL);
12441 value >>= 8;
12442 }
12443 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12444 }
12445
12446 static void
12447 dexDataProc1Source (sim_cpu *cpu)
12448 {
12449 /* instr[30] = 1
12450 instr[28,21] = 111010110
12451 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12452 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12453 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12454 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12455 000010 ==> REV, 000011 ==> UNALLOC
12456 000100 ==> CLZ, 000101 ==> CLS
12457 ow ==> UNALLOC
12458 instr[9,5] = rn : may not be SP
12459 instr[4,0] = rd : may not be SP. */
12460
12461 uint32_t S = INSTR (29, 29);
12462 uint32_t opcode2 = INSTR (20, 16);
12463 uint32_t opcode = INSTR (15, 10);
12464 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12465
12466 if (S == 1)
12467 HALT_UNALLOC;
12468
12469 if (opcode2 != 0)
12470 HALT_UNALLOC;
12471
12472 if (opcode & 0x38)
12473 HALT_UNALLOC;
12474
12475 switch (dispatch)
12476 {
12477 case 0: rbit32 (cpu); return;
12478 case 1: revh32 (cpu); return;
12479 case 2: rev32 (cpu); return;
12480 case 4: clz32 (cpu); return;
12481 case 5: cls32 (cpu); return;
12482 case 8: rbit64 (cpu); return;
12483 case 9: revh64 (cpu); return;
12484 case 10:rev32 (cpu); return;
12485 case 11:rev64 (cpu); return;
12486 case 12:clz64 (cpu); return;
12487 case 13:cls64 (cpu); return;
12488 default: HALT_UNALLOC;
12489 }
12490 }
12491
12492 /* Variable shift.
12493 Shifts by count supplied in register.
12494 N.B register args may not be SP.
12495 These all use the shifted auxiliary function for
12496 simplicity and clarity. Writing the actual shift
12497 inline would avoid a branch and so be faster but
12498 would also necessitate getting signs right. */
12499
12500 /* 32 bit arithmetic shift right. */
12501 static void
12502 asrv32 (sim_cpu *cpu)
12503 {
12504 unsigned rm = INSTR (20, 16);
12505 unsigned rn = INSTR (9, 5);
12506 unsigned rd = INSTR (4, 0);
12507
12508 aarch64_set_reg_u64
12509 (cpu, rd, NO_SP,
12510 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12511 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12512 }
12513
12514 /* 64 bit arithmetic shift right. */
12515 static void
12516 asrv64 (sim_cpu *cpu)
12517 {
12518 unsigned rm = INSTR (20, 16);
12519 unsigned rn = INSTR (9, 5);
12520 unsigned rd = INSTR (4, 0);
12521
12522 aarch64_set_reg_u64
12523 (cpu, rd, NO_SP,
12524 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12525 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12526 }
12527
12528 /* 32 bit logical shift left. */
12529 static void
12530 lslv32 (sim_cpu *cpu)
12531 {
12532 unsigned rm = INSTR (20, 16);
12533 unsigned rn = INSTR (9, 5);
12534 unsigned rd = INSTR (4, 0);
12535
12536 aarch64_set_reg_u64
12537 (cpu, rd, NO_SP,
12538 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12539 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12540 }
12541
12542 /* 64 bit arithmetic shift left. */
12543 static void
12544 lslv64 (sim_cpu *cpu)
12545 {
12546 unsigned rm = INSTR (20, 16);
12547 unsigned rn = INSTR (9, 5);
12548 unsigned rd = INSTR (4, 0);
12549
12550 aarch64_set_reg_u64
12551 (cpu, rd, NO_SP,
12552 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12553 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12554 }
12555
12556 /* 32 bit logical shift right. */
12557 static void
12558 lsrv32 (sim_cpu *cpu)
12559 {
12560 unsigned rm = INSTR (20, 16);
12561 unsigned rn = INSTR (9, 5);
12562 unsigned rd = INSTR (4, 0);
12563
12564 aarch64_set_reg_u64
12565 (cpu, rd, NO_SP,
12566 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12567 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12568 }
12569
12570 /* 64 bit logical shift right. */
12571 static void
12572 lsrv64 (sim_cpu *cpu)
12573 {
12574 unsigned rm = INSTR (20, 16);
12575 unsigned rn = INSTR (9, 5);
12576 unsigned rd = INSTR (4, 0);
12577
12578 aarch64_set_reg_u64
12579 (cpu, rd, NO_SP,
12580 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12581 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12582 }
12583
12584 /* 32 bit rotate right. */
12585 static void
12586 rorv32 (sim_cpu *cpu)
12587 {
12588 unsigned rm = INSTR (20, 16);
12589 unsigned rn = INSTR (9, 5);
12590 unsigned rd = INSTR (4, 0);
12591
12592 aarch64_set_reg_u64
12593 (cpu, rd, NO_SP,
12594 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12595 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12596 }
12597
12598 /* 64 bit rotate right. */
12599 static void
12600 rorv64 (sim_cpu *cpu)
12601 {
12602 unsigned rm = INSTR (20, 16);
12603 unsigned rn = INSTR (9, 5);
12604 unsigned rd = INSTR (4, 0);
12605
12606 aarch64_set_reg_u64
12607 (cpu, rd, NO_SP,
12608 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12609 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12610 }
12611
12612
12613 /* divide. */
12614
12615 /* 32 bit signed divide. */
12616 static void
12617 cpuiv32 (sim_cpu *cpu)
12618 {
12619 unsigned rm = INSTR (20, 16);
12620 unsigned rn = INSTR (9, 5);
12621 unsigned rd = INSTR (4, 0);
12622 /* N.B. the pseudo-code does the divide using 64 bit data. */
12623 /* TODO : check that this rounds towards zero as required. */
12624 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12625 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12626
12627 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12628 divisor ? ((int32_t) (dividend / divisor)) : 0);
12629 }
12630
12631 /* 64 bit signed divide. */
12632 static void
12633 cpuiv64 (sim_cpu *cpu)
12634 {
12635 unsigned rm = INSTR (20, 16);
12636 unsigned rn = INSTR (9, 5);
12637 unsigned rd = INSTR (4, 0);
12638
12639 /* TODO : check that this rounds towards zero as required. */
12640 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12641
12642 aarch64_set_reg_s64
12643 (cpu, rd, NO_SP,
12644 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12645 }
12646
12647 /* 32 bit unsigned divide. */
12648 static void
12649 udiv32 (sim_cpu *cpu)
12650 {
12651 unsigned rm = INSTR (20, 16);
12652 unsigned rn = INSTR (9, 5);
12653 unsigned rd = INSTR (4, 0);
12654
12655 /* N.B. the pseudo-code does the divide using 64 bit data. */
12656 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12657 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12658
12659 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12660 divisor ? (uint32_t) (dividend / divisor) : 0);
12661 }
12662
12663 /* 64 bit unsigned divide. */
12664 static void
12665 udiv64 (sim_cpu *cpu)
12666 {
12667 unsigned rm = INSTR (20, 16);
12668 unsigned rn = INSTR (9, 5);
12669 unsigned rd = INSTR (4, 0);
12670
12671 /* TODO : check that this rounds towards zero as required. */
12672 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12673
12674 aarch64_set_reg_u64
12675 (cpu, rd, NO_SP,
12676 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12677 }
12678
12679 static void
12680 dexDataProc2Source (sim_cpu *cpu)
12681 {
12682 /* assert instr[30] == 0
12683 instr[28,21] == 11010110
12684 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12685 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12686 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12687 001000 ==> LSLV, 001001 ==> LSRV
12688 001010 ==> ASRV, 001011 ==> RORV
12689 ow ==> UNALLOC. */
12690
12691 uint32_t dispatch;
12692 uint32_t S = INSTR (29, 29);
12693 uint32_t opcode = INSTR (15, 10);
12694
12695 if (S == 1)
12696 HALT_UNALLOC;
12697
12698 if (opcode & 0x34)
12699 HALT_UNALLOC;
12700
12701 dispatch = ( (INSTR (31, 31) << 3)
12702 | (uimm (opcode, 3, 3) << 2)
12703 | uimm (opcode, 1, 0));
12704 switch (dispatch)
12705 {
12706 case 2: udiv32 (cpu); return;
12707 case 3: cpuiv32 (cpu); return;
12708 case 4: lslv32 (cpu); return;
12709 case 5: lsrv32 (cpu); return;
12710 case 6: asrv32 (cpu); return;
12711 case 7: rorv32 (cpu); return;
12712 case 10: udiv64 (cpu); return;
12713 case 11: cpuiv64 (cpu); return;
12714 case 12: lslv64 (cpu); return;
12715 case 13: lsrv64 (cpu); return;
12716 case 14: asrv64 (cpu); return;
12717 case 15: rorv64 (cpu); return;
12718 default: HALT_UNALLOC;
12719 }
12720 }
12721
12722
12723 /* Multiply. */
12724
12725 /* 32 bit multiply and add. */
12726 static void
12727 madd32 (sim_cpu *cpu)
12728 {
12729 unsigned rm = INSTR (20, 16);
12730 unsigned ra = INSTR (14, 10);
12731 unsigned rn = INSTR (9, 5);
12732 unsigned rd = INSTR (4, 0);
12733
12734 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12735 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12736 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12737 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12738 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12739 }
12740
12741 /* 64 bit multiply and add. */
12742 static void
12743 madd64 (sim_cpu *cpu)
12744 {
12745 unsigned rm = INSTR (20, 16);
12746 unsigned ra = INSTR (14, 10);
12747 unsigned rn = INSTR (9, 5);
12748 unsigned rd = INSTR (4, 0);
12749
12750 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12751 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12752 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12753 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12754 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12755 }
12756
12757 /* 32 bit multiply and sub. */
12758 static void
12759 msub32 (sim_cpu *cpu)
12760 {
12761 unsigned rm = INSTR (20, 16);
12762 unsigned ra = INSTR (14, 10);
12763 unsigned rn = INSTR (9, 5);
12764 unsigned rd = INSTR (4, 0);
12765
12766 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12767 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12768 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12769 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12770 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12771 }
12772
12773 /* 64 bit multiply and sub. */
12774 static void
12775 msub64 (sim_cpu *cpu)
12776 {
12777 unsigned rm = INSTR (20, 16);
12778 unsigned ra = INSTR (14, 10);
12779 unsigned rn = INSTR (9, 5);
12780 unsigned rd = INSTR (4, 0);
12781
12782 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12783 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12784 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12785 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
12786 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
12787 }
12788
12789 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
12790 static void
12791 smaddl (sim_cpu *cpu)
12792 {
12793 unsigned rm = INSTR (20, 16);
12794 unsigned ra = INSTR (14, 10);
12795 unsigned rn = INSTR (9, 5);
12796 unsigned rd = INSTR (4, 0);
12797
12798 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12799 obtain a 64 bit product. */
12800 aarch64_set_reg_s64
12801 (cpu, rd, NO_SP,
12802 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12803 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12804 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12805 }
12806
12807 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12808 static void
12809 smsubl (sim_cpu *cpu)
12810 {
12811 unsigned rm = INSTR (20, 16);
12812 unsigned ra = INSTR (14, 10);
12813 unsigned rn = INSTR (9, 5);
12814 unsigned rd = INSTR (4, 0);
12815
12816 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12817 obtain a 64 bit product. */
12818 aarch64_set_reg_s64
12819 (cpu, rd, NO_SP,
12820 aarch64_get_reg_s64 (cpu, ra, NO_SP)
12821 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
12822 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
12823 }
12824
12825 /* Integer Multiply/Divide. */
12826
12827 /* First some macros and a helper function. */
12828 /* Macros to test or access elements of 64 bit words. */
12829
12830 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
12831 #define LOW_WORD_MASK ((1ULL << 32) - 1)
12832 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12833 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
12834 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
12835 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
12836
12837 /* Offset of sign bit in 64 bit signed integger. */
12838 #define SIGN_SHIFT_U64 63
12839 /* The sign bit itself -- also identifies the minimum negative int value. */
12840 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
12841 /* Return true if a 64 bit signed int presented as an unsigned int is the
12842 most negative value. */
12843 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
12844 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
12845 int has its sign bit set to false. */
12846 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
12847 /* Return 1L or -1L according to whether a 64 bit signed int presented as
12848 an unsigned int has its sign bit set or not. */
12849 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
12850 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
12851 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
12852
12853 /* Multiply two 64 bit ints and return.
12854 the hi 64 bits of the 128 bit product. */
12855
12856 static uint64_t
12857 mul64hi (uint64_t value1, uint64_t value2)
12858 {
12859 uint64_t resultmid1;
12860 uint64_t result;
12861 uint64_t value1_lo = lowWordToU64 (value1);
12862 uint64_t value1_hi = highWordToU64 (value1) ;
12863 uint64_t value2_lo = lowWordToU64 (value2);
12864 uint64_t value2_hi = highWordToU64 (value2);
12865
12866 /* Cross-multiply and collect results. */
12867 uint64_t xproductlo = value1_lo * value2_lo;
12868 uint64_t xproductmid1 = value1_lo * value2_hi;
12869 uint64_t xproductmid2 = value1_hi * value2_lo;
12870 uint64_t xproducthi = value1_hi * value2_hi;
12871 uint64_t carry = 0;
12872 /* Start accumulating 64 bit results. */
12873 /* Drop bottom half of lowest cross-product. */
12874 uint64_t resultmid = xproductlo >> 32;
12875 /* Add in middle products. */
12876 resultmid = resultmid + xproductmid1;
12877
12878 /* Check for overflow. */
12879 if (resultmid < xproductmid1)
12880 /* Carry over 1 into top cross-product. */
12881 carry++;
12882
12883 resultmid1 = resultmid + xproductmid2;
12884
12885 /* Check for overflow. */
12886 if (resultmid1 < xproductmid2)
12887 /* Carry over 1 into top cross-product. */
12888 carry++;
12889
12890 /* Drop lowest 32 bits of middle cross-product. */
12891 result = resultmid1 >> 32;
12892
12893 /* Add top cross-product plus and any carry. */
12894 result += xproducthi + carry;
12895
12896 return result;
12897 }
12898
12899 /* Signed multiply high, source, source2 :
12900 64 bit, dest <-- high 64-bit of result. */
12901 static void
12902 smulh (sim_cpu *cpu)
12903 {
12904 uint64_t uresult;
12905 int64_t result;
12906 unsigned rm = INSTR (20, 16);
12907 unsigned rn = INSTR (9, 5);
12908 unsigned rd = INSTR (4, 0);
12909 GReg ra = INSTR (14, 10);
12910 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12911 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12912 uint64_t uvalue1;
12913 uint64_t uvalue2;
12914 int64_t signum = 1;
12915
12916 if (ra != R31)
12917 HALT_UNALLOC;
12918
12919 /* Convert to unsigned and use the unsigned mul64hi routine
12920 the fix the sign up afterwards. */
12921 if (value1 < 0)
12922 {
12923 signum *= -1L;
12924 uvalue1 = -value1;
12925 }
12926 else
12927 {
12928 uvalue1 = value1;
12929 }
12930
12931 if (value2 < 0)
12932 {
12933 signum *= -1L;
12934 uvalue2 = -value2;
12935 }
12936 else
12937 {
12938 uvalue2 = value2;
12939 }
12940
12941 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12942 uresult = mul64hi (uvalue1, uvalue2);
12943 result = uresult;
12944 result *= signum;
12945
12946 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
12947 }
12948
12949 /* Unsigned multiply add long -- source, source2 :
12950 32 bit, source3 : 64 bit. */
12951 static void
12952 umaddl (sim_cpu *cpu)
12953 {
12954 unsigned rm = INSTR (20, 16);
12955 unsigned ra = INSTR (14, 10);
12956 unsigned rn = INSTR (9, 5);
12957 unsigned rd = INSTR (4, 0);
12958
12959 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12960 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12961 obtain a 64 bit product. */
12962 aarch64_set_reg_u64
12963 (cpu, rd, NO_SP,
12964 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12965 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12966 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12967 }
12968
12969 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
12970 static void
12971 umsubl (sim_cpu *cpu)
12972 {
12973 unsigned rm = INSTR (20, 16);
12974 unsigned ra = INSTR (14, 10);
12975 unsigned rn = INSTR (9, 5);
12976 unsigned rd = INSTR (4, 0);
12977
12978 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12979 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
12980 obtain a 64 bit product. */
12981 aarch64_set_reg_u64
12982 (cpu, rd, NO_SP,
12983 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12984 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
12985 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
12986 }
12987
12988 /* Unsigned multiply high, source, source2 :
12989 64 bit, dest <-- high 64-bit of result. */
12990 static void
12991 umulh (sim_cpu *cpu)
12992 {
12993 unsigned rm = INSTR (20, 16);
12994 unsigned rn = INSTR (9, 5);
12995 unsigned rd = INSTR (4, 0);
12996 GReg ra = INSTR (14, 10);
12997
12998 if (ra != R31)
12999 HALT_UNALLOC;
13000
13001 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13002 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13003 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13004 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13005 }
13006
13007 static void
13008 dexDataProc3Source (sim_cpu *cpu)
13009 {
13010 /* assert instr[28,24] == 11011. */
13011 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13012 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13013 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13014 instr[15] = o0 : 0/1 ==> ok
13015 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13016 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13017 0100 ==> SMULH, (64 bit only)
13018 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13019 1100 ==> UMULH (64 bit only)
13020 ow ==> UNALLOC. */
13021
13022 uint32_t dispatch;
13023 uint32_t size = INSTR (31, 31);
13024 uint32_t op54 = INSTR (30, 29);
13025 uint32_t op31 = INSTR (23, 21);
13026 uint32_t o0 = INSTR (15, 15);
13027
13028 if (op54 != 0)
13029 HALT_UNALLOC;
13030
13031 if (size == 0)
13032 {
13033 if (op31 != 0)
13034 HALT_UNALLOC;
13035
13036 if (o0 == 0)
13037 madd32 (cpu);
13038 else
13039 msub32 (cpu);
13040 return;
13041 }
13042
13043 dispatch = (op31 << 1) | o0;
13044
13045 switch (dispatch)
13046 {
13047 case 0: madd64 (cpu); return;
13048 case 1: msub64 (cpu); return;
13049 case 2: smaddl (cpu); return;
13050 case 3: smsubl (cpu); return;
13051 case 4: smulh (cpu); return;
13052 case 10: umaddl (cpu); return;
13053 case 11: umsubl (cpu); return;
13054 case 12: umulh (cpu); return;
13055 default: HALT_UNALLOC;
13056 }
13057 }
13058
13059 static void
13060 dexDPReg (sim_cpu *cpu)
13061 {
13062 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13063 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13064 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13065 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13066
13067 switch (group2)
13068 {
13069 case DPREG_LOG_000:
13070 case DPREG_LOG_001:
13071 dexLogicalShiftedRegister (cpu); return;
13072
13073 case DPREG_ADDSHF_010:
13074 dexAddSubtractShiftedRegister (cpu); return;
13075
13076 case DPREG_ADDEXT_011:
13077 dexAddSubtractExtendedRegister (cpu); return;
13078
13079 case DPREG_ADDCOND_100:
13080 {
13081 /* This set bundles a variety of different operations. */
13082 /* Check for. */
13083 /* 1) add/sub w carry. */
13084 uint32_t mask1 = 0x1FE00000U;
13085 uint32_t val1 = 0x1A000000U;
13086 /* 2) cond compare register/immediate. */
13087 uint32_t mask2 = 0x1FE00000U;
13088 uint32_t val2 = 0x1A400000U;
13089 /* 3) cond select. */
13090 uint32_t mask3 = 0x1FE00000U;
13091 uint32_t val3 = 0x1A800000U;
13092 /* 4) data proc 1/2 source. */
13093 uint32_t mask4 = 0x1FE00000U;
13094 uint32_t val4 = 0x1AC00000U;
13095
13096 if ((aarch64_get_instr (cpu) & mask1) == val1)
13097 dexAddSubtractWithCarry (cpu);
13098
13099 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13100 CondCompare (cpu);
13101
13102 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13103 dexCondSelect (cpu);
13104
13105 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13106 {
13107 /* Bit 30 is clear for data proc 2 source
13108 and set for data proc 1 source. */
13109 if (aarch64_get_instr (cpu) & (1U << 30))
13110 dexDataProc1Source (cpu);
13111 else
13112 dexDataProc2Source (cpu);
13113 }
13114
13115 else
13116 /* Should not reach here. */
13117 HALT_NYI;
13118
13119 return;
13120 }
13121
13122 case DPREG_3SRC_110:
13123 dexDataProc3Source (cpu); return;
13124
13125 case DPREG_UNALLOC_101:
13126 HALT_UNALLOC;
13127
13128 case DPREG_3SRC_111:
13129 dexDataProc3Source (cpu); return;
13130
13131 default:
13132 /* Should never reach here. */
13133 HALT_NYI;
13134 }
13135 }
13136
13137 /* Unconditional Branch immediate.
13138 Offset is a PC-relative byte offset in the range +/- 128MiB.
13139 The offset is assumed to be raw from the decode i.e. the
13140 simulator is expected to scale them from word offsets to byte. */
13141
13142 /* Unconditional branch. */
13143 static void
13144 buc (sim_cpu *cpu, int32_t offset)
13145 {
13146 aarch64_set_next_PC_by_offset (cpu, offset);
13147 }
13148
13149 static unsigned stack_depth = 0;
13150
13151 /* Unconditional branch and link -- writes return PC to LR. */
13152 static void
13153 bl (sim_cpu *cpu, int32_t offset)
13154 {
13155 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13156 aarch64_save_LR (cpu);
13157 aarch64_set_next_PC_by_offset (cpu, offset);
13158
13159 if (TRACE_BRANCH_P (cpu))
13160 {
13161 ++ stack_depth;
13162 TRACE_BRANCH (cpu,
13163 " %*scall %" PRIx64 " [%s]"
13164 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13165 stack_depth, " ", aarch64_get_next_PC (cpu),
13166 aarch64_get_func (aarch64_get_next_PC (cpu)),
13167 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13168 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13169 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13170 );
13171 }
13172 }
13173
13174 /* Unconditional Branch register.
13175 Branch/return address is in source register. */
13176
13177 /* Unconditional branch. */
13178 static void
13179 br (sim_cpu *cpu)
13180 {
13181 unsigned rn = INSTR (9, 5);
13182 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13183 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13184 }
13185
13186 /* Unconditional branch and link -- writes return PC to LR. */
13187 static void
13188 blr (sim_cpu *cpu)
13189 {
13190 unsigned rn = INSTR (9, 5);
13191
13192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13193 /* The pseudo code in the spec says we update LR before fetching.
13194 the value from the rn. */
13195 aarch64_save_LR (cpu);
13196 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13197
13198 if (TRACE_BRANCH_P (cpu))
13199 {
13200 ++ stack_depth;
13201 TRACE_BRANCH (cpu,
13202 " %*scall %" PRIx64 " [%s]"
13203 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13204 stack_depth, " ", aarch64_get_next_PC (cpu),
13205 aarch64_get_func (aarch64_get_next_PC (cpu)),
13206 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13207 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13208 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13209 );
13210 }
13211 }
13212
13213 /* Return -- assembler will default source to LR this is functionally
13214 equivalent to br but, presumably, unlike br it side effects the
13215 branch predictor. */
13216 static void
13217 ret (sim_cpu *cpu)
13218 {
13219 unsigned rn = INSTR (9, 5);
13220 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13221
13222 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13223 if (TRACE_BRANCH_P (cpu))
13224 {
13225 TRACE_BRANCH (cpu,
13226 " %*sreturn [result: %" PRIx64 "]",
13227 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13228 -- stack_depth;
13229 }
13230 }
13231
13232 /* NOP -- we implement this and call it from the decode in case we
13233 want to intercept it later. */
13234
13235 static void
13236 nop (sim_cpu *cpu)
13237 {
13238 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13239 }
13240
13241 /* Data synchronization barrier. */
13242
13243 static void
13244 dsb (sim_cpu *cpu)
13245 {
13246 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13247 }
13248
13249 /* Data memory barrier. */
13250
13251 static void
13252 dmb (sim_cpu *cpu)
13253 {
13254 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13255 }
13256
13257 /* Instruction synchronization barrier. */
13258
13259 static void
13260 isb (sim_cpu *cpu)
13261 {
13262 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13263 }
13264
13265 static void
13266 dexBranchImmediate (sim_cpu *cpu)
13267 {
13268 /* assert instr[30,26] == 00101
13269 instr[31] ==> 0 == B, 1 == BL
13270 instr[25,0] == imm26 branch offset counted in words. */
13271
13272 uint32_t top = INSTR (31, 31);
13273 /* We have a 26 byte signed word offset which we need to pass to the
13274 execute routine as a signed byte offset. */
13275 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13276
13277 if (top)
13278 bl (cpu, offset);
13279 else
13280 buc (cpu, offset);
13281 }
13282
13283 /* Control Flow. */
13284
13285 /* Conditional branch
13286
13287 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13288 a bit position in the range 0 .. 63
13289
13290 cc is a CondCode enum value as pulled out of the decode
13291
13292 N.B. any offset register (source) can only be Xn or Wn. */
13293
13294 static void
13295 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13296 {
13297 /* The test returns TRUE if CC is met. */
13298 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13299 if (testConditionCode (cpu, cc))
13300 aarch64_set_next_PC_by_offset (cpu, offset);
13301 }
13302
13303 /* 32 bit branch on register non-zero. */
13304 static void
13305 cbnz32 (sim_cpu *cpu, int32_t offset)
13306 {
13307 unsigned rt = INSTR (4, 0);
13308
13309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13310 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13311 aarch64_set_next_PC_by_offset (cpu, offset);
13312 }
13313
13314 /* 64 bit branch on register zero. */
13315 static void
13316 cbnz (sim_cpu *cpu, int32_t offset)
13317 {
13318 unsigned rt = INSTR (4, 0);
13319
13320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13321 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13322 aarch64_set_next_PC_by_offset (cpu, offset);
13323 }
13324
13325 /* 32 bit branch on register non-zero. */
13326 static void
13327 cbz32 (sim_cpu *cpu, int32_t offset)
13328 {
13329 unsigned rt = INSTR (4, 0);
13330
13331 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13332 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13333 aarch64_set_next_PC_by_offset (cpu, offset);
13334 }
13335
13336 /* 64 bit branch on register zero. */
13337 static void
13338 cbz (sim_cpu *cpu, int32_t offset)
13339 {
13340 unsigned rt = INSTR (4, 0);
13341
13342 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13343 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13344 aarch64_set_next_PC_by_offset (cpu, offset);
13345 }
13346
13347 /* Branch on register bit test non-zero -- one size fits all. */
13348 static void
13349 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13350 {
13351 unsigned rt = INSTR (4, 0);
13352
13353 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13354 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos))
13355 aarch64_set_next_PC_by_offset (cpu, offset);
13356 }
13357
13358 /* Branch on register bit test zero -- one size fits all. */
13359 static void
13360 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13361 {
13362 unsigned rt = INSTR (4, 0);
13363
13364 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13365 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (1 << pos)))
13366 aarch64_set_next_PC_by_offset (cpu, offset);
13367 }
13368
13369 static void
13370 dexCompareBranchImmediate (sim_cpu *cpu)
13371 {
13372 /* instr[30,25] = 01 1010
13373 instr[31] = size : 0 ==> 32, 1 ==> 64
13374 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13375 instr[23,5] = simm19 branch offset counted in words
13376 instr[4,0] = rt */
13377
13378 uint32_t size = INSTR (31, 31);
13379 uint32_t op = INSTR (24, 24);
13380 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13381
13382 if (size == 0)
13383 {
13384 if (op == 0)
13385 cbz32 (cpu, offset);
13386 else
13387 cbnz32 (cpu, offset);
13388 }
13389 else
13390 {
13391 if (op == 0)
13392 cbz (cpu, offset);
13393 else
13394 cbnz (cpu, offset);
13395 }
13396 }
13397
13398 static void
13399 dexTestBranchImmediate (sim_cpu *cpu)
13400 {
13401 /* instr[31] = b5 : bit 5 of test bit idx
13402 instr[30,25] = 01 1011
13403 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13404 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13405 instr[18,5] = simm14 : signed offset counted in words
13406 instr[4,0] = uimm5 */
13407
13408 uint32_t pos = ((INSTR (31, 31) << 4) | INSTR (23, 19));
13409 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13410
13411 NYI_assert (30, 25, 0x1b);
13412
13413 if (INSTR (24, 24) == 0)
13414 tbz (cpu, pos, offset);
13415 else
13416 tbnz (cpu, pos, offset);
13417 }
13418
13419 static void
13420 dexCondBranchImmediate (sim_cpu *cpu)
13421 {
13422 /* instr[31,25] = 010 1010
13423 instr[24] = op1; op => 00 ==> B.cond
13424 instr[23,5] = simm19 : signed offset counted in words
13425 instr[4] = op0
13426 instr[3,0] = cond */
13427
13428 int32_t offset;
13429 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13430
13431 NYI_assert (31, 25, 0x2a);
13432
13433 if (op != 0)
13434 HALT_UNALLOC;
13435
13436 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13437
13438 bcc (cpu, offset, INSTR (3, 0));
13439 }
13440
13441 static void
13442 dexBranchRegister (sim_cpu *cpu)
13443 {
13444 /* instr[31,25] = 110 1011
13445 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13446 instr[20,16] = op2 : must be 11111
13447 instr[15,10] = op3 : must be 000000
13448 instr[4,0] = op2 : must be 11111. */
13449
13450 uint32_t op = INSTR (24, 21);
13451 uint32_t op2 = INSTR (20, 16);
13452 uint32_t op3 = INSTR (15, 10);
13453 uint32_t op4 = INSTR (4, 0);
13454
13455 NYI_assert (31, 25, 0x6b);
13456
13457 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13458 HALT_UNALLOC;
13459
13460 if (op == 0)
13461 br (cpu);
13462
13463 else if (op == 1)
13464 blr (cpu);
13465
13466 else if (op == 2)
13467 ret (cpu);
13468
13469 else
13470 {
13471 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13472 /* anything else is unallocated. */
13473 uint32_t rn = INSTR (4, 0);
13474
13475 if (rn != 0x1f)
13476 HALT_UNALLOC;
13477
13478 if (op == 4 || op == 5)
13479 HALT_NYI;
13480
13481 HALT_UNALLOC;
13482 }
13483 }
13484
13485 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13486 but this may not be available. So instead we define the values we need
13487 here. */
13488 #define AngelSVC_Reason_Open 0x01
13489 #define AngelSVC_Reason_Close 0x02
13490 #define AngelSVC_Reason_Write 0x05
13491 #define AngelSVC_Reason_Read 0x06
13492 #define AngelSVC_Reason_IsTTY 0x09
13493 #define AngelSVC_Reason_Seek 0x0A
13494 #define AngelSVC_Reason_FLen 0x0C
13495 #define AngelSVC_Reason_Remove 0x0E
13496 #define AngelSVC_Reason_Rename 0x0F
13497 #define AngelSVC_Reason_Clock 0x10
13498 #define AngelSVC_Reason_Time 0x11
13499 #define AngelSVC_Reason_System 0x12
13500 #define AngelSVC_Reason_Errno 0x13
13501 #define AngelSVC_Reason_GetCmdLine 0x15
13502 #define AngelSVC_Reason_HeapInfo 0x16
13503 #define AngelSVC_Reason_ReportException 0x18
13504 #define AngelSVC_Reason_Elapsed 0x30
13505
13506
13507 static void
13508 handle_halt (sim_cpu *cpu, uint32_t val)
13509 {
13510 uint64_t result = 0;
13511
13512 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13513 if (val != 0xf000)
13514 {
13515 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13516 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13517 sim_stopped, SIM_SIGTRAP);
13518 }
13519
13520 /* We have encountered an Angel SVC call. See if we can process it. */
13521 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13522 {
13523 case AngelSVC_Reason_HeapInfo:
13524 {
13525 /* Get the values. */
13526 uint64_t stack_top = aarch64_get_stack_start (cpu);
13527 uint64_t heap_base = aarch64_get_heap_start (cpu);
13528
13529 /* Get the pointer */
13530 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13531 ptr = aarch64_get_mem_u64 (cpu, ptr);
13532
13533 /* Fill in the memory block. */
13534 /* Start addr of heap. */
13535 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13536 /* End addr of heap. */
13537 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13538 /* Lowest stack addr. */
13539 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13540 /* Initial stack addr. */
13541 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13542
13543 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13544 }
13545 break;
13546
13547 case AngelSVC_Reason_Open:
13548 {
13549 /* Get the pointer */
13550 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13551 /* FIXME: For now we just assume that we will only be asked
13552 to open the standard file descriptors. */
13553 static int fd = 0;
13554 result = fd ++;
13555
13556 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13557 }
13558 break;
13559
13560 case AngelSVC_Reason_Close:
13561 {
13562 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13563 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13564 result = 0;
13565 }
13566 break;
13567
13568 case AngelSVC_Reason_Errno:
13569 result = 0;
13570 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13571 break;
13572
13573 case AngelSVC_Reason_Clock:
13574 result =
13575 #ifdef CLOCKS_PER_SEC
13576 (CLOCKS_PER_SEC >= 100)
13577 ? (clock () / (CLOCKS_PER_SEC / 100))
13578 : ((clock () * 100) / CLOCKS_PER_SEC)
13579 #else
13580 /* Presume unix... clock() returns microseconds. */
13581 (clock () / 10000)
13582 #endif
13583 ;
13584 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13585 break;
13586
13587 case AngelSVC_Reason_GetCmdLine:
13588 {
13589 /* Get the pointer */
13590 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13591 ptr = aarch64_get_mem_u64 (cpu, ptr);
13592
13593 /* FIXME: No command line for now. */
13594 aarch64_set_mem_u64 (cpu, ptr, 0);
13595 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13596 }
13597 break;
13598
13599 case AngelSVC_Reason_IsTTY:
13600 result = 1;
13601 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13602 break;
13603
13604 case AngelSVC_Reason_Write:
13605 {
13606 /* Get the pointer */
13607 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13608 /* Get the write control block. */
13609 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13610 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13611 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13612
13613 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13614 PRIx64 " on descriptor %" PRIx64,
13615 len, buf, fd);
13616
13617 if (len > 1280)
13618 {
13619 TRACE_SYSCALL (cpu,
13620 " AngelSVC: Write: Suspiciously long write: %ld",
13621 (long) len);
13622 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13623 sim_stopped, SIM_SIGBUS);
13624 }
13625 else if (fd == 1)
13626 {
13627 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13628 }
13629 else if (fd == 2)
13630 {
13631 TRACE (cpu, 0, "\n");
13632 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13633 (int) len, aarch64_get_mem_ptr (cpu, buf));
13634 TRACE (cpu, 0, "\n");
13635 }
13636 else
13637 {
13638 TRACE_SYSCALL (cpu,
13639 " AngelSVC: Write: Unexpected file handle: %d",
13640 (int) fd);
13641 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13642 sim_stopped, SIM_SIGABRT);
13643 }
13644 }
13645 break;
13646
13647 case AngelSVC_Reason_ReportException:
13648 {
13649 /* Get the pointer */
13650 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13651 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13652 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13653 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13654
13655 TRACE_SYSCALL (cpu,
13656 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13657 type, state);
13658
13659 if (type == 0x20026)
13660 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13661 sim_exited, state);
13662 else
13663 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13664 sim_stopped, SIM_SIGINT);
13665 }
13666 break;
13667
13668 case AngelSVC_Reason_Read:
13669 case AngelSVC_Reason_FLen:
13670 case AngelSVC_Reason_Seek:
13671 case AngelSVC_Reason_Remove:
13672 case AngelSVC_Reason_Time:
13673 case AngelSVC_Reason_System:
13674 case AngelSVC_Reason_Rename:
13675 case AngelSVC_Reason_Elapsed:
13676 default:
13677 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13678 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13679 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13680 sim_stopped, SIM_SIGTRAP);
13681 }
13682
13683 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13684 }
13685
13686 static void
13687 dexExcpnGen (sim_cpu *cpu)
13688 {
13689 /* instr[31:24] = 11010100
13690 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13691 010 ==> HLT, 101 ==> DBG GEN EXCPN
13692 instr[20,5] = imm16
13693 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13694 instr[1,0] = LL : discriminates opc */
13695
13696 uint32_t opc = INSTR (23, 21);
13697 uint32_t imm16 = INSTR (20, 5);
13698 uint32_t opc2 = INSTR (4, 2);
13699 uint32_t LL;
13700
13701 NYI_assert (31, 24, 0xd4);
13702
13703 if (opc2 != 0)
13704 HALT_UNALLOC;
13705
13706 LL = INSTR (1, 0);
13707
13708 /* We only implement HLT and BRK for now. */
13709 if (opc == 1 && LL == 0)
13710 {
13711 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13712 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13713 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13714 }
13715
13716 if (opc == 2 && LL == 0)
13717 handle_halt (cpu, imm16);
13718
13719 else if (opc == 0 || opc == 5)
13720 HALT_NYI;
13721
13722 else
13723 HALT_UNALLOC;
13724 }
13725
13726 /* Stub for accessing system registers. */
13727
13728 static uint64_t
13729 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13730 unsigned crm, unsigned op2)
13731 {
13732 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13733 /* DCZID_EL0 - the Data Cache Zero ID register.
13734 We do not support DC ZVA at the moment, so
13735 we return a value with the disable bit set.
13736 We implement support for the DCZID register since
13737 it is used by the C library's memset function. */
13738 return ((uint64_t) 1) << 4;
13739
13740 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13741 /* Cache Type Register. */
13742 return 0x80008000UL;
13743
13744 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13745 /* TPIDR_EL0 - thread pointer id. */
13746 return aarch64_get_thread_id (cpu);
13747
13748 if (op1 == 3 && crm == 4 && op2 == 0)
13749 return aarch64_get_FPCR (cpu);
13750
13751 if (op1 == 3 && crm == 4 && op2 == 1)
13752 return aarch64_get_FPSR (cpu);
13753
13754 else if (op1 == 3 && crm == 2 && op2 == 0)
13755 return aarch64_get_CPSR (cpu);
13756
13757 HALT_NYI;
13758 }
13759
13760 static void
13761 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13762 unsigned crm, unsigned op2, uint64_t val)
13763 {
13764 if (op1 == 3 && crm == 4 && op2 == 0)
13765 aarch64_set_FPCR (cpu, val);
13766
13767 else if (op1 == 3 && crm == 4 && op2 == 1)
13768 aarch64_set_FPSR (cpu, val);
13769
13770 else if (op1 == 3 && crm == 2 && op2 == 0)
13771 aarch64_set_CPSR (cpu, val);
13772
13773 else
13774 HALT_NYI;
13775 }
13776
13777 static void
13778 do_mrs (sim_cpu *cpu)
13779 {
13780 /* instr[31:20] = 1101 0101 0001 1
13781 instr[19] = op0
13782 instr[18,16] = op1
13783 instr[15,12] = CRn
13784 instr[11,8] = CRm
13785 instr[7,5] = op2
13786 instr[4,0] = Rt */
13787 unsigned sys_op0 = INSTR (19, 19) + 2;
13788 unsigned sys_op1 = INSTR (18, 16);
13789 unsigned sys_crn = INSTR (15, 12);
13790 unsigned sys_crm = INSTR (11, 8);
13791 unsigned sys_op2 = INSTR (7, 5);
13792 unsigned rt = INSTR (4, 0);
13793
13794 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13795 aarch64_set_reg_u64 (cpu, rt, NO_SP,
13796 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
13797 }
13798
13799 static void
13800 do_MSR_immediate (sim_cpu *cpu)
13801 {
13802 /* instr[31:19] = 1101 0101 0000 0
13803 instr[18,16] = op1
13804 instr[15,12] = 0100
13805 instr[11,8] = CRm
13806 instr[7,5] = op2
13807 instr[4,0] = 1 1111 */
13808
13809 unsigned op1 = INSTR (18, 16);
13810 /*unsigned crm = INSTR (11, 8);*/
13811 unsigned op2 = INSTR (7, 5);
13812
13813 NYI_assert (31, 19, 0x1AA0);
13814 NYI_assert (15, 12, 0x4);
13815 NYI_assert (4, 0, 0x1F);
13816
13817 if (op1 == 0)
13818 {
13819 if (op2 == 5)
13820 HALT_NYI; /* set SPSel. */
13821 else
13822 HALT_UNALLOC;
13823 }
13824 else if (op1 == 3)
13825 {
13826 if (op2 == 6)
13827 HALT_NYI; /* set DAIFset. */
13828 else if (op2 == 7)
13829 HALT_NYI; /* set DAIFclr. */
13830 else
13831 HALT_UNALLOC;
13832 }
13833 else
13834 HALT_UNALLOC;
13835 }
13836
13837 static void
13838 do_MSR_reg (sim_cpu *cpu)
13839 {
13840 /* instr[31:20] = 1101 0101 0001
13841 instr[19] = op0
13842 instr[18,16] = op1
13843 instr[15,12] = CRn
13844 instr[11,8] = CRm
13845 instr[7,5] = op2
13846 instr[4,0] = Rt */
13847
13848 unsigned sys_op0 = INSTR (19, 19) + 2;
13849 unsigned sys_op1 = INSTR (18, 16);
13850 unsigned sys_crn = INSTR (15, 12);
13851 unsigned sys_crm = INSTR (11, 8);
13852 unsigned sys_op2 = INSTR (7, 5);
13853 unsigned rt = INSTR (4, 0);
13854
13855 NYI_assert (31, 20, 0xD51);
13856
13857 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13858 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
13859 aarch64_get_reg_u64 (cpu, rt, NO_SP));
13860 }
13861
13862 static void
13863 do_SYS (sim_cpu *cpu)
13864 {
13865 /* instr[31,19] = 1101 0101 0000 1
13866 instr[18,16] = op1
13867 instr[15,12] = CRn
13868 instr[11,8] = CRm
13869 instr[7,5] = op2
13870 instr[4,0] = Rt */
13871 NYI_assert (31, 19, 0x1AA1);
13872
13873 /* FIXME: For now we just silently accept system ops. */
13874 }
13875
13876 static void
13877 dexSystem (sim_cpu *cpu)
13878 {
13879 /* instr[31:22] = 1101 01010 0
13880 instr[21] = L
13881 instr[20,19] = op0
13882 instr[18,16] = op1
13883 instr[15,12] = CRn
13884 instr[11,8] = CRm
13885 instr[7,5] = op2
13886 instr[4,0] = uimm5 */
13887
13888 /* We are interested in HINT, DSB, DMB and ISB
13889
13890 Hint #0 encodes NOOP (this is the only hint we care about)
13891 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
13892 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
13893
13894 DSB, DMB, ISB are data store barrier, data memory barrier and
13895 instruction store barrier, respectively, where
13896
13897 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
13898 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
13899 CRm<3:2> ==> domain, CRm<1:0> ==> types,
13900 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
13901 10 ==> InerShareable, 11 ==> FullSystem
13902 types : 01 ==> Reads, 10 ==> Writes,
13903 11 ==> All, 00 ==> All (domain == FullSystem). */
13904
13905 unsigned rt = INSTR (4, 0);
13906
13907 NYI_assert (31, 22, 0x354);
13908
13909 switch (INSTR (21, 12))
13910 {
13911 case 0x032:
13912 if (rt == 0x1F)
13913 {
13914 /* NOP has CRm != 0000 OR. */
13915 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
13916 uint32_t crm = INSTR (11, 8);
13917 uint32_t op2 = INSTR (7, 5);
13918
13919 if (crm != 0 || (op2 == 0 || op2 > 5))
13920 {
13921 /* Actually call nop method so we can reimplement it later. */
13922 nop (cpu);
13923 return;
13924 }
13925 }
13926 HALT_NYI;
13927
13928 case 0x033:
13929 {
13930 uint32_t op2 = INSTR (7, 5);
13931
13932 switch (op2)
13933 {
13934 case 2: HALT_NYI;
13935 case 4: dsb (cpu); return;
13936 case 5: dmb (cpu); return;
13937 case 6: isb (cpu); return;
13938 default: HALT_UNALLOC;
13939 }
13940 }
13941
13942 case 0x3B0:
13943 case 0x3B4:
13944 case 0x3BD:
13945 do_mrs (cpu);
13946 return;
13947
13948 case 0x0B7:
13949 do_SYS (cpu); /* DC is an alias of SYS. */
13950 return;
13951
13952 default:
13953 if (INSTR (21, 20) == 0x1)
13954 do_MSR_reg (cpu);
13955 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
13956 do_MSR_immediate (cpu);
13957 else
13958 HALT_NYI;
13959 return;
13960 }
13961 }
13962
13963 static void
13964 dexBr (sim_cpu *cpu)
13965 {
13966 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13967 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
13968 bits [31,29] of a BrExSys are the secondary dispatch vector. */
13969 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
13970
13971 switch (group2)
13972 {
13973 case BR_IMM_000:
13974 return dexBranchImmediate (cpu);
13975
13976 case BR_IMMCMP_001:
13977 /* Compare has bit 25 clear while test has it set. */
13978 if (!INSTR (25, 25))
13979 dexCompareBranchImmediate (cpu);
13980 else
13981 dexTestBranchImmediate (cpu);
13982 return;
13983
13984 case BR_IMMCOND_010:
13985 /* This is a conditional branch if bit 25 is clear otherwise
13986 unallocated. */
13987 if (!INSTR (25, 25))
13988 dexCondBranchImmediate (cpu);
13989 else
13990 HALT_UNALLOC;
13991 return;
13992
13993 case BR_UNALLOC_011:
13994 HALT_UNALLOC;
13995
13996 case BR_IMM_100:
13997 dexBranchImmediate (cpu);
13998 return;
13999
14000 case BR_IMMCMP_101:
14001 /* Compare has bit 25 clear while test has it set. */
14002 if (!INSTR (25, 25))
14003 dexCompareBranchImmediate (cpu);
14004 else
14005 dexTestBranchImmediate (cpu);
14006 return;
14007
14008 case BR_REG_110:
14009 /* Unconditional branch reg has bit 25 set. */
14010 if (INSTR (25, 25))
14011 dexBranchRegister (cpu);
14012
14013 /* This includes both Excpn Gen, System and unalloc operations.
14014 We need to decode the Excpn Gen operation BRK so we can plant
14015 debugger entry points.
14016 Excpn Gen operations have instr [24] = 0.
14017 we need to decode at least one of the System operations NOP
14018 which is an alias for HINT #0.
14019 System operations have instr [24,22] = 100. */
14020 else if (INSTR (24, 24) == 0)
14021 dexExcpnGen (cpu);
14022
14023 else if (INSTR (24, 22) == 4)
14024 dexSystem (cpu);
14025
14026 else
14027 HALT_UNALLOC;
14028
14029 return;
14030
14031 case BR_UNALLOC_111:
14032 HALT_UNALLOC;
14033
14034 default:
14035 /* Should never reach here. */
14036 HALT_NYI;
14037 }
14038 }
14039
14040 static void
14041 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14042 {
14043 /* We need to check if gdb wants an in here. */
14044 /* checkBreak (cpu);. */
14045
14046 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14047
14048 switch (group)
14049 {
14050 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14051 case GROUP_LDST_0100: dexLdSt (cpu); break;
14052 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14053 case GROUP_LDST_0110: dexLdSt (cpu); break;
14054 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14055 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14056 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14057 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14058 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14059 case GROUP_LDST_1100: dexLdSt (cpu); break;
14060 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14061 case GROUP_LDST_1110: dexLdSt (cpu); break;
14062 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14063
14064 case GROUP_UNALLOC_0001:
14065 case GROUP_UNALLOC_0010:
14066 case GROUP_UNALLOC_0011:
14067 HALT_UNALLOC;
14068
14069 default:
14070 /* Should never reach here. */
14071 HALT_NYI;
14072 }
14073 }
14074
14075 bfd_boolean
14076 aarch64_step (sim_cpu *cpu)
14077 {
14078 uint64_t pc = aarch64_get_PC (cpu);
14079
14080 if (pc == TOP_LEVEL_RETURN_PC)
14081 return FALSE;
14082
14083 aarch64_set_next_PC (cpu, pc + 4);
14084
14085 /* Code is always little-endian. */
14086 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14087 & aarch64_get_instr (cpu), pc, 4);
14088 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14089
14090 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14091 aarch64_get_instr (cpu));
14092 TRACE_DISASM (cpu, pc);
14093
14094 aarch64_decode_and_execute (cpu, pc);
14095
14096 return TRUE;
14097 }
14098
14099 void
14100 aarch64_run (SIM_DESC sd)
14101 {
14102 sim_cpu *cpu = STATE_CPU (sd, 0);
14103
14104 while (aarch64_step (cpu))
14105 aarch64_update_PC (cpu);
14106
14107 sim_engine_halt (sd, NULL, NULL, aarch64_get_PC (cpu),
14108 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
14109 }
14110
14111 void
14112 aarch64_init (sim_cpu *cpu, uint64_t pc)
14113 {
14114 uint64_t sp = aarch64_get_stack_start (cpu);
14115
14116 /* Install SP, FP and PC and set LR to -20
14117 so we can detect a top-level return. */
14118 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14119 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14120 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14121 aarch64_set_next_PC (cpu, pc);
14122 aarch64_update_PC (cpu);
14123 aarch64_init_LIT_table ();
14124 }