sim: assume sys/stat.h always exists (via gnulib)
[binutils-gdb.git] / sim / aarch64 / simulator.c
1 /* simulator.c -- Interface for the AArch64 simulator.
2
3 Copyright (C) 2015-2023 Free Software Foundation, Inc.
4
5 Contributed by Red Hat.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 /* This must come before any other includes. */
23 #include "defs.h"
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <sys/types.h>
29 #include <math.h>
30 #include <time.h>
31 #include <limits.h>
32
33 #include "aarch64-sim.h"
34 #include "simulator.h"
35 #include "cpustate.h"
36 #include "memory.h"
37
38 #include "sim-signal.h"
39
40 #define NO_SP 0
41 #define SP_OK 1
42
43 #define TST(_flag) (aarch64_test_CPSR_bit (cpu, _flag))
44 #define IS_SET(_X) (TST (( _X )) ? 1 : 0)
45 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
46
47 /* Space saver macro. */
48 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
49
50 #define HALT_UNALLOC \
51 do \
52 { \
53 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
54 TRACE_INSN (cpu, \
55 "Unallocated instruction detected at sim line %d," \
56 " exe addr %" PRIx64, \
57 __LINE__, aarch64_get_PC (cpu)); \
58 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
59 sim_stopped, SIM_SIGILL); \
60 } \
61 while (0)
62
63 #define HALT_NYI \
64 do \
65 { \
66 TRACE_DISASM (cpu, aarch64_get_PC (cpu)); \
67 TRACE_INSN (cpu, \
68 "Unimplemented instruction detected at sim line %d," \
69 " exe addr %" PRIx64, \
70 __LINE__, aarch64_get_PC (cpu)); \
71 if (! TRACE_ANY_P (cpu)) \
72 sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
73 aarch64_get_instr (cpu)); \
74 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
75 sim_stopped, SIM_SIGABRT); \
76 } \
77 while (0)
78
79 #define NYI_assert(HI, LO, EXPECTED) \
80 do \
81 { \
82 if (INSTR ((HI), (LO)) != (EXPECTED)) \
83 HALT_NYI; \
84 } \
85 while (0)
86
87 static uint64_t
88 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
89 {
90 uint64_t mask;
91 uint64_t imm;
92 unsigned simd_size;
93
94 /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
95 (in other words, right rotated by R), then replicated. */
96 if (N != 0)
97 {
98 simd_size = 64;
99 mask = 0xffffffffffffffffull;
100 }
101 else
102 {
103 switch (S)
104 {
105 case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32; break;
106 case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
107 case 0x30 ... 0x37: /* 110xxx */ simd_size = 8; S &= 0x7; break;
108 case 0x38 ... 0x3b: /* 1110xx */ simd_size = 4; S &= 0x3; break;
109 case 0x3c ... 0x3d: /* 11110x */ simd_size = 2; S &= 0x1; break;
110 default: return 0;
111 }
112 mask = (1ull << simd_size) - 1;
113 /* Top bits are IGNORED. */
114 R &= simd_size - 1;
115 }
116
117 /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected. */
118 if (S == simd_size - 1)
119 return 0;
120
121 /* S+1 consecutive bits to 1. */
122 /* NOTE: S can't be 63 due to detection above. */
123 imm = (1ull << (S + 1)) - 1;
124
125 /* Rotate to the left by simd_size - R. */
126 if (R != 0)
127 imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
128
129 /* Replicate the value according to SIMD size. */
130 switch (simd_size)
131 {
132 case 2: imm = (imm << 2) | imm;
133 case 4: imm = (imm << 4) | imm;
134 case 8: imm = (imm << 8) | imm;
135 case 16: imm = (imm << 16) | imm;
136 case 32: imm = (imm << 32) | imm;
137 case 64: break;
138 default: return 0;
139 }
140
141 return imm;
142 }
143
144 /* Instr[22,10] encodes N immr and imms. we want a lookup table
145 for each possible combination i.e. 13 bits worth of int entries. */
146 #define LI_TABLE_SIZE (1 << 13)
147 static uint64_t LITable[LI_TABLE_SIZE];
148
149 void
150 aarch64_init_LIT_table (void)
151 {
152 unsigned index;
153
154 for (index = 0; index < LI_TABLE_SIZE; index++)
155 {
156 uint32_t N = uimm (index, 12, 12);
157 uint32_t immr = uimm (index, 11, 6);
158 uint32_t imms = uimm (index, 5, 0);
159
160 LITable [index] = expand_logical_immediate (imms, immr, N);
161 }
162 }
163
164 static void
165 dexNotify (sim_cpu *cpu)
166 {
167 /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
168 2 ==> exit Java, 3 ==> start next bytecode. */
169 uint32_t type = INSTR (14, 0);
170
171 TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
172
173 switch (type)
174 {
175 case 0:
176 /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
177 aarch64_get_reg_u64 (cpu, R22, 0)); */
178 break;
179 case 1:
180 /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
181 aarch64_get_reg_u64 (cpu, R22, 0)); */
182 break;
183 case 2:
184 /* aarch64_notifyMethodExit (); */
185 break;
186 case 3:
187 /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
188 aarch64_get_reg_u64 (cpu, R22, 0)); */
189 break;
190 }
191 }
192
193 /* secondary decode within top level groups */
194
195 static void
196 dexPseudo (sim_cpu *cpu)
197 {
198 /* assert instr[28,27] = 00
199
200 We provide 2 pseudo instructions:
201
202 HALT stops execution of the simulator causing an immediate
203 return to the x86 code which entered it.
204
205 CALLOUT initiates recursive entry into x86 code. A register
206 argument holds the address of the x86 routine. Immediate
207 values in the instruction identify the number of general
208 purpose and floating point register arguments to be passed
209 and the type of any value to be returned. */
210
211 uint32_t PSEUDO_HALT = 0xE0000000U;
212 uint32_t PSEUDO_CALLOUT = 0x00018000U;
213 uint32_t PSEUDO_CALLOUTR = 0x00018001U;
214 uint32_t PSEUDO_NOTIFY = 0x00014000U;
215 uint32_t dispatch;
216
217 if (aarch64_get_instr (cpu) == PSEUDO_HALT)
218 {
219 TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
220 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
221 sim_stopped, SIM_SIGTRAP);
222 }
223
224 dispatch = INSTR (31, 15);
225
226 /* We do not handle callouts at the moment. */
227 if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
228 {
229 TRACE_EVENTS (cpu, " Callout");
230 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
231 sim_stopped, SIM_SIGABRT);
232 }
233
234 else if (dispatch == PSEUDO_NOTIFY)
235 dexNotify (cpu);
236
237 else
238 HALT_UNALLOC;
239 }
240
241 /* Load-store single register (unscaled offset)
242 These instructions employ a base register plus an unscaled signed
243 9 bit offset.
244
245 N.B. the base register (source) can be Xn or SP. all other
246 registers may not be SP. */
247
248 /* 32 bit load 32 bit unscaled signed 9 bit. */
249 static void
250 ldur32 (sim_cpu *cpu, int32_t offset)
251 {
252 unsigned rn = INSTR (9, 5);
253 unsigned rt = INSTR (4, 0);
254
255 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
256 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
257 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
258 + offset));
259 }
260
261 /* 64 bit load 64 bit unscaled signed 9 bit. */
262 static void
263 ldur64 (sim_cpu *cpu, int32_t offset)
264 {
265 unsigned rn = INSTR (9, 5);
266 unsigned rt = INSTR (4, 0);
267
268 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
269 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
270 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
271 + offset));
272 }
273
274 /* 32 bit load zero-extended byte unscaled signed 9 bit. */
275 static void
276 ldurb32 (sim_cpu *cpu, int32_t offset)
277 {
278 unsigned rn = INSTR (9, 5);
279 unsigned rt = INSTR (4, 0);
280
281 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
282 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
283 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
284 + offset));
285 }
286
287 /* 32 bit load sign-extended byte unscaled signed 9 bit. */
288 static void
289 ldursb32 (sim_cpu *cpu, int32_t offset)
290 {
291 unsigned rn = INSTR (9, 5);
292 unsigned rt = INSTR (4, 0);
293
294 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
295 aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
296 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
297 + offset));
298 }
299
300 /* 64 bit load sign-extended byte unscaled signed 9 bit. */
301 static void
302 ldursb64 (sim_cpu *cpu, int32_t offset)
303 {
304 unsigned rn = INSTR (9, 5);
305 unsigned rt = INSTR (4, 0);
306
307 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
308 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
309 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
310 + offset));
311 }
312
313 /* 32 bit load zero-extended short unscaled signed 9 bit */
314 static void
315 ldurh32 (sim_cpu *cpu, int32_t offset)
316 {
317 unsigned rn = INSTR (9, 5);
318 unsigned rd = INSTR (4, 0);
319
320 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
321 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
322 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
323 + offset));
324 }
325
326 /* 32 bit load sign-extended short unscaled signed 9 bit */
327 static void
328 ldursh32 (sim_cpu *cpu, int32_t offset)
329 {
330 unsigned rn = INSTR (9, 5);
331 unsigned rd = INSTR (4, 0);
332
333 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
334 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
335 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
336 + offset));
337 }
338
339 /* 64 bit load sign-extended short unscaled signed 9 bit */
340 static void
341 ldursh64 (sim_cpu *cpu, int32_t offset)
342 {
343 unsigned rn = INSTR (9, 5);
344 unsigned rt = INSTR (4, 0);
345
346 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
347 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
348 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
349 + offset));
350 }
351
352 /* 64 bit load sign-extended word unscaled signed 9 bit */
353 static void
354 ldursw (sim_cpu *cpu, int32_t offset)
355 {
356 unsigned rn = INSTR (9, 5);
357 unsigned rd = INSTR (4, 0);
358
359 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
360 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
361 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
362 + offset));
363 }
364
365 /* N.B. with stores the value in source is written to the address
366 identified by source2 modified by offset. */
367
368 /* 32 bit store 32 bit unscaled signed 9 bit. */
369 static void
370 stur32 (sim_cpu *cpu, int32_t offset)
371 {
372 unsigned rn = INSTR (9, 5);
373 unsigned rd = INSTR (4, 0);
374
375 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
376 aarch64_set_mem_u32 (cpu,
377 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
378 aarch64_get_reg_u32 (cpu, rd, NO_SP));
379 }
380
381 /* 64 bit store 64 bit unscaled signed 9 bit */
382 static void
383 stur64 (sim_cpu *cpu, int32_t offset)
384 {
385 unsigned rn = INSTR (9, 5);
386 unsigned rd = INSTR (4, 0);
387
388 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
389 aarch64_set_mem_u64 (cpu,
390 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
391 aarch64_get_reg_u64 (cpu, rd, NO_SP));
392 }
393
394 /* 32 bit store byte unscaled signed 9 bit */
395 static void
396 sturb (sim_cpu *cpu, int32_t offset)
397 {
398 unsigned rn = INSTR (9, 5);
399 unsigned rd = INSTR (4, 0);
400
401 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
402 aarch64_set_mem_u8 (cpu,
403 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
404 aarch64_get_reg_u8 (cpu, rd, NO_SP));
405 }
406
407 /* 32 bit store short unscaled signed 9 bit */
408 static void
409 sturh (sim_cpu *cpu, int32_t offset)
410 {
411 unsigned rn = INSTR (9, 5);
412 unsigned rd = INSTR (4, 0);
413
414 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
415 aarch64_set_mem_u16 (cpu,
416 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
417 aarch64_get_reg_u16 (cpu, rd, NO_SP));
418 }
419
420 /* Load single register pc-relative label
421 Offset is a signed 19 bit immediate count in words
422 rt may not be SP. */
423
424 /* 32 bit pc-relative load */
425 static void
426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
427 {
428 unsigned rd = INSTR (4, 0);
429
430 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
431 aarch64_set_reg_u64 (cpu, rd, NO_SP,
432 aarch64_get_mem_u32
433 (cpu, aarch64_get_PC (cpu) + offset * 4));
434 }
435
436 /* 64 bit pc-relative load */
437 static void
438 ldr_pcrel (sim_cpu *cpu, int32_t offset)
439 {
440 unsigned rd = INSTR (4, 0);
441
442 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
443 aarch64_set_reg_u64 (cpu, rd, NO_SP,
444 aarch64_get_mem_u64
445 (cpu, aarch64_get_PC (cpu) + offset * 4));
446 }
447
448 /* sign extended 32 bit pc-relative load */
449 static void
450 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
451 {
452 unsigned rd = INSTR (4, 0);
453
454 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
455 aarch64_set_reg_u64 (cpu, rd, NO_SP,
456 aarch64_get_mem_s32
457 (cpu, aarch64_get_PC (cpu) + offset * 4));
458 }
459
460 /* float pc-relative load */
461 static void
462 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
463 {
464 unsigned int rd = INSTR (4, 0);
465
466 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
467 aarch64_set_vec_u32 (cpu, rd, 0,
468 aarch64_get_mem_u32
469 (cpu, aarch64_get_PC (cpu) + offset * 4));
470 }
471
472 /* double pc-relative load */
473 static void
474 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
475 {
476 unsigned int st = INSTR (4, 0);
477
478 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
479 aarch64_set_vec_u64 (cpu, st, 0,
480 aarch64_get_mem_u64
481 (cpu, aarch64_get_PC (cpu) + offset * 4));
482 }
483
484 /* long double pc-relative load. */
485 static void
486 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
487 {
488 unsigned int st = INSTR (4, 0);
489 uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
490 FRegister a;
491
492 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
493 aarch64_get_mem_long_double (cpu, addr, & a);
494 aarch64_set_FP_long_double (cpu, st, a);
495 }
496
497 /* This can be used to scale an offset by applying
498 the requisite shift. the second argument is either
499 16, 32 or 64. */
500
501 #define SCALE(_offset, _elementSize) \
502 ((_offset) << ScaleShift ## _elementSize)
503
504 /* This can be used to optionally scale a register derived offset
505 by applying the requisite shift as indicated by the Scaling
506 argument. The second argument is either Byte, Short, Word
507 or Long. The third argument is either Scaled or Unscaled.
508 N.B. when _Scaling is Scaled the shift gets ANDed with
509 all 1s while when it is Unscaled it gets ANDed with 0. */
510
511 #define OPT_SCALE(_offset, _elementType, _Scaling) \
512 ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
513
514 /* This can be used to zero or sign extend a 32 bit register derived
515 value to a 64 bit value. the first argument must be the value as
516 a uint32_t and the second must be either UXTW or SXTW. The result
517 is returned as an int64_t. */
518
519 static inline int64_t
520 extend (uint32_t value, Extension extension)
521 {
522 union
523 {
524 uint32_t u;
525 int32_t n;
526 } x;
527
528 /* A branchless variant of this ought to be possible. */
529 if (extension == UXTW || extension == NoExtension)
530 return value;
531
532 x.u = value;
533 return x.n;
534 }
535
536 /* Scalar Floating Point
537
538 FP load/store single register (4 addressing modes)
539
540 N.B. the base register (source) can be the stack pointer.
541 The secondary source register (source2) can only be an Xn register. */
542
543 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
544 static void
545 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
546 {
547 unsigned rn = INSTR (9, 5);
548 unsigned st = INSTR (4, 0);
549 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
550
551 if (wb != Post)
552 address += offset;
553
554 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
555 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
556 if (wb == Post)
557 address += offset;
558
559 if (wb != NoWriteBack)
560 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
561 }
562
563 /* Load 8 bit with unsigned 12 bit offset. */
564 static void
565 fldrb_abs (sim_cpu *cpu, uint32_t offset)
566 {
567 unsigned rd = INSTR (4, 0);
568 unsigned rn = INSTR (9, 5);
569 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
570
571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
572 aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
573 }
574
575 /* Load 16 bit scaled unsigned 12 bit. */
576 static void
577 fldrh_abs (sim_cpu *cpu, uint32_t offset)
578 {
579 unsigned rd = INSTR (4, 0);
580 unsigned rn = INSTR (9, 5);
581 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
582
583 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
584 aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
585 }
586
587 /* Load 32 bit scaled unsigned 12 bit. */
588 static void
589 fldrs_abs (sim_cpu *cpu, uint32_t offset)
590 {
591 unsigned rd = INSTR (4, 0);
592 unsigned rn = INSTR (9, 5);
593 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
594
595 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
596 aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
597 }
598
599 /* Load 64 bit scaled unsigned 12 bit. */
600 static void
601 fldrd_abs (sim_cpu *cpu, uint32_t offset)
602 {
603 unsigned rd = INSTR (4, 0);
604 unsigned rn = INSTR (9, 5);
605 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
606
607 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
608 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
609 }
610
611 /* Load 128 bit scaled unsigned 12 bit. */
612 static void
613 fldrq_abs (sim_cpu *cpu, uint32_t offset)
614 {
615 unsigned rd = INSTR (4, 0);
616 unsigned rn = INSTR (9, 5);
617 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
618
619 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
620 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
621 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
622 }
623
624 /* Load 32 bit scaled or unscaled zero- or sign-extended
625 32-bit register offset. */
626 static void
627 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
628 {
629 unsigned rm = INSTR (20, 16);
630 unsigned rn = INSTR (9, 5);
631 unsigned st = INSTR (4, 0);
632 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
633 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
634 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
635
636 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
637 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
638 (cpu, address + displacement));
639 }
640
641 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
642 static void
643 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
644 {
645 unsigned rn = INSTR (9, 5);
646 unsigned st = INSTR (4, 0);
647 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
648
649 if (wb != Post)
650 address += offset;
651
652 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
653 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
654
655 if (wb == Post)
656 address += offset;
657
658 if (wb != NoWriteBack)
659 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
660 }
661
662 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset. */
663 static void
664 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
665 {
666 unsigned rm = INSTR (20, 16);
667 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
668 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
669
670 fldrd_wb (cpu, displacement, NoWriteBack);
671 }
672
673 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback. */
674 static void
675 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
676 {
677 FRegister a;
678 unsigned rn = INSTR (9, 5);
679 unsigned st = INSTR (4, 0);
680 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
681
682 if (wb != Post)
683 address += offset;
684
685 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
686 aarch64_get_mem_long_double (cpu, address, & a);
687 aarch64_set_FP_long_double (cpu, st, a);
688
689 if (wb == Post)
690 address += offset;
691
692 if (wb != NoWriteBack)
693 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
694 }
695
696 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset */
697 static void
698 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
699 {
700 unsigned rm = INSTR (20, 16);
701 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
702 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
703
704 fldrq_wb (cpu, displacement, NoWriteBack);
705 }
706
707 /* Memory Access
708
709 load-store single register
710 There are four addressing modes available here which all employ a
711 64 bit source (base) register.
712
713 N.B. the base register (source) can be the stack pointer.
714 The secondary source register (source2)can only be an Xn register.
715
716 Scaled, 12-bit, unsigned immediate offset, without pre- and
717 post-index options.
718 Unscaled, 9-bit, signed immediate offset with pre- or post-index
719 writeback.
720 scaled or unscaled 64-bit register offset.
721 scaled or unscaled 32-bit extended register offset.
722
723 All offsets are assumed to be raw from the decode i.e. the
724 simulator is expected to adjust scaled offsets based on the
725 accessed data size with register or extended register offset
726 versions the same applies except that in the latter case the
727 operation may also require a sign extend.
728
729 A separate method is provided for each possible addressing mode. */
730
731 /* 32 bit load 32 bit scaled unsigned 12 bit */
732 static void
733 ldr32_abs (sim_cpu *cpu, uint32_t offset)
734 {
735 unsigned rn = INSTR (9, 5);
736 unsigned rt = INSTR (4, 0);
737
738 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
739 /* The target register may not be SP but the source may be. */
740 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
741 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
742 + SCALE (offset, 32)));
743 }
744
745 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback. */
746 static void
747 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
748 {
749 unsigned rn = INSTR (9, 5);
750 unsigned rt = INSTR (4, 0);
751 uint64_t address;
752
753 if (rn == rt && wb != NoWriteBack)
754 HALT_UNALLOC;
755
756 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
757
758 if (wb != Post)
759 address += offset;
760
761 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
762 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
763
764 if (wb == Post)
765 address += offset;
766
767 if (wb != NoWriteBack)
768 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
769 }
770
771 /* 32 bit load 32 bit scaled or unscaled
772 zero- or sign-extended 32-bit register offset */
773 static void
774 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
775 {
776 unsigned rm = INSTR (20, 16);
777 unsigned rn = INSTR (9, 5);
778 unsigned rt = INSTR (4, 0);
779 /* rn may reference SP, rm and rt must reference ZR */
780
781 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
782 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
783 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
784
785 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
786 aarch64_set_reg_u64 (cpu, rt, NO_SP,
787 aarch64_get_mem_u32 (cpu, address + displacement));
788 }
789
790 /* 64 bit load 64 bit scaled unsigned 12 bit */
791 static void
792 ldr_abs (sim_cpu *cpu, uint32_t offset)
793 {
794 unsigned rn = INSTR (9, 5);
795 unsigned rt = INSTR (4, 0);
796
797 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
798 /* The target register may not be SP but the source may be. */
799 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
800 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
801 + SCALE (offset, 64)));
802 }
803
804 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback. */
805 static void
806 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
807 {
808 unsigned rn = INSTR (9, 5);
809 unsigned rt = INSTR (4, 0);
810 uint64_t address;
811
812 if (rn == rt && wb != NoWriteBack)
813 HALT_UNALLOC;
814
815 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
816
817 if (wb != Post)
818 address += offset;
819
820 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
821 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
822
823 if (wb == Post)
824 address += offset;
825
826 if (wb != NoWriteBack)
827 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
828 }
829
830 /* 64 bit load 64 bit scaled or unscaled zero-
831 or sign-extended 32-bit register offset. */
832 static void
833 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
834 {
835 unsigned rm = INSTR (20, 16);
836 unsigned rn = INSTR (9, 5);
837 unsigned rt = INSTR (4, 0);
838 /* rn may reference SP, rm and rt must reference ZR */
839
840 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
841 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
842 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
843
844 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
845 aarch64_set_reg_u64 (cpu, rt, NO_SP,
846 aarch64_get_mem_u64 (cpu, address + displacement));
847 }
848
849 /* 32 bit load zero-extended byte scaled unsigned 12 bit. */
850 static void
851 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
852 {
853 unsigned rn = INSTR (9, 5);
854 unsigned rt = INSTR (4, 0);
855
856 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
857 /* The target register may not be SP but the source may be
858 there is no scaling required for a byte load. */
859 aarch64_set_reg_u64 (cpu, rt, NO_SP,
860 aarch64_get_mem_u8
861 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
862 }
863
864 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback. */
865 static void
866 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
867 {
868 unsigned rn = INSTR (9, 5);
869 unsigned rt = INSTR (4, 0);
870 uint64_t address;
871
872 if (rn == rt && wb != NoWriteBack)
873 HALT_UNALLOC;
874
875 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
876
877 if (wb != Post)
878 address += offset;
879
880 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
881 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
882
883 if (wb == Post)
884 address += offset;
885
886 if (wb != NoWriteBack)
887 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
888 }
889
890 /* 32 bit load zero-extended byte scaled or unscaled zero-
891 or sign-extended 32-bit register offset. */
892 static void
893 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
894 {
895 unsigned rm = INSTR (20, 16);
896 unsigned rn = INSTR (9, 5);
897 unsigned rt = INSTR (4, 0);
898 /* rn may reference SP, rm and rt must reference ZR */
899
900 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
901 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
902 extension);
903
904 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
905 /* There is no scaling required for a byte load. */
906 aarch64_set_reg_u64 (cpu, rt, NO_SP,
907 aarch64_get_mem_u8 (cpu, address + displacement));
908 }
909
910 /* 64 bit load sign-extended byte unscaled signed 9 bit
911 with pre- or post-writeback. */
912 static void
913 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
914 {
915 unsigned rn = INSTR (9, 5);
916 unsigned rt = INSTR (4, 0);
917 uint64_t address;
918 int64_t val;
919
920 if (rn == rt && wb != NoWriteBack)
921 HALT_UNALLOC;
922
923 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
924
925 if (wb != Post)
926 address += offset;
927
928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
929 val = aarch64_get_mem_s8 (cpu, address);
930 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
931
932 if (wb == Post)
933 address += offset;
934
935 if (wb != NoWriteBack)
936 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
937 }
938
939 /* 64 bit load sign-extended byte scaled unsigned 12 bit. */
940 static void
941 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
942 {
943 ldrsb_wb (cpu, offset, NoWriteBack);
944 }
945
946 /* 64 bit load sign-extended byte scaled or unscaled zero-
947 or sign-extended 32-bit register offset. */
948 static void
949 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
950 {
951 unsigned rm = INSTR (20, 16);
952 unsigned rn = INSTR (9, 5);
953 unsigned rt = INSTR (4, 0);
954 /* rn may reference SP, rm and rt must reference ZR */
955
956 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
957 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
958 extension);
959 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
960 /* There is no scaling required for a byte load. */
961 aarch64_set_reg_s64 (cpu, rt, NO_SP,
962 aarch64_get_mem_s8 (cpu, address + displacement));
963 }
964
965 /* 32 bit load zero-extended short scaled unsigned 12 bit. */
966 static void
967 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
968 {
969 unsigned rn = INSTR (9, 5);
970 unsigned rt = INSTR (4, 0);
971 uint32_t val;
972
973 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
974 /* The target register may not be SP but the source may be. */
975 val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
976 + SCALE (offset, 16));
977 aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
978 }
979
980 /* 32 bit load zero-extended short unscaled signed 9 bit
981 with pre- or post-writeback. */
982 static void
983 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
984 {
985 unsigned rn = INSTR (9, 5);
986 unsigned rt = INSTR (4, 0);
987 uint64_t address;
988
989 if (rn == rt && wb != NoWriteBack)
990 HALT_UNALLOC;
991
992 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
993
994 if (wb != Post)
995 address += offset;
996
997 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
998 aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
999
1000 if (wb == Post)
1001 address += offset;
1002
1003 if (wb != NoWriteBack)
1004 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1005 }
1006
1007 /* 32 bit load zero-extended short scaled or unscaled zero-
1008 or sign-extended 32-bit register offset. */
1009 static void
1010 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1011 {
1012 unsigned rm = INSTR (20, 16);
1013 unsigned rn = INSTR (9, 5);
1014 unsigned rt = INSTR (4, 0);
1015 /* rn may reference SP, rm and rt must reference ZR */
1016
1017 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1018 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1019 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1020
1021 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1022 aarch64_set_reg_u32 (cpu, rt, NO_SP,
1023 aarch64_get_mem_u16 (cpu, address + displacement));
1024 }
1025
1026 /* 32 bit load sign-extended short scaled unsigned 12 bit. */
1027 static void
1028 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1029 {
1030 unsigned rn = INSTR (9, 5);
1031 unsigned rt = INSTR (4, 0);
1032 int32_t val;
1033
1034 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035 /* The target register may not be SP but the source may be. */
1036 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1037 + SCALE (offset, 16));
1038 aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1039 }
1040
1041 /* 32 bit load sign-extended short unscaled signed 9 bit
1042 with pre- or post-writeback. */
1043 static void
1044 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1045 {
1046 unsigned rn = INSTR (9, 5);
1047 unsigned rt = INSTR (4, 0);
1048 uint64_t address;
1049
1050 if (rn == rt && wb != NoWriteBack)
1051 HALT_UNALLOC;
1052
1053 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1054
1055 if (wb != Post)
1056 address += offset;
1057
1058 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1059 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1060 (int32_t) aarch64_get_mem_s16 (cpu, address));
1061
1062 if (wb == Post)
1063 address += offset;
1064
1065 if (wb != NoWriteBack)
1066 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1067 }
1068
1069 /* 32 bit load sign-extended short scaled or unscaled zero-
1070 or sign-extended 32-bit register offset. */
1071 static void
1072 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1073 {
1074 unsigned rm = INSTR (20, 16);
1075 unsigned rn = INSTR (9, 5);
1076 unsigned rt = INSTR (4, 0);
1077 /* rn may reference SP, rm and rt must reference ZR */
1078
1079 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1080 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1081 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1082
1083 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1084 aarch64_set_reg_s32 (cpu, rt, NO_SP,
1085 (int32_t) aarch64_get_mem_s16
1086 (cpu, address + displacement));
1087 }
1088
1089 /* 64 bit load sign-extended short scaled unsigned 12 bit. */
1090 static void
1091 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1092 {
1093 unsigned rn = INSTR (9, 5);
1094 unsigned rt = INSTR (4, 0);
1095 int64_t val;
1096
1097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1098 /* The target register may not be SP but the source may be. */
1099 val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1100 + SCALE (offset, 16));
1101 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1102 }
1103
1104 /* 64 bit load sign-extended short unscaled signed 9 bit
1105 with pre- or post-writeback. */
1106 static void
1107 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1108 {
1109 unsigned rn = INSTR (9, 5);
1110 unsigned rt = INSTR (4, 0);
1111 uint64_t address;
1112 int64_t val;
1113
1114 if (rn == rt && wb != NoWriteBack)
1115 HALT_UNALLOC;
1116
1117 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1118 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1119
1120 if (wb != Post)
1121 address += offset;
1122
1123 val = aarch64_get_mem_s16 (cpu, address);
1124 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1125
1126 if (wb == Post)
1127 address += offset;
1128
1129 if (wb != NoWriteBack)
1130 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1131 }
1132
1133 /* 64 bit load sign-extended short scaled or unscaled zero-
1134 or sign-extended 32-bit register offset. */
1135 static void
1136 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1137 {
1138 unsigned rm = INSTR (20, 16);
1139 unsigned rn = INSTR (9, 5);
1140 unsigned rt = INSTR (4, 0);
1141
1142 /* rn may reference SP, rm and rt must reference ZR */
1143
1144 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1145 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1146 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1147 int64_t val;
1148
1149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1150 val = aarch64_get_mem_s16 (cpu, address + displacement);
1151 aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1152 }
1153
1154 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit. */
1155 static void
1156 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1157 {
1158 unsigned rn = INSTR (9, 5);
1159 unsigned rt = INSTR (4, 0);
1160 int64_t val;
1161
1162 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163 val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1164 + SCALE (offset, 32));
1165 /* The target register may not be SP but the source may be. */
1166 return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1167 }
1168
1169 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1170 with pre- or post-writeback. */
1171 static void
1172 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1173 {
1174 unsigned rn = INSTR (9, 5);
1175 unsigned rt = INSTR (4, 0);
1176 uint64_t address;
1177
1178 if (rn == rt && wb != NoWriteBack)
1179 HALT_UNALLOC;
1180
1181 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1182
1183 if (wb != Post)
1184 address += offset;
1185
1186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1187 aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1188
1189 if (wb == Post)
1190 address += offset;
1191
1192 if (wb != NoWriteBack)
1193 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1194 }
1195
1196 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1197 or sign-extended 32-bit register offset. */
1198 static void
1199 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1200 {
1201 unsigned rm = INSTR (20, 16);
1202 unsigned rn = INSTR (9, 5);
1203 unsigned rt = INSTR (4, 0);
1204 /* rn may reference SP, rm and rt must reference ZR */
1205
1206 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1207 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1208 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1209
1210 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1211 aarch64_set_reg_s64 (cpu, rt, NO_SP,
1212 aarch64_get_mem_s32 (cpu, address + displacement));
1213 }
1214
1215 /* N.B. with stores the value in source is written to the
1216 address identified by source2 modified by source3/offset. */
1217
1218 /* 32 bit store scaled unsigned 12 bit. */
1219 static void
1220 str32_abs (sim_cpu *cpu, uint32_t offset)
1221 {
1222 unsigned rn = INSTR (9, 5);
1223 unsigned rt = INSTR (4, 0);
1224
1225 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1226 /* The target register may not be SP but the source may be. */
1227 aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1228 + SCALE (offset, 32)),
1229 aarch64_get_reg_u32 (cpu, rt, NO_SP));
1230 }
1231
1232 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
1233 static void
1234 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1235 {
1236 unsigned rn = INSTR (9, 5);
1237 unsigned rt = INSTR (4, 0);
1238 uint64_t address;
1239
1240 if (rn == rt && wb != NoWriteBack)
1241 HALT_UNALLOC;
1242
1243 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1244 if (wb != Post)
1245 address += offset;
1246
1247 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1248 aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1249
1250 if (wb == Post)
1251 address += offset;
1252
1253 if (wb != NoWriteBack)
1254 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1255 }
1256
1257 /* 32 bit store scaled or unscaled zero- or
1258 sign-extended 32-bit register offset. */
1259 static void
1260 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1261 {
1262 unsigned rm = INSTR (20, 16);
1263 unsigned rn = INSTR (9, 5);
1264 unsigned rt = INSTR (4, 0);
1265
1266 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1267 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1268 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1269
1270 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1271 aarch64_set_mem_u32 (cpu, address + displacement,
1272 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1273 }
1274
1275 /* 64 bit store scaled unsigned 12 bit. */
1276 static void
1277 str_abs (sim_cpu *cpu, uint32_t offset)
1278 {
1279 unsigned rn = INSTR (9, 5);
1280 unsigned rt = INSTR (4, 0);
1281
1282 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1283 aarch64_set_mem_u64 (cpu,
1284 aarch64_get_reg_u64 (cpu, rn, SP_OK)
1285 + SCALE (offset, 64),
1286 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1287 }
1288
1289 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
1290 static void
1291 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1292 {
1293 unsigned rn = INSTR (9, 5);
1294 unsigned rt = INSTR (4, 0);
1295 uint64_t address;
1296
1297 if (rn == rt && wb != NoWriteBack)
1298 HALT_UNALLOC;
1299
1300 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1301
1302 if (wb != Post)
1303 address += offset;
1304
1305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1306 aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1307
1308 if (wb == Post)
1309 address += offset;
1310
1311 if (wb != NoWriteBack)
1312 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1313 }
1314
1315 /* 64 bit store scaled or unscaled zero-
1316 or sign-extended 32-bit register offset. */
1317 static void
1318 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1319 {
1320 unsigned rm = INSTR (20, 16);
1321 unsigned rn = INSTR (9, 5);
1322 unsigned rt = INSTR (4, 0);
1323 /* rn may reference SP, rm and rt must reference ZR */
1324
1325 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1326 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1327 extension);
1328 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1329
1330 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1331 aarch64_set_mem_u64 (cpu, address + displacement,
1332 aarch64_get_reg_u64 (cpu, rt, NO_SP));
1333 }
1334
1335 /* 32 bit store byte scaled unsigned 12 bit. */
1336 static void
1337 strb_abs (sim_cpu *cpu, uint32_t offset)
1338 {
1339 unsigned rn = INSTR (9, 5);
1340 unsigned rt = INSTR (4, 0);
1341
1342 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1343 /* The target register may not be SP but the source may be.
1344 There is no scaling required for a byte load. */
1345 aarch64_set_mem_u8 (cpu,
1346 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1347 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1348 }
1349
1350 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback. */
1351 static void
1352 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1353 {
1354 unsigned rn = INSTR (9, 5);
1355 unsigned rt = INSTR (4, 0);
1356 uint64_t address;
1357
1358 if (rn == rt && wb != NoWriteBack)
1359 HALT_UNALLOC;
1360
1361 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1362
1363 if (wb != Post)
1364 address += offset;
1365
1366 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1367 aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1368
1369 if (wb == Post)
1370 address += offset;
1371
1372 if (wb != NoWriteBack)
1373 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1374 }
1375
1376 /* 32 bit store byte scaled or unscaled zero-
1377 or sign-extended 32-bit register offset. */
1378 static void
1379 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1380 {
1381 unsigned rm = INSTR (20, 16);
1382 unsigned rn = INSTR (9, 5);
1383 unsigned rt = INSTR (4, 0);
1384 /* rn may reference SP, rm and rt must reference ZR */
1385
1386 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1387 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1388 extension);
1389
1390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1391 /* There is no scaling required for a byte load. */
1392 aarch64_set_mem_u8 (cpu, address + displacement,
1393 aarch64_get_reg_u8 (cpu, rt, NO_SP));
1394 }
1395
1396 /* 32 bit store short scaled unsigned 12 bit. */
1397 static void
1398 strh_abs (sim_cpu *cpu, uint32_t offset)
1399 {
1400 unsigned rn = INSTR (9, 5);
1401 unsigned rt = INSTR (4, 0);
1402
1403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404 /* The target register may not be SP but the source may be. */
1405 aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1406 + SCALE (offset, 16),
1407 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1408 }
1409
1410 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback. */
1411 static void
1412 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1413 {
1414 unsigned rn = INSTR (9, 5);
1415 unsigned rt = INSTR (4, 0);
1416 uint64_t address;
1417
1418 if (rn == rt && wb != NoWriteBack)
1419 HALT_UNALLOC;
1420
1421 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1422
1423 if (wb != Post)
1424 address += offset;
1425
1426 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1427 aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1428
1429 if (wb == Post)
1430 address += offset;
1431
1432 if (wb != NoWriteBack)
1433 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1434 }
1435
1436 /* 32 bit store short scaled or unscaled zero-
1437 or sign-extended 32-bit register offset. */
1438 static void
1439 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1440 {
1441 unsigned rm = INSTR (20, 16);
1442 unsigned rn = INSTR (9, 5);
1443 unsigned rt = INSTR (4, 0);
1444 /* rn may reference SP, rm and rt must reference ZR */
1445
1446 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1447 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1448 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1449
1450 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1451 aarch64_set_mem_u16 (cpu, address + displacement,
1452 aarch64_get_reg_u16 (cpu, rt, NO_SP));
1453 }
1454
1455 /* Prefetch unsigned 12 bit. */
1456 static void
1457 prfm_abs (sim_cpu *cpu, uint32_t offset)
1458 {
1459 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1460 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1461 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1462 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1463 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1464 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1465 ow ==> UNALLOC
1466 PrfOp prfop = prfop (instr, 4, 0);
1467 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1468 + SCALE (offset, 64). */
1469
1470 /* TODO : implement prefetch of address. */
1471 }
1472
1473 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset. */
1474 static void
1475 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1476 {
1477 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1478 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1479 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1480 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1481 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1482 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1483 ow ==> UNALLOC
1484 rn may reference SP, rm may only reference ZR
1485 PrfOp prfop = prfop (instr, 4, 0);
1486 uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1487 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1488 extension);
1489 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1490 uint64_t address = base + displacement. */
1491
1492 /* TODO : implement prefetch of address */
1493 }
1494
1495 /* 64 bit pc-relative prefetch. */
1496 static void
1497 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1498 {
1499 /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1500 00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1501 00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1502 10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1503 10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1504 10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1505 ow ==> UNALLOC
1506 PrfOp prfop = prfop (instr, 4, 0);
1507 uint64_t address = aarch64_get_PC (cpu) + offset. */
1508
1509 /* TODO : implement this */
1510 }
1511
1512 /* Load-store exclusive. */
1513
1514 static void
1515 ldxr (sim_cpu *cpu)
1516 {
1517 unsigned rn = INSTR (9, 5);
1518 unsigned rt = INSTR (4, 0);
1519 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1520 int size = INSTR (31, 30);
1521 /* int ordered = INSTR (15, 15); */
1522 /* int exclusive = ! INSTR (23, 23); */
1523
1524 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1525 switch (size)
1526 {
1527 case 0:
1528 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1529 break;
1530 case 1:
1531 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1532 break;
1533 case 2:
1534 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1535 break;
1536 case 3:
1537 aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1538 break;
1539 }
1540 }
1541
1542 static void
1543 stxr (sim_cpu *cpu)
1544 {
1545 unsigned rn = INSTR (9, 5);
1546 unsigned rt = INSTR (4, 0);
1547 unsigned rs = INSTR (20, 16);
1548 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1549 int size = INSTR (31, 30);
1550 uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1551
1552 switch (size)
1553 {
1554 case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1555 case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1556 case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1557 case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1558 }
1559
1560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1561 aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive... */
1562 }
1563
1564 static void
1565 dexLoadLiteral (sim_cpu *cpu)
1566 {
1567 /* instr[29,27] == 011
1568 instr[25,24] == 00
1569 instr[31,30:26] = opc: 000 ==> LDRW, 001 ==> FLDRS
1570 010 ==> LDRX, 011 ==> FLDRD
1571 100 ==> LDRSW, 101 ==> FLDRQ
1572 110 ==> PRFM, 111 ==> UNALLOC
1573 instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1574 instr[23, 5] == simm19 */
1575
1576 /* unsigned rt = INSTR (4, 0); */
1577 uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1578 int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1579
1580 switch (dispatch)
1581 {
1582 case 0: ldr32_pcrel (cpu, imm); break;
1583 case 1: fldrs_pcrel (cpu, imm); break;
1584 case 2: ldr_pcrel (cpu, imm); break;
1585 case 3: fldrd_pcrel (cpu, imm); break;
1586 case 4: ldrsw_pcrel (cpu, imm); break;
1587 case 5: fldrq_pcrel (cpu, imm); break;
1588 case 6: prfm_pcrel (cpu, imm); break;
1589 case 7:
1590 default:
1591 HALT_UNALLOC;
1592 }
1593 }
1594
1595 /* Immediate arithmetic
1596 The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1597 value left shifted by 12 bits (done at decode).
1598
1599 N.B. the register args (dest, source) can normally be Xn or SP.
1600 the exception occurs for flag setting instructions which may
1601 only use Xn for the output (dest). */
1602
1603 /* 32 bit add immediate. */
1604 static void
1605 add32 (sim_cpu *cpu, uint32_t aimm)
1606 {
1607 unsigned rn = INSTR (9, 5);
1608 unsigned rd = INSTR (4, 0);
1609
1610 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1611 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1612 aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1613 }
1614
1615 /* 64 bit add immediate. */
1616 static void
1617 add64 (sim_cpu *cpu, uint32_t aimm)
1618 {
1619 unsigned rn = INSTR (9, 5);
1620 unsigned rd = INSTR (4, 0);
1621
1622 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1623 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1624 aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1625 }
1626
1627 static void
1628 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1629 {
1630 int32_t result = value1 + value2;
1631 int64_t sresult = (int64_t) value1 + (int64_t) value2;
1632 uint64_t uresult = (uint64_t)(uint32_t) value1
1633 + (uint64_t)(uint32_t) value2;
1634 uint32_t flags = 0;
1635
1636 if (result == 0)
1637 flags |= Z;
1638
1639 if (result & (1 << 31))
1640 flags |= N;
1641
1642 if (uresult != (uint32_t)uresult)
1643 flags |= C;
1644
1645 if (sresult != (int32_t)sresult)
1646 flags |= V;
1647
1648 aarch64_set_CPSR (cpu, flags);
1649 }
1650
1651 #define NEG(a) (((a) & signbit) == signbit)
1652 #define POS(a) (((a) & signbit) == 0)
1653
1654 static void
1655 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1656 {
1657 uint64_t result = value1 + value2;
1658 uint32_t flags = 0;
1659 uint64_t signbit = 1ULL << 63;
1660
1661 if (result == 0)
1662 flags |= Z;
1663
1664 if (NEG (result))
1665 flags |= N;
1666
1667 if ( (NEG (value1) && NEG (value2))
1668 || (NEG (value1) && POS (result))
1669 || (NEG (value2) && POS (result)))
1670 flags |= C;
1671
1672 if ( (NEG (value1) && NEG (value2) && POS (result))
1673 || (POS (value1) && POS (value2) && NEG (result)))
1674 flags |= V;
1675
1676 aarch64_set_CPSR (cpu, flags);
1677 }
1678
1679 static void
1680 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1681 {
1682 uint32_t result = value1 - value2;
1683 uint32_t flags = 0;
1684 uint32_t signbit = 1U << 31;
1685
1686 if (result == 0)
1687 flags |= Z;
1688
1689 if (NEG (result))
1690 flags |= N;
1691
1692 if ( (NEG (value1) && POS (value2))
1693 || (NEG (value1) && POS (result))
1694 || (POS (value2) && POS (result)))
1695 flags |= C;
1696
1697 if ( (NEG (value1) && POS (value2) && POS (result))
1698 || (POS (value1) && NEG (value2) && NEG (result)))
1699 flags |= V;
1700
1701 aarch64_set_CPSR (cpu, flags);
1702 }
1703
1704 static void
1705 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1706 {
1707 uint64_t result = value1 - value2;
1708 uint32_t flags = 0;
1709 uint64_t signbit = 1ULL << 63;
1710
1711 if (result == 0)
1712 flags |= Z;
1713
1714 if (NEG (result))
1715 flags |= N;
1716
1717 if ( (NEG (value1) && POS (value2))
1718 || (NEG (value1) && POS (result))
1719 || (POS (value2) && POS (result)))
1720 flags |= C;
1721
1722 if ( (NEG (value1) && POS (value2) && POS (result))
1723 || (POS (value1) && NEG (value2) && NEG (result)))
1724 flags |= V;
1725
1726 aarch64_set_CPSR (cpu, flags);
1727 }
1728
1729 static void
1730 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1731 {
1732 uint32_t flags = 0;
1733
1734 if (result == 0)
1735 flags |= Z;
1736 else
1737 flags &= ~ Z;
1738
1739 if (result & (1 << 31))
1740 flags |= N;
1741 else
1742 flags &= ~ N;
1743
1744 aarch64_set_CPSR (cpu, flags);
1745 }
1746
1747 static void
1748 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1749 {
1750 uint32_t flags = 0;
1751
1752 if (result == 0)
1753 flags |= Z;
1754 else
1755 flags &= ~ Z;
1756
1757 if (result & (1ULL << 63))
1758 flags |= N;
1759 else
1760 flags &= ~ N;
1761
1762 aarch64_set_CPSR (cpu, flags);
1763 }
1764
1765 /* 32 bit add immediate set flags. */
1766 static void
1767 adds32 (sim_cpu *cpu, uint32_t aimm)
1768 {
1769 unsigned rn = INSTR (9, 5);
1770 unsigned rd = INSTR (4, 0);
1771 /* TODO : do we need to worry about signs here? */
1772 int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1773
1774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1775 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1776 set_flags_for_add32 (cpu, value1, aimm);
1777 }
1778
1779 /* 64 bit add immediate set flags. */
1780 static void
1781 adds64 (sim_cpu *cpu, uint32_t aimm)
1782 {
1783 unsigned rn = INSTR (9, 5);
1784 unsigned rd = INSTR (4, 0);
1785 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1786 uint64_t value2 = aimm;
1787
1788 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1789 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1790 set_flags_for_add64 (cpu, value1, value2);
1791 }
1792
1793 /* 32 bit sub immediate. */
1794 static void
1795 sub32 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797 unsigned rn = INSTR (9, 5);
1798 unsigned rd = INSTR (4, 0);
1799
1800 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1801 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1802 aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1803 }
1804
1805 /* 64 bit sub immediate. */
1806 static void
1807 sub64 (sim_cpu *cpu, uint32_t aimm)
1808 {
1809 unsigned rn = INSTR (9, 5);
1810 unsigned rd = INSTR (4, 0);
1811
1812 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1813 aarch64_set_reg_u64 (cpu, rd, SP_OK,
1814 aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1815 }
1816
1817 /* 32 bit sub immediate set flags. */
1818 static void
1819 subs32 (sim_cpu *cpu, uint32_t aimm)
1820 {
1821 unsigned rn = INSTR (9, 5);
1822 unsigned rd = INSTR (4, 0);
1823 uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1824 uint32_t value2 = aimm;
1825
1826 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1827 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1828 set_flags_for_sub32 (cpu, value1, value2);
1829 }
1830
1831 /* 64 bit sub immediate set flags. */
1832 static void
1833 subs64 (sim_cpu *cpu, uint32_t aimm)
1834 {
1835 unsigned rn = INSTR (9, 5);
1836 unsigned rd = INSTR (4, 0);
1837 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1838 uint32_t value2 = aimm;
1839
1840 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1841 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1842 set_flags_for_sub64 (cpu, value1, value2);
1843 }
1844
1845 /* Data Processing Register. */
1846
1847 /* First two helpers to perform the shift operations. */
1848
1849 static inline uint32_t
1850 shifted32 (uint32_t value, Shift shift, uint32_t count)
1851 {
1852 switch (shift)
1853 {
1854 default:
1855 case LSL:
1856 return (value << count);
1857 case LSR:
1858 return (value >> count);
1859 case ASR:
1860 {
1861 int32_t svalue = value;
1862 return (svalue >> count);
1863 }
1864 case ROR:
1865 {
1866 uint32_t top = value >> count;
1867 uint32_t bottom = value << (32 - count);
1868 return (bottom | top);
1869 }
1870 }
1871 }
1872
1873 static inline uint64_t
1874 shifted64 (uint64_t value, Shift shift, uint32_t count)
1875 {
1876 switch (shift)
1877 {
1878 default:
1879 case LSL:
1880 return (value << count);
1881 case LSR:
1882 return (value >> count);
1883 case ASR:
1884 {
1885 int64_t svalue = value;
1886 return (svalue >> count);
1887 }
1888 case ROR:
1889 {
1890 uint64_t top = value >> count;
1891 uint64_t bottom = value << (64 - count);
1892 return (bottom | top);
1893 }
1894 }
1895 }
1896
1897 /* Arithmetic shifted register.
1898 These allow an optional LSL, ASR or LSR to the second source
1899 register with a count up to the register bit count.
1900
1901 N.B register args may not be SP. */
1902
1903 /* 32 bit ADD shifted register. */
1904 static void
1905 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1906 {
1907 unsigned rm = INSTR (20, 16);
1908 unsigned rn = INSTR (9, 5);
1909 unsigned rd = INSTR (4, 0);
1910
1911 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1912 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1913 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1914 + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1915 shift, count));
1916 }
1917
1918 /* 64 bit ADD shifted register. */
1919 static void
1920 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1921 {
1922 unsigned rm = INSTR (20, 16);
1923 unsigned rn = INSTR (9, 5);
1924 unsigned rd = INSTR (4, 0);
1925
1926 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1927 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1928 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1929 + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1930 shift, count));
1931 }
1932
1933 /* 32 bit ADD shifted register setting flags. */
1934 static void
1935 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1936 {
1937 unsigned rm = INSTR (20, 16);
1938 unsigned rn = INSTR (9, 5);
1939 unsigned rd = INSTR (4, 0);
1940
1941 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1942 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1943 shift, count);
1944
1945 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1946 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1947 set_flags_for_add32 (cpu, value1, value2);
1948 }
1949
1950 /* 64 bit ADD shifted register setting flags. */
1951 static void
1952 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1953 {
1954 unsigned rm = INSTR (20, 16);
1955 unsigned rn = INSTR (9, 5);
1956 unsigned rd = INSTR (4, 0);
1957
1958 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1959 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1960 shift, count);
1961
1962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1963 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1964 set_flags_for_add64 (cpu, value1, value2);
1965 }
1966
1967 /* 32 bit SUB shifted register. */
1968 static void
1969 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1970 {
1971 unsigned rm = INSTR (20, 16);
1972 unsigned rn = INSTR (9, 5);
1973 unsigned rd = INSTR (4, 0);
1974
1975 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1976 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1977 aarch64_get_reg_u32 (cpu, rn, NO_SP)
1978 - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1979 shift, count));
1980 }
1981
1982 /* 64 bit SUB shifted register. */
1983 static void
1984 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986 unsigned rm = INSTR (20, 16);
1987 unsigned rn = INSTR (9, 5);
1988 unsigned rd = INSTR (4, 0);
1989
1990 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1991 aarch64_set_reg_u64 (cpu, rd, NO_SP,
1992 aarch64_get_reg_u64 (cpu, rn, NO_SP)
1993 - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1994 shift, count));
1995 }
1996
1997 /* 32 bit SUB shifted register setting flags. */
1998 static void
1999 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2000 {
2001 unsigned rm = INSTR (20, 16);
2002 unsigned rn = INSTR (9, 5);
2003 unsigned rd = INSTR (4, 0);
2004
2005 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2006 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2007 shift, count);
2008
2009 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2010 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2011 set_flags_for_sub32 (cpu, value1, value2);
2012 }
2013
2014 /* 64 bit SUB shifted register setting flags. */
2015 static void
2016 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2017 {
2018 unsigned rm = INSTR (20, 16);
2019 unsigned rn = INSTR (9, 5);
2020 unsigned rd = INSTR (4, 0);
2021
2022 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2023 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2024 shift, count);
2025
2026 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2027 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2028 set_flags_for_sub64 (cpu, value1, value2);
2029 }
2030
2031 /* First a couple more helpers to fetch the
2032 relevant source register element either
2033 sign or zero extended as required by the
2034 extension value. */
2035
2036 static uint32_t
2037 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2038 {
2039 switch (extension)
2040 {
2041 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2042 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2043 case UXTW: /* Fall through. */
2044 case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2045 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2046 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2047 case SXTW: /* Fall through. */
2048 case SXTX: /* Fall through. */
2049 default: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2050 }
2051 }
2052
2053 static uint64_t
2054 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2055 {
2056 switch (extension)
2057 {
2058 case UXTB: return aarch64_get_reg_u8 (cpu, lo, NO_SP);
2059 case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2060 case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2061 case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2062 case SXTB: return aarch64_get_reg_s8 (cpu, lo, NO_SP);
2063 case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2064 case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065 case SXTX:
2066 default: return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2067 }
2068 }
2069
2070 /* Arithmetic extending register
2071 These allow an optional sign extension of some portion of the
2072 second source register followed by an optional left shift of
2073 between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2074
2075 N.B output (dest) and first input arg (source) may normally be Xn
2076 or SP. However, for flag setting operations dest can only be
2077 Xn. Second input registers are always Xn. */
2078
2079 /* 32 bit ADD extending register. */
2080 static void
2081 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2082 {
2083 unsigned rm = INSTR (20, 16);
2084 unsigned rn = INSTR (9, 5);
2085 unsigned rd = INSTR (4, 0);
2086
2087 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2088 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2089 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2090 + (extreg32 (cpu, rm, extension) << shift));
2091 }
2092
2093 /* 64 bit ADD extending register.
2094 N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2095 static void
2096 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2097 {
2098 unsigned rm = INSTR (20, 16);
2099 unsigned rn = INSTR (9, 5);
2100 unsigned rd = INSTR (4, 0);
2101
2102 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2103 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2104 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2105 + (extreg64 (cpu, rm, extension) << shift));
2106 }
2107
2108 /* 32 bit ADD extending register setting flags. */
2109 static void
2110 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2111 {
2112 unsigned rm = INSTR (20, 16);
2113 unsigned rn = INSTR (9, 5);
2114 unsigned rd = INSTR (4, 0);
2115
2116 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2117 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2118
2119 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2120 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2121 set_flags_for_add32 (cpu, value1, value2);
2122 }
2123
2124 /* 64 bit ADD extending register setting flags */
2125 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2126 static void
2127 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2128 {
2129 unsigned rm = INSTR (20, 16);
2130 unsigned rn = INSTR (9, 5);
2131 unsigned rd = INSTR (4, 0);
2132
2133 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2134 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2135
2136 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2137 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2138 set_flags_for_add64 (cpu, value1, value2);
2139 }
2140
2141 /* 32 bit SUB extending register. */
2142 static void
2143 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2144 {
2145 unsigned rm = INSTR (20, 16);
2146 unsigned rn = INSTR (9, 5);
2147 unsigned rd = INSTR (4, 0);
2148
2149 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2150 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2151 aarch64_get_reg_u32 (cpu, rn, SP_OK)
2152 - (extreg32 (cpu, rm, extension) << shift));
2153 }
2154
2155 /* 64 bit SUB extending register. */
2156 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0. */
2157 static void
2158 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2159 {
2160 unsigned rm = INSTR (20, 16);
2161 unsigned rn = INSTR (9, 5);
2162 unsigned rd = INSTR (4, 0);
2163
2164 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2165 aarch64_set_reg_u64 (cpu, rd, SP_OK,
2166 aarch64_get_reg_u64 (cpu, rn, SP_OK)
2167 - (extreg64 (cpu, rm, extension) << shift));
2168 }
2169
2170 /* 32 bit SUB extending register setting flags. */
2171 static void
2172 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2173 {
2174 unsigned rm = INSTR (20, 16);
2175 unsigned rn = INSTR (9, 5);
2176 unsigned rd = INSTR (4, 0);
2177
2178 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2179 uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2180
2181 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2182 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2183 set_flags_for_sub32 (cpu, value1, value2);
2184 }
2185
2186 /* 64 bit SUB extending register setting flags */
2187 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0 */
2188 static void
2189 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2190 {
2191 unsigned rm = INSTR (20, 16);
2192 unsigned rn = INSTR (9, 5);
2193 unsigned rd = INSTR (4, 0);
2194
2195 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2196 uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2197
2198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2199 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2200 set_flags_for_sub64 (cpu, value1, value2);
2201 }
2202
2203 static void
2204 dexAddSubtractImmediate (sim_cpu *cpu)
2205 {
2206 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2207 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2208 instr[29] = set : 0 ==> no flags, 1 ==> set flags
2209 instr[28,24] = 10001
2210 instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2211 instr[21,10] = uimm12
2212 instr[9,5] = Rn
2213 instr[4,0] = Rd */
2214
2215 /* N.B. the shift is applied at decode before calling the add/sub routine. */
2216 uint32_t shift = INSTR (23, 22);
2217 uint32_t imm = INSTR (21, 10);
2218 uint32_t dispatch = INSTR (31, 29);
2219
2220 NYI_assert (28, 24, 0x11);
2221
2222 if (shift > 1)
2223 HALT_UNALLOC;
2224
2225 if (shift)
2226 imm <<= 12;
2227
2228 switch (dispatch)
2229 {
2230 case 0: add32 (cpu, imm); break;
2231 case 1: adds32 (cpu, imm); break;
2232 case 2: sub32 (cpu, imm); break;
2233 case 3: subs32 (cpu, imm); break;
2234 case 4: add64 (cpu, imm); break;
2235 case 5: adds64 (cpu, imm); break;
2236 case 6: sub64 (cpu, imm); break;
2237 case 7: subs64 (cpu, imm); break;
2238 }
2239 }
2240
2241 static void
2242 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2243 {
2244 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2245 instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2246 instr[28,24] = 01011
2247 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2248 instr[21] = 0
2249 instr[20,16] = Rm
2250 instr[15,10] = count : must be 0xxxxx for 32 bit
2251 instr[9,5] = Rn
2252 instr[4,0] = Rd */
2253
2254 uint32_t size = INSTR (31, 31);
2255 uint32_t count = INSTR (15, 10);
2256 Shift shiftType = INSTR (23, 22);
2257
2258 NYI_assert (28, 24, 0x0B);
2259 NYI_assert (21, 21, 0);
2260
2261 /* Shift encoded as ROR is unallocated. */
2262 if (shiftType == ROR)
2263 HALT_UNALLOC;
2264
2265 /* 32 bit operations must have count[5] = 0
2266 or else we have an UNALLOC. */
2267 if (size == 0 && uimm (count, 5, 5))
2268 HALT_UNALLOC;
2269
2270 /* Dispatch on size:op i.e instr [31,29]. */
2271 switch (INSTR (31, 29))
2272 {
2273 case 0: add32_shift (cpu, shiftType, count); break;
2274 case 1: adds32_shift (cpu, shiftType, count); break;
2275 case 2: sub32_shift (cpu, shiftType, count); break;
2276 case 3: subs32_shift (cpu, shiftType, count); break;
2277 case 4: add64_shift (cpu, shiftType, count); break;
2278 case 5: adds64_shift (cpu, shiftType, count); break;
2279 case 6: sub64_shift (cpu, shiftType, count); break;
2280 case 7: subs64_shift (cpu, shiftType, count); break;
2281 }
2282 }
2283
2284 static void
2285 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2286 {
2287 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2288 instr[30] = op : 0 ==> ADD, 1 ==> SUB
2289 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2290 instr[28,24] = 01011
2291 instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2292 instr[21] = 1
2293 instr[20,16] = Rm
2294 instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2295 000 ==> LSL|UXTW, 001 ==> UXTZ,
2296 000 ==> SXTB, 001 ==> SXTH,
2297 000 ==> SXTW, 001 ==> SXTX,
2298 instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2299 instr[9,5] = Rn
2300 instr[4,0] = Rd */
2301
2302 Extension extensionType = INSTR (15, 13);
2303 uint32_t shift = INSTR (12, 10);
2304
2305 NYI_assert (28, 24, 0x0B);
2306 NYI_assert (21, 21, 1);
2307
2308 /* Shift may not exceed 4. */
2309 if (shift > 4)
2310 HALT_UNALLOC;
2311
2312 /* Dispatch on size:op:set?. */
2313 switch (INSTR (31, 29))
2314 {
2315 case 0: add32_ext (cpu, extensionType, shift); break;
2316 case 1: adds32_ext (cpu, extensionType, shift); break;
2317 case 2: sub32_ext (cpu, extensionType, shift); break;
2318 case 3: subs32_ext (cpu, extensionType, shift); break;
2319 case 4: add64_ext (cpu, extensionType, shift); break;
2320 case 5: adds64_ext (cpu, extensionType, shift); break;
2321 case 6: sub64_ext (cpu, extensionType, shift); break;
2322 case 7: subs64_ext (cpu, extensionType, shift); break;
2323 }
2324 }
2325
2326 /* Conditional data processing
2327 Condition register is implicit 3rd source. */
2328
2329 /* 32 bit add with carry. */
2330 /* N.B register args may not be SP. */
2331
2332 static void
2333 adc32 (sim_cpu *cpu)
2334 {
2335 unsigned rm = INSTR (20, 16);
2336 unsigned rn = INSTR (9, 5);
2337 unsigned rd = INSTR (4, 0);
2338
2339 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2340 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2341 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2342 + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2343 + IS_SET (C));
2344 }
2345
2346 /* 64 bit add with carry */
2347 static void
2348 adc64 (sim_cpu *cpu)
2349 {
2350 unsigned rm = INSTR (20, 16);
2351 unsigned rn = INSTR (9, 5);
2352 unsigned rd = INSTR (4, 0);
2353
2354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2355 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2356 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2357 + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2358 + IS_SET (C));
2359 }
2360
2361 /* 32 bit add with carry setting flags. */
2362 static void
2363 adcs32 (sim_cpu *cpu)
2364 {
2365 unsigned rm = INSTR (20, 16);
2366 unsigned rn = INSTR (9, 5);
2367 unsigned rd = INSTR (4, 0);
2368
2369 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2370 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2371 uint32_t carry = IS_SET (C);
2372
2373 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2374 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2375 set_flags_for_add32 (cpu, value1, value2 + carry);
2376 }
2377
2378 /* 64 bit add with carry setting flags. */
2379 static void
2380 adcs64 (sim_cpu *cpu)
2381 {
2382 unsigned rm = INSTR (20, 16);
2383 unsigned rn = INSTR (9, 5);
2384 unsigned rd = INSTR (4, 0);
2385
2386 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2387 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2388 uint64_t carry = IS_SET (C);
2389
2390 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2391 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2392 set_flags_for_add64 (cpu, value1, value2 + carry);
2393 }
2394
2395 /* 32 bit sub with carry. */
2396 static void
2397 sbc32 (sim_cpu *cpu)
2398 {
2399 unsigned rm = INSTR (20, 16);
2400 unsigned rn = INSTR (9, 5); /* ngc iff rn == 31. */
2401 unsigned rd = INSTR (4, 0);
2402
2403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2404 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2405 aarch64_get_reg_u32 (cpu, rn, NO_SP)
2406 - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2407 - 1 + IS_SET (C));
2408 }
2409
2410 /* 64 bit sub with carry */
2411 static void
2412 sbc64 (sim_cpu *cpu)
2413 {
2414 unsigned rm = INSTR (20, 16);
2415 unsigned rn = INSTR (9, 5);
2416 unsigned rd = INSTR (4, 0);
2417
2418 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2419 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2420 aarch64_get_reg_u64 (cpu, rn, NO_SP)
2421 - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2422 - 1 + IS_SET (C));
2423 }
2424
2425 /* 32 bit sub with carry setting flags */
2426 static void
2427 sbcs32 (sim_cpu *cpu)
2428 {
2429 unsigned rm = INSTR (20, 16);
2430 unsigned rn = INSTR (9, 5);
2431 unsigned rd = INSTR (4, 0);
2432
2433 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2434 uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2435 uint32_t carry = IS_SET (C);
2436 uint32_t result = value1 - value2 + 1 - carry;
2437
2438 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2439 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2440 set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2441 }
2442
2443 /* 64 bit sub with carry setting flags */
2444 static void
2445 sbcs64 (sim_cpu *cpu)
2446 {
2447 unsigned rm = INSTR (20, 16);
2448 unsigned rn = INSTR (9, 5);
2449 unsigned rd = INSTR (4, 0);
2450
2451 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2452 uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2453 uint64_t carry = IS_SET (C);
2454 uint64_t result = value1 - value2 + 1 - carry;
2455
2456 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2457 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2458 set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2459 }
2460
2461 static void
2462 dexAddSubtractWithCarry (sim_cpu *cpu)
2463 {
2464 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2465 instr[30] = op : 0 ==> ADC, 1 ==> SBC
2466 instr[29] = set? : 0 ==> no flags, 1 ==> set flags
2467 instr[28,21] = 1 1010 000
2468 instr[20,16] = Rm
2469 instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2470 instr[9,5] = Rn
2471 instr[4,0] = Rd */
2472
2473 uint32_t op2 = INSTR (15, 10);
2474
2475 NYI_assert (28, 21, 0xD0);
2476
2477 if (op2 != 0)
2478 HALT_UNALLOC;
2479
2480 /* Dispatch on size:op:set?. */
2481 switch (INSTR (31, 29))
2482 {
2483 case 0: adc32 (cpu); break;
2484 case 1: adcs32 (cpu); break;
2485 case 2: sbc32 (cpu); break;
2486 case 3: sbcs32 (cpu); break;
2487 case 4: adc64 (cpu); break;
2488 case 5: adcs64 (cpu); break;
2489 case 6: sbc64 (cpu); break;
2490 case 7: sbcs64 (cpu); break;
2491 }
2492 }
2493
2494 static uint32_t
2495 testConditionCode (sim_cpu *cpu, CondCode cc)
2496 {
2497 /* This should be reduceable to branchless logic
2498 by some careful testing of bits in CC followed
2499 by the requisite masking and combining of bits
2500 from the flag register.
2501
2502 For now we do it with a switch. */
2503 int res;
2504
2505 switch (cc)
2506 {
2507 case EQ: res = IS_SET (Z); break;
2508 case NE: res = IS_CLEAR (Z); break;
2509 case CS: res = IS_SET (C); break;
2510 case CC: res = IS_CLEAR (C); break;
2511 case MI: res = IS_SET (N); break;
2512 case PL: res = IS_CLEAR (N); break;
2513 case VS: res = IS_SET (V); break;
2514 case VC: res = IS_CLEAR (V); break;
2515 case HI: res = IS_SET (C) && IS_CLEAR (Z); break;
2516 case LS: res = IS_CLEAR (C) || IS_SET (Z); break;
2517 case GE: res = IS_SET (N) == IS_SET (V); break;
2518 case LT: res = IS_SET (N) != IS_SET (V); break;
2519 case GT: res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V)); break;
2520 case LE: res = IS_SET (Z) || (IS_SET (N) != IS_SET (V)); break;
2521 case AL:
2522 case NV:
2523 default:
2524 res = 1;
2525 break;
2526 }
2527 return res;
2528 }
2529
2530 static void
2531 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn */
2532 {
2533 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
2534 instr[30] = compare with positive (1) or negative value (0)
2535 instr[29,21] = 1 1101 0010
2536 instr[20,16] = Rm or const
2537 instr[15,12] = cond
2538 instr[11] = compare reg (0) or const (1)
2539 instr[10] = 0
2540 instr[9,5] = Rn
2541 instr[4] = 0
2542 instr[3,0] = value for CPSR bits if the comparison does not take place. */
2543 signed int negate;
2544 unsigned rm;
2545 unsigned rn;
2546
2547 NYI_assert (29, 21, 0x1d2);
2548 NYI_assert (10, 10, 0);
2549 NYI_assert (4, 4, 0);
2550
2551 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2552 if (! testConditionCode (cpu, INSTR (15, 12)))
2553 {
2554 aarch64_set_CPSR (cpu, INSTR (3, 0));
2555 return;
2556 }
2557
2558 negate = INSTR (30, 30) ? 1 : -1;
2559 rm = INSTR (20, 16);
2560 rn = INSTR ( 9, 5);
2561
2562 if (INSTR (31, 31))
2563 {
2564 if (INSTR (11, 11))
2565 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2566 negate * (uint64_t) rm);
2567 else
2568 set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2569 negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2570 }
2571 else
2572 {
2573 if (INSTR (11, 11))
2574 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2575 negate * rm);
2576 else
2577 set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2578 negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2579 }
2580 }
2581
2582 static void
2583 do_vec_MOV_whole_vector (sim_cpu *cpu)
2584 {
2585 /* MOV Vd.T, Vs.T (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2586
2587 instr[31] = 0
2588 instr[30] = half(0)/full(1)
2589 instr[29,21] = 001110101
2590 instr[20,16] = Vs
2591 instr[15,10] = 000111
2592 instr[9,5] = Vs
2593 instr[4,0] = Vd */
2594
2595 unsigned vs = INSTR (9, 5);
2596 unsigned vd = INSTR (4, 0);
2597
2598 NYI_assert (29, 21, 0x075);
2599 NYI_assert (15, 10, 0x07);
2600
2601 if (INSTR (20, 16) != vs)
2602 HALT_NYI;
2603
2604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2605 if (INSTR (30, 30))
2606 aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2607
2608 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2609 }
2610
2611 static void
2612 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2613 {
2614 /* instr[31] = 0
2615 instr[30] = word(0)/long(1)
2616 instr[29,21] = 00 1110 000
2617 instr[20,16] = element size and index
2618 instr[15,10] = 00 0010 11
2619 instr[9,5] = V source
2620 instr[4,0] = R dest */
2621
2622 unsigned vs = INSTR (9, 5);
2623 unsigned rd = INSTR (4, 0);
2624 unsigned imm5 = INSTR (20, 16);
2625 unsigned full = INSTR (30, 30);
2626 int size, index;
2627
2628 NYI_assert (29, 21, 0x070);
2629 NYI_assert (15, 10, 0x0B);
2630
2631 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2632
2633 if (imm5 & 0x1)
2634 {
2635 size = 0;
2636 index = (imm5 >> 1) & 0xF;
2637 }
2638 else if (imm5 & 0x2)
2639 {
2640 size = 1;
2641 index = (imm5 >> 2) & 0x7;
2642 }
2643 else if (full && (imm5 & 0x4))
2644 {
2645 size = 2;
2646 index = (imm5 >> 3) & 0x3;
2647 }
2648 else
2649 HALT_UNALLOC;
2650
2651 switch (size)
2652 {
2653 case 0:
2654 if (full)
2655 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2656 aarch64_get_vec_s8 (cpu, vs, index));
2657 else
2658 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2659 aarch64_get_vec_s8 (cpu, vs, index));
2660 break;
2661
2662 case 1:
2663 if (full)
2664 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2665 aarch64_get_vec_s16 (cpu, vs, index));
2666 else
2667 aarch64_set_reg_s32 (cpu, rd, NO_SP,
2668 aarch64_get_vec_s16 (cpu, vs, index));
2669 break;
2670
2671 case 2:
2672 aarch64_set_reg_s64 (cpu, rd, NO_SP,
2673 aarch64_get_vec_s32 (cpu, vs, index));
2674 break;
2675
2676 default:
2677 HALT_UNALLOC;
2678 }
2679 }
2680
2681 static void
2682 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2683 {
2684 /* instr[31] = 0
2685 instr[30] = word(0)/long(1)
2686 instr[29,21] = 00 1110 000
2687 instr[20,16] = element size and index
2688 instr[15,10] = 00 0011 11
2689 instr[9,5] = V source
2690 instr[4,0] = R dest */
2691
2692 unsigned vs = INSTR (9, 5);
2693 unsigned rd = INSTR (4, 0);
2694 unsigned imm5 = INSTR (20, 16);
2695 unsigned full = INSTR (30, 30);
2696 int size, index;
2697
2698 NYI_assert (29, 21, 0x070);
2699 NYI_assert (15, 10, 0x0F);
2700
2701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2702
2703 if (!full)
2704 {
2705 if (imm5 & 0x1)
2706 {
2707 size = 0;
2708 index = (imm5 >> 1) & 0xF;
2709 }
2710 else if (imm5 & 0x2)
2711 {
2712 size = 1;
2713 index = (imm5 >> 2) & 0x7;
2714 }
2715 else if (imm5 & 0x4)
2716 {
2717 size = 2;
2718 index = (imm5 >> 3) & 0x3;
2719 }
2720 else
2721 HALT_UNALLOC;
2722 }
2723 else if (imm5 & 0x8)
2724 {
2725 size = 3;
2726 index = (imm5 >> 4) & 0x1;
2727 }
2728 else
2729 HALT_UNALLOC;
2730
2731 switch (size)
2732 {
2733 case 0:
2734 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2735 aarch64_get_vec_u8 (cpu, vs, index));
2736 break;
2737
2738 case 1:
2739 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2740 aarch64_get_vec_u16 (cpu, vs, index));
2741 break;
2742
2743 case 2:
2744 aarch64_set_reg_u32 (cpu, rd, NO_SP,
2745 aarch64_get_vec_u32 (cpu, vs, index));
2746 break;
2747
2748 case 3:
2749 aarch64_set_reg_u64 (cpu, rd, NO_SP,
2750 aarch64_get_vec_u64 (cpu, vs, index));
2751 break;
2752
2753 default:
2754 HALT_UNALLOC;
2755 }
2756 }
2757
2758 static void
2759 do_vec_INS (sim_cpu *cpu)
2760 {
2761 /* instr[31,21] = 01001110000
2762 instr[20,16] = element size and index
2763 instr[15,10] = 000111
2764 instr[9,5] = W source
2765 instr[4,0] = V dest */
2766
2767 int index;
2768 unsigned rs = INSTR (9, 5);
2769 unsigned vd = INSTR (4, 0);
2770
2771 NYI_assert (31, 21, 0x270);
2772 NYI_assert (15, 10, 0x07);
2773
2774 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2775 if (INSTR (16, 16))
2776 {
2777 index = INSTR (20, 17);
2778 aarch64_set_vec_u8 (cpu, vd, index,
2779 aarch64_get_reg_u8 (cpu, rs, NO_SP));
2780 }
2781 else if (INSTR (17, 17))
2782 {
2783 index = INSTR (20, 18);
2784 aarch64_set_vec_u16 (cpu, vd, index,
2785 aarch64_get_reg_u16 (cpu, rs, NO_SP));
2786 }
2787 else if (INSTR (18, 18))
2788 {
2789 index = INSTR (20, 19);
2790 aarch64_set_vec_u32 (cpu, vd, index,
2791 aarch64_get_reg_u32 (cpu, rs, NO_SP));
2792 }
2793 else if (INSTR (19, 19))
2794 {
2795 index = INSTR (20, 20);
2796 aarch64_set_vec_u64 (cpu, vd, index,
2797 aarch64_get_reg_u64 (cpu, rs, NO_SP));
2798 }
2799 else
2800 HALT_NYI;
2801 }
2802
2803 static void
2804 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2805 {
2806 /* instr[31] = 0
2807 instr[30] = half(0)/full(1)
2808 instr[29,21] = 00 1110 000
2809 instr[20,16] = element size and index
2810 instr[15,10] = 0000 01
2811 instr[9,5] = V source
2812 instr[4,0] = V dest. */
2813
2814 unsigned full = INSTR (30, 30);
2815 unsigned vs = INSTR (9, 5);
2816 unsigned vd = INSTR (4, 0);
2817 int i, index;
2818
2819 NYI_assert (29, 21, 0x070);
2820 NYI_assert (15, 10, 0x01);
2821
2822 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2823 if (INSTR (16, 16))
2824 {
2825 index = INSTR (20, 17);
2826
2827 for (i = 0; i < (full ? 16 : 8); i++)
2828 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2829 }
2830 else if (INSTR (17, 17))
2831 {
2832 index = INSTR (20, 18);
2833
2834 for (i = 0; i < (full ? 8 : 4); i++)
2835 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2836 }
2837 else if (INSTR (18, 18))
2838 {
2839 index = INSTR (20, 19);
2840
2841 for (i = 0; i < (full ? 4 : 2); i++)
2842 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2843 }
2844 else
2845 {
2846 if (INSTR (19, 19) == 0)
2847 HALT_UNALLOC;
2848
2849 if (! full)
2850 HALT_UNALLOC;
2851
2852 index = INSTR (20, 20);
2853
2854 for (i = 0; i < 2; i++)
2855 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2856 }
2857 }
2858
2859 static void
2860 do_vec_TBL (sim_cpu *cpu)
2861 {
2862 /* instr[31] = 0
2863 instr[30] = half(0)/full(1)
2864 instr[29,21] = 00 1110 000
2865 instr[20,16] = Vm
2866 instr[15] = 0
2867 instr[14,13] = vec length
2868 instr[12,10] = 000
2869 instr[9,5] = V start
2870 instr[4,0] = V dest */
2871
2872 int full = INSTR (30, 30);
2873 int len = INSTR (14, 13) + 1;
2874 unsigned vm = INSTR (20, 16);
2875 unsigned vn = INSTR (9, 5);
2876 unsigned vd = INSTR (4, 0);
2877 unsigned i;
2878
2879 NYI_assert (29, 21, 0x070);
2880 NYI_assert (12, 10, 0);
2881
2882 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2883 for (i = 0; i < (full ? 16 : 8); i++)
2884 {
2885 unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2886 uint8_t val;
2887
2888 if (selector < 16)
2889 val = aarch64_get_vec_u8 (cpu, vn, selector);
2890 else if (selector < 32)
2891 val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2892 else if (selector < 48)
2893 val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2894 else if (selector < 64)
2895 val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2896 else
2897 val = 0;
2898
2899 aarch64_set_vec_u8 (cpu, vd, i, val);
2900 }
2901 }
2902
2903 static void
2904 do_vec_TRN (sim_cpu *cpu)
2905 {
2906 /* instr[31] = 0
2907 instr[30] = half(0)/full(1)
2908 instr[29,24] = 00 1110
2909 instr[23,22] = size
2910 instr[21] = 0
2911 instr[20,16] = Vm
2912 instr[15] = 0
2913 instr[14] = TRN1 (0) / TRN2 (1)
2914 instr[13,10] = 1010
2915 instr[9,5] = V source
2916 instr[4,0] = V dest. */
2917
2918 int full = INSTR (30, 30);
2919 int second = INSTR (14, 14);
2920 unsigned vm = INSTR (20, 16);
2921 unsigned vn = INSTR (9, 5);
2922 unsigned vd = INSTR (4, 0);
2923 unsigned i;
2924
2925 NYI_assert (29, 24, 0x0E);
2926 NYI_assert (13, 10, 0xA);
2927
2928 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2929 switch (INSTR (23, 22))
2930 {
2931 case 0:
2932 for (i = 0; i < (full ? 8 : 4); i++)
2933 {
2934 aarch64_set_vec_u8
2935 (cpu, vd, i * 2,
2936 aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2937 aarch64_set_vec_u8
2938 (cpu, vd, 1 * 2 + 1,
2939 aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2940 }
2941 break;
2942
2943 case 1:
2944 for (i = 0; i < (full ? 4 : 2); i++)
2945 {
2946 aarch64_set_vec_u16
2947 (cpu, vd, i * 2,
2948 aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2949 aarch64_set_vec_u16
2950 (cpu, vd, 1 * 2 + 1,
2951 aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2952 }
2953 break;
2954
2955 case 2:
2956 aarch64_set_vec_u32
2957 (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2958 aarch64_set_vec_u32
2959 (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2960 aarch64_set_vec_u32
2961 (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2962 aarch64_set_vec_u32
2963 (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2964 break;
2965
2966 case 3:
2967 if (! full)
2968 HALT_UNALLOC;
2969
2970 aarch64_set_vec_u64 (cpu, vd, 0,
2971 aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2972 aarch64_set_vec_u64 (cpu, vd, 1,
2973 aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2974 break;
2975 }
2976 }
2977
2978 static void
2979 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2980 {
2981 /* instr[31] = 0
2982 instr[30] = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2983 [must be 1 for 64-bit xfer]
2984 instr[29,20] = 00 1110 0000
2985 instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2986 0100=> 32-bits. 1000=>64-bits
2987 instr[15,10] = 0000 11
2988 instr[9,5] = W source
2989 instr[4,0] = V dest. */
2990
2991 unsigned i;
2992 unsigned Vd = INSTR (4, 0);
2993 unsigned Rs = INSTR (9, 5);
2994 int both = INSTR (30, 30);
2995
2996 NYI_assert (29, 20, 0x0E0);
2997 NYI_assert (15, 10, 0x03);
2998
2999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3000 switch (INSTR (19, 16))
3001 {
3002 case 1:
3003 for (i = 0; i < (both ? 16 : 8); i++)
3004 aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3005 break;
3006
3007 case 2:
3008 for (i = 0; i < (both ? 8 : 4); i++)
3009 aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3010 break;
3011
3012 case 4:
3013 for (i = 0; i < (both ? 4 : 2); i++)
3014 aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3015 break;
3016
3017 case 8:
3018 if (!both)
3019 HALT_NYI;
3020 aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3021 aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3022 break;
3023
3024 default:
3025 HALT_NYI;
3026 }
3027 }
3028
3029 static void
3030 do_vec_UZP (sim_cpu *cpu)
3031 {
3032 /* instr[31] = 0
3033 instr[30] = half(0)/full(1)
3034 instr[29,24] = 00 1110
3035 instr[23,22] = size: byte(00), half(01), word (10), long (11)
3036 instr[21] = 0
3037 instr[20,16] = Vm
3038 instr[15] = 0
3039 instr[14] = lower (0) / upper (1)
3040 instr[13,10] = 0110
3041 instr[9,5] = Vn
3042 instr[4,0] = Vd. */
3043
3044 int full = INSTR (30, 30);
3045 int upper = INSTR (14, 14);
3046
3047 unsigned vm = INSTR (20, 16);
3048 unsigned vn = INSTR (9, 5);
3049 unsigned vd = INSTR (4, 0);
3050
3051 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3052 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3053 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3054 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3055
3056 uint64_t val1;
3057 uint64_t val2;
3058
3059 uint64_t input2 = full ? val_n2 : val_m1;
3060
3061 NYI_assert (29, 24, 0x0E);
3062 NYI_assert (21, 21, 0);
3063 NYI_assert (15, 15, 0);
3064 NYI_assert (13, 10, 6);
3065
3066 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3067 switch (INSTR (23, 22))
3068 {
3069 case 0:
3070 val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3071 val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3072 val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3073 val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3074
3075 val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3076 val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3077 val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3078 val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3079
3080 if (full)
3081 {
3082 val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3083 val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3084 val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3085 val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3086
3087 val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3088 val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3089 val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3090 val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3091 }
3092 break;
3093
3094 case 1:
3095 val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3096 val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3097
3098 val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3099 val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3100
3101 if (full)
3102 {
3103 val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3104 val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3105
3106 val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3107 val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3108 }
3109 break;
3110
3111 case 2:
3112 val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3113 val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3114
3115 if (full)
3116 {
3117 val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3118 val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3119 }
3120 break;
3121
3122 case 3:
3123 if (! full)
3124 HALT_UNALLOC;
3125
3126 val1 = upper ? val_n2 : val_n1;
3127 val2 = upper ? val_m2 : val_m1;
3128 break;
3129 }
3130
3131 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3132 if (full)
3133 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3134 }
3135
3136 static void
3137 do_vec_ZIP (sim_cpu *cpu)
3138 {
3139 /* instr[31] = 0
3140 instr[30] = half(0)/full(1)
3141 instr[29,24] = 00 1110
3142 instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3143 instr[21] = 0
3144 instr[20,16] = Vm
3145 instr[15] = 0
3146 instr[14] = lower (0) / upper (1)
3147 instr[13,10] = 1110
3148 instr[9,5] = Vn
3149 instr[4,0] = Vd. */
3150
3151 int full = INSTR (30, 30);
3152 int upper = INSTR (14, 14);
3153
3154 unsigned vm = INSTR (20, 16);
3155 unsigned vn = INSTR (9, 5);
3156 unsigned vd = INSTR (4, 0);
3157
3158 uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3159 uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3160 uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3161 uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3162
3163 uint64_t val1 = 0;
3164 uint64_t val2 = 0;
3165
3166 uint64_t input1 = upper ? val_n1 : val_m1;
3167 uint64_t input2 = upper ? val_n2 : val_m2;
3168
3169 NYI_assert (29, 24, 0x0E);
3170 NYI_assert (21, 21, 0);
3171 NYI_assert (15, 15, 0);
3172 NYI_assert (13, 10, 0xE);
3173
3174 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3175 switch (INSTR (23, 23))
3176 {
3177 case 0:
3178 val1 =
3179 ((input1 << 0) & (0xFF << 0))
3180 | ((input2 << 8) & (0xFF << 8))
3181 | ((input1 << 8) & (0xFF << 16))
3182 | ((input2 << 16) & (0xFF << 24))
3183 | ((input1 << 16) & (0xFFULL << 32))
3184 | ((input2 << 24) & (0xFFULL << 40))
3185 | ((input1 << 24) & (0xFFULL << 48))
3186 | ((input2 << 32) & (0xFFULL << 56));
3187
3188 val2 =
3189 ((input1 >> 32) & (0xFF << 0))
3190 | ((input2 >> 24) & (0xFF << 8))
3191 | ((input1 >> 24) & (0xFF << 16))
3192 | ((input2 >> 16) & (0xFF << 24))
3193 | ((input1 >> 16) & (0xFFULL << 32))
3194 | ((input2 >> 8) & (0xFFULL << 40))
3195 | ((input1 >> 8) & (0xFFULL << 48))
3196 | ((input2 >> 0) & (0xFFULL << 56));
3197 break;
3198
3199 case 1:
3200 val1 =
3201 ((input1 << 0) & (0xFFFF << 0))
3202 | ((input2 << 16) & (0xFFFF << 16))
3203 | ((input1 << 16) & (0xFFFFULL << 32))
3204 | ((input2 << 32) & (0xFFFFULL << 48));
3205
3206 val2 =
3207 ((input1 >> 32) & (0xFFFF << 0))
3208 | ((input2 >> 16) & (0xFFFF << 16))
3209 | ((input1 >> 16) & (0xFFFFULL << 32))
3210 | ((input2 >> 0) & (0xFFFFULL << 48));
3211 break;
3212
3213 case 2:
3214 val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3215 val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3216 break;
3217
3218 case 3:
3219 val1 = input1;
3220 val2 = input2;
3221 break;
3222 }
3223
3224 aarch64_set_vec_u64 (cpu, vd, 0, val1);
3225 if (full)
3226 aarch64_set_vec_u64 (cpu, vd, 1, val2);
3227 }
3228
3229 /* Floating point immediates are encoded in 8 bits.
3230 fpimm[7] = sign bit.
3231 fpimm[6:4] = signed exponent.
3232 fpimm[3:0] = fraction (assuming leading 1).
3233 i.e. F = s * 1.f * 2^(e - b). */
3234
3235 static float
3236 fp_immediate_for_encoding_32 (uint32_t imm8)
3237 {
3238 float u;
3239 uint32_t s, e, f, i;
3240
3241 s = (imm8 >> 7) & 0x1;
3242 e = (imm8 >> 4) & 0x7;
3243 f = imm8 & 0xf;
3244
3245 /* The fp value is s * n/16 * 2r where n is 16+e. */
3246 u = (16.0 + f) / 16.0;
3247
3248 /* N.B. exponent is signed. */
3249 if (e < 4)
3250 {
3251 int epos = e;
3252
3253 for (i = 0; i <= epos; i++)
3254 u *= 2.0;
3255 }
3256 else
3257 {
3258 int eneg = 7 - e;
3259
3260 for (i = 0; i < eneg; i++)
3261 u /= 2.0;
3262 }
3263
3264 if (s)
3265 u = - u;
3266
3267 return u;
3268 }
3269
3270 static double
3271 fp_immediate_for_encoding_64 (uint32_t imm8)
3272 {
3273 double u;
3274 uint32_t s, e, f, i;
3275
3276 s = (imm8 >> 7) & 0x1;
3277 e = (imm8 >> 4) & 0x7;
3278 f = imm8 & 0xf;
3279
3280 /* The fp value is s * n/16 * 2r where n is 16+e. */
3281 u = (16.0 + f) / 16.0;
3282
3283 /* N.B. exponent is signed. */
3284 if (e < 4)
3285 {
3286 int epos = e;
3287
3288 for (i = 0; i <= epos; i++)
3289 u *= 2.0;
3290 }
3291 else
3292 {
3293 int eneg = 7 - e;
3294
3295 for (i = 0; i < eneg; i++)
3296 u /= 2.0;
3297 }
3298
3299 if (s)
3300 u = - u;
3301
3302 return u;
3303 }
3304
3305 static void
3306 do_vec_MOV_immediate (sim_cpu *cpu)
3307 {
3308 /* instr[31] = 0
3309 instr[30] = full/half selector
3310 instr[29,19] = 00111100000
3311 instr[18,16] = high 3 bits of uimm8
3312 instr[15,12] = size & shift:
3313 0000 => 32-bit
3314 0010 => 32-bit + LSL#8
3315 0100 => 32-bit + LSL#16
3316 0110 => 32-bit + LSL#24
3317 1010 => 16-bit + LSL#8
3318 1000 => 16-bit
3319 1101 => 32-bit + MSL#16
3320 1100 => 32-bit + MSL#8
3321 1110 => 8-bit
3322 1111 => double
3323 instr[11,10] = 01
3324 instr[9,5] = low 5-bits of uimm8
3325 instr[4,0] = Vd. */
3326
3327 int full = INSTR (30, 30);
3328 unsigned vd = INSTR (4, 0);
3329 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3330 unsigned i;
3331
3332 NYI_assert (29, 19, 0x1E0);
3333 NYI_assert (11, 10, 1);
3334
3335 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3336 switch (INSTR (15, 12))
3337 {
3338 case 0x0: /* 32-bit, no shift. */
3339 case 0x2: /* 32-bit, shift by 8. */
3340 case 0x4: /* 32-bit, shift by 16. */
3341 case 0x6: /* 32-bit, shift by 24. */
3342 val <<= (8 * INSTR (14, 13));
3343 for (i = 0; i < (full ? 4 : 2); i++)
3344 aarch64_set_vec_u32 (cpu, vd, i, val);
3345 break;
3346
3347 case 0xa: /* 16-bit, shift by 8. */
3348 val <<= 8;
3349 /* Fall through. */
3350 case 0x8: /* 16-bit, no shift. */
3351 for (i = 0; i < (full ? 8 : 4); i++)
3352 aarch64_set_vec_u16 (cpu, vd, i, val);
3353 break;
3354
3355 case 0xd: /* 32-bit, mask shift by 16. */
3356 val <<= 8;
3357 val |= 0xFF;
3358 /* Fall through. */
3359 case 0xc: /* 32-bit, mask shift by 8. */
3360 val <<= 8;
3361 val |= 0xFF;
3362 for (i = 0; i < (full ? 4 : 2); i++)
3363 aarch64_set_vec_u32 (cpu, vd, i, val);
3364 break;
3365
3366 case 0xe: /* 8-bit, no shift. */
3367 for (i = 0; i < (full ? 16 : 8); i++)
3368 aarch64_set_vec_u8 (cpu, vd, i, val);
3369 break;
3370
3371 case 0xf: /* FMOV Vs.{2|4}S, #fpimm. */
3372 {
3373 float u = fp_immediate_for_encoding_32 (val);
3374 for (i = 0; i < (full ? 4 : 2); i++)
3375 aarch64_set_vec_float (cpu, vd, i, u);
3376 break;
3377 }
3378
3379 default:
3380 HALT_NYI;
3381 }
3382 }
3383
3384 static void
3385 do_vec_MVNI (sim_cpu *cpu)
3386 {
3387 /* instr[31] = 0
3388 instr[30] = full/half selector
3389 instr[29,19] = 10111100000
3390 instr[18,16] = high 3 bits of uimm8
3391 instr[15,12] = selector
3392 instr[11,10] = 01
3393 instr[9,5] = low 5-bits of uimm8
3394 instr[4,0] = Vd. */
3395
3396 int full = INSTR (30, 30);
3397 unsigned vd = INSTR (4, 0);
3398 unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3399 unsigned i;
3400
3401 NYI_assert (29, 19, 0x5E0);
3402 NYI_assert (11, 10, 1);
3403
3404 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3405 switch (INSTR (15, 12))
3406 {
3407 case 0x0: /* 32-bit, no shift. */
3408 case 0x2: /* 32-bit, shift by 8. */
3409 case 0x4: /* 32-bit, shift by 16. */
3410 case 0x6: /* 32-bit, shift by 24. */
3411 val <<= (8 * INSTR (14, 13));
3412 val = ~ val;
3413 for (i = 0; i < (full ? 4 : 2); i++)
3414 aarch64_set_vec_u32 (cpu, vd, i, val);
3415 return;
3416
3417 case 0xa: /* 16-bit, 8 bit shift. */
3418 val <<= 8;
3419 case 0x8: /* 16-bit, no shift. */
3420 val = ~ val;
3421 for (i = 0; i < (full ? 8 : 4); i++)
3422 aarch64_set_vec_u16 (cpu, vd, i, val);
3423 return;
3424
3425 case 0xd: /* 32-bit, mask shift by 16. */
3426 val <<= 8;
3427 val |= 0xFF;
3428 case 0xc: /* 32-bit, mask shift by 8. */
3429 val <<= 8;
3430 val |= 0xFF;
3431 val = ~ val;
3432 for (i = 0; i < (full ? 4 : 2); i++)
3433 aarch64_set_vec_u32 (cpu, vd, i, val);
3434 return;
3435
3436 case 0xE: /* MOVI Dn, #mask64 */
3437 {
3438 uint64_t mask = 0;
3439
3440 for (i = 0; i < 8; i++)
3441 if (val & (1 << i))
3442 mask |= (0xFFUL << (i * 8));
3443 aarch64_set_vec_u64 (cpu, vd, 0, mask);
3444 aarch64_set_vec_u64 (cpu, vd, 1, mask);
3445 return;
3446 }
3447
3448 case 0xf: /* FMOV Vd.2D, #fpimm. */
3449 {
3450 double u = fp_immediate_for_encoding_64 (val);
3451
3452 if (! full)
3453 HALT_UNALLOC;
3454
3455 aarch64_set_vec_double (cpu, vd, 0, u);
3456 aarch64_set_vec_double (cpu, vd, 1, u);
3457 return;
3458 }
3459
3460 default:
3461 HALT_NYI;
3462 }
3463 }
3464
3465 #define ABS(A) ((A) < 0 ? - (A) : (A))
3466
3467 static void
3468 do_vec_ABS (sim_cpu *cpu)
3469 {
3470 /* instr[31] = 0
3471 instr[30] = half(0)/full(1)
3472 instr[29,24] = 00 1110
3473 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3474 instr[21,10] = 10 0000 1011 10
3475 instr[9,5] = Vn
3476 instr[4.0] = Vd. */
3477
3478 unsigned vn = INSTR (9, 5);
3479 unsigned vd = INSTR (4, 0);
3480 unsigned full = INSTR (30, 30);
3481 unsigned i;
3482
3483 NYI_assert (29, 24, 0x0E);
3484 NYI_assert (21, 10, 0x82E);
3485
3486 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3487 switch (INSTR (23, 22))
3488 {
3489 case 0:
3490 for (i = 0; i < (full ? 16 : 8); i++)
3491 aarch64_set_vec_s8 (cpu, vd, i,
3492 ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3493 break;
3494
3495 case 1:
3496 for (i = 0; i < (full ? 8 : 4); i++)
3497 aarch64_set_vec_s16 (cpu, vd, i,
3498 ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3499 break;
3500
3501 case 2:
3502 for (i = 0; i < (full ? 4 : 2); i++)
3503 aarch64_set_vec_s32 (cpu, vd, i,
3504 ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3505 break;
3506
3507 case 3:
3508 if (! full)
3509 HALT_NYI;
3510 for (i = 0; i < 2; i++)
3511 aarch64_set_vec_s64 (cpu, vd, i,
3512 ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3513 break;
3514 }
3515 }
3516
3517 static void
3518 do_vec_ADDV (sim_cpu *cpu)
3519 {
3520 /* instr[31] = 0
3521 instr[30] = full/half selector
3522 instr[29,24] = 00 1110
3523 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3524 instr[21,10] = 11 0001 1011 10
3525 instr[9,5] = Vm
3526 instr[4.0] = Rd. */
3527
3528 unsigned vm = INSTR (9, 5);
3529 unsigned rd = INSTR (4, 0);
3530 unsigned i;
3531 int full = INSTR (30, 30);
3532
3533 NYI_assert (29, 24, 0x0E);
3534 NYI_assert (21, 10, 0xC6E);
3535
3536 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3537 switch (INSTR (23, 22))
3538 {
3539 case 0:
3540 {
3541 uint8_t val = 0;
3542 for (i = 0; i < (full ? 16 : 8); i++)
3543 val += aarch64_get_vec_u8 (cpu, vm, i);
3544 aarch64_set_vec_u64 (cpu, rd, 0, val);
3545 return;
3546 }
3547
3548 case 1:
3549 {
3550 uint16_t val = 0;
3551 for (i = 0; i < (full ? 8 : 4); i++)
3552 val += aarch64_get_vec_u16 (cpu, vm, i);
3553 aarch64_set_vec_u64 (cpu, rd, 0, val);
3554 return;
3555 }
3556
3557 case 2:
3558 {
3559 uint32_t val = 0;
3560 if (! full)
3561 HALT_UNALLOC;
3562 for (i = 0; i < 4; i++)
3563 val += aarch64_get_vec_u32 (cpu, vm, i);
3564 aarch64_set_vec_u64 (cpu, rd, 0, val);
3565 return;
3566 }
3567
3568 case 3:
3569 HALT_UNALLOC;
3570 }
3571 }
3572
3573 static void
3574 do_vec_ins_2 (sim_cpu *cpu)
3575 {
3576 /* instr[31,21] = 01001110000
3577 instr[20,18] = size & element selector
3578 instr[17,14] = 0000
3579 instr[13] = direction: to vec(0), from vec (1)
3580 instr[12,10] = 111
3581 instr[9,5] = Vm
3582 instr[4,0] = Vd. */
3583
3584 unsigned elem;
3585 unsigned vm = INSTR (9, 5);
3586 unsigned vd = INSTR (4, 0);
3587
3588 NYI_assert (31, 21, 0x270);
3589 NYI_assert (17, 14, 0);
3590 NYI_assert (12, 10, 7);
3591
3592 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3593 if (INSTR (13, 13) == 1)
3594 {
3595 if (INSTR (18, 18) == 1)
3596 {
3597 /* 32-bit moves. */
3598 elem = INSTR (20, 19);
3599 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3600 aarch64_get_vec_u32 (cpu, vm, elem));
3601 }
3602 else
3603 {
3604 /* 64-bit moves. */
3605 if (INSTR (19, 19) != 1)
3606 HALT_NYI;
3607
3608 elem = INSTR (20, 20);
3609 aarch64_set_reg_u64 (cpu, vd, NO_SP,
3610 aarch64_get_vec_u64 (cpu, vm, elem));
3611 }
3612 }
3613 else
3614 {
3615 if (INSTR (18, 18) == 1)
3616 {
3617 /* 32-bit moves. */
3618 elem = INSTR (20, 19);
3619 aarch64_set_vec_u32 (cpu, vd, elem,
3620 aarch64_get_reg_u32 (cpu, vm, NO_SP));
3621 }
3622 else
3623 {
3624 /* 64-bit moves. */
3625 if (INSTR (19, 19) != 1)
3626 HALT_NYI;
3627
3628 elem = INSTR (20, 20);
3629 aarch64_set_vec_u64 (cpu, vd, elem,
3630 aarch64_get_reg_u64 (cpu, vm, NO_SP));
3631 }
3632 }
3633 }
3634
3635 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE) \
3636 do \
3637 { \
3638 DST_TYPE a[N], b[N]; \
3639 \
3640 for (i = 0; i < (N); i++) \
3641 { \
3642 a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3643 b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3644 } \
3645 for (i = 0; i < (N); i++) \
3646 aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]); \
3647 } \
3648 while (0)
3649
3650 static void
3651 do_vec_mull (sim_cpu *cpu)
3652 {
3653 /* instr[31] = 0
3654 instr[30] = lower(0)/upper(1) selector
3655 instr[29] = signed(0)/unsigned(1)
3656 instr[28,24] = 0 1110
3657 instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3658 instr[21] = 1
3659 instr[20,16] = Vm
3660 instr[15,10] = 11 0000
3661 instr[9,5] = Vn
3662 instr[4.0] = Vd. */
3663
3664 int unsign = INSTR (29, 29);
3665 int bias = INSTR (30, 30);
3666 unsigned vm = INSTR (20, 16);
3667 unsigned vn = INSTR ( 9, 5);
3668 unsigned vd = INSTR ( 4, 0);
3669 unsigned i;
3670
3671 NYI_assert (28, 24, 0x0E);
3672 NYI_assert (15, 10, 0x30);
3673
3674 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3675 /* NB: Read source values before writing results, in case
3676 the source and destination vectors are the same. */
3677 switch (INSTR (23, 22))
3678 {
3679 case 0:
3680 if (bias)
3681 bias = 8;
3682 if (unsign)
3683 DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3684 else
3685 DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3686 return;
3687
3688 case 1:
3689 if (bias)
3690 bias = 4;
3691 if (unsign)
3692 DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3693 else
3694 DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3695 return;
3696
3697 case 2:
3698 if (bias)
3699 bias = 2;
3700 if (unsign)
3701 DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3702 else
3703 DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3704 return;
3705
3706 case 3:
3707 HALT_NYI;
3708 }
3709 }
3710
3711 static void
3712 do_vec_fadd (sim_cpu *cpu)
3713 {
3714 /* instr[31] = 0
3715 instr[30] = half(0)/full(1)
3716 instr[29,24] = 001110
3717 instr[23] = FADD(0)/FSUB(1)
3718 instr[22] = float (0)/double(1)
3719 instr[21] = 1
3720 instr[20,16] = Vm
3721 instr[15,10] = 110101
3722 instr[9,5] = Vn
3723 instr[4.0] = Vd. */
3724
3725 unsigned vm = INSTR (20, 16);
3726 unsigned vn = INSTR (9, 5);
3727 unsigned vd = INSTR (4, 0);
3728 unsigned i;
3729 int full = INSTR (30, 30);
3730
3731 NYI_assert (29, 24, 0x0E);
3732 NYI_assert (21, 21, 1);
3733 NYI_assert (15, 10, 0x35);
3734
3735 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3736 if (INSTR (23, 23))
3737 {
3738 if (INSTR (22, 22))
3739 {
3740 if (! full)
3741 HALT_NYI;
3742
3743 for (i = 0; i < 2; i++)
3744 aarch64_set_vec_double (cpu, vd, i,
3745 aarch64_get_vec_double (cpu, vn, i)
3746 - aarch64_get_vec_double (cpu, vm, i));
3747 }
3748 else
3749 {
3750 for (i = 0; i < (full ? 4 : 2); i++)
3751 aarch64_set_vec_float (cpu, vd, i,
3752 aarch64_get_vec_float (cpu, vn, i)
3753 - aarch64_get_vec_float (cpu, vm, i));
3754 }
3755 }
3756 else
3757 {
3758 if (INSTR (22, 22))
3759 {
3760 if (! full)
3761 HALT_NYI;
3762
3763 for (i = 0; i < 2; i++)
3764 aarch64_set_vec_double (cpu, vd, i,
3765 aarch64_get_vec_double (cpu, vm, i)
3766 + aarch64_get_vec_double (cpu, vn, i));
3767 }
3768 else
3769 {
3770 for (i = 0; i < (full ? 4 : 2); i++)
3771 aarch64_set_vec_float (cpu, vd, i,
3772 aarch64_get_vec_float (cpu, vm, i)
3773 + aarch64_get_vec_float (cpu, vn, i));
3774 }
3775 }
3776 }
3777
3778 static void
3779 do_vec_add (sim_cpu *cpu)
3780 {
3781 /* instr[31] = 0
3782 instr[30] = full/half selector
3783 instr[29,24] = 001110
3784 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3785 instr[21] = 1
3786 instr[20,16] = Vn
3787 instr[15,10] = 100001
3788 instr[9,5] = Vm
3789 instr[4.0] = Vd. */
3790
3791 unsigned vm = INSTR (20, 16);
3792 unsigned vn = INSTR (9, 5);
3793 unsigned vd = INSTR (4, 0);
3794 unsigned i;
3795 int full = INSTR (30, 30);
3796
3797 NYI_assert (29, 24, 0x0E);
3798 NYI_assert (21, 21, 1);
3799 NYI_assert (15, 10, 0x21);
3800
3801 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3802 switch (INSTR (23, 22))
3803 {
3804 case 0:
3805 for (i = 0; i < (full ? 16 : 8); i++)
3806 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3807 + aarch64_get_vec_u8 (cpu, vm, i));
3808 return;
3809
3810 case 1:
3811 for (i = 0; i < (full ? 8 : 4); i++)
3812 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3813 + aarch64_get_vec_u16 (cpu, vm, i));
3814 return;
3815
3816 case 2:
3817 for (i = 0; i < (full ? 4 : 2); i++)
3818 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3819 + aarch64_get_vec_u32 (cpu, vm, i));
3820 return;
3821
3822 case 3:
3823 if (! full)
3824 HALT_UNALLOC;
3825 aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3826 + aarch64_get_vec_u64 (cpu, vm, 0));
3827 aarch64_set_vec_u64 (cpu, vd, 1,
3828 aarch64_get_vec_u64 (cpu, vn, 1)
3829 + aarch64_get_vec_u64 (cpu, vm, 1));
3830 return;
3831 }
3832 }
3833
3834 static void
3835 do_vec_mul (sim_cpu *cpu)
3836 {
3837 /* instr[31] = 0
3838 instr[30] = full/half selector
3839 instr[29,24] = 00 1110
3840 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3841 instr[21] = 1
3842 instr[20,16] = Vn
3843 instr[15,10] = 10 0111
3844 instr[9,5] = Vm
3845 instr[4.0] = Vd. */
3846
3847 unsigned vm = INSTR (20, 16);
3848 unsigned vn = INSTR (9, 5);
3849 unsigned vd = INSTR (4, 0);
3850 unsigned i;
3851 int full = INSTR (30, 30);
3852 int bias = 0;
3853
3854 NYI_assert (29, 24, 0x0E);
3855 NYI_assert (21, 21, 1);
3856 NYI_assert (15, 10, 0x27);
3857
3858 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3859 switch (INSTR (23, 22))
3860 {
3861 case 0:
3862 DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3863 return;
3864
3865 case 1:
3866 DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3867 return;
3868
3869 case 2:
3870 DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3871 return;
3872
3873 case 3:
3874 HALT_UNALLOC;
3875 }
3876 }
3877
3878 static void
3879 do_vec_MLA (sim_cpu *cpu)
3880 {
3881 /* instr[31] = 0
3882 instr[30] = full/half selector
3883 instr[29,24] = 00 1110
3884 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3885 instr[21] = 1
3886 instr[20,16] = Vn
3887 instr[15,10] = 1001 01
3888 instr[9,5] = Vm
3889 instr[4.0] = Vd. */
3890
3891 unsigned vm = INSTR (20, 16);
3892 unsigned vn = INSTR (9, 5);
3893 unsigned vd = INSTR (4, 0);
3894 unsigned i;
3895 int full = INSTR (30, 30);
3896
3897 NYI_assert (29, 24, 0x0E);
3898 NYI_assert (21, 21, 1);
3899 NYI_assert (15, 10, 0x25);
3900
3901 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3902 switch (INSTR (23, 22))
3903 {
3904 case 0:
3905 for (i = 0; i < (full ? 16 : 8); i++)
3906 aarch64_set_vec_u8 (cpu, vd, i,
3907 aarch64_get_vec_u8 (cpu, vd, i)
3908 + (aarch64_get_vec_u8 (cpu, vn, i)
3909 * aarch64_get_vec_u8 (cpu, vm, i)));
3910 return;
3911
3912 case 1:
3913 for (i = 0; i < (full ? 8 : 4); i++)
3914 aarch64_set_vec_u16 (cpu, vd, i,
3915 aarch64_get_vec_u16 (cpu, vd, i)
3916 + (aarch64_get_vec_u16 (cpu, vn, i)
3917 * aarch64_get_vec_u16 (cpu, vm, i)));
3918 return;
3919
3920 case 2:
3921 for (i = 0; i < (full ? 4 : 2); i++)
3922 aarch64_set_vec_u32 (cpu, vd, i,
3923 aarch64_get_vec_u32 (cpu, vd, i)
3924 + (aarch64_get_vec_u32 (cpu, vn, i)
3925 * aarch64_get_vec_u32 (cpu, vm, i)));
3926 return;
3927
3928 default:
3929 HALT_UNALLOC;
3930 }
3931 }
3932
3933 static float
3934 fmaxnm (float a, float b)
3935 {
3936 if (! isnan (a))
3937 {
3938 if (! isnan (b))
3939 return a > b ? a : b;
3940 return a;
3941 }
3942 else if (! isnan (b))
3943 return b;
3944 return a;
3945 }
3946
3947 static float
3948 fminnm (float a, float b)
3949 {
3950 if (! isnan (a))
3951 {
3952 if (! isnan (b))
3953 return a < b ? a : b;
3954 return a;
3955 }
3956 else if (! isnan (b))
3957 return b;
3958 return a;
3959 }
3960
3961 static double
3962 dmaxnm (double a, double b)
3963 {
3964 if (! isnan (a))
3965 {
3966 if (! isnan (b))
3967 return a > b ? a : b;
3968 return a;
3969 }
3970 else if (! isnan (b))
3971 return b;
3972 return a;
3973 }
3974
3975 static double
3976 dminnm (double a, double b)
3977 {
3978 if (! isnan (a))
3979 {
3980 if (! isnan (b))
3981 return a < b ? a : b;
3982 return a;
3983 }
3984 else if (! isnan (b))
3985 return b;
3986 return a;
3987 }
3988
3989 static void
3990 do_vec_FminmaxNMP (sim_cpu *cpu)
3991 {
3992 /* instr [31] = 0
3993 instr [30] = half (0)/full (1)
3994 instr [29,24] = 10 1110
3995 instr [23] = max(0)/min(1)
3996 instr [22] = float (0)/double (1)
3997 instr [21] = 1
3998 instr [20,16] = Vn
3999 instr [15,10] = 1100 01
4000 instr [9,5] = Vm
4001 instr [4.0] = Vd. */
4002
4003 unsigned vm = INSTR (20, 16);
4004 unsigned vn = INSTR (9, 5);
4005 unsigned vd = INSTR (4, 0);
4006 int full = INSTR (30, 30);
4007
4008 NYI_assert (29, 24, 0x2E);
4009 NYI_assert (21, 21, 1);
4010 NYI_assert (15, 10, 0x31);
4011
4012 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4013 if (INSTR (22, 22))
4014 {
4015 double (* fn)(double, double) = INSTR (23, 23)
4016 ? dminnm : dmaxnm;
4017
4018 if (! full)
4019 HALT_NYI;
4020 aarch64_set_vec_double (cpu, vd, 0,
4021 fn (aarch64_get_vec_double (cpu, vn, 0),
4022 aarch64_get_vec_double (cpu, vn, 1)));
4023 aarch64_set_vec_double (cpu, vd, 0,
4024 fn (aarch64_get_vec_double (cpu, vm, 0),
4025 aarch64_get_vec_double (cpu, vm, 1)));
4026 }
4027 else
4028 {
4029 float (* fn)(float, float) = INSTR (23, 23)
4030 ? fminnm : fmaxnm;
4031
4032 aarch64_set_vec_float (cpu, vd, 0,
4033 fn (aarch64_get_vec_float (cpu, vn, 0),
4034 aarch64_get_vec_float (cpu, vn, 1)));
4035 if (full)
4036 aarch64_set_vec_float (cpu, vd, 1,
4037 fn (aarch64_get_vec_float (cpu, vn, 2),
4038 aarch64_get_vec_float (cpu, vn, 3)));
4039
4040 aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4041 fn (aarch64_get_vec_float (cpu, vm, 0),
4042 aarch64_get_vec_float (cpu, vm, 1)));
4043 if (full)
4044 aarch64_set_vec_float (cpu, vd, 3,
4045 fn (aarch64_get_vec_float (cpu, vm, 2),
4046 aarch64_get_vec_float (cpu, vm, 3)));
4047 }
4048 }
4049
4050 static void
4051 do_vec_AND (sim_cpu *cpu)
4052 {
4053 /* instr[31] = 0
4054 instr[30] = half (0)/full (1)
4055 instr[29,21] = 001110001
4056 instr[20,16] = Vm
4057 instr[15,10] = 000111
4058 instr[9,5] = Vn
4059 instr[4.0] = Vd. */
4060
4061 unsigned vm = INSTR (20, 16);
4062 unsigned vn = INSTR (9, 5);
4063 unsigned vd = INSTR (4, 0);
4064 unsigned i;
4065 int full = INSTR (30, 30);
4066
4067 NYI_assert (29, 21, 0x071);
4068 NYI_assert (15, 10, 0x07);
4069
4070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4071 for (i = 0; i < (full ? 4 : 2); i++)
4072 aarch64_set_vec_u32 (cpu, vd, i,
4073 aarch64_get_vec_u32 (cpu, vn, i)
4074 & aarch64_get_vec_u32 (cpu, vm, i));
4075 }
4076
4077 static void
4078 do_vec_BSL (sim_cpu *cpu)
4079 {
4080 /* instr[31] = 0
4081 instr[30] = half (0)/full (1)
4082 instr[29,21] = 101110011
4083 instr[20,16] = Vm
4084 instr[15,10] = 000111
4085 instr[9,5] = Vn
4086 instr[4.0] = Vd. */
4087
4088 unsigned vm = INSTR (20, 16);
4089 unsigned vn = INSTR (9, 5);
4090 unsigned vd = INSTR (4, 0);
4091 unsigned i;
4092 int full = INSTR (30, 30);
4093
4094 NYI_assert (29, 21, 0x173);
4095 NYI_assert (15, 10, 0x07);
4096
4097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4098 for (i = 0; i < (full ? 16 : 8); i++)
4099 aarch64_set_vec_u8 (cpu, vd, i,
4100 ( aarch64_get_vec_u8 (cpu, vd, i)
4101 & aarch64_get_vec_u8 (cpu, vn, i))
4102 | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4103 & aarch64_get_vec_u8 (cpu, vm, i)));
4104 }
4105
4106 static void
4107 do_vec_EOR (sim_cpu *cpu)
4108 {
4109 /* instr[31] = 0
4110 instr[30] = half (0)/full (1)
4111 instr[29,21] = 10 1110 001
4112 instr[20,16] = Vm
4113 instr[15,10] = 000111
4114 instr[9,5] = Vn
4115 instr[4.0] = Vd. */
4116
4117 unsigned vm = INSTR (20, 16);
4118 unsigned vn = INSTR (9, 5);
4119 unsigned vd = INSTR (4, 0);
4120 unsigned i;
4121 int full = INSTR (30, 30);
4122
4123 NYI_assert (29, 21, 0x171);
4124 NYI_assert (15, 10, 0x07);
4125
4126 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4127 for (i = 0; i < (full ? 4 : 2); i++)
4128 aarch64_set_vec_u32 (cpu, vd, i,
4129 aarch64_get_vec_u32 (cpu, vn, i)
4130 ^ aarch64_get_vec_u32 (cpu, vm, i));
4131 }
4132
4133 static void
4134 do_vec_bit (sim_cpu *cpu)
4135 {
4136 /* instr[31] = 0
4137 instr[30] = half (0)/full (1)
4138 instr[29,23] = 10 1110 1
4139 instr[22] = BIT (0) / BIF (1)
4140 instr[21] = 1
4141 instr[20,16] = Vm
4142 instr[15,10] = 0001 11
4143 instr[9,5] = Vn
4144 instr[4.0] = Vd. */
4145
4146 unsigned vm = INSTR (20, 16);
4147 unsigned vn = INSTR (9, 5);
4148 unsigned vd = INSTR (4, 0);
4149 unsigned full = INSTR (30, 30);
4150 unsigned test_false = INSTR (22, 22);
4151 unsigned i;
4152
4153 NYI_assert (29, 23, 0x5D);
4154 NYI_assert (21, 21, 1);
4155 NYI_assert (15, 10, 0x07);
4156
4157 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4158 for (i = 0; i < (full ? 4 : 2); i++)
4159 {
4160 uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4161 uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4162 uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4163 if (test_false)
4164 aarch64_set_vec_u32 (cpu, vd, i,
4165 (vd_val & vm_val) | (vn_val & ~vm_val));
4166 else
4167 aarch64_set_vec_u32 (cpu, vd, i,
4168 (vd_val & ~vm_val) | (vn_val & vm_val));
4169 }
4170 }
4171
4172 static void
4173 do_vec_ORN (sim_cpu *cpu)
4174 {
4175 /* instr[31] = 0
4176 instr[30] = half (0)/full (1)
4177 instr[29,21] = 00 1110 111
4178 instr[20,16] = Vm
4179 instr[15,10] = 00 0111
4180 instr[9,5] = Vn
4181 instr[4.0] = Vd. */
4182
4183 unsigned vm = INSTR (20, 16);
4184 unsigned vn = INSTR (9, 5);
4185 unsigned vd = INSTR (4, 0);
4186 unsigned i;
4187 int full = INSTR (30, 30);
4188
4189 NYI_assert (29, 21, 0x077);
4190 NYI_assert (15, 10, 0x07);
4191
4192 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4193 for (i = 0; i < (full ? 16 : 8); i++)
4194 aarch64_set_vec_u8 (cpu, vd, i,
4195 aarch64_get_vec_u8 (cpu, vn, i)
4196 | ~ aarch64_get_vec_u8 (cpu, vm, i));
4197 }
4198
4199 static void
4200 do_vec_ORR (sim_cpu *cpu)
4201 {
4202 /* instr[31] = 0
4203 instr[30] = half (0)/full (1)
4204 instr[29,21] = 00 1110 101
4205 instr[20,16] = Vm
4206 instr[15,10] = 0001 11
4207 instr[9,5] = Vn
4208 instr[4.0] = Vd. */
4209
4210 unsigned vm = INSTR (20, 16);
4211 unsigned vn = INSTR (9, 5);
4212 unsigned vd = INSTR (4, 0);
4213 unsigned i;
4214 int full = INSTR (30, 30);
4215
4216 NYI_assert (29, 21, 0x075);
4217 NYI_assert (15, 10, 0x07);
4218
4219 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4220 for (i = 0; i < (full ? 16 : 8); i++)
4221 aarch64_set_vec_u8 (cpu, vd, i,
4222 aarch64_get_vec_u8 (cpu, vn, i)
4223 | aarch64_get_vec_u8 (cpu, vm, i));
4224 }
4225
4226 static void
4227 do_vec_BIC (sim_cpu *cpu)
4228 {
4229 /* instr[31] = 0
4230 instr[30] = half (0)/full (1)
4231 instr[29,21] = 00 1110 011
4232 instr[20,16] = Vm
4233 instr[15,10] = 00 0111
4234 instr[9,5] = Vn
4235 instr[4.0] = Vd. */
4236
4237 unsigned vm = INSTR (20, 16);
4238 unsigned vn = INSTR (9, 5);
4239 unsigned vd = INSTR (4, 0);
4240 unsigned i;
4241 int full = INSTR (30, 30);
4242
4243 NYI_assert (29, 21, 0x073);
4244 NYI_assert (15, 10, 0x07);
4245
4246 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4247 for (i = 0; i < (full ? 16 : 8); i++)
4248 aarch64_set_vec_u8 (cpu, vd, i,
4249 aarch64_get_vec_u8 (cpu, vn, i)
4250 & ~ aarch64_get_vec_u8 (cpu, vm, i));
4251 }
4252
4253 static void
4254 do_vec_XTN (sim_cpu *cpu)
4255 {
4256 /* instr[31] = 0
4257 instr[30] = first part (0)/ second part (1)
4258 instr[29,24] = 00 1110
4259 instr[23,22] = size: byte(00), half(01), word (10)
4260 instr[21,10] = 1000 0100 1010
4261 instr[9,5] = Vs
4262 instr[4,0] = Vd. */
4263
4264 unsigned vs = INSTR (9, 5);
4265 unsigned vd = INSTR (4, 0);
4266 unsigned bias = INSTR (30, 30);
4267 unsigned i;
4268
4269 NYI_assert (29, 24, 0x0E);
4270 NYI_assert (21, 10, 0x84A);
4271
4272 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4273 switch (INSTR (23, 22))
4274 {
4275 case 0:
4276 for (i = 0; i < 8; i++)
4277 aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4278 aarch64_get_vec_u16 (cpu, vs, i));
4279 return;
4280
4281 case 1:
4282 for (i = 0; i < 4; i++)
4283 aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4284 aarch64_get_vec_u32 (cpu, vs, i));
4285 return;
4286
4287 case 2:
4288 for (i = 0; i < 2; i++)
4289 aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4290 aarch64_get_vec_u64 (cpu, vs, i));
4291 return;
4292 }
4293 }
4294
4295 /* Return the number of bits set in the input value. */
4296 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4297 # define popcount __builtin_popcount
4298 #else
4299 static int
4300 popcount (unsigned char x)
4301 {
4302 static const unsigned char popcnt[16] =
4303 {
4304 0, 1, 1, 2,
4305 1, 2, 2, 3,
4306 1, 2, 2, 3,
4307 2, 3, 3, 4
4308 };
4309
4310 /* Only counts the low 8 bits of the input as that is all we need. */
4311 return popcnt[x % 16] + popcnt[x / 16];
4312 }
4313 #endif
4314
4315 static void
4316 do_vec_CNT (sim_cpu *cpu)
4317 {
4318 /* instr[31] = 0
4319 instr[30] = half (0)/ full (1)
4320 instr[29,24] = 00 1110
4321 instr[23,22] = size: byte(00)
4322 instr[21,10] = 1000 0001 0110
4323 instr[9,5] = Vs
4324 instr[4,0] = Vd. */
4325
4326 unsigned vs = INSTR (9, 5);
4327 unsigned vd = INSTR (4, 0);
4328 int full = INSTR (30, 30);
4329 int size = INSTR (23, 22);
4330 int i;
4331
4332 NYI_assert (29, 24, 0x0E);
4333 NYI_assert (21, 10, 0x816);
4334
4335 if (size != 0)
4336 HALT_UNALLOC;
4337
4338 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4339
4340 for (i = 0; i < (full ? 16 : 8); i++)
4341 aarch64_set_vec_u8 (cpu, vd, i,
4342 popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4343 }
4344
4345 static void
4346 do_vec_maxv (sim_cpu *cpu)
4347 {
4348 /* instr[31] = 0
4349 instr[30] = half(0)/full(1)
4350 instr[29] = signed (0)/unsigned(1)
4351 instr[28,24] = 0 1110
4352 instr[23,22] = size: byte(00), half(01), word (10)
4353 instr[21] = 1
4354 instr[20,17] = 1 000
4355 instr[16] = max(0)/min(1)
4356 instr[15,10] = 1010 10
4357 instr[9,5] = V source
4358 instr[4.0] = R dest. */
4359
4360 unsigned vs = INSTR (9, 5);
4361 unsigned rd = INSTR (4, 0);
4362 unsigned full = INSTR (30, 30);
4363 unsigned i;
4364
4365 NYI_assert (28, 24, 0x0E);
4366 NYI_assert (21, 21, 1);
4367 NYI_assert (20, 17, 8);
4368 NYI_assert (15, 10, 0x2A);
4369
4370 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4371 switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4372 {
4373 case 0: /* SMAXV. */
4374 {
4375 int64_t smax;
4376 switch (INSTR (23, 22))
4377 {
4378 case 0:
4379 smax = aarch64_get_vec_s8 (cpu, vs, 0);
4380 for (i = 1; i < (full ? 16 : 8); i++)
4381 smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4382 break;
4383 case 1:
4384 smax = aarch64_get_vec_s16 (cpu, vs, 0);
4385 for (i = 1; i < (full ? 8 : 4); i++)
4386 smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4387 break;
4388 case 2:
4389 smax = aarch64_get_vec_s32 (cpu, vs, 0);
4390 for (i = 1; i < (full ? 4 : 2); i++)
4391 smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4392 break;
4393 case 3:
4394 HALT_UNALLOC;
4395 }
4396 aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4397 return;
4398 }
4399
4400 case 1: /* SMINV. */
4401 {
4402 int64_t smin;
4403 switch (INSTR (23, 22))
4404 {
4405 case 0:
4406 smin = aarch64_get_vec_s8 (cpu, vs, 0);
4407 for (i = 1; i < (full ? 16 : 8); i++)
4408 smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4409 break;
4410 case 1:
4411 smin = aarch64_get_vec_s16 (cpu, vs, 0);
4412 for (i = 1; i < (full ? 8 : 4); i++)
4413 smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4414 break;
4415 case 2:
4416 smin = aarch64_get_vec_s32 (cpu, vs, 0);
4417 for (i = 1; i < (full ? 4 : 2); i++)
4418 smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4419 break;
4420
4421 case 3:
4422 HALT_UNALLOC;
4423 }
4424 aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4425 return;
4426 }
4427
4428 case 2: /* UMAXV. */
4429 {
4430 uint64_t umax;
4431 switch (INSTR (23, 22))
4432 {
4433 case 0:
4434 umax = aarch64_get_vec_u8 (cpu, vs, 0);
4435 for (i = 1; i < (full ? 16 : 8); i++)
4436 umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4437 break;
4438 case 1:
4439 umax = aarch64_get_vec_u16 (cpu, vs, 0);
4440 for (i = 1; i < (full ? 8 : 4); i++)
4441 umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4442 break;
4443 case 2:
4444 umax = aarch64_get_vec_u32 (cpu, vs, 0);
4445 for (i = 1; i < (full ? 4 : 2); i++)
4446 umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4447 break;
4448
4449 case 3:
4450 HALT_UNALLOC;
4451 }
4452 aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4453 return;
4454 }
4455
4456 case 3: /* UMINV. */
4457 {
4458 uint64_t umin;
4459 switch (INSTR (23, 22))
4460 {
4461 case 0:
4462 umin = aarch64_get_vec_u8 (cpu, vs, 0);
4463 for (i = 1; i < (full ? 16 : 8); i++)
4464 umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4465 break;
4466 case 1:
4467 umin = aarch64_get_vec_u16 (cpu, vs, 0);
4468 for (i = 1; i < (full ? 8 : 4); i++)
4469 umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4470 break;
4471 case 2:
4472 umin = aarch64_get_vec_u32 (cpu, vs, 0);
4473 for (i = 1; i < (full ? 4 : 2); i++)
4474 umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4475 break;
4476
4477 case 3:
4478 HALT_UNALLOC;
4479 }
4480 aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4481 return;
4482 }
4483 }
4484 }
4485
4486 static void
4487 do_vec_fminmaxV (sim_cpu *cpu)
4488 {
4489 /* instr[31,24] = 0110 1110
4490 instr[23] = max(0)/min(1)
4491 instr[22,14] = 011 0000 11
4492 instr[13,12] = nm(00)/normal(11)
4493 instr[11,10] = 10
4494 instr[9,5] = V source
4495 instr[4.0] = R dest. */
4496
4497 unsigned vs = INSTR (9, 5);
4498 unsigned rd = INSTR (4, 0);
4499 unsigned i;
4500 float res = aarch64_get_vec_float (cpu, vs, 0);
4501
4502 NYI_assert (31, 24, 0x6E);
4503 NYI_assert (22, 14, 0x0C3);
4504 NYI_assert (11, 10, 2);
4505
4506 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4507 if (INSTR (23, 23))
4508 {
4509 switch (INSTR (13, 12))
4510 {
4511 case 0: /* FMNINNMV. */
4512 for (i = 1; i < 4; i++)
4513 res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4514 break;
4515
4516 case 3: /* FMINV. */
4517 for (i = 1; i < 4; i++)
4518 res = min (res, aarch64_get_vec_float (cpu, vs, i));
4519 break;
4520
4521 default:
4522 HALT_NYI;
4523 }
4524 }
4525 else
4526 {
4527 switch (INSTR (13, 12))
4528 {
4529 case 0: /* FMNAXNMV. */
4530 for (i = 1; i < 4; i++)
4531 res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4532 break;
4533
4534 case 3: /* FMAXV. */
4535 for (i = 1; i < 4; i++)
4536 res = max (res, aarch64_get_vec_float (cpu, vs, i));
4537 break;
4538
4539 default:
4540 HALT_NYI;
4541 }
4542 }
4543
4544 aarch64_set_FP_float (cpu, rd, res);
4545 }
4546
4547 static void
4548 do_vec_Fminmax (sim_cpu *cpu)
4549 {
4550 /* instr[31] = 0
4551 instr[30] = half(0)/full(1)
4552 instr[29,24] = 00 1110
4553 instr[23] = max(0)/min(1)
4554 instr[22] = float(0)/double(1)
4555 instr[21] = 1
4556 instr[20,16] = Vm
4557 instr[15,14] = 11
4558 instr[13,12] = nm(00)/normal(11)
4559 instr[11,10] = 01
4560 instr[9,5] = Vn
4561 instr[4,0] = Vd. */
4562
4563 unsigned vm = INSTR (20, 16);
4564 unsigned vn = INSTR (9, 5);
4565 unsigned vd = INSTR (4, 0);
4566 unsigned full = INSTR (30, 30);
4567 unsigned min = INSTR (23, 23);
4568 unsigned i;
4569
4570 NYI_assert (29, 24, 0x0E);
4571 NYI_assert (21, 21, 1);
4572 NYI_assert (15, 14, 3);
4573 NYI_assert (11, 10, 1);
4574
4575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4576 if (INSTR (22, 22))
4577 {
4578 double (* func)(double, double);
4579
4580 if (! full)
4581 HALT_NYI;
4582
4583 if (INSTR (13, 12) == 0)
4584 func = min ? dminnm : dmaxnm;
4585 else if (INSTR (13, 12) == 3)
4586 func = min ? fmin : fmax;
4587 else
4588 HALT_NYI;
4589
4590 for (i = 0; i < 2; i++)
4591 aarch64_set_vec_double (cpu, vd, i,
4592 func (aarch64_get_vec_double (cpu, vn, i),
4593 aarch64_get_vec_double (cpu, vm, i)));
4594 }
4595 else
4596 {
4597 float (* func)(float, float);
4598
4599 if (INSTR (13, 12) == 0)
4600 func = min ? fminnm : fmaxnm;
4601 else if (INSTR (13, 12) == 3)
4602 func = min ? fminf : fmaxf;
4603 else
4604 HALT_NYI;
4605
4606 for (i = 0; i < (full ? 4 : 2); i++)
4607 aarch64_set_vec_float (cpu, vd, i,
4608 func (aarch64_get_vec_float (cpu, vn, i),
4609 aarch64_get_vec_float (cpu, vm, i)));
4610 }
4611 }
4612
4613 static void
4614 do_vec_SCVTF (sim_cpu *cpu)
4615 {
4616 /* instr[31] = 0
4617 instr[30] = Q
4618 instr[29,23] = 00 1110 0
4619 instr[22] = float(0)/double(1)
4620 instr[21,10] = 10 0001 1101 10
4621 instr[9,5] = Vn
4622 instr[4,0] = Vd. */
4623
4624 unsigned vn = INSTR (9, 5);
4625 unsigned vd = INSTR (4, 0);
4626 unsigned full = INSTR (30, 30);
4627 unsigned size = INSTR (22, 22);
4628 unsigned i;
4629
4630 NYI_assert (29, 23, 0x1C);
4631 NYI_assert (21, 10, 0x876);
4632
4633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4634 if (size)
4635 {
4636 if (! full)
4637 HALT_UNALLOC;
4638
4639 for (i = 0; i < 2; i++)
4640 {
4641 double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4642 aarch64_set_vec_double (cpu, vd, i, val);
4643 }
4644 }
4645 else
4646 {
4647 for (i = 0; i < (full ? 4 : 2); i++)
4648 {
4649 float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4650 aarch64_set_vec_float (cpu, vd, i, val);
4651 }
4652 }
4653 }
4654
4655 #define VEC_CMP(SOURCE, CMP) \
4656 do \
4657 { \
4658 switch (size) \
4659 { \
4660 case 0: \
4661 for (i = 0; i < (full ? 16 : 8); i++) \
4662 aarch64_set_vec_u8 (cpu, vd, i, \
4663 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4664 CMP \
4665 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4666 ? -1 : 0); \
4667 return; \
4668 case 1: \
4669 for (i = 0; i < (full ? 8 : 4); i++) \
4670 aarch64_set_vec_u16 (cpu, vd, i, \
4671 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4672 CMP \
4673 aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4674 ? -1 : 0); \
4675 return; \
4676 case 2: \
4677 for (i = 0; i < (full ? 4 : 2); i++) \
4678 aarch64_set_vec_u32 (cpu, vd, i, \
4679 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4680 CMP \
4681 aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4682 ? -1 : 0); \
4683 return; \
4684 case 3: \
4685 if (! full) \
4686 HALT_UNALLOC; \
4687 for (i = 0; i < 2; i++) \
4688 aarch64_set_vec_u64 (cpu, vd, i, \
4689 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4690 CMP \
4691 aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4692 ? -1ULL : 0); \
4693 return; \
4694 } \
4695 } \
4696 while (0)
4697
4698 #define VEC_CMP0(SOURCE, CMP) \
4699 do \
4700 { \
4701 switch (size) \
4702 { \
4703 case 0: \
4704 for (i = 0; i < (full ? 16 : 8); i++) \
4705 aarch64_set_vec_u8 (cpu, vd, i, \
4706 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4707 CMP 0 ? -1 : 0); \
4708 return; \
4709 case 1: \
4710 for (i = 0; i < (full ? 8 : 4); i++) \
4711 aarch64_set_vec_u16 (cpu, vd, i, \
4712 aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4713 CMP 0 ? -1 : 0); \
4714 return; \
4715 case 2: \
4716 for (i = 0; i < (full ? 4 : 2); i++) \
4717 aarch64_set_vec_u32 (cpu, vd, i, \
4718 aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4719 CMP 0 ? -1 : 0); \
4720 return; \
4721 case 3: \
4722 if (! full) \
4723 HALT_UNALLOC; \
4724 for (i = 0; i < 2; i++) \
4725 aarch64_set_vec_u64 (cpu, vd, i, \
4726 aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4727 CMP 0 ? -1ULL : 0); \
4728 return; \
4729 } \
4730 } \
4731 while (0)
4732
4733 #define VEC_FCMP0(CMP) \
4734 do \
4735 { \
4736 if (vm != 0) \
4737 HALT_NYI; \
4738 if (INSTR (22, 22)) \
4739 { \
4740 if (! full) \
4741 HALT_NYI; \
4742 for (i = 0; i < 2; i++) \
4743 aarch64_set_vec_u64 (cpu, vd, i, \
4744 aarch64_get_vec_double (cpu, vn, i) \
4745 CMP 0.0 ? -1 : 0); \
4746 } \
4747 else \
4748 { \
4749 for (i = 0; i < (full ? 4 : 2); i++) \
4750 aarch64_set_vec_u32 (cpu, vd, i, \
4751 aarch64_get_vec_float (cpu, vn, i) \
4752 CMP 0.0 ? -1 : 0); \
4753 } \
4754 return; \
4755 } \
4756 while (0)
4757
4758 #define VEC_FCMP(CMP) \
4759 do \
4760 { \
4761 if (INSTR (22, 22)) \
4762 { \
4763 if (! full) \
4764 HALT_NYI; \
4765 for (i = 0; i < 2; i++) \
4766 aarch64_set_vec_u64 (cpu, vd, i, \
4767 aarch64_get_vec_double (cpu, vn, i) \
4768 CMP \
4769 aarch64_get_vec_double (cpu, vm, i) \
4770 ? -1 : 0); \
4771 } \
4772 else \
4773 { \
4774 for (i = 0; i < (full ? 4 : 2); i++) \
4775 aarch64_set_vec_u32 (cpu, vd, i, \
4776 aarch64_get_vec_float (cpu, vn, i) \
4777 CMP \
4778 aarch64_get_vec_float (cpu, vm, i) \
4779 ? -1 : 0); \
4780 } \
4781 return; \
4782 } \
4783 while (0)
4784
4785 static void
4786 do_vec_compare (sim_cpu *cpu)
4787 {
4788 /* instr[31] = 0
4789 instr[30] = half(0)/full(1)
4790 instr[29] = part-of-comparison-type
4791 instr[28,24] = 0 1110
4792 instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4793 type of float compares: single (-0) / double (-1)
4794 instr[21] = 1
4795 instr[20,16] = Vm or 00000 (compare vs 0)
4796 instr[15,10] = part-of-comparison-type
4797 instr[9,5] = Vn
4798 instr[4.0] = Vd. */
4799
4800 int full = INSTR (30, 30);
4801 int size = INSTR (23, 22);
4802 unsigned vm = INSTR (20, 16);
4803 unsigned vn = INSTR (9, 5);
4804 unsigned vd = INSTR (4, 0);
4805 unsigned i;
4806
4807 NYI_assert (28, 24, 0x0E);
4808 NYI_assert (21, 21, 1);
4809
4810 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4811 if ((INSTR (11, 11)
4812 && INSTR (14, 14))
4813 || ((INSTR (11, 11) == 0
4814 && INSTR (10, 10) == 0)))
4815 {
4816 /* A compare vs 0. */
4817 if (vm != 0)
4818 {
4819 if (INSTR (15, 10) == 0x2A)
4820 do_vec_maxv (cpu);
4821 else if (INSTR (15, 10) == 0x32
4822 || INSTR (15, 10) == 0x3E)
4823 do_vec_fminmaxV (cpu);
4824 else if (INSTR (29, 23) == 0x1C
4825 && INSTR (21, 10) == 0x876)
4826 do_vec_SCVTF (cpu);
4827 else
4828 HALT_NYI;
4829 return;
4830 }
4831 }
4832
4833 if (INSTR (14, 14))
4834 {
4835 /* A floating point compare. */
4836 unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4837 | INSTR (13, 10);
4838
4839 NYI_assert (15, 15, 1);
4840
4841 switch (decode)
4842 {
4843 case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4844 case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4845 case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4846 case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4847 case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4848 case /* 0b111001: GT */ 0x39: VEC_FCMP (>);
4849 case /* 0b101001: GE */ 0x29: VEC_FCMP (>=);
4850 case /* 0b001001: EQ */ 0x09: VEC_FCMP (==);
4851
4852 default:
4853 HALT_NYI;
4854 }
4855 }
4856 else
4857 {
4858 unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4859
4860 switch (decode)
4861 {
4862 case 0x0D: /* 0001101 GT */ VEC_CMP (s, > );
4863 case 0x0F: /* 0001111 GE */ VEC_CMP (s, >= );
4864 case 0x22: /* 0100010 GT #0 */ VEC_CMP0 (s, > );
4865 case 0x23: /* 0100011 TST */ VEC_CMP (u, & );
4866 case 0x26: /* 0100110 EQ #0 */ VEC_CMP0 (s, == );
4867 case 0x2A: /* 0101010 LT #0 */ VEC_CMP0 (s, < );
4868 case 0x4D: /* 1001101 HI */ VEC_CMP (u, > );
4869 case 0x4F: /* 1001111 HS */ VEC_CMP (u, >= );
4870 case 0x62: /* 1100010 GE #0 */ VEC_CMP0 (s, >= );
4871 case 0x63: /* 1100011 EQ */ VEC_CMP (u, == );
4872 case 0x66: /* 1100110 LE #0 */ VEC_CMP0 (s, <= );
4873 default:
4874 if (vm == 0)
4875 HALT_NYI;
4876 do_vec_maxv (cpu);
4877 }
4878 }
4879 }
4880
4881 static void
4882 do_vec_SSHL (sim_cpu *cpu)
4883 {
4884 /* instr[31] = 0
4885 instr[30] = first part (0)/ second part (1)
4886 instr[29,24] = 00 1110
4887 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4888 instr[21] = 1
4889 instr[20,16] = Vm
4890 instr[15,10] = 0100 01
4891 instr[9,5] = Vn
4892 instr[4,0] = Vd. */
4893
4894 unsigned full = INSTR (30, 30);
4895 unsigned vm = INSTR (20, 16);
4896 unsigned vn = INSTR (9, 5);
4897 unsigned vd = INSTR (4, 0);
4898 unsigned i;
4899 signed int shift;
4900
4901 NYI_assert (29, 24, 0x0E);
4902 NYI_assert (21, 21, 1);
4903 NYI_assert (15, 10, 0x11);
4904
4905 /* FIXME: What is a signed shift left in this context ?. */
4906
4907 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4908 switch (INSTR (23, 22))
4909 {
4910 case 0:
4911 for (i = 0; i < (full ? 16 : 8); i++)
4912 {
4913 shift = aarch64_get_vec_s8 (cpu, vm, i);
4914 if (shift >= 0)
4915 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4916 << shift);
4917 else
4918 aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4919 >> - shift);
4920 }
4921 return;
4922
4923 case 1:
4924 for (i = 0; i < (full ? 8 : 4); i++)
4925 {
4926 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4927 if (shift >= 0)
4928 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4929 << shift);
4930 else
4931 aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4932 >> - shift);
4933 }
4934 return;
4935
4936 case 2:
4937 for (i = 0; i < (full ? 4 : 2); i++)
4938 {
4939 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4940 if (shift >= 0)
4941 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4942 << shift);
4943 else
4944 aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4945 >> - shift);
4946 }
4947 return;
4948
4949 case 3:
4950 if (! full)
4951 HALT_UNALLOC;
4952 for (i = 0; i < 2; i++)
4953 {
4954 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4955 if (shift >= 0)
4956 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4957 << shift);
4958 else
4959 aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4960 >> - shift);
4961 }
4962 return;
4963 }
4964 }
4965
4966 static void
4967 do_vec_USHL (sim_cpu *cpu)
4968 {
4969 /* instr[31] = 0
4970 instr[30] = first part (0)/ second part (1)
4971 instr[29,24] = 10 1110
4972 instr[23,22] = size: byte(00), half(01), word (10), long (11)
4973 instr[21] = 1
4974 instr[20,16] = Vm
4975 instr[15,10] = 0100 01
4976 instr[9,5] = Vn
4977 instr[4,0] = Vd */
4978
4979 unsigned full = INSTR (30, 30);
4980 unsigned vm = INSTR (20, 16);
4981 unsigned vn = INSTR (9, 5);
4982 unsigned vd = INSTR (4, 0);
4983 unsigned i;
4984 signed int shift;
4985
4986 NYI_assert (29, 24, 0x2E);
4987 NYI_assert (15, 10, 0x11);
4988
4989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4990 switch (INSTR (23, 22))
4991 {
4992 case 0:
4993 for (i = 0; i < (full ? 16 : 8); i++)
4994 {
4995 shift = aarch64_get_vec_s8 (cpu, vm, i);
4996 if (shift >= 0)
4997 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4998 << shift);
4999 else
5000 aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5001 >> - shift);
5002 }
5003 return;
5004
5005 case 1:
5006 for (i = 0; i < (full ? 8 : 4); i++)
5007 {
5008 shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5009 if (shift >= 0)
5010 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5011 << shift);
5012 else
5013 aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5014 >> - shift);
5015 }
5016 return;
5017
5018 case 2:
5019 for (i = 0; i < (full ? 4 : 2); i++)
5020 {
5021 shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5022 if (shift >= 0)
5023 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5024 << shift);
5025 else
5026 aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5027 >> - shift);
5028 }
5029 return;
5030
5031 case 3:
5032 if (! full)
5033 HALT_UNALLOC;
5034 for (i = 0; i < 2; i++)
5035 {
5036 shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5037 if (shift >= 0)
5038 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5039 << shift);
5040 else
5041 aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5042 >> - shift);
5043 }
5044 return;
5045 }
5046 }
5047
5048 static void
5049 do_vec_FMLA (sim_cpu *cpu)
5050 {
5051 /* instr[31] = 0
5052 instr[30] = full/half selector
5053 instr[29,23] = 0011100
5054 instr[22] = size: 0=>float, 1=>double
5055 instr[21] = 1
5056 instr[20,16] = Vn
5057 instr[15,10] = 1100 11
5058 instr[9,5] = Vm
5059 instr[4.0] = Vd. */
5060
5061 unsigned vm = INSTR (20, 16);
5062 unsigned vn = INSTR (9, 5);
5063 unsigned vd = INSTR (4, 0);
5064 unsigned i;
5065 int full = INSTR (30, 30);
5066
5067 NYI_assert (29, 23, 0x1C);
5068 NYI_assert (21, 21, 1);
5069 NYI_assert (15, 10, 0x33);
5070
5071 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5072 if (INSTR (22, 22))
5073 {
5074 if (! full)
5075 HALT_UNALLOC;
5076 for (i = 0; i < 2; i++)
5077 aarch64_set_vec_double (cpu, vd, i,
5078 aarch64_get_vec_double (cpu, vn, i) *
5079 aarch64_get_vec_double (cpu, vm, i) +
5080 aarch64_get_vec_double (cpu, vd, i));
5081 }
5082 else
5083 {
5084 for (i = 0; i < (full ? 4 : 2); i++)
5085 aarch64_set_vec_float (cpu, vd, i,
5086 aarch64_get_vec_float (cpu, vn, i) *
5087 aarch64_get_vec_float (cpu, vm, i) +
5088 aarch64_get_vec_float (cpu, vd, i));
5089 }
5090 }
5091
5092 static void
5093 do_vec_max (sim_cpu *cpu)
5094 {
5095 /* instr[31] = 0
5096 instr[30] = full/half selector
5097 instr[29] = SMAX (0) / UMAX (1)
5098 instr[28,24] = 0 1110
5099 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5100 instr[21] = 1
5101 instr[20,16] = Vn
5102 instr[15,10] = 0110 01
5103 instr[9,5] = Vm
5104 instr[4.0] = Vd. */
5105
5106 unsigned vm = INSTR (20, 16);
5107 unsigned vn = INSTR (9, 5);
5108 unsigned vd = INSTR (4, 0);
5109 unsigned i;
5110 int full = INSTR (30, 30);
5111
5112 NYI_assert (28, 24, 0x0E);
5113 NYI_assert (21, 21, 1);
5114 NYI_assert (15, 10, 0x19);
5115
5116 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5117 if (INSTR (29, 29))
5118 {
5119 switch (INSTR (23, 22))
5120 {
5121 case 0:
5122 for (i = 0; i < (full ? 16 : 8); i++)
5123 aarch64_set_vec_u8 (cpu, vd, i,
5124 aarch64_get_vec_u8 (cpu, vn, i)
5125 > aarch64_get_vec_u8 (cpu, vm, i)
5126 ? aarch64_get_vec_u8 (cpu, vn, i)
5127 : aarch64_get_vec_u8 (cpu, vm, i));
5128 return;
5129
5130 case 1:
5131 for (i = 0; i < (full ? 8 : 4); i++)
5132 aarch64_set_vec_u16 (cpu, vd, i,
5133 aarch64_get_vec_u16 (cpu, vn, i)
5134 > aarch64_get_vec_u16 (cpu, vm, i)
5135 ? aarch64_get_vec_u16 (cpu, vn, i)
5136 : aarch64_get_vec_u16 (cpu, vm, i));
5137 return;
5138
5139 case 2:
5140 for (i = 0; i < (full ? 4 : 2); i++)
5141 aarch64_set_vec_u32 (cpu, vd, i,
5142 aarch64_get_vec_u32 (cpu, vn, i)
5143 > aarch64_get_vec_u32 (cpu, vm, i)
5144 ? aarch64_get_vec_u32 (cpu, vn, i)
5145 : aarch64_get_vec_u32 (cpu, vm, i));
5146 return;
5147
5148 case 3:
5149 HALT_UNALLOC;
5150 }
5151 }
5152 else
5153 {
5154 switch (INSTR (23, 22))
5155 {
5156 case 0:
5157 for (i = 0; i < (full ? 16 : 8); i++)
5158 aarch64_set_vec_s8 (cpu, vd, i,
5159 aarch64_get_vec_s8 (cpu, vn, i)
5160 > aarch64_get_vec_s8 (cpu, vm, i)
5161 ? aarch64_get_vec_s8 (cpu, vn, i)
5162 : aarch64_get_vec_s8 (cpu, vm, i));
5163 return;
5164
5165 case 1:
5166 for (i = 0; i < (full ? 8 : 4); i++)
5167 aarch64_set_vec_s16 (cpu, vd, i,
5168 aarch64_get_vec_s16 (cpu, vn, i)
5169 > aarch64_get_vec_s16 (cpu, vm, i)
5170 ? aarch64_get_vec_s16 (cpu, vn, i)
5171 : aarch64_get_vec_s16 (cpu, vm, i));
5172 return;
5173
5174 case 2:
5175 for (i = 0; i < (full ? 4 : 2); i++)
5176 aarch64_set_vec_s32 (cpu, vd, i,
5177 aarch64_get_vec_s32 (cpu, vn, i)
5178 > aarch64_get_vec_s32 (cpu, vm, i)
5179 ? aarch64_get_vec_s32 (cpu, vn, i)
5180 : aarch64_get_vec_s32 (cpu, vm, i));
5181 return;
5182
5183 case 3:
5184 HALT_UNALLOC;
5185 }
5186 }
5187 }
5188
5189 static void
5190 do_vec_min (sim_cpu *cpu)
5191 {
5192 /* instr[31] = 0
5193 instr[30] = full/half selector
5194 instr[29] = SMIN (0) / UMIN (1)
5195 instr[28,24] = 0 1110
5196 instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5197 instr[21] = 1
5198 instr[20,16] = Vn
5199 instr[15,10] = 0110 11
5200 instr[9,5] = Vm
5201 instr[4.0] = Vd. */
5202
5203 unsigned vm = INSTR (20, 16);
5204 unsigned vn = INSTR (9, 5);
5205 unsigned vd = INSTR (4, 0);
5206 unsigned i;
5207 int full = INSTR (30, 30);
5208
5209 NYI_assert (28, 24, 0x0E);
5210 NYI_assert (21, 21, 1);
5211 NYI_assert (15, 10, 0x1B);
5212
5213 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5214 if (INSTR (29, 29))
5215 {
5216 switch (INSTR (23, 22))
5217 {
5218 case 0:
5219 for (i = 0; i < (full ? 16 : 8); i++)
5220 aarch64_set_vec_u8 (cpu, vd, i,
5221 aarch64_get_vec_u8 (cpu, vn, i)
5222 < aarch64_get_vec_u8 (cpu, vm, i)
5223 ? aarch64_get_vec_u8 (cpu, vn, i)
5224 : aarch64_get_vec_u8 (cpu, vm, i));
5225 return;
5226
5227 case 1:
5228 for (i = 0; i < (full ? 8 : 4); i++)
5229 aarch64_set_vec_u16 (cpu, vd, i,
5230 aarch64_get_vec_u16 (cpu, vn, i)
5231 < aarch64_get_vec_u16 (cpu, vm, i)
5232 ? aarch64_get_vec_u16 (cpu, vn, i)
5233 : aarch64_get_vec_u16 (cpu, vm, i));
5234 return;
5235
5236 case 2:
5237 for (i = 0; i < (full ? 4 : 2); i++)
5238 aarch64_set_vec_u32 (cpu, vd, i,
5239 aarch64_get_vec_u32 (cpu, vn, i)
5240 < aarch64_get_vec_u32 (cpu, vm, i)
5241 ? aarch64_get_vec_u32 (cpu, vn, i)
5242 : aarch64_get_vec_u32 (cpu, vm, i));
5243 return;
5244
5245 case 3:
5246 HALT_UNALLOC;
5247 }
5248 }
5249 else
5250 {
5251 switch (INSTR (23, 22))
5252 {
5253 case 0:
5254 for (i = 0; i < (full ? 16 : 8); i++)
5255 aarch64_set_vec_s8 (cpu, vd, i,
5256 aarch64_get_vec_s8 (cpu, vn, i)
5257 < aarch64_get_vec_s8 (cpu, vm, i)
5258 ? aarch64_get_vec_s8 (cpu, vn, i)
5259 : aarch64_get_vec_s8 (cpu, vm, i));
5260 return;
5261
5262 case 1:
5263 for (i = 0; i < (full ? 8 : 4); i++)
5264 aarch64_set_vec_s16 (cpu, vd, i,
5265 aarch64_get_vec_s16 (cpu, vn, i)
5266 < aarch64_get_vec_s16 (cpu, vm, i)
5267 ? aarch64_get_vec_s16 (cpu, vn, i)
5268 : aarch64_get_vec_s16 (cpu, vm, i));
5269 return;
5270
5271 case 2:
5272 for (i = 0; i < (full ? 4 : 2); i++)
5273 aarch64_set_vec_s32 (cpu, vd, i,
5274 aarch64_get_vec_s32 (cpu, vn, i)
5275 < aarch64_get_vec_s32 (cpu, vm, i)
5276 ? aarch64_get_vec_s32 (cpu, vn, i)
5277 : aarch64_get_vec_s32 (cpu, vm, i));
5278 return;
5279
5280 case 3:
5281 HALT_UNALLOC;
5282 }
5283 }
5284 }
5285
5286 static void
5287 do_vec_sub_long (sim_cpu *cpu)
5288 {
5289 /* instr[31] = 0
5290 instr[30] = lower (0) / upper (1)
5291 instr[29] = signed (0) / unsigned (1)
5292 instr[28,24] = 0 1110
5293 instr[23,22] = size: bytes (00), half (01), word (10)
5294 instr[21] = 1
5295 insrt[20,16] = Vm
5296 instr[15,10] = 0010 00
5297 instr[9,5] = Vn
5298 instr[4,0] = V dest. */
5299
5300 unsigned size = INSTR (23, 22);
5301 unsigned vm = INSTR (20, 16);
5302 unsigned vn = INSTR (9, 5);
5303 unsigned vd = INSTR (4, 0);
5304 unsigned bias = 0;
5305 unsigned i;
5306
5307 NYI_assert (28, 24, 0x0E);
5308 NYI_assert (21, 21, 1);
5309 NYI_assert (15, 10, 0x08);
5310
5311 if (size == 3)
5312 HALT_UNALLOC;
5313
5314 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5315 switch (INSTR (30, 29))
5316 {
5317 case 2: /* SSUBL2. */
5318 bias = 2;
5319 case 0: /* SSUBL. */
5320 switch (size)
5321 {
5322 case 0:
5323 bias *= 3;
5324 for (i = 0; i < 8; i++)
5325 aarch64_set_vec_s16 (cpu, vd, i,
5326 aarch64_get_vec_s8 (cpu, vn, i + bias)
5327 - aarch64_get_vec_s8 (cpu, vm, i + bias));
5328 break;
5329
5330 case 1:
5331 bias *= 2;
5332 for (i = 0; i < 4; i++)
5333 aarch64_set_vec_s32 (cpu, vd, i,
5334 aarch64_get_vec_s16 (cpu, vn, i + bias)
5335 - aarch64_get_vec_s16 (cpu, vm, i + bias));
5336 break;
5337
5338 case 2:
5339 for (i = 0; i < 2; i++)
5340 aarch64_set_vec_s64 (cpu, vd, i,
5341 aarch64_get_vec_s32 (cpu, vn, i + bias)
5342 - aarch64_get_vec_s32 (cpu, vm, i + bias));
5343 break;
5344
5345 default:
5346 HALT_UNALLOC;
5347 }
5348 break;
5349
5350 case 3: /* USUBL2. */
5351 bias = 2;
5352 case 1: /* USUBL. */
5353 switch (size)
5354 {
5355 case 0:
5356 bias *= 3;
5357 for (i = 0; i < 8; i++)
5358 aarch64_set_vec_u16 (cpu, vd, i,
5359 aarch64_get_vec_u8 (cpu, vn, i + bias)
5360 - aarch64_get_vec_u8 (cpu, vm, i + bias));
5361 break;
5362
5363 case 1:
5364 bias *= 2;
5365 for (i = 0; i < 4; i++)
5366 aarch64_set_vec_u32 (cpu, vd, i,
5367 aarch64_get_vec_u16 (cpu, vn, i + bias)
5368 - aarch64_get_vec_u16 (cpu, vm, i + bias));
5369 break;
5370
5371 case 2:
5372 for (i = 0; i < 2; i++)
5373 aarch64_set_vec_u64 (cpu, vd, i,
5374 aarch64_get_vec_u32 (cpu, vn, i + bias)
5375 - aarch64_get_vec_u32 (cpu, vm, i + bias));
5376 break;
5377
5378 default:
5379 HALT_UNALLOC;
5380 }
5381 break;
5382 }
5383 }
5384
5385 static void
5386 do_vec_ADDP (sim_cpu *cpu)
5387 {
5388 /* instr[31] = 0
5389 instr[30] = half(0)/full(1)
5390 instr[29,24] = 00 1110
5391 instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5392 instr[21] = 1
5393 insrt[20,16] = Vm
5394 instr[15,10] = 1011 11
5395 instr[9,5] = Vn
5396 instr[4,0] = V dest. */
5397
5398 struct aarch64_sim_cpu *aarch64_cpu = AARCH64_SIM_CPU (cpu);
5399 FRegister copy_vn;
5400 FRegister copy_vm;
5401 unsigned full = INSTR (30, 30);
5402 unsigned size = INSTR (23, 22);
5403 unsigned vm = INSTR (20, 16);
5404 unsigned vn = INSTR (9, 5);
5405 unsigned vd = INSTR (4, 0);
5406 unsigned i, range;
5407
5408 NYI_assert (29, 24, 0x0E);
5409 NYI_assert (21, 21, 1);
5410 NYI_assert (15, 10, 0x2F);
5411
5412 /* Make copies of the source registers in case vd == vn/vm. */
5413 copy_vn = aarch64_cpu->fr[vn];
5414 copy_vm = aarch64_cpu->fr[vm];
5415
5416 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5417 switch (size)
5418 {
5419 case 0:
5420 range = full ? 8 : 4;
5421 for (i = 0; i < range; i++)
5422 {
5423 aarch64_set_vec_u8 (cpu, vd, i,
5424 copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5425 aarch64_set_vec_u8 (cpu, vd, i + range,
5426 copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5427 }
5428 return;
5429
5430 case 1:
5431 range = full ? 4 : 2;
5432 for (i = 0; i < range; i++)
5433 {
5434 aarch64_set_vec_u16 (cpu, vd, i,
5435 copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5436 aarch64_set_vec_u16 (cpu, vd, i + range,
5437 copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5438 }
5439 return;
5440
5441 case 2:
5442 range = full ? 2 : 1;
5443 for (i = 0; i < range; i++)
5444 {
5445 aarch64_set_vec_u32 (cpu, vd, i,
5446 copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5447 aarch64_set_vec_u32 (cpu, vd, i + range,
5448 copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5449 }
5450 return;
5451
5452 case 3:
5453 if (! full)
5454 HALT_UNALLOC;
5455 aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5456 aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5457 return;
5458 }
5459 }
5460
5461 /* Float point vector convert to longer (precision). */
5462 static void
5463 do_vec_FCVTL (sim_cpu *cpu)
5464 {
5465 /* instr[31] = 0
5466 instr[30] = half (0) / all (1)
5467 instr[29,23] = 00 1110 0
5468 instr[22] = single (0) / double (1)
5469 instr[21,10] = 10 0001 0111 10
5470 instr[9,5] = Rn
5471 instr[4,0] = Rd. */
5472
5473 unsigned rn = INSTR (9, 5);
5474 unsigned rd = INSTR (4, 0);
5475 unsigned full = INSTR (30, 30);
5476 unsigned i;
5477
5478 NYI_assert (31, 31, 0);
5479 NYI_assert (29, 23, 0x1C);
5480 NYI_assert (21, 10, 0x85E);
5481
5482 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5483 if (INSTR (22, 22))
5484 {
5485 for (i = 0; i < 2; i++)
5486 aarch64_set_vec_double (cpu, rd, i,
5487 aarch64_get_vec_float (cpu, rn, i + 2*full));
5488 }
5489 else
5490 {
5491 HALT_NYI;
5492
5493 #if 0
5494 /* TODO: Implement missing half-float support. */
5495 for (i = 0; i < 4; i++)
5496 aarch64_set_vec_float (cpu, rd, i,
5497 aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5498 #endif
5499 }
5500 }
5501
5502 static void
5503 do_vec_FABS (sim_cpu *cpu)
5504 {
5505 /* instr[31] = 0
5506 instr[30] = half(0)/full(1)
5507 instr[29,23] = 00 1110 1
5508 instr[22] = float(0)/double(1)
5509 instr[21,16] = 10 0000
5510 instr[15,10] = 1111 10
5511 instr[9,5] = Vn
5512 instr[4,0] = Vd. */
5513
5514 unsigned vn = INSTR (9, 5);
5515 unsigned vd = INSTR (4, 0);
5516 unsigned full = INSTR (30, 30);
5517 unsigned i;
5518
5519 NYI_assert (29, 23, 0x1D);
5520 NYI_assert (21, 10, 0x83E);
5521
5522 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5523 if (INSTR (22, 22))
5524 {
5525 if (! full)
5526 HALT_NYI;
5527
5528 for (i = 0; i < 2; i++)
5529 aarch64_set_vec_double (cpu, vd, i,
5530 fabs (aarch64_get_vec_double (cpu, vn, i)));
5531 }
5532 else
5533 {
5534 for (i = 0; i < (full ? 4 : 2); i++)
5535 aarch64_set_vec_float (cpu, vd, i,
5536 fabsf (aarch64_get_vec_float (cpu, vn, i)));
5537 }
5538 }
5539
5540 static void
5541 do_vec_FCVTZS (sim_cpu *cpu)
5542 {
5543 /* instr[31] = 0
5544 instr[30] = half (0) / all (1)
5545 instr[29,23] = 00 1110 1
5546 instr[22] = single (0) / double (1)
5547 instr[21,10] = 10 0001 1011 10
5548 instr[9,5] = Rn
5549 instr[4,0] = Rd. */
5550
5551 unsigned rn = INSTR (9, 5);
5552 unsigned rd = INSTR (4, 0);
5553 unsigned full = INSTR (30, 30);
5554 unsigned i;
5555
5556 NYI_assert (31, 31, 0);
5557 NYI_assert (29, 23, 0x1D);
5558 NYI_assert (21, 10, 0x86E);
5559
5560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5561 if (INSTR (22, 22))
5562 {
5563 if (! full)
5564 HALT_UNALLOC;
5565
5566 for (i = 0; i < 2; i++)
5567 aarch64_set_vec_s64 (cpu, rd, i,
5568 (int64_t) aarch64_get_vec_double (cpu, rn, i));
5569 }
5570 else
5571 for (i = 0; i < (full ? 4 : 2); i++)
5572 aarch64_set_vec_s32 (cpu, rd, i,
5573 (int32_t) aarch64_get_vec_float (cpu, rn, i));
5574 }
5575
5576 static void
5577 do_vec_REV64 (sim_cpu *cpu)
5578 {
5579 /* instr[31] = 0
5580 instr[30] = full/half
5581 instr[29,24] = 00 1110
5582 instr[23,22] = size
5583 instr[21,10] = 10 0000 0000 10
5584 instr[9,5] = Rn
5585 instr[4,0] = Rd. */
5586
5587 unsigned rn = INSTR (9, 5);
5588 unsigned rd = INSTR (4, 0);
5589 unsigned size = INSTR (23, 22);
5590 unsigned full = INSTR (30, 30);
5591 unsigned i;
5592 FRegister val;
5593
5594 NYI_assert (29, 24, 0x0E);
5595 NYI_assert (21, 10, 0x802);
5596
5597 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5598 switch (size)
5599 {
5600 case 0:
5601 for (i = 0; i < (full ? 16 : 8); i++)
5602 val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5603 break;
5604
5605 case 1:
5606 for (i = 0; i < (full ? 8 : 4); i++)
5607 val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5608 break;
5609
5610 case 2:
5611 for (i = 0; i < (full ? 4 : 2); i++)
5612 val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5613 break;
5614
5615 case 3:
5616 HALT_UNALLOC;
5617 }
5618
5619 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5620 if (full)
5621 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5622 }
5623
5624 static void
5625 do_vec_REV16 (sim_cpu *cpu)
5626 {
5627 /* instr[31] = 0
5628 instr[30] = full/half
5629 instr[29,24] = 00 1110
5630 instr[23,22] = size
5631 instr[21,10] = 10 0000 0001 10
5632 instr[9,5] = Rn
5633 instr[4,0] = Rd. */
5634
5635 unsigned rn = INSTR (9, 5);
5636 unsigned rd = INSTR (4, 0);
5637 unsigned size = INSTR (23, 22);
5638 unsigned full = INSTR (30, 30);
5639 unsigned i;
5640 FRegister val;
5641
5642 NYI_assert (29, 24, 0x0E);
5643 NYI_assert (21, 10, 0x806);
5644
5645 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5646 switch (size)
5647 {
5648 case 0:
5649 for (i = 0; i < (full ? 16 : 8); i++)
5650 val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5651 break;
5652
5653 default:
5654 HALT_UNALLOC;
5655 }
5656
5657 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5658 if (full)
5659 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5660 }
5661
5662 static void
5663 do_vec_op1 (sim_cpu *cpu)
5664 {
5665 /* instr[31] = 0
5666 instr[30] = half/full
5667 instr[29,24] = 00 1110
5668 instr[23,21] = ???
5669 instr[20,16] = Vm
5670 instr[15,10] = sub-opcode
5671 instr[9,5] = Vn
5672 instr[4,0] = Vd */
5673 NYI_assert (29, 24, 0x0E);
5674
5675 if (INSTR (21, 21) == 0)
5676 {
5677 if (INSTR (23, 22) == 0)
5678 {
5679 if (INSTR (30, 30) == 1
5680 && INSTR (17, 14) == 0
5681 && INSTR (12, 10) == 7)
5682 return do_vec_ins_2 (cpu);
5683
5684 switch (INSTR (15, 10))
5685 {
5686 case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5687 case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5688 case 0x07: do_vec_INS (cpu); return;
5689 case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5690 case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5691
5692 case 0x00:
5693 case 0x08:
5694 case 0x10:
5695 case 0x18:
5696 do_vec_TBL (cpu); return;
5697
5698 case 0x06:
5699 case 0x16:
5700 do_vec_UZP (cpu); return;
5701
5702 case 0x0A: do_vec_TRN (cpu); return;
5703
5704 case 0x0E:
5705 case 0x1E:
5706 do_vec_ZIP (cpu); return;
5707
5708 default:
5709 HALT_NYI;
5710 }
5711 }
5712
5713 switch (INSTR (13, 10))
5714 {
5715 case 0x6: do_vec_UZP (cpu); return;
5716 case 0xE: do_vec_ZIP (cpu); return;
5717 case 0xA: do_vec_TRN (cpu); return;
5718 default: HALT_NYI;
5719 }
5720 }
5721
5722 switch (INSTR (15, 10))
5723 {
5724 case 0x02: do_vec_REV64 (cpu); return;
5725 case 0x06: do_vec_REV16 (cpu); return;
5726
5727 case 0x07:
5728 switch (INSTR (23, 21))
5729 {
5730 case 1: do_vec_AND (cpu); return;
5731 case 3: do_vec_BIC (cpu); return;
5732 case 5: do_vec_ORR (cpu); return;
5733 case 7: do_vec_ORN (cpu); return;
5734 default: HALT_NYI;
5735 }
5736
5737 case 0x08: do_vec_sub_long (cpu); return;
5738 case 0x0a: do_vec_XTN (cpu); return;
5739 case 0x11: do_vec_SSHL (cpu); return;
5740 case 0x16: do_vec_CNT (cpu); return;
5741 case 0x19: do_vec_max (cpu); return;
5742 case 0x1B: do_vec_min (cpu); return;
5743 case 0x21: do_vec_add (cpu); return;
5744 case 0x25: do_vec_MLA (cpu); return;
5745 case 0x27: do_vec_mul (cpu); return;
5746 case 0x2F: do_vec_ADDP (cpu); return;
5747 case 0x30: do_vec_mull (cpu); return;
5748 case 0x33: do_vec_FMLA (cpu); return;
5749 case 0x35: do_vec_fadd (cpu); return;
5750
5751 case 0x1E:
5752 switch (INSTR (20, 16))
5753 {
5754 case 0x01: do_vec_FCVTL (cpu); return;
5755 default: HALT_NYI;
5756 }
5757
5758 case 0x2E:
5759 switch (INSTR (20, 16))
5760 {
5761 case 0x00: do_vec_ABS (cpu); return;
5762 case 0x01: do_vec_FCVTZS (cpu); return;
5763 case 0x11: do_vec_ADDV (cpu); return;
5764 default: HALT_NYI;
5765 }
5766
5767 case 0x31:
5768 case 0x3B:
5769 do_vec_Fminmax (cpu); return;
5770
5771 case 0x0D:
5772 case 0x0F:
5773 case 0x22:
5774 case 0x23:
5775 case 0x26:
5776 case 0x2A:
5777 case 0x32:
5778 case 0x36:
5779 case 0x39:
5780 case 0x3A:
5781 do_vec_compare (cpu); return;
5782
5783 case 0x3E:
5784 do_vec_FABS (cpu); return;
5785
5786 default:
5787 HALT_NYI;
5788 }
5789 }
5790
5791 static void
5792 do_vec_xtl (sim_cpu *cpu)
5793 {
5794 /* instr[31] = 0
5795 instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5796 instr[28,22] = 0 1111 00
5797 instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5798 instr[15,10] = 1010 01
5799 instr[9,5] = V source
5800 instr[4,0] = V dest. */
5801
5802 unsigned vs = INSTR (9, 5);
5803 unsigned vd = INSTR (4, 0);
5804 unsigned i, shift, bias = 0;
5805
5806 NYI_assert (28, 22, 0x3C);
5807 NYI_assert (15, 10, 0x29);
5808
5809 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5810 switch (INSTR (30, 29))
5811 {
5812 case 2: /* SXTL2, SSHLL2. */
5813 bias = 2;
5814 case 0: /* SXTL, SSHLL. */
5815 if (INSTR (21, 21))
5816 {
5817 int64_t val1, val2;
5818
5819 shift = INSTR (20, 16);
5820 /* Get the source values before setting the destination values
5821 in case the source and destination are the same. */
5822 val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5823 val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5824 aarch64_set_vec_s64 (cpu, vd, 0, val1);
5825 aarch64_set_vec_s64 (cpu, vd, 1, val2);
5826 }
5827 else if (INSTR (20, 20))
5828 {
5829 int32_t v[4];
5830 int32_t v1,v2,v3,v4;
5831
5832 shift = INSTR (19, 16);
5833 bias *= 2;
5834 for (i = 0; i < 4; i++)
5835 v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5836 for (i = 0; i < 4; i++)
5837 aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5838 }
5839 else
5840 {
5841 int16_t v[8];
5842 NYI_assert (19, 19, 1);
5843
5844 shift = INSTR (18, 16);
5845 bias *= 4;
5846 for (i = 0; i < 8; i++)
5847 v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5848 for (i = 0; i < 8; i++)
5849 aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5850 }
5851 return;
5852
5853 case 3: /* UXTL2, USHLL2. */
5854 bias = 2;
5855 case 1: /* UXTL, USHLL. */
5856 if (INSTR (21, 21))
5857 {
5858 uint64_t v1, v2;
5859 shift = INSTR (20, 16);
5860 v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5861 v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5862 aarch64_set_vec_u64 (cpu, vd, 0, v1);
5863 aarch64_set_vec_u64 (cpu, vd, 1, v2);
5864 }
5865 else if (INSTR (20, 20))
5866 {
5867 uint32_t v[4];
5868 shift = INSTR (19, 16);
5869 bias *= 2;
5870 for (i = 0; i < 4; i++)
5871 v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5872 for (i = 0; i < 4; i++)
5873 aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5874 }
5875 else
5876 {
5877 uint16_t v[8];
5878 NYI_assert (19, 19, 1);
5879
5880 shift = INSTR (18, 16);
5881 bias *= 4;
5882 for (i = 0; i < 8; i++)
5883 v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5884 for (i = 0; i < 8; i++)
5885 aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5886 }
5887 return;
5888 }
5889 }
5890
5891 static void
5892 do_vec_SHL (sim_cpu *cpu)
5893 {
5894 /* instr [31] = 0
5895 instr [30] = half(0)/full(1)
5896 instr [29,23] = 001 1110
5897 instr [22,16] = size and shift amount
5898 instr [15,10] = 01 0101
5899 instr [9, 5] = Vs
5900 instr [4, 0] = Vd. */
5901
5902 int shift;
5903 int full = INSTR (30, 30);
5904 unsigned vs = INSTR (9, 5);
5905 unsigned vd = INSTR (4, 0);
5906 unsigned i;
5907
5908 NYI_assert (29, 23, 0x1E);
5909 NYI_assert (15, 10, 0x15);
5910
5911 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5912 if (INSTR (22, 22))
5913 {
5914 shift = INSTR (21, 16);
5915
5916 if (full == 0)
5917 HALT_UNALLOC;
5918
5919 for (i = 0; i < 2; i++)
5920 {
5921 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5922 aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5923 }
5924
5925 return;
5926 }
5927
5928 if (INSTR (21, 21))
5929 {
5930 shift = INSTR (20, 16);
5931
5932 for (i = 0; i < (full ? 4 : 2); i++)
5933 {
5934 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5935 aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5936 }
5937
5938 return;
5939 }
5940
5941 if (INSTR (20, 20))
5942 {
5943 shift = INSTR (19, 16);
5944
5945 for (i = 0; i < (full ? 8 : 4); i++)
5946 {
5947 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5948 aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5949 }
5950
5951 return;
5952 }
5953
5954 if (INSTR (19, 19) == 0)
5955 HALT_UNALLOC;
5956
5957 shift = INSTR (18, 16);
5958
5959 for (i = 0; i < (full ? 16 : 8); i++)
5960 {
5961 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5962 aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5963 }
5964 }
5965
5966 static void
5967 do_vec_SSHR_USHR (sim_cpu *cpu)
5968 {
5969 /* instr [31] = 0
5970 instr [30] = half(0)/full(1)
5971 instr [29] = signed(0)/unsigned(1)
5972 instr [28,23] = 0 1111 0
5973 instr [22,16] = size and shift amount
5974 instr [15,10] = 0000 01
5975 instr [9, 5] = Vs
5976 instr [4, 0] = Vd. */
5977
5978 int full = INSTR (30, 30);
5979 int sign = ! INSTR (29, 29);
5980 unsigned shift = INSTR (22, 16);
5981 unsigned vs = INSTR (9, 5);
5982 unsigned vd = INSTR (4, 0);
5983 unsigned i;
5984
5985 NYI_assert (28, 23, 0x1E);
5986 NYI_assert (15, 10, 0x01);
5987
5988 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5989 if (INSTR (22, 22))
5990 {
5991 shift = 128 - shift;
5992
5993 if (full == 0)
5994 HALT_UNALLOC;
5995
5996 if (sign)
5997 for (i = 0; i < 2; i++)
5998 {
5999 int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6000 aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6001 }
6002 else
6003 for (i = 0; i < 2; i++)
6004 {
6005 uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6006 aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6007 }
6008
6009 return;
6010 }
6011
6012 if (INSTR (21, 21))
6013 {
6014 shift = 64 - shift;
6015
6016 if (sign)
6017 for (i = 0; i < (full ? 4 : 2); i++)
6018 {
6019 int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6020 aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6021 }
6022 else
6023 for (i = 0; i < (full ? 4 : 2); i++)
6024 {
6025 uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6026 aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6027 }
6028
6029 return;
6030 }
6031
6032 if (INSTR (20, 20))
6033 {
6034 shift = 32 - shift;
6035
6036 if (sign)
6037 for (i = 0; i < (full ? 8 : 4); i++)
6038 {
6039 int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6040 aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6041 }
6042 else
6043 for (i = 0; i < (full ? 8 : 4); i++)
6044 {
6045 uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6046 aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6047 }
6048
6049 return;
6050 }
6051
6052 if (INSTR (19, 19) == 0)
6053 HALT_UNALLOC;
6054
6055 shift = 16 - shift;
6056
6057 if (sign)
6058 for (i = 0; i < (full ? 16 : 8); i++)
6059 {
6060 int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6061 aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6062 }
6063 else
6064 for (i = 0; i < (full ? 16 : 8); i++)
6065 {
6066 uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6067 aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6068 }
6069 }
6070
6071 static void
6072 do_vec_MUL_by_element (sim_cpu *cpu)
6073 {
6074 /* instr[31] = 0
6075 instr[30] = half/full
6076 instr[29,24] = 00 1111
6077 instr[23,22] = size
6078 instr[21] = L
6079 instr[20] = M
6080 instr[19,16] = m
6081 instr[15,12] = 1000
6082 instr[11] = H
6083 instr[10] = 0
6084 instr[9,5] = Vn
6085 instr[4,0] = Vd */
6086
6087 unsigned full = INSTR (30, 30);
6088 unsigned L = INSTR (21, 21);
6089 unsigned H = INSTR (11, 11);
6090 unsigned vn = INSTR (9, 5);
6091 unsigned vd = INSTR (4, 0);
6092 unsigned size = INSTR (23, 22);
6093 unsigned index;
6094 unsigned vm;
6095 unsigned e;
6096
6097 NYI_assert (29, 24, 0x0F);
6098 NYI_assert (15, 12, 0x8);
6099 NYI_assert (10, 10, 0);
6100
6101 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6102 switch (size)
6103 {
6104 case 1:
6105 {
6106 /* 16 bit products. */
6107 uint16_t product;
6108 uint16_t element1;
6109 uint16_t element2;
6110
6111 index = (H << 2) | (L << 1) | INSTR (20, 20);
6112 vm = INSTR (19, 16);
6113 element2 = aarch64_get_vec_u16 (cpu, vm, index);
6114
6115 for (e = 0; e < (full ? 8 : 4); e ++)
6116 {
6117 element1 = aarch64_get_vec_u16 (cpu, vn, e);
6118 product = element1 * element2;
6119 aarch64_set_vec_u16 (cpu, vd, e, product);
6120 }
6121 }
6122 break;
6123
6124 case 2:
6125 {
6126 /* 32 bit products. */
6127 uint32_t product;
6128 uint32_t element1;
6129 uint32_t element2;
6130
6131 index = (H << 1) | L;
6132 vm = INSTR (20, 16);
6133 element2 = aarch64_get_vec_u32 (cpu, vm, index);
6134
6135 for (e = 0; e < (full ? 4 : 2); e ++)
6136 {
6137 element1 = aarch64_get_vec_u32 (cpu, vn, e);
6138 product = element1 * element2;
6139 aarch64_set_vec_u32 (cpu, vd, e, product);
6140 }
6141 }
6142 break;
6143
6144 default:
6145 HALT_UNALLOC;
6146 }
6147 }
6148
6149 static void
6150 do_FMLA_by_element (sim_cpu *cpu)
6151 {
6152 /* instr[31] = 0
6153 instr[30] = half/full
6154 instr[29,23] = 00 1111 1
6155 instr[22] = size
6156 instr[21] = L
6157 instr[20,16] = m
6158 instr[15,12] = 0001
6159 instr[11] = H
6160 instr[10] = 0
6161 instr[9,5] = Vn
6162 instr[4,0] = Vd */
6163
6164 unsigned full = INSTR (30, 30);
6165 unsigned size = INSTR (22, 22);
6166 unsigned L = INSTR (21, 21);
6167 unsigned vm = INSTR (20, 16);
6168 unsigned H = INSTR (11, 11);
6169 unsigned vn = INSTR (9, 5);
6170 unsigned vd = INSTR (4, 0);
6171 unsigned e;
6172
6173 NYI_assert (29, 23, 0x1F);
6174 NYI_assert (15, 12, 0x1);
6175 NYI_assert (10, 10, 0);
6176
6177 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6178 if (size)
6179 {
6180 double element1, element2;
6181
6182 if (! full || L)
6183 HALT_UNALLOC;
6184
6185 element2 = aarch64_get_vec_double (cpu, vm, H);
6186
6187 for (e = 0; e < 2; e++)
6188 {
6189 element1 = aarch64_get_vec_double (cpu, vn, e);
6190 element1 *= element2;
6191 element1 += aarch64_get_vec_double (cpu, vd, e);
6192 aarch64_set_vec_double (cpu, vd, e, element1);
6193 }
6194 }
6195 else
6196 {
6197 float element1;
6198 float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6199
6200 for (e = 0; e < (full ? 4 : 2); e++)
6201 {
6202 element1 = aarch64_get_vec_float (cpu, vn, e);
6203 element1 *= element2;
6204 element1 += aarch64_get_vec_float (cpu, vd, e);
6205 aarch64_set_vec_float (cpu, vd, e, element1);
6206 }
6207 }
6208 }
6209
6210 static void
6211 do_vec_op2 (sim_cpu *cpu)
6212 {
6213 /* instr[31] = 0
6214 instr[30] = half/full
6215 instr[29,24] = 00 1111
6216 instr[23] = ?
6217 instr[22,16] = element size & index
6218 instr[15,10] = sub-opcode
6219 instr[9,5] = Vm
6220 instr[4,0] = Vd */
6221
6222 NYI_assert (29, 24, 0x0F);
6223
6224 if (INSTR (23, 23) != 0)
6225 {
6226 switch (INSTR (15, 10))
6227 {
6228 case 0x04:
6229 case 0x06:
6230 do_FMLA_by_element (cpu);
6231 return;
6232
6233 case 0x20:
6234 case 0x22:
6235 do_vec_MUL_by_element (cpu);
6236 return;
6237
6238 default:
6239 HALT_NYI;
6240 }
6241 }
6242 else
6243 {
6244 switch (INSTR (15, 10))
6245 {
6246 case 0x01: do_vec_SSHR_USHR (cpu); return;
6247 case 0x15: do_vec_SHL (cpu); return;
6248 case 0x20:
6249 case 0x22: do_vec_MUL_by_element (cpu); return;
6250 case 0x29: do_vec_xtl (cpu); return;
6251 default: HALT_NYI;
6252 }
6253 }
6254 }
6255
6256 static void
6257 do_vec_neg (sim_cpu *cpu)
6258 {
6259 /* instr[31] = 0
6260 instr[30] = full(1)/half(0)
6261 instr[29,24] = 10 1110
6262 instr[23,22] = size: byte(00), half (01), word (10), long (11)
6263 instr[21,10] = 1000 0010 1110
6264 instr[9,5] = Vs
6265 instr[4,0] = Vd */
6266
6267 int full = INSTR (30, 30);
6268 unsigned vs = INSTR (9, 5);
6269 unsigned vd = INSTR (4, 0);
6270 unsigned i;
6271
6272 NYI_assert (29, 24, 0x2E);
6273 NYI_assert (21, 10, 0x82E);
6274
6275 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6276 switch (INSTR (23, 22))
6277 {
6278 case 0:
6279 for (i = 0; i < (full ? 16 : 8); i++)
6280 aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6281 return;
6282
6283 case 1:
6284 for (i = 0; i < (full ? 8 : 4); i++)
6285 aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6286 return;
6287
6288 case 2:
6289 for (i = 0; i < (full ? 4 : 2); i++)
6290 aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6291 return;
6292
6293 case 3:
6294 if (! full)
6295 HALT_NYI;
6296 for (i = 0; i < 2; i++)
6297 aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6298 return;
6299 }
6300 }
6301
6302 static void
6303 do_vec_sqrt (sim_cpu *cpu)
6304 {
6305 /* instr[31] = 0
6306 instr[30] = full(1)/half(0)
6307 instr[29,23] = 101 1101
6308 instr[22] = single(0)/double(1)
6309 instr[21,10] = 1000 0111 1110
6310 instr[9,5] = Vs
6311 instr[4,0] = Vd. */
6312
6313 int full = INSTR (30, 30);
6314 unsigned vs = INSTR (9, 5);
6315 unsigned vd = INSTR (4, 0);
6316 unsigned i;
6317
6318 NYI_assert (29, 23, 0x5B);
6319 NYI_assert (21, 10, 0x87E);
6320
6321 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6322 if (INSTR (22, 22) == 0)
6323 for (i = 0; i < (full ? 4 : 2); i++)
6324 aarch64_set_vec_float (cpu, vd, i,
6325 sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6326 else
6327 for (i = 0; i < 2; i++)
6328 aarch64_set_vec_double (cpu, vd, i,
6329 sqrt (aarch64_get_vec_double (cpu, vs, i)));
6330 }
6331
6332 static void
6333 do_vec_mls_indexed (sim_cpu *cpu)
6334 {
6335 /* instr[31] = 0
6336 instr[30] = half(0)/full(1)
6337 instr[29,24] = 10 1111
6338 instr[23,22] = 16-bit(01)/32-bit(10)
6339 instr[21,20+11] = index (if 16-bit)
6340 instr[21+11] = index (if 32-bit)
6341 instr[20,16] = Vm
6342 instr[15,12] = 0100
6343 instr[11] = part of index
6344 instr[10] = 0
6345 instr[9,5] = Vs
6346 instr[4,0] = Vd. */
6347
6348 int full = INSTR (30, 30);
6349 unsigned vs = INSTR (9, 5);
6350 unsigned vd = INSTR (4, 0);
6351 unsigned vm = INSTR (20, 16);
6352 unsigned i;
6353
6354 NYI_assert (15, 12, 4);
6355 NYI_assert (10, 10, 0);
6356
6357 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6358 switch (INSTR (23, 22))
6359 {
6360 case 1:
6361 {
6362 unsigned elem;
6363 uint32_t val;
6364
6365 if (vm > 15)
6366 HALT_NYI;
6367
6368 elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6369 val = aarch64_get_vec_u16 (cpu, vm, elem);
6370
6371 for (i = 0; i < (full ? 8 : 4); i++)
6372 aarch64_set_vec_u32 (cpu, vd, i,
6373 aarch64_get_vec_u32 (cpu, vd, i) -
6374 (aarch64_get_vec_u32 (cpu, vs, i) * val));
6375 return;
6376 }
6377
6378 case 2:
6379 {
6380 unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6381 uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6382
6383 for (i = 0; i < (full ? 4 : 2); i++)
6384 aarch64_set_vec_u64 (cpu, vd, i,
6385 aarch64_get_vec_u64 (cpu, vd, i) -
6386 (aarch64_get_vec_u64 (cpu, vs, i) * val));
6387 return;
6388 }
6389
6390 case 0:
6391 case 3:
6392 default:
6393 HALT_NYI;
6394 }
6395 }
6396
6397 static void
6398 do_vec_SUB (sim_cpu *cpu)
6399 {
6400 /* instr [31] = 0
6401 instr [30] = half(0)/full(1)
6402 instr [29,24] = 10 1110
6403 instr [23,22] = size: byte(00, half(01), word (10), long (11)
6404 instr [21] = 1
6405 instr [20,16] = Vm
6406 instr [15,10] = 10 0001
6407 instr [9, 5] = Vn
6408 instr [4, 0] = Vd. */
6409
6410 unsigned full = INSTR (30, 30);
6411 unsigned vm = INSTR (20, 16);
6412 unsigned vn = INSTR (9, 5);
6413 unsigned vd = INSTR (4, 0);
6414 unsigned i;
6415
6416 NYI_assert (29, 24, 0x2E);
6417 NYI_assert (21, 21, 1);
6418 NYI_assert (15, 10, 0x21);
6419
6420 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6421 switch (INSTR (23, 22))
6422 {
6423 case 0:
6424 for (i = 0; i < (full ? 16 : 8); i++)
6425 aarch64_set_vec_s8 (cpu, vd, i,
6426 aarch64_get_vec_s8 (cpu, vn, i)
6427 - aarch64_get_vec_s8 (cpu, vm, i));
6428 return;
6429
6430 case 1:
6431 for (i = 0; i < (full ? 8 : 4); i++)
6432 aarch64_set_vec_s16 (cpu, vd, i,
6433 aarch64_get_vec_s16 (cpu, vn, i)
6434 - aarch64_get_vec_s16 (cpu, vm, i));
6435 return;
6436
6437 case 2:
6438 for (i = 0; i < (full ? 4 : 2); i++)
6439 aarch64_set_vec_s32 (cpu, vd, i,
6440 aarch64_get_vec_s32 (cpu, vn, i)
6441 - aarch64_get_vec_s32 (cpu, vm, i));
6442 return;
6443
6444 case 3:
6445 if (full == 0)
6446 HALT_UNALLOC;
6447
6448 for (i = 0; i < 2; i++)
6449 aarch64_set_vec_s64 (cpu, vd, i,
6450 aarch64_get_vec_s64 (cpu, vn, i)
6451 - aarch64_get_vec_s64 (cpu, vm, i));
6452 return;
6453 }
6454 }
6455
6456 static void
6457 do_vec_MLS (sim_cpu *cpu)
6458 {
6459 /* instr [31] = 0
6460 instr [30] = half(0)/full(1)
6461 instr [29,24] = 10 1110
6462 instr [23,22] = size: byte(00, half(01), word (10)
6463 instr [21] = 1
6464 instr [20,16] = Vm
6465 instr [15,10] = 10 0101
6466 instr [9, 5] = Vn
6467 instr [4, 0] = Vd. */
6468
6469 unsigned full = INSTR (30, 30);
6470 unsigned vm = INSTR (20, 16);
6471 unsigned vn = INSTR (9, 5);
6472 unsigned vd = INSTR (4, 0);
6473 unsigned i;
6474
6475 NYI_assert (29, 24, 0x2E);
6476 NYI_assert (21, 21, 1);
6477 NYI_assert (15, 10, 0x25);
6478
6479 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6480 switch (INSTR (23, 22))
6481 {
6482 case 0:
6483 for (i = 0; i < (full ? 16 : 8); i++)
6484 aarch64_set_vec_u8 (cpu, vd, i,
6485 aarch64_get_vec_u8 (cpu, vd, i)
6486 - (aarch64_get_vec_u8 (cpu, vn, i)
6487 * aarch64_get_vec_u8 (cpu, vm, i)));
6488 return;
6489
6490 case 1:
6491 for (i = 0; i < (full ? 8 : 4); i++)
6492 aarch64_set_vec_u16 (cpu, vd, i,
6493 aarch64_get_vec_u16 (cpu, vd, i)
6494 - (aarch64_get_vec_u16 (cpu, vn, i)
6495 * aarch64_get_vec_u16 (cpu, vm, i)));
6496 return;
6497
6498 case 2:
6499 for (i = 0; i < (full ? 4 : 2); i++)
6500 aarch64_set_vec_u32 (cpu, vd, i,
6501 aarch64_get_vec_u32 (cpu, vd, i)
6502 - (aarch64_get_vec_u32 (cpu, vn, i)
6503 * aarch64_get_vec_u32 (cpu, vm, i)));
6504 return;
6505
6506 default:
6507 HALT_UNALLOC;
6508 }
6509 }
6510
6511 static void
6512 do_vec_FDIV (sim_cpu *cpu)
6513 {
6514 /* instr [31] = 0
6515 instr [30] = half(0)/full(1)
6516 instr [29,23] = 10 1110 0
6517 instr [22] = float()/double(1)
6518 instr [21] = 1
6519 instr [20,16] = Vm
6520 instr [15,10] = 1111 11
6521 instr [9, 5] = Vn
6522 instr [4, 0] = Vd. */
6523
6524 unsigned full = INSTR (30, 30);
6525 unsigned vm = INSTR (20, 16);
6526 unsigned vn = INSTR (9, 5);
6527 unsigned vd = INSTR (4, 0);
6528 unsigned i;
6529
6530 NYI_assert (29, 23, 0x5C);
6531 NYI_assert (21, 21, 1);
6532 NYI_assert (15, 10, 0x3F);
6533
6534 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6535 if (INSTR (22, 22))
6536 {
6537 if (! full)
6538 HALT_UNALLOC;
6539
6540 for (i = 0; i < 2; i++)
6541 aarch64_set_vec_double (cpu, vd, i,
6542 aarch64_get_vec_double (cpu, vn, i)
6543 / aarch64_get_vec_double (cpu, vm, i));
6544 }
6545 else
6546 for (i = 0; i < (full ? 4 : 2); i++)
6547 aarch64_set_vec_float (cpu, vd, i,
6548 aarch64_get_vec_float (cpu, vn, i)
6549 / aarch64_get_vec_float (cpu, vm, i));
6550 }
6551
6552 static void
6553 do_vec_FMUL (sim_cpu *cpu)
6554 {
6555 /* instr [31] = 0
6556 instr [30] = half(0)/full(1)
6557 instr [29,23] = 10 1110 0
6558 instr [22] = float(0)/double(1)
6559 instr [21] = 1
6560 instr [20,16] = Vm
6561 instr [15,10] = 1101 11
6562 instr [9, 5] = Vn
6563 instr [4, 0] = Vd. */
6564
6565 unsigned full = INSTR (30, 30);
6566 unsigned vm = INSTR (20, 16);
6567 unsigned vn = INSTR (9, 5);
6568 unsigned vd = INSTR (4, 0);
6569 unsigned i;
6570
6571 NYI_assert (29, 23, 0x5C);
6572 NYI_assert (21, 21, 1);
6573 NYI_assert (15, 10, 0x37);
6574
6575 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6576 if (INSTR (22, 22))
6577 {
6578 if (! full)
6579 HALT_UNALLOC;
6580
6581 for (i = 0; i < 2; i++)
6582 aarch64_set_vec_double (cpu, vd, i,
6583 aarch64_get_vec_double (cpu, vn, i)
6584 * aarch64_get_vec_double (cpu, vm, i));
6585 }
6586 else
6587 for (i = 0; i < (full ? 4 : 2); i++)
6588 aarch64_set_vec_float (cpu, vd, i,
6589 aarch64_get_vec_float (cpu, vn, i)
6590 * aarch64_get_vec_float (cpu, vm, i));
6591 }
6592
6593 static void
6594 do_vec_FADDP (sim_cpu *cpu)
6595 {
6596 /* instr [31] = 0
6597 instr [30] = half(0)/full(1)
6598 instr [29,23] = 10 1110 0
6599 instr [22] = float(0)/double(1)
6600 instr [21] = 1
6601 instr [20,16] = Vm
6602 instr [15,10] = 1101 01
6603 instr [9, 5] = Vn
6604 instr [4, 0] = Vd. */
6605
6606 unsigned full = INSTR (30, 30);
6607 unsigned vm = INSTR (20, 16);
6608 unsigned vn = INSTR (9, 5);
6609 unsigned vd = INSTR (4, 0);
6610
6611 NYI_assert (29, 23, 0x5C);
6612 NYI_assert (21, 21, 1);
6613 NYI_assert (15, 10, 0x35);
6614
6615 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6616 if (INSTR (22, 22))
6617 {
6618 /* Extract values before adding them incase vd == vn/vm. */
6619 double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6620 double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6621 double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6622 double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6623
6624 if (! full)
6625 HALT_UNALLOC;
6626
6627 aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6628 aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6629 }
6630 else
6631 {
6632 /* Extract values before adding them incase vd == vn/vm. */
6633 float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6634 float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6635 float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6636 float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6637
6638 if (full)
6639 {
6640 float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6641 float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6642 float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6643 float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6644
6645 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6646 aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6647 aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6648 aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6649 }
6650 else
6651 {
6652 aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6653 aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6654 }
6655 }
6656 }
6657
6658 static void
6659 do_vec_FSQRT (sim_cpu *cpu)
6660 {
6661 /* instr[31] = 0
6662 instr[30] = half(0)/full(1)
6663 instr[29,23] = 10 1110 1
6664 instr[22] = single(0)/double(1)
6665 instr[21,10] = 10 0001 1111 10
6666 instr[9,5] = Vsrc
6667 instr[4,0] = Vdest. */
6668
6669 unsigned vn = INSTR (9, 5);
6670 unsigned vd = INSTR (4, 0);
6671 unsigned full = INSTR (30, 30);
6672 int i;
6673
6674 NYI_assert (29, 23, 0x5D);
6675 NYI_assert (21, 10, 0x87E);
6676
6677 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6678 if (INSTR (22, 22))
6679 {
6680 if (! full)
6681 HALT_UNALLOC;
6682
6683 for (i = 0; i < 2; i++)
6684 aarch64_set_vec_double (cpu, vd, i,
6685 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6686 }
6687 else
6688 {
6689 for (i = 0; i < (full ? 4 : 2); i++)
6690 aarch64_set_vec_float (cpu, vd, i,
6691 sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6692 }
6693 }
6694
6695 static void
6696 do_vec_FNEG (sim_cpu *cpu)
6697 {
6698 /* instr[31] = 0
6699 instr[30] = half (0)/full (1)
6700 instr[29,23] = 10 1110 1
6701 instr[22] = single (0)/double (1)
6702 instr[21,10] = 10 0000 1111 10
6703 instr[9,5] = Vsrc
6704 instr[4,0] = Vdest. */
6705
6706 unsigned vn = INSTR (9, 5);
6707 unsigned vd = INSTR (4, 0);
6708 unsigned full = INSTR (30, 30);
6709 int i;
6710
6711 NYI_assert (29, 23, 0x5D);
6712 NYI_assert (21, 10, 0x83E);
6713
6714 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6715 if (INSTR (22, 22))
6716 {
6717 if (! full)
6718 HALT_UNALLOC;
6719
6720 for (i = 0; i < 2; i++)
6721 aarch64_set_vec_double (cpu, vd, i,
6722 - aarch64_get_vec_double (cpu, vn, i));
6723 }
6724 else
6725 {
6726 for (i = 0; i < (full ? 4 : 2); i++)
6727 aarch64_set_vec_float (cpu, vd, i,
6728 - aarch64_get_vec_float (cpu, vn, i));
6729 }
6730 }
6731
6732 static void
6733 do_vec_NOT (sim_cpu *cpu)
6734 {
6735 /* instr[31] = 0
6736 instr[30] = half (0)/full (1)
6737 instr[29,10] = 10 1110 0010 0000 0101 10
6738 instr[9,5] = Vn
6739 instr[4.0] = Vd. */
6740
6741 unsigned vn = INSTR (9, 5);
6742 unsigned vd = INSTR (4, 0);
6743 unsigned i;
6744 int full = INSTR (30, 30);
6745
6746 NYI_assert (29, 10, 0xB8816);
6747
6748 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6749 for (i = 0; i < (full ? 16 : 8); i++)
6750 aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6751 }
6752
6753 static unsigned int
6754 clz (uint64_t val, unsigned size)
6755 {
6756 uint64_t mask = 1;
6757 int count;
6758
6759 mask <<= (size - 1);
6760 count = 0;
6761 do
6762 {
6763 if (val & mask)
6764 break;
6765 mask >>= 1;
6766 count ++;
6767 }
6768 while (mask);
6769
6770 return count;
6771 }
6772
6773 static void
6774 do_vec_CLZ (sim_cpu *cpu)
6775 {
6776 /* instr[31] = 0
6777 instr[30] = half (0)/full (1)
6778 instr[29,24] = 10 1110
6779 instr[23,22] = size
6780 instr[21,10] = 10 0000 0100 10
6781 instr[9,5] = Vn
6782 instr[4.0] = Vd. */
6783
6784 unsigned vn = INSTR (9, 5);
6785 unsigned vd = INSTR (4, 0);
6786 unsigned i;
6787 int full = INSTR (30,30);
6788
6789 NYI_assert (29, 24, 0x2E);
6790 NYI_assert (21, 10, 0x812);
6791
6792 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6793 switch (INSTR (23, 22))
6794 {
6795 case 0:
6796 for (i = 0; i < (full ? 16 : 8); i++)
6797 aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6798 break;
6799 case 1:
6800 for (i = 0; i < (full ? 8 : 4); i++)
6801 aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6802 break;
6803 case 2:
6804 for (i = 0; i < (full ? 4 : 2); i++)
6805 aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6806 break;
6807 case 3:
6808 if (! full)
6809 HALT_UNALLOC;
6810 aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6811 aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6812 break;
6813 }
6814 }
6815
6816 static void
6817 do_vec_MOV_element (sim_cpu *cpu)
6818 {
6819 /* instr[31,21] = 0110 1110 000
6820 instr[20,16] = size & dest index
6821 instr[15] = 0
6822 instr[14,11] = source index
6823 instr[10] = 1
6824 instr[9,5] = Vs
6825 instr[4.0] = Vd. */
6826
6827 unsigned vs = INSTR (9, 5);
6828 unsigned vd = INSTR (4, 0);
6829 unsigned src_index;
6830 unsigned dst_index;
6831
6832 NYI_assert (31, 21, 0x370);
6833 NYI_assert (15, 15, 0);
6834 NYI_assert (10, 10, 1);
6835
6836 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6837 if (INSTR (16, 16))
6838 {
6839 /* Move a byte. */
6840 src_index = INSTR (14, 11);
6841 dst_index = INSTR (20, 17);
6842 aarch64_set_vec_u8 (cpu, vd, dst_index,
6843 aarch64_get_vec_u8 (cpu, vs, src_index));
6844 }
6845 else if (INSTR (17, 17))
6846 {
6847 /* Move 16-bits. */
6848 NYI_assert (11, 11, 0);
6849 src_index = INSTR (14, 12);
6850 dst_index = INSTR (20, 18);
6851 aarch64_set_vec_u16 (cpu, vd, dst_index,
6852 aarch64_get_vec_u16 (cpu, vs, src_index));
6853 }
6854 else if (INSTR (18, 18))
6855 {
6856 /* Move 32-bits. */
6857 NYI_assert (12, 11, 0);
6858 src_index = INSTR (14, 13);
6859 dst_index = INSTR (20, 19);
6860 aarch64_set_vec_u32 (cpu, vd, dst_index,
6861 aarch64_get_vec_u32 (cpu, vs, src_index));
6862 }
6863 else
6864 {
6865 NYI_assert (19, 19, 1);
6866 NYI_assert (13, 11, 0);
6867 src_index = INSTR (14, 14);
6868 dst_index = INSTR (20, 20);
6869 aarch64_set_vec_u64 (cpu, vd, dst_index,
6870 aarch64_get_vec_u64 (cpu, vs, src_index));
6871 }
6872 }
6873
6874 static void
6875 do_vec_REV32 (sim_cpu *cpu)
6876 {
6877 /* instr[31] = 0
6878 instr[30] = full/half
6879 instr[29,24] = 10 1110
6880 instr[23,22] = size
6881 instr[21,10] = 10 0000 0000 10
6882 instr[9,5] = Rn
6883 instr[4,0] = Rd. */
6884
6885 unsigned rn = INSTR (9, 5);
6886 unsigned rd = INSTR (4, 0);
6887 unsigned size = INSTR (23, 22);
6888 unsigned full = INSTR (30, 30);
6889 unsigned i;
6890 FRegister val;
6891
6892 NYI_assert (29, 24, 0x2E);
6893 NYI_assert (21, 10, 0x802);
6894
6895 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6896 switch (size)
6897 {
6898 case 0:
6899 for (i = 0; i < (full ? 16 : 8); i++)
6900 val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6901 break;
6902
6903 case 1:
6904 for (i = 0; i < (full ? 8 : 4); i++)
6905 val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6906 break;
6907
6908 default:
6909 HALT_UNALLOC;
6910 }
6911
6912 aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6913 if (full)
6914 aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6915 }
6916
6917 static void
6918 do_vec_EXT (sim_cpu *cpu)
6919 {
6920 /* instr[31] = 0
6921 instr[30] = full/half
6922 instr[29,21] = 10 1110 000
6923 instr[20,16] = Vm
6924 instr[15] = 0
6925 instr[14,11] = source index
6926 instr[10] = 0
6927 instr[9,5] = Vn
6928 instr[4.0] = Vd. */
6929
6930 unsigned vm = INSTR (20, 16);
6931 unsigned vn = INSTR (9, 5);
6932 unsigned vd = INSTR (4, 0);
6933 unsigned src_index = INSTR (14, 11);
6934 unsigned full = INSTR (30, 30);
6935 unsigned i;
6936 unsigned j;
6937 FRegister val;
6938
6939 NYI_assert (31, 21, 0x370);
6940 NYI_assert (15, 15, 0);
6941 NYI_assert (10, 10, 0);
6942
6943 if (!full && (src_index & 0x8))
6944 HALT_UNALLOC;
6945
6946 j = 0;
6947
6948 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6949 for (i = src_index; i < (full ? 16 : 8); i++)
6950 val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6951 for (i = 0; i < src_index; i++)
6952 val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6953
6954 aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6955 if (full)
6956 aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6957 }
6958
6959 static void
6960 dexAdvSIMD0 (sim_cpu *cpu)
6961 {
6962 /* instr [28,25] = 0 111. */
6963 if ( INSTR (15, 10) == 0x07
6964 && (INSTR (9, 5) ==
6965 INSTR (20, 16)))
6966 {
6967 if (INSTR (31, 21) == 0x075
6968 || INSTR (31, 21) == 0x275)
6969 {
6970 do_vec_MOV_whole_vector (cpu);
6971 return;
6972 }
6973 }
6974
6975 if (INSTR (29, 19) == 0x1E0)
6976 {
6977 do_vec_MOV_immediate (cpu);
6978 return;
6979 }
6980
6981 if (INSTR (29, 19) == 0x5E0)
6982 {
6983 do_vec_MVNI (cpu);
6984 return;
6985 }
6986
6987 if (INSTR (29, 19) == 0x1C0
6988 || INSTR (29, 19) == 0x1C1)
6989 {
6990 if (INSTR (15, 10) == 0x03)
6991 {
6992 do_vec_DUP_scalar_into_vector (cpu);
6993 return;
6994 }
6995 }
6996
6997 switch (INSTR (29, 24))
6998 {
6999 case 0x0E: do_vec_op1 (cpu); return;
7000 case 0x0F: do_vec_op2 (cpu); return;
7001
7002 case 0x2E:
7003 if (INSTR (21, 21) == 1)
7004 {
7005 switch (INSTR (15, 10))
7006 {
7007 case 0x02:
7008 do_vec_REV32 (cpu);
7009 return;
7010
7011 case 0x07:
7012 switch (INSTR (23, 22))
7013 {
7014 case 0: do_vec_EOR (cpu); return;
7015 case 1: do_vec_BSL (cpu); return;
7016 case 2:
7017 case 3: do_vec_bit (cpu); return;
7018 }
7019 break;
7020
7021 case 0x08: do_vec_sub_long (cpu); return;
7022 case 0x11: do_vec_USHL (cpu); return;
7023 case 0x12: do_vec_CLZ (cpu); return;
7024 case 0x16: do_vec_NOT (cpu); return;
7025 case 0x19: do_vec_max (cpu); return;
7026 case 0x1B: do_vec_min (cpu); return;
7027 case 0x21: do_vec_SUB (cpu); return;
7028 case 0x25: do_vec_MLS (cpu); return;
7029 case 0x31: do_vec_FminmaxNMP (cpu); return;
7030 case 0x35: do_vec_FADDP (cpu); return;
7031 case 0x37: do_vec_FMUL (cpu); return;
7032 case 0x3F: do_vec_FDIV (cpu); return;
7033
7034 case 0x3E:
7035 switch (INSTR (20, 16))
7036 {
7037 case 0x00: do_vec_FNEG (cpu); return;
7038 case 0x01: do_vec_FSQRT (cpu); return;
7039 default: HALT_NYI;
7040 }
7041
7042 case 0x0D:
7043 case 0x0F:
7044 case 0x22:
7045 case 0x23:
7046 case 0x26:
7047 case 0x2A:
7048 case 0x32:
7049 case 0x36:
7050 case 0x39:
7051 case 0x3A:
7052 do_vec_compare (cpu); return;
7053
7054 default:
7055 break;
7056 }
7057 }
7058
7059 if (INSTR (31, 21) == 0x370)
7060 {
7061 if (INSTR (10, 10))
7062 do_vec_MOV_element (cpu);
7063 else
7064 do_vec_EXT (cpu);
7065 return;
7066 }
7067
7068 switch (INSTR (21, 10))
7069 {
7070 case 0x82E: do_vec_neg (cpu); return;
7071 case 0x87E: do_vec_sqrt (cpu); return;
7072 default:
7073 if (INSTR (15, 10) == 0x30)
7074 {
7075 do_vec_mull (cpu);
7076 return;
7077 }
7078 break;
7079 }
7080 break;
7081
7082 case 0x2f:
7083 switch (INSTR (15, 10))
7084 {
7085 case 0x01: do_vec_SSHR_USHR (cpu); return;
7086 case 0x10:
7087 case 0x12: do_vec_mls_indexed (cpu); return;
7088 case 0x29: do_vec_xtl (cpu); return;
7089 default:
7090 HALT_NYI;
7091 }
7092
7093 default:
7094 break;
7095 }
7096
7097 HALT_NYI;
7098 }
7099
7100 /* 3 sources. */
7101
7102 /* Float multiply add. */
7103 static void
7104 fmadds (sim_cpu *cpu)
7105 {
7106 unsigned sa = INSTR (14, 10);
7107 unsigned sm = INSTR (20, 16);
7108 unsigned sn = INSTR ( 9, 5);
7109 unsigned sd = INSTR ( 4, 0);
7110
7111 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7112 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7113 + aarch64_get_FP_float (cpu, sn)
7114 * aarch64_get_FP_float (cpu, sm));
7115 }
7116
7117 /* Double multiply add. */
7118 static void
7119 fmaddd (sim_cpu *cpu)
7120 {
7121 unsigned sa = INSTR (14, 10);
7122 unsigned sm = INSTR (20, 16);
7123 unsigned sn = INSTR ( 9, 5);
7124 unsigned sd = INSTR ( 4, 0);
7125
7126 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7127 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7128 + aarch64_get_FP_double (cpu, sn)
7129 * aarch64_get_FP_double (cpu, sm));
7130 }
7131
7132 /* Float multiply subtract. */
7133 static void
7134 fmsubs (sim_cpu *cpu)
7135 {
7136 unsigned sa = INSTR (14, 10);
7137 unsigned sm = INSTR (20, 16);
7138 unsigned sn = INSTR ( 9, 5);
7139 unsigned sd = INSTR ( 4, 0);
7140
7141 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7142 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7143 - aarch64_get_FP_float (cpu, sn)
7144 * aarch64_get_FP_float (cpu, sm));
7145 }
7146
7147 /* Double multiply subtract. */
7148 static void
7149 fmsubd (sim_cpu *cpu)
7150 {
7151 unsigned sa = INSTR (14, 10);
7152 unsigned sm = INSTR (20, 16);
7153 unsigned sn = INSTR ( 9, 5);
7154 unsigned sd = INSTR ( 4, 0);
7155
7156 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7157 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7158 - aarch64_get_FP_double (cpu, sn)
7159 * aarch64_get_FP_double (cpu, sm));
7160 }
7161
7162 /* Float negative multiply add. */
7163 static void
7164 fnmadds (sim_cpu *cpu)
7165 {
7166 unsigned sa = INSTR (14, 10);
7167 unsigned sm = INSTR (20, 16);
7168 unsigned sn = INSTR ( 9, 5);
7169 unsigned sd = INSTR ( 4, 0);
7170
7171 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7172 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7173 + (- aarch64_get_FP_float (cpu, sn))
7174 * aarch64_get_FP_float (cpu, sm));
7175 }
7176
7177 /* Double negative multiply add. */
7178 static void
7179 fnmaddd (sim_cpu *cpu)
7180 {
7181 unsigned sa = INSTR (14, 10);
7182 unsigned sm = INSTR (20, 16);
7183 unsigned sn = INSTR ( 9, 5);
7184 unsigned sd = INSTR ( 4, 0);
7185
7186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7187 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7188 + (- aarch64_get_FP_double (cpu, sn))
7189 * aarch64_get_FP_double (cpu, sm));
7190 }
7191
7192 /* Float negative multiply subtract. */
7193 static void
7194 fnmsubs (sim_cpu *cpu)
7195 {
7196 unsigned sa = INSTR (14, 10);
7197 unsigned sm = INSTR (20, 16);
7198 unsigned sn = INSTR ( 9, 5);
7199 unsigned sd = INSTR ( 4, 0);
7200
7201 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7202 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7203 + aarch64_get_FP_float (cpu, sn)
7204 * aarch64_get_FP_float (cpu, sm));
7205 }
7206
7207 /* Double negative multiply subtract. */
7208 static void
7209 fnmsubd (sim_cpu *cpu)
7210 {
7211 unsigned sa = INSTR (14, 10);
7212 unsigned sm = INSTR (20, 16);
7213 unsigned sn = INSTR ( 9, 5);
7214 unsigned sd = INSTR ( 4, 0);
7215
7216 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7217 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7218 + aarch64_get_FP_double (cpu, sn)
7219 * aarch64_get_FP_double (cpu, sm));
7220 }
7221
7222 static void
7223 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7224 {
7225 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7226 instr[30] = 0
7227 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7228 instr[28,25] = 1111
7229 instr[24] = 1
7230 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7231 instr[21] ==> o1 : 0 ==> unnegated, 1 ==> negated
7232 instr[15] ==> o2 : 0 ==> ADD, 1 ==> SUB */
7233
7234 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7235 /* dispatch on combined type:o1:o2. */
7236 uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7237
7238 if (M_S != 0)
7239 HALT_UNALLOC;
7240
7241 switch (dispatch)
7242 {
7243 case 0: fmadds (cpu); return;
7244 case 1: fmsubs (cpu); return;
7245 case 2: fnmadds (cpu); return;
7246 case 3: fnmsubs (cpu); return;
7247 case 4: fmaddd (cpu); return;
7248 case 5: fmsubd (cpu); return;
7249 case 6: fnmaddd (cpu); return;
7250 case 7: fnmsubd (cpu); return;
7251 default:
7252 /* type > 1 is currently unallocated. */
7253 HALT_UNALLOC;
7254 }
7255 }
7256
7257 static void
7258 dexSimpleFPFixedConvert (sim_cpu *cpu)
7259 {
7260 HALT_NYI;
7261 }
7262
7263 static void
7264 dexSimpleFPCondCompare (sim_cpu *cpu)
7265 {
7266 /* instr [31,23] = 0001 1110 0
7267 instr [22] = type
7268 instr [21] = 1
7269 instr [20,16] = Rm
7270 instr [15,12] = condition
7271 instr [11,10] = 01
7272 instr [9,5] = Rn
7273 instr [4] = 0
7274 instr [3,0] = nzcv */
7275
7276 unsigned rm = INSTR (20, 16);
7277 unsigned rn = INSTR (9, 5);
7278
7279 NYI_assert (31, 23, 0x3C);
7280 NYI_assert (11, 10, 0x1);
7281 NYI_assert (4, 4, 0);
7282
7283 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7284 if (! testConditionCode (cpu, INSTR (15, 12)))
7285 {
7286 aarch64_set_CPSR (cpu, INSTR (3, 0));
7287 return;
7288 }
7289
7290 if (INSTR (22, 22))
7291 {
7292 /* Double precision. */
7293 double val1 = aarch64_get_vec_double (cpu, rn, 0);
7294 double val2 = aarch64_get_vec_double (cpu, rm, 0);
7295
7296 /* FIXME: Check for NaNs. */
7297 if (val1 == val2)
7298 aarch64_set_CPSR (cpu, (Z | C));
7299 else if (val1 < val2)
7300 aarch64_set_CPSR (cpu, N);
7301 else /* val1 > val2 */
7302 aarch64_set_CPSR (cpu, C);
7303 }
7304 else
7305 {
7306 /* Single precision. */
7307 float val1 = aarch64_get_vec_float (cpu, rn, 0);
7308 float val2 = aarch64_get_vec_float (cpu, rm, 0);
7309
7310 /* FIXME: Check for NaNs. */
7311 if (val1 == val2)
7312 aarch64_set_CPSR (cpu, (Z | C));
7313 else if (val1 < val2)
7314 aarch64_set_CPSR (cpu, N);
7315 else /* val1 > val2 */
7316 aarch64_set_CPSR (cpu, C);
7317 }
7318 }
7319
7320 /* 2 sources. */
7321
7322 /* Float add. */
7323 static void
7324 fadds (sim_cpu *cpu)
7325 {
7326 unsigned sm = INSTR (20, 16);
7327 unsigned sn = INSTR ( 9, 5);
7328 unsigned sd = INSTR ( 4, 0);
7329
7330 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7331 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7332 + aarch64_get_FP_float (cpu, sm));
7333 }
7334
7335 /* Double add. */
7336 static void
7337 faddd (sim_cpu *cpu)
7338 {
7339 unsigned sm = INSTR (20, 16);
7340 unsigned sn = INSTR ( 9, 5);
7341 unsigned sd = INSTR ( 4, 0);
7342
7343 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7344 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7345 + aarch64_get_FP_double (cpu, sm));
7346 }
7347
7348 /* Float divide. */
7349 static void
7350 fdivs (sim_cpu *cpu)
7351 {
7352 unsigned sm = INSTR (20, 16);
7353 unsigned sn = INSTR ( 9, 5);
7354 unsigned sd = INSTR ( 4, 0);
7355
7356 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7357 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7358 / aarch64_get_FP_float (cpu, sm));
7359 }
7360
7361 /* Double divide. */
7362 static void
7363 fdivd (sim_cpu *cpu)
7364 {
7365 unsigned sm = INSTR (20, 16);
7366 unsigned sn = INSTR ( 9, 5);
7367 unsigned sd = INSTR ( 4, 0);
7368
7369 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7370 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7371 / aarch64_get_FP_double (cpu, sm));
7372 }
7373
7374 /* Float multiply. */
7375 static void
7376 fmuls (sim_cpu *cpu)
7377 {
7378 unsigned sm = INSTR (20, 16);
7379 unsigned sn = INSTR ( 9, 5);
7380 unsigned sd = INSTR ( 4, 0);
7381
7382 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7383 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7384 * aarch64_get_FP_float (cpu, sm));
7385 }
7386
7387 /* Double multiply. */
7388 static void
7389 fmuld (sim_cpu *cpu)
7390 {
7391 unsigned sm = INSTR (20, 16);
7392 unsigned sn = INSTR ( 9, 5);
7393 unsigned sd = INSTR ( 4, 0);
7394
7395 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7396 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7397 * aarch64_get_FP_double (cpu, sm));
7398 }
7399
7400 /* Float negate and multiply. */
7401 static void
7402 fnmuls (sim_cpu *cpu)
7403 {
7404 unsigned sm = INSTR (20, 16);
7405 unsigned sn = INSTR ( 9, 5);
7406 unsigned sd = INSTR ( 4, 0);
7407
7408 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7409 aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7410 * aarch64_get_FP_float (cpu, sm)));
7411 }
7412
7413 /* Double negate and multiply. */
7414 static void
7415 fnmuld (sim_cpu *cpu)
7416 {
7417 unsigned sm = INSTR (20, 16);
7418 unsigned sn = INSTR ( 9, 5);
7419 unsigned sd = INSTR ( 4, 0);
7420
7421 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7422 aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7423 * aarch64_get_FP_double (cpu, sm)));
7424 }
7425
7426 /* Float subtract. */
7427 static void
7428 fsubs (sim_cpu *cpu)
7429 {
7430 unsigned sm = INSTR (20, 16);
7431 unsigned sn = INSTR ( 9, 5);
7432 unsigned sd = INSTR ( 4, 0);
7433
7434 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7435 aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7436 - aarch64_get_FP_float (cpu, sm));
7437 }
7438
7439 /* Double subtract. */
7440 static void
7441 fsubd (sim_cpu *cpu)
7442 {
7443 unsigned sm = INSTR (20, 16);
7444 unsigned sn = INSTR ( 9, 5);
7445 unsigned sd = INSTR ( 4, 0);
7446
7447 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7448 aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7449 - aarch64_get_FP_double (cpu, sm));
7450 }
7451
7452 static void
7453 do_FMINNM (sim_cpu *cpu)
7454 {
7455 /* instr[31,23] = 0 0011 1100
7456 instr[22] = float(0)/double(1)
7457 instr[21] = 1
7458 instr[20,16] = Sm
7459 instr[15,10] = 01 1110
7460 instr[9,5] = Sn
7461 instr[4,0] = Cpu */
7462
7463 unsigned sm = INSTR (20, 16);
7464 unsigned sn = INSTR ( 9, 5);
7465 unsigned sd = INSTR ( 4, 0);
7466
7467 NYI_assert (31, 23, 0x03C);
7468 NYI_assert (15, 10, 0x1E);
7469
7470 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7471 if (INSTR (22, 22))
7472 aarch64_set_FP_double (cpu, sd,
7473 dminnm (aarch64_get_FP_double (cpu, sn),
7474 aarch64_get_FP_double (cpu, sm)));
7475 else
7476 aarch64_set_FP_float (cpu, sd,
7477 fminnm (aarch64_get_FP_float (cpu, sn),
7478 aarch64_get_FP_float (cpu, sm)));
7479 }
7480
7481 static void
7482 do_FMAXNM (sim_cpu *cpu)
7483 {
7484 /* instr[31,23] = 0 0011 1100
7485 instr[22] = float(0)/double(1)
7486 instr[21] = 1
7487 instr[20,16] = Sm
7488 instr[15,10] = 01 1010
7489 instr[9,5] = Sn
7490 instr[4,0] = Cpu */
7491
7492 unsigned sm = INSTR (20, 16);
7493 unsigned sn = INSTR ( 9, 5);
7494 unsigned sd = INSTR ( 4, 0);
7495
7496 NYI_assert (31, 23, 0x03C);
7497 NYI_assert (15, 10, 0x1A);
7498
7499 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7500 if (INSTR (22, 22))
7501 aarch64_set_FP_double (cpu, sd,
7502 dmaxnm (aarch64_get_FP_double (cpu, sn),
7503 aarch64_get_FP_double (cpu, sm)));
7504 else
7505 aarch64_set_FP_float (cpu, sd,
7506 fmaxnm (aarch64_get_FP_float (cpu, sn),
7507 aarch64_get_FP_float (cpu, sm)));
7508 }
7509
7510 static void
7511 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7512 {
7513 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
7514 instr[30] = 0
7515 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
7516 instr[28,25] = 1111
7517 instr[24] = 0
7518 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7519 instr[21] = 1
7520 instr[20,16] = Vm
7521 instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7522 0010 ==> FADD, 0011 ==> FSUB,
7523 0100 ==> FMAX, 0101 ==> FMIN
7524 0110 ==> FMAXNM, 0111 ==> FMINNM
7525 1000 ==> FNMUL, ow ==> UNALLOC
7526 instr[11,10] = 10
7527 instr[9,5] = Vn
7528 instr[4,0] = Vd */
7529
7530 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7531 uint32_t type = INSTR (23, 22);
7532 /* Dispatch on opcode. */
7533 uint32_t dispatch = INSTR (15, 12);
7534
7535 if (type > 1)
7536 HALT_UNALLOC;
7537
7538 if (M_S != 0)
7539 HALT_UNALLOC;
7540
7541 if (type)
7542 switch (dispatch)
7543 {
7544 case 0: fmuld (cpu); return;
7545 case 1: fdivd (cpu); return;
7546 case 2: faddd (cpu); return;
7547 case 3: fsubd (cpu); return;
7548 case 6: do_FMAXNM (cpu); return;
7549 case 7: do_FMINNM (cpu); return;
7550 case 8: fnmuld (cpu); return;
7551
7552 /* Have not yet implemented fmax and fmin. */
7553 case 4:
7554 case 5:
7555 HALT_NYI;
7556
7557 default:
7558 HALT_UNALLOC;
7559 }
7560 else /* type == 0 => floats. */
7561 switch (dispatch)
7562 {
7563 case 0: fmuls (cpu); return;
7564 case 1: fdivs (cpu); return;
7565 case 2: fadds (cpu); return;
7566 case 3: fsubs (cpu); return;
7567 case 6: do_FMAXNM (cpu); return;
7568 case 7: do_FMINNM (cpu); return;
7569 case 8: fnmuls (cpu); return;
7570
7571 case 4:
7572 case 5:
7573 HALT_NYI;
7574
7575 default:
7576 HALT_UNALLOC;
7577 }
7578 }
7579
7580 static void
7581 dexSimpleFPCondSelect (sim_cpu *cpu)
7582 {
7583 /* FCSEL
7584 instr[31,23] = 0 0011 1100
7585 instr[22] = 0=>single 1=>double
7586 instr[21] = 1
7587 instr[20,16] = Sm
7588 instr[15,12] = cond
7589 instr[11,10] = 11
7590 instr[9,5] = Sn
7591 instr[4,0] = Cpu */
7592 unsigned sm = INSTR (20, 16);
7593 unsigned sn = INSTR ( 9, 5);
7594 unsigned sd = INSTR ( 4, 0);
7595 uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7596
7597 NYI_assert (31, 23, 0x03C);
7598 NYI_assert (11, 10, 0x3);
7599
7600 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7601 if (INSTR (22, 22))
7602 aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7603 : aarch64_get_FP_double (cpu, sm)));
7604 else
7605 aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7606 : aarch64_get_FP_float (cpu, sm)));
7607 }
7608
7609 /* Store 32 bit unscaled signed 9 bit. */
7610 static void
7611 fsturs (sim_cpu *cpu, int32_t offset)
7612 {
7613 unsigned int rn = INSTR (9, 5);
7614 unsigned int st = INSTR (4, 0);
7615
7616 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7617 aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7618 aarch64_get_vec_u32 (cpu, st, 0));
7619 }
7620
7621 /* Store 64 bit unscaled signed 9 bit. */
7622 static void
7623 fsturd (sim_cpu *cpu, int32_t offset)
7624 {
7625 unsigned int rn = INSTR (9, 5);
7626 unsigned int st = INSTR (4, 0);
7627
7628 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7629 aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7630 aarch64_get_vec_u64 (cpu, st, 0));
7631 }
7632
7633 /* Store 128 bit unscaled signed 9 bit. */
7634 static void
7635 fsturq (sim_cpu *cpu, int32_t offset)
7636 {
7637 unsigned int rn = INSTR (9, 5);
7638 unsigned int st = INSTR (4, 0);
7639 FRegister a;
7640
7641 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642 aarch64_get_FP_long_double (cpu, st, & a);
7643 aarch64_set_mem_long_double (cpu,
7644 aarch64_get_reg_u64 (cpu, rn, 1)
7645 + offset, a);
7646 }
7647
7648 /* TODO FP move register. */
7649
7650 /* 32 bit fp to fp move register. */
7651 static void
7652 ffmovs (sim_cpu *cpu)
7653 {
7654 unsigned int rn = INSTR (9, 5);
7655 unsigned int st = INSTR (4, 0);
7656
7657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7658 aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7659 }
7660
7661 /* 64 bit fp to fp move register. */
7662 static void
7663 ffmovd (sim_cpu *cpu)
7664 {
7665 unsigned int rn = INSTR (9, 5);
7666 unsigned int st = INSTR (4, 0);
7667
7668 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7669 aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7670 }
7671
7672 /* 32 bit GReg to Vec move register. */
7673 static void
7674 fgmovs (sim_cpu *cpu)
7675 {
7676 unsigned int rn = INSTR (9, 5);
7677 unsigned int st = INSTR (4, 0);
7678
7679 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7680 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7681 }
7682
7683 /* 64 bit g to fp move register. */
7684 static void
7685 fgmovd (sim_cpu *cpu)
7686 {
7687 unsigned int rn = INSTR (9, 5);
7688 unsigned int st = INSTR (4, 0);
7689
7690 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7691 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7692 }
7693
7694 /* 32 bit fp to g move register. */
7695 static void
7696 gfmovs (sim_cpu *cpu)
7697 {
7698 unsigned int rn = INSTR (9, 5);
7699 unsigned int st = INSTR (4, 0);
7700
7701 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7702 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7703 }
7704
7705 /* 64 bit fp to g move register. */
7706 static void
7707 gfmovd (sim_cpu *cpu)
7708 {
7709 unsigned int rn = INSTR (9, 5);
7710 unsigned int st = INSTR (4, 0);
7711
7712 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7713 aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7714 }
7715
7716 /* FP move immediate
7717
7718 These install an immediate 8 bit value in the target register
7719 where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7720 bit exponent. */
7721
7722 static void
7723 fmovs (sim_cpu *cpu)
7724 {
7725 unsigned int sd = INSTR (4, 0);
7726 uint32_t imm = INSTR (20, 13);
7727 float f = fp_immediate_for_encoding_32 (imm);
7728
7729 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7730 aarch64_set_FP_float (cpu, sd, f);
7731 }
7732
7733 static void
7734 fmovd (sim_cpu *cpu)
7735 {
7736 unsigned int sd = INSTR (4, 0);
7737 uint32_t imm = INSTR (20, 13);
7738 double d = fp_immediate_for_encoding_64 (imm);
7739
7740 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7741 aarch64_set_FP_double (cpu, sd, d);
7742 }
7743
7744 static void
7745 dexSimpleFPImmediate (sim_cpu *cpu)
7746 {
7747 /* instr[31,23] == 00111100
7748 instr[22] == type : single(0)/double(1)
7749 instr[21] == 1
7750 instr[20,13] == imm8
7751 instr[12,10] == 100
7752 instr[9,5] == imm5 : 00000 ==> PK, ow ==> UNALLOC
7753 instr[4,0] == Rd */
7754 uint32_t imm5 = INSTR (9, 5);
7755
7756 NYI_assert (31, 23, 0x3C);
7757
7758 if (imm5 != 0)
7759 HALT_UNALLOC;
7760
7761 if (INSTR (22, 22))
7762 fmovd (cpu);
7763 else
7764 fmovs (cpu);
7765 }
7766
7767 /* TODO specific decode and execute for group Load Store. */
7768
7769 /* TODO FP load/store single register (unscaled offset). */
7770
7771 /* TODO load 8 bit unscaled signed 9 bit. */
7772 /* TODO load 16 bit unscaled signed 9 bit. */
7773
7774 /* Load 32 bit unscaled signed 9 bit. */
7775 static void
7776 fldurs (sim_cpu *cpu, int32_t offset)
7777 {
7778 unsigned int rn = INSTR (9, 5);
7779 unsigned int st = INSTR (4, 0);
7780
7781 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7782 aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7783 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7784 }
7785
7786 /* Load 64 bit unscaled signed 9 bit. */
7787 static void
7788 fldurd (sim_cpu *cpu, int32_t offset)
7789 {
7790 unsigned int rn = INSTR (9, 5);
7791 unsigned int st = INSTR (4, 0);
7792
7793 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7794 aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7795 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7796 }
7797
7798 /* Load 128 bit unscaled signed 9 bit. */
7799 static void
7800 fldurq (sim_cpu *cpu, int32_t offset)
7801 {
7802 unsigned int rn = INSTR (9, 5);
7803 unsigned int st = INSTR (4, 0);
7804 FRegister a;
7805 uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7806
7807 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7808 aarch64_get_mem_long_double (cpu, addr, & a);
7809 aarch64_set_FP_long_double (cpu, st, a);
7810 }
7811
7812 /* TODO store 8 bit unscaled signed 9 bit. */
7813 /* TODO store 16 bit unscaled signed 9 bit. */
7814
7815
7816 /* 1 source. */
7817
7818 /* Float absolute value. */
7819 static void
7820 fabss (sim_cpu *cpu)
7821 {
7822 unsigned sn = INSTR (9, 5);
7823 unsigned sd = INSTR (4, 0);
7824 float value = aarch64_get_FP_float (cpu, sn);
7825
7826 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7827 aarch64_set_FP_float (cpu, sd, fabsf (value));
7828 }
7829
7830 /* Double absolute value. */
7831 static void
7832 fabcpu (sim_cpu *cpu)
7833 {
7834 unsigned sn = INSTR (9, 5);
7835 unsigned sd = INSTR (4, 0);
7836 double value = aarch64_get_FP_double (cpu, sn);
7837
7838 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7839 aarch64_set_FP_double (cpu, sd, fabs (value));
7840 }
7841
7842 /* Float negative value. */
7843 static void
7844 fnegs (sim_cpu *cpu)
7845 {
7846 unsigned sn = INSTR (9, 5);
7847 unsigned sd = INSTR (4, 0);
7848
7849 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7850 aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7851 }
7852
7853 /* Double negative value. */
7854 static void
7855 fnegd (sim_cpu *cpu)
7856 {
7857 unsigned sn = INSTR (9, 5);
7858 unsigned sd = INSTR (4, 0);
7859
7860 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7861 aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7862 }
7863
7864 /* Float square root. */
7865 static void
7866 fsqrts (sim_cpu *cpu)
7867 {
7868 unsigned sn = INSTR (9, 5);
7869 unsigned sd = INSTR (4, 0);
7870
7871 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7872 aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7873 }
7874
7875 /* Double square root. */
7876 static void
7877 fsqrtd (sim_cpu *cpu)
7878 {
7879 unsigned sn = INSTR (9, 5);
7880 unsigned sd = INSTR (4, 0);
7881
7882 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7883 aarch64_set_FP_double (cpu, sd,
7884 sqrt (aarch64_get_FP_double (cpu, sn)));
7885 }
7886
7887 /* Convert double to float. */
7888 static void
7889 fcvtds (sim_cpu *cpu)
7890 {
7891 unsigned sn = INSTR (9, 5);
7892 unsigned sd = INSTR (4, 0);
7893
7894 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7895 aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7896 }
7897
7898 /* Convert float to double. */
7899 static void
7900 fcvtcpu (sim_cpu *cpu)
7901 {
7902 unsigned sn = INSTR (9, 5);
7903 unsigned sd = INSTR (4, 0);
7904
7905 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7906 aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7907 }
7908
7909 static void
7910 do_FRINT (sim_cpu *cpu)
7911 {
7912 /* instr[31,23] = 0001 1110 0
7913 instr[22] = single(0)/double(1)
7914 instr[21,18] = 1001
7915 instr[17,15] = rounding mode
7916 instr[14,10] = 10000
7917 instr[9,5] = source
7918 instr[4,0] = dest */
7919
7920 float val;
7921 unsigned rs = INSTR (9, 5);
7922 unsigned rd = INSTR (4, 0);
7923 unsigned int rmode = INSTR (17, 15);
7924
7925 NYI_assert (31, 23, 0x03C);
7926 NYI_assert (21, 18, 0x9);
7927 NYI_assert (14, 10, 0x10);
7928
7929 if (rmode == 6 || rmode == 7)
7930 /* FIXME: Add support for rmode == 6 exactness check. */
7931 rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7932
7933 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7934 if (INSTR (22, 22))
7935 {
7936 double val = aarch64_get_FP_double (cpu, rs);
7937
7938 switch (rmode)
7939 {
7940 case 0: /* mode N: nearest or even. */
7941 {
7942 double rval = round (val);
7943
7944 if (val - rval == 0.5)
7945 {
7946 if (((rval / 2.0) * 2.0) != rval)
7947 rval += 1.0;
7948 }
7949
7950 aarch64_set_FP_double (cpu, rd, round (val));
7951 return;
7952 }
7953
7954 case 1: /* mode P: towards +inf. */
7955 if (val < 0.0)
7956 aarch64_set_FP_double (cpu, rd, trunc (val));
7957 else
7958 aarch64_set_FP_double (cpu, rd, round (val));
7959 return;
7960
7961 case 2: /* mode M: towards -inf. */
7962 if (val < 0.0)
7963 aarch64_set_FP_double (cpu, rd, round (val));
7964 else
7965 aarch64_set_FP_double (cpu, rd, trunc (val));
7966 return;
7967
7968 case 3: /* mode Z: towards 0. */
7969 aarch64_set_FP_double (cpu, rd, trunc (val));
7970 return;
7971
7972 case 4: /* mode A: away from 0. */
7973 aarch64_set_FP_double (cpu, rd, round (val));
7974 return;
7975
7976 case 6: /* mode X: use FPCR with exactness check. */
7977 case 7: /* mode I: use FPCR mode. */
7978 HALT_NYI;
7979
7980 default:
7981 HALT_UNALLOC;
7982 }
7983 }
7984
7985 val = aarch64_get_FP_float (cpu, rs);
7986
7987 switch (rmode)
7988 {
7989 case 0: /* mode N: nearest or even. */
7990 {
7991 float rval = roundf (val);
7992
7993 if (val - rval == 0.5)
7994 {
7995 if (((rval / 2.0) * 2.0) != rval)
7996 rval += 1.0;
7997 }
7998
7999 aarch64_set_FP_float (cpu, rd, rval);
8000 return;
8001 }
8002
8003 case 1: /* mode P: towards +inf. */
8004 if (val < 0.0)
8005 aarch64_set_FP_float (cpu, rd, truncf (val));
8006 else
8007 aarch64_set_FP_float (cpu, rd, roundf (val));
8008 return;
8009
8010 case 2: /* mode M: towards -inf. */
8011 if (val < 0.0)
8012 aarch64_set_FP_float (cpu, rd, truncf (val));
8013 else
8014 aarch64_set_FP_float (cpu, rd, roundf (val));
8015 return;
8016
8017 case 3: /* mode Z: towards 0. */
8018 aarch64_set_FP_float (cpu, rd, truncf (val));
8019 return;
8020
8021 case 4: /* mode A: away from 0. */
8022 aarch64_set_FP_float (cpu, rd, roundf (val));
8023 return;
8024
8025 case 6: /* mode X: use FPCR with exactness check. */
8026 case 7: /* mode I: use FPCR mode. */
8027 HALT_NYI;
8028
8029 default:
8030 HALT_UNALLOC;
8031 }
8032 }
8033
8034 /* Convert half to float. */
8035 static void
8036 do_FCVT_half_to_single (sim_cpu *cpu)
8037 {
8038 unsigned rn = INSTR (9, 5);
8039 unsigned rd = INSTR (4, 0);
8040
8041 NYI_assert (31, 10, 0x7B890);
8042
8043 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8044 aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half (cpu, rn));
8045 }
8046
8047 /* Convert half to double. */
8048 static void
8049 do_FCVT_half_to_double (sim_cpu *cpu)
8050 {
8051 unsigned rn = INSTR (9, 5);
8052 unsigned rd = INSTR (4, 0);
8053
8054 NYI_assert (31, 10, 0x7B8B0);
8055
8056 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8057 aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half (cpu, rn));
8058 }
8059
8060 static void
8061 do_FCVT_single_to_half (sim_cpu *cpu)
8062 {
8063 unsigned rn = INSTR (9, 5);
8064 unsigned rd = INSTR (4, 0);
8065
8066 NYI_assert (31, 10, 0x788F0);
8067
8068 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8069 aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float (cpu, rn));
8070 }
8071
8072 /* Convert double to half. */
8073 static void
8074 do_FCVT_double_to_half (sim_cpu *cpu)
8075 {
8076 unsigned rn = INSTR (9, 5);
8077 unsigned rd = INSTR (4, 0);
8078
8079 NYI_assert (31, 10, 0x798F0);
8080
8081 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8082 aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double (cpu, rn));
8083 }
8084
8085 static void
8086 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8087 {
8088 /* instr[31] ==> M : 0 ==> OK, 1 ==> UNALLOC
8089 instr[30] = 0
8090 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8091 instr[28,25] = 1111
8092 instr[24] = 0
8093 instr[23,22] ==> type : 00 ==> source is single,
8094 01 ==> source is double
8095 10 ==> UNALLOC
8096 11 ==> UNALLOC or source is half
8097 instr[21] = 1
8098 instr[20,15] ==> opcode : with type 00 or 01
8099 000000 ==> FMOV, 000001 ==> FABS,
8100 000010 ==> FNEG, 000011 ==> FSQRT,
8101 000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8102 000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8103 001000 ==> FRINTN, 001001 ==> FRINTP,
8104 001010 ==> FRINTM, 001011 ==> FRINTZ,
8105 001100 ==> FRINTA, 001101 ==> UNALLOC
8106 001110 ==> FRINTX, 001111 ==> FRINTI
8107 with type 11
8108 000100 ==> FCVT (half-to-single)
8109 000101 ==> FCVT (half-to-double)
8110 instr[14,10] = 10000. */
8111
8112 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8113 uint32_t type = INSTR (23, 22);
8114 uint32_t opcode = INSTR (20, 15);
8115
8116 if (M_S != 0)
8117 HALT_UNALLOC;
8118
8119 if (type == 3)
8120 {
8121 if (opcode == 4)
8122 do_FCVT_half_to_single (cpu);
8123 else if (opcode == 5)
8124 do_FCVT_half_to_double (cpu);
8125 else
8126 HALT_UNALLOC;
8127 return;
8128 }
8129
8130 if (type == 2)
8131 HALT_UNALLOC;
8132
8133 switch (opcode)
8134 {
8135 case 0:
8136 if (type)
8137 ffmovd (cpu);
8138 else
8139 ffmovs (cpu);
8140 return;
8141
8142 case 1:
8143 if (type)
8144 fabcpu (cpu);
8145 else
8146 fabss (cpu);
8147 return;
8148
8149 case 2:
8150 if (type)
8151 fnegd (cpu);
8152 else
8153 fnegs (cpu);
8154 return;
8155
8156 case 3:
8157 if (type)
8158 fsqrtd (cpu);
8159 else
8160 fsqrts (cpu);
8161 return;
8162
8163 case 4:
8164 if (type)
8165 fcvtds (cpu);
8166 else
8167 HALT_UNALLOC;
8168 return;
8169
8170 case 5:
8171 if (type)
8172 HALT_UNALLOC;
8173 fcvtcpu (cpu);
8174 return;
8175
8176 case 8: /* FRINTN etc. */
8177 case 9:
8178 case 10:
8179 case 11:
8180 case 12:
8181 case 14:
8182 case 15:
8183 do_FRINT (cpu);
8184 return;
8185
8186 case 7:
8187 if (INSTR (22, 22))
8188 do_FCVT_double_to_half (cpu);
8189 else
8190 do_FCVT_single_to_half (cpu);
8191 return;
8192
8193 case 13:
8194 HALT_NYI;
8195
8196 default:
8197 HALT_UNALLOC;
8198 }
8199 }
8200
8201 /* 32 bit signed int to float. */
8202 static void
8203 scvtf32 (sim_cpu *cpu)
8204 {
8205 unsigned rn = INSTR (9, 5);
8206 unsigned sd = INSTR (4, 0);
8207
8208 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8209 aarch64_set_FP_float
8210 (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8211 }
8212
8213 /* signed int to float. */
8214 static void
8215 scvtf (sim_cpu *cpu)
8216 {
8217 unsigned rn = INSTR (9, 5);
8218 unsigned sd = INSTR (4, 0);
8219
8220 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8221 aarch64_set_FP_float
8222 (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8223 }
8224
8225 /* 32 bit signed int to double. */
8226 static void
8227 scvtd32 (sim_cpu *cpu)
8228 {
8229 unsigned rn = INSTR (9, 5);
8230 unsigned sd = INSTR (4, 0);
8231
8232 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8233 aarch64_set_FP_double
8234 (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8235 }
8236
8237 /* signed int to double. */
8238 static void
8239 scvtd (sim_cpu *cpu)
8240 {
8241 unsigned rn = INSTR (9, 5);
8242 unsigned sd = INSTR (4, 0);
8243
8244 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8245 aarch64_set_FP_double
8246 (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8247 }
8248
8249 static const float FLOAT_INT_MAX = (float) INT_MAX;
8250 static const float FLOAT_INT_MIN = (float) INT_MIN;
8251 static const double DOUBLE_INT_MAX = (double) INT_MAX;
8252 static const double DOUBLE_INT_MIN = (double) INT_MIN;
8253 static const float FLOAT_LONG_MAX = (float) LONG_MAX;
8254 static const float FLOAT_LONG_MIN = (float) LONG_MIN;
8255 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8256 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8257
8258 #define UINT_MIN 0
8259 #define ULONG_MIN 0
8260 static const float FLOAT_UINT_MAX = (float) UINT_MAX;
8261 static const float FLOAT_UINT_MIN = (float) UINT_MIN;
8262 static const double DOUBLE_UINT_MAX = (double) UINT_MAX;
8263 static const double DOUBLE_UINT_MIN = (double) UINT_MIN;
8264 static const float FLOAT_ULONG_MAX = (float) ULONG_MAX;
8265 static const float FLOAT_ULONG_MIN = (float) ULONG_MIN;
8266 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8267 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8268
8269 /* Check for FP exception conditions:
8270 NaN raises IO
8271 Infinity raises IO
8272 Out of Range raises IO and IX and saturates value
8273 Denormal raises ID and IX and sets to zero. */
8274 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE) \
8275 do \
8276 { \
8277 switch (fpclassify (F)) \
8278 { \
8279 case FP_INFINITE: \
8280 case FP_NAN: \
8281 aarch64_set_FPSR (cpu, IO); \
8282 if (signbit (F)) \
8283 VALUE = ITYPE##_MAX; \
8284 else \
8285 VALUE = ITYPE##_MIN; \
8286 break; \
8287 \
8288 case FP_NORMAL: \
8289 if (F >= FTYPE##_##ITYPE##_MAX) \
8290 { \
8291 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8292 VALUE = ITYPE##_MAX; \
8293 } \
8294 else if (F <= FTYPE##_##ITYPE##_MIN) \
8295 { \
8296 aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX); \
8297 VALUE = ITYPE##_MIN; \
8298 } \
8299 break; \
8300 \
8301 case FP_SUBNORMAL: \
8302 aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID); \
8303 VALUE = 0; \
8304 break; \
8305 \
8306 default: \
8307 case FP_ZERO: \
8308 VALUE = 0; \
8309 break; \
8310 } \
8311 } \
8312 while (0)
8313
8314 /* 32 bit convert float to signed int truncate towards zero. */
8315 static void
8316 fcvtszs32 (sim_cpu *cpu)
8317 {
8318 unsigned sn = INSTR (9, 5);
8319 unsigned rd = INSTR (4, 0);
8320 /* TODO : check that this rounds toward zero. */
8321 float f = aarch64_get_FP_float (cpu, sn);
8322 int32_t value = (int32_t) f;
8323
8324 RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8325
8326 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8327 /* Avoid sign extension to 64 bit. */
8328 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8329 }
8330
8331 /* 64 bit convert float to signed int truncate towards zero. */
8332 static void
8333 fcvtszs (sim_cpu *cpu)
8334 {
8335 unsigned sn = INSTR (9, 5);
8336 unsigned rd = INSTR (4, 0);
8337 float f = aarch64_get_FP_float (cpu, sn);
8338 int64_t value = (int64_t) f;
8339
8340 RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8341
8342 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8343 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8344 }
8345
8346 /* 32 bit convert double to signed int truncate towards zero. */
8347 static void
8348 fcvtszd32 (sim_cpu *cpu)
8349 {
8350 unsigned sn = INSTR (9, 5);
8351 unsigned rd = INSTR (4, 0);
8352 /* TODO : check that this rounds toward zero. */
8353 double d = aarch64_get_FP_double (cpu, sn);
8354 int32_t value = (int32_t) d;
8355
8356 RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8357
8358 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8359 /* Avoid sign extension to 64 bit. */
8360 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8361 }
8362
8363 /* 64 bit convert double to signed int truncate towards zero. */
8364 static void
8365 fcvtszd (sim_cpu *cpu)
8366 {
8367 unsigned sn = INSTR (9, 5);
8368 unsigned rd = INSTR (4, 0);
8369 /* TODO : check that this rounds toward zero. */
8370 double d = aarch64_get_FP_double (cpu, sn);
8371 int64_t value;
8372
8373 value = (int64_t) d;
8374
8375 RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8376
8377 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8378 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8379 }
8380
8381 static void
8382 do_fcvtzu (sim_cpu *cpu)
8383 {
8384 /* instr[31] = size: 32-bit (0), 64-bit (1)
8385 instr[30,23] = 00111100
8386 instr[22] = type: single (0)/ double (1)
8387 instr[21] = enable (0)/disable(1) precision
8388 instr[20,16] = 11001
8389 instr[15,10] = precision
8390 instr[9,5] = Rs
8391 instr[4,0] = Rd. */
8392
8393 unsigned rs = INSTR (9, 5);
8394 unsigned rd = INSTR (4, 0);
8395
8396 NYI_assert (30, 23, 0x3C);
8397 NYI_assert (20, 16, 0x19);
8398
8399 if (INSTR (21, 21) != 1)
8400 /* Convert to fixed point. */
8401 HALT_NYI;
8402
8403 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8404 if (INSTR (31, 31))
8405 {
8406 /* Convert to unsigned 64-bit integer. */
8407 if (INSTR (22, 22))
8408 {
8409 double d = aarch64_get_FP_double (cpu, rs);
8410 uint64_t value = (uint64_t) d;
8411
8412 /* Do not raise an exception if we have reached ULONG_MAX. */
8413 if (value != (1ULL << 63))
8414 RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8415
8416 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8417 }
8418 else
8419 {
8420 float f = aarch64_get_FP_float (cpu, rs);
8421 uint64_t value = (uint64_t) f;
8422
8423 /* Do not raise an exception if we have reached ULONG_MAX. */
8424 if (value != (1ULL << 63))
8425 RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8426
8427 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8428 }
8429 }
8430 else
8431 {
8432 uint32_t value;
8433
8434 /* Convert to unsigned 32-bit integer. */
8435 if (INSTR (22, 22))
8436 {
8437 double d = aarch64_get_FP_double (cpu, rs);
8438
8439 value = (uint32_t) d;
8440 /* Do not raise an exception if we have reached UINT_MAX. */
8441 if (value != (1UL << 31))
8442 RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8443 }
8444 else
8445 {
8446 float f = aarch64_get_FP_float (cpu, rs);
8447
8448 value = (uint32_t) f;
8449 /* Do not raise an exception if we have reached UINT_MAX. */
8450 if (value != (1UL << 31))
8451 RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8452 }
8453
8454 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8455 }
8456 }
8457
8458 static void
8459 do_UCVTF (sim_cpu *cpu)
8460 {
8461 /* instr[31] = size: 32-bit (0), 64-bit (1)
8462 instr[30,23] = 001 1110 0
8463 instr[22] = type: single (0)/ double (1)
8464 instr[21] = enable (0)/disable(1) precision
8465 instr[20,16] = 0 0011
8466 instr[15,10] = precision
8467 instr[9,5] = Rs
8468 instr[4,0] = Rd. */
8469
8470 unsigned rs = INSTR (9, 5);
8471 unsigned rd = INSTR (4, 0);
8472
8473 NYI_assert (30, 23, 0x3C);
8474 NYI_assert (20, 16, 0x03);
8475
8476 if (INSTR (21, 21) != 1)
8477 HALT_NYI;
8478
8479 /* FIXME: Add exception raising. */
8480 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8481 if (INSTR (31, 31))
8482 {
8483 uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8484
8485 if (INSTR (22, 22))
8486 aarch64_set_FP_double (cpu, rd, (double) value);
8487 else
8488 aarch64_set_FP_float (cpu, rd, (float) value);
8489 }
8490 else
8491 {
8492 uint32_t value = aarch64_get_reg_u32 (cpu, rs, NO_SP);
8493
8494 if (INSTR (22, 22))
8495 aarch64_set_FP_double (cpu, rd, (double) value);
8496 else
8497 aarch64_set_FP_float (cpu, rd, (float) value);
8498 }
8499 }
8500
8501 static void
8502 float_vector_move (sim_cpu *cpu)
8503 {
8504 /* instr[31,17] == 100 1111 0101 0111
8505 instr[16] ==> direction 0=> to GR, 1=> from GR
8506 instr[15,10] => ???
8507 instr[9,5] ==> source
8508 instr[4,0] ==> dest. */
8509
8510 unsigned rn = INSTR (9, 5);
8511 unsigned rd = INSTR (4, 0);
8512
8513 NYI_assert (31, 17, 0x4F57);
8514
8515 if (INSTR (15, 10) != 0)
8516 HALT_UNALLOC;
8517
8518 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8519 if (INSTR (16, 16))
8520 aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8521 else
8522 aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8523 }
8524
8525 static void
8526 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8527 {
8528 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
8529 instr[30 = 0
8530 instr[29] = S : 0 ==> OK, 1 ==> UNALLOC
8531 instr[28,25] = 1111
8532 instr[24] = 0
8533 instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8534 instr[21] = 1
8535 instr[20,19] = rmode
8536 instr[18,16] = opcode
8537 instr[15,10] = 10 0000 */
8538
8539 uint32_t rmode_opcode;
8540 uint32_t size_type;
8541 uint32_t type;
8542 uint32_t size;
8543 uint32_t S;
8544
8545 if (INSTR (31, 17) == 0x4F57)
8546 {
8547 float_vector_move (cpu);
8548 return;
8549 }
8550
8551 size = INSTR (31, 31);
8552 S = INSTR (29, 29);
8553 if (S != 0)
8554 HALT_UNALLOC;
8555
8556 type = INSTR (23, 22);
8557 if (type > 1)
8558 HALT_UNALLOC;
8559
8560 rmode_opcode = INSTR (20, 16);
8561 size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d. */
8562
8563 switch (rmode_opcode)
8564 {
8565 case 2: /* SCVTF. */
8566 switch (size_type)
8567 {
8568 case 0: scvtf32 (cpu); return;
8569 case 1: scvtd32 (cpu); return;
8570 case 2: scvtf (cpu); return;
8571 case 3: scvtd (cpu); return;
8572 }
8573
8574 case 6: /* FMOV GR, Vec. */
8575 switch (size_type)
8576 {
8577 case 0: gfmovs (cpu); return;
8578 case 3: gfmovd (cpu); return;
8579 default: HALT_UNALLOC;
8580 }
8581
8582 case 7: /* FMOV vec, GR. */
8583 switch (size_type)
8584 {
8585 case 0: fgmovs (cpu); return;
8586 case 3: fgmovd (cpu); return;
8587 default: HALT_UNALLOC;
8588 }
8589
8590 case 24: /* FCVTZS. */
8591 switch (size_type)
8592 {
8593 case 0: fcvtszs32 (cpu); return;
8594 case 1: fcvtszd32 (cpu); return;
8595 case 2: fcvtszs (cpu); return;
8596 case 3: fcvtszd (cpu); return;
8597 }
8598
8599 case 25: do_fcvtzu (cpu); return;
8600 case 3: do_UCVTF (cpu); return;
8601
8602 case 0: /* FCVTNS. */
8603 case 1: /* FCVTNU. */
8604 case 4: /* FCVTAS. */
8605 case 5: /* FCVTAU. */
8606 case 8: /* FCVPTS. */
8607 case 9: /* FCVTPU. */
8608 case 16: /* FCVTMS. */
8609 case 17: /* FCVTMU. */
8610 default:
8611 HALT_NYI;
8612 }
8613 }
8614
8615 static void
8616 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8617 {
8618 uint32_t flags;
8619
8620 /* FIXME: Add exception raising. */
8621 if (isnan (fvalue1) || isnan (fvalue2))
8622 flags = C|V;
8623 else if (isinf (fvalue1) && isinf (fvalue2))
8624 {
8625 /* Subtracting two infinities may give a NaN. We only need to compare
8626 the signs, which we can get from isinf. */
8627 int result = isinf (fvalue1) - isinf (fvalue2);
8628
8629 if (result == 0)
8630 flags = Z|C;
8631 else if (result < 0)
8632 flags = N;
8633 else /* (result > 0). */
8634 flags = C;
8635 }
8636 else
8637 {
8638 float result = fvalue1 - fvalue2;
8639
8640 if (result == 0.0)
8641 flags = Z|C;
8642 else if (result < 0)
8643 flags = N;
8644 else /* (result > 0). */
8645 flags = C;
8646 }
8647
8648 aarch64_set_CPSR (cpu, flags);
8649 }
8650
8651 static void
8652 fcmps (sim_cpu *cpu)
8653 {
8654 unsigned sm = INSTR (20, 16);
8655 unsigned sn = INSTR ( 9, 5);
8656
8657 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8658 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8659
8660 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8661 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8662 }
8663
8664 /* Float compare to zero -- Invalid Operation exception
8665 only on signaling NaNs. */
8666 static void
8667 fcmpzs (sim_cpu *cpu)
8668 {
8669 unsigned sn = INSTR ( 9, 5);
8670 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8671
8672 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8673 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8674 }
8675
8676 /* Float compare -- Invalid Operation exception on all NaNs. */
8677 static void
8678 fcmpes (sim_cpu *cpu)
8679 {
8680 unsigned sm = INSTR (20, 16);
8681 unsigned sn = INSTR ( 9, 5);
8682
8683 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8684 float fvalue2 = aarch64_get_FP_float (cpu, sm);
8685
8686 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8687 set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8688 }
8689
8690 /* Float compare to zero -- Invalid Operation exception on all NaNs. */
8691 static void
8692 fcmpzes (sim_cpu *cpu)
8693 {
8694 unsigned sn = INSTR ( 9, 5);
8695 float fvalue1 = aarch64_get_FP_float (cpu, sn);
8696
8697 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8698 set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8699 }
8700
8701 static void
8702 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8703 {
8704 uint32_t flags;
8705
8706 /* FIXME: Add exception raising. */
8707 if (isnan (dval1) || isnan (dval2))
8708 flags = C|V;
8709 else if (isinf (dval1) && isinf (dval2))
8710 {
8711 /* Subtracting two infinities may give a NaN. We only need to compare
8712 the signs, which we can get from isinf. */
8713 int result = isinf (dval1) - isinf (dval2);
8714
8715 if (result == 0)
8716 flags = Z|C;
8717 else if (result < 0)
8718 flags = N;
8719 else /* (result > 0). */
8720 flags = C;
8721 }
8722 else
8723 {
8724 double result = dval1 - dval2;
8725
8726 if (result == 0.0)
8727 flags = Z|C;
8728 else if (result < 0)
8729 flags = N;
8730 else /* (result > 0). */
8731 flags = C;
8732 }
8733
8734 aarch64_set_CPSR (cpu, flags);
8735 }
8736
8737 /* Double compare -- Invalid Operation exception only on signaling NaNs. */
8738 static void
8739 fcmpd (sim_cpu *cpu)
8740 {
8741 unsigned sm = INSTR (20, 16);
8742 unsigned sn = INSTR ( 9, 5);
8743
8744 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8745 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8746
8747 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8748 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8749 }
8750
8751 /* Double compare to zero -- Invalid Operation exception
8752 only on signaling NaNs. */
8753 static void
8754 fcmpzd (sim_cpu *cpu)
8755 {
8756 unsigned sn = INSTR ( 9, 5);
8757 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8758
8759 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8760 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8761 }
8762
8763 /* Double compare -- Invalid Operation exception on all NaNs. */
8764 static void
8765 fcmped (sim_cpu *cpu)
8766 {
8767 unsigned sm = INSTR (20, 16);
8768 unsigned sn = INSTR ( 9, 5);
8769
8770 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8771 double dvalue2 = aarch64_get_FP_double (cpu, sm);
8772
8773 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8774 set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8775 }
8776
8777 /* Double compare to zero -- Invalid Operation exception on all NaNs. */
8778 static void
8779 fcmpzed (sim_cpu *cpu)
8780 {
8781 unsigned sn = INSTR ( 9, 5);
8782 double dvalue1 = aarch64_get_FP_double (cpu, sn);
8783
8784 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8785 set_flags_for_double_compare (cpu, dvalue1, 0.0);
8786 }
8787
8788 static void
8789 dexSimpleFPCompare (sim_cpu *cpu)
8790 {
8791 /* assert instr[28,25] == 1111
8792 instr[30:24:21:13,10] = 0011000
8793 instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8794 instr[29] ==> S : 0 ==> OK, 1 ==> UNALLOC
8795 instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8796 instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8797 instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8798 01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8799 ow ==> UNALLOC */
8800 uint32_t dispatch;
8801 uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8802 uint32_t type = INSTR (23, 22);
8803 uint32_t op = INSTR (15, 14);
8804 uint32_t op2_2_0 = INSTR (2, 0);
8805
8806 if (op2_2_0 != 0)
8807 HALT_UNALLOC;
8808
8809 if (M_S != 0)
8810 HALT_UNALLOC;
8811
8812 if (type > 1)
8813 HALT_UNALLOC;
8814
8815 if (op != 0)
8816 HALT_UNALLOC;
8817
8818 /* dispatch on type and top 2 bits of opcode. */
8819 dispatch = (type << 2) | INSTR (4, 3);
8820
8821 switch (dispatch)
8822 {
8823 case 0: fcmps (cpu); return;
8824 case 1: fcmpzs (cpu); return;
8825 case 2: fcmpes (cpu); return;
8826 case 3: fcmpzes (cpu); return;
8827 case 4: fcmpd (cpu); return;
8828 case 5: fcmpzd (cpu); return;
8829 case 6: fcmped (cpu); return;
8830 case 7: fcmpzed (cpu); return;
8831 }
8832 }
8833
8834 static void
8835 do_scalar_FADDP (sim_cpu *cpu)
8836 {
8837 /* instr [31,23] = 0111 1110 0
8838 instr [22] = single(0)/double(1)
8839 instr [21,10] = 11 0000 1101 10
8840 instr [9,5] = Fn
8841 instr [4,0] = Fd. */
8842
8843 unsigned Fn = INSTR (9, 5);
8844 unsigned Fd = INSTR (4, 0);
8845
8846 NYI_assert (31, 23, 0x0FC);
8847 NYI_assert (21, 10, 0xC36);
8848
8849 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8850 if (INSTR (22, 22))
8851 {
8852 double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8853 double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8854
8855 aarch64_set_FP_double (cpu, Fd, val1 + val2);
8856 }
8857 else
8858 {
8859 float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8860 float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8861
8862 aarch64_set_FP_float (cpu, Fd, val1 + val2);
8863 }
8864 }
8865
8866 /* Floating point absolute difference. */
8867
8868 static void
8869 do_scalar_FABD (sim_cpu *cpu)
8870 {
8871 /* instr [31,23] = 0111 1110 1
8872 instr [22] = float(0)/double(1)
8873 instr [21] = 1
8874 instr [20,16] = Rm
8875 instr [15,10] = 1101 01
8876 instr [9, 5] = Rn
8877 instr [4, 0] = Rd. */
8878
8879 unsigned rm = INSTR (20, 16);
8880 unsigned rn = INSTR (9, 5);
8881 unsigned rd = INSTR (4, 0);
8882
8883 NYI_assert (31, 23, 0x0FD);
8884 NYI_assert (21, 21, 1);
8885 NYI_assert (15, 10, 0x35);
8886
8887 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8888 if (INSTR (22, 22))
8889 aarch64_set_FP_double (cpu, rd,
8890 fabs (aarch64_get_FP_double (cpu, rn)
8891 - aarch64_get_FP_double (cpu, rm)));
8892 else
8893 aarch64_set_FP_float (cpu, rd,
8894 fabsf (aarch64_get_FP_float (cpu, rn)
8895 - aarch64_get_FP_float (cpu, rm)));
8896 }
8897
8898 static void
8899 do_scalar_CMGT (sim_cpu *cpu)
8900 {
8901 /* instr [31,21] = 0101 1110 111
8902 instr [20,16] = Rm
8903 instr [15,10] = 00 1101
8904 instr [9, 5] = Rn
8905 instr [4, 0] = Rd. */
8906
8907 unsigned rm = INSTR (20, 16);
8908 unsigned rn = INSTR (9, 5);
8909 unsigned rd = INSTR (4, 0);
8910
8911 NYI_assert (31, 21, 0x2F7);
8912 NYI_assert (15, 10, 0x0D);
8913
8914 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8915 aarch64_set_vec_u64 (cpu, rd, 0,
8916 aarch64_get_vec_u64 (cpu, rn, 0) >
8917 aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8918 }
8919
8920 static void
8921 do_scalar_USHR (sim_cpu *cpu)
8922 {
8923 /* instr [31,23] = 0111 1111 0
8924 instr [22,16] = shift amount
8925 instr [15,10] = 0000 01
8926 instr [9, 5] = Rn
8927 instr [4, 0] = Rd. */
8928
8929 unsigned amount = 128 - INSTR (22, 16);
8930 unsigned rn = INSTR (9, 5);
8931 unsigned rd = INSTR (4, 0);
8932
8933 NYI_assert (31, 23, 0x0FE);
8934 NYI_assert (15, 10, 0x01);
8935
8936 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8937 aarch64_set_vec_u64 (cpu, rd, 0,
8938 aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8939 }
8940
8941 static void
8942 do_scalar_SSHL (sim_cpu *cpu)
8943 {
8944 /* instr [31,21] = 0101 1110 111
8945 instr [20,16] = Rm
8946 instr [15,10] = 0100 01
8947 instr [9, 5] = Rn
8948 instr [4, 0] = Rd. */
8949
8950 unsigned rm = INSTR (20, 16);
8951 unsigned rn = INSTR (9, 5);
8952 unsigned rd = INSTR (4, 0);
8953 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8954
8955 NYI_assert (31, 21, 0x2F7);
8956 NYI_assert (15, 10, 0x11);
8957
8958 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8959 if (shift >= 0)
8960 aarch64_set_vec_s64 (cpu, rd, 0,
8961 aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8962 else
8963 aarch64_set_vec_s64 (cpu, rd, 0,
8964 aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8965 }
8966
8967 /* Floating point scalar compare greater than or equal to 0. */
8968 static void
8969 do_scalar_FCMGE_zero (sim_cpu *cpu)
8970 {
8971 /* instr [31,23] = 0111 1110 1
8972 instr [22,22] = size
8973 instr [21,16] = 1000 00
8974 instr [15,10] = 1100 10
8975 instr [9, 5] = Rn
8976 instr [4, 0] = Rd. */
8977
8978 unsigned size = INSTR (22, 22);
8979 unsigned rn = INSTR (9, 5);
8980 unsigned rd = INSTR (4, 0);
8981
8982 NYI_assert (31, 23, 0x0FD);
8983 NYI_assert (21, 16, 0x20);
8984 NYI_assert (15, 10, 0x32);
8985
8986 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8987 if (size)
8988 aarch64_set_vec_u64 (cpu, rd, 0,
8989 aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
8990 else
8991 aarch64_set_vec_u32 (cpu, rd, 0,
8992 aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
8993 }
8994
8995 /* Floating point scalar compare less than or equal to 0. */
8996 static void
8997 do_scalar_FCMLE_zero (sim_cpu *cpu)
8998 {
8999 /* instr [31,23] = 0111 1110 1
9000 instr [22,22] = size
9001 instr [21,16] = 1000 00
9002 instr [15,10] = 1101 10
9003 instr [9, 5] = Rn
9004 instr [4, 0] = Rd. */
9005
9006 unsigned size = INSTR (22, 22);
9007 unsigned rn = INSTR (9, 5);
9008 unsigned rd = INSTR (4, 0);
9009
9010 NYI_assert (31, 23, 0x0FD);
9011 NYI_assert (21, 16, 0x20);
9012 NYI_assert (15, 10, 0x36);
9013
9014 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9015 if (size)
9016 aarch64_set_vec_u64 (cpu, rd, 0,
9017 aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9018 else
9019 aarch64_set_vec_u32 (cpu, rd, 0,
9020 aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9021 }
9022
9023 /* Floating point scalar compare greater than 0. */
9024 static void
9025 do_scalar_FCMGT_zero (sim_cpu *cpu)
9026 {
9027 /* instr [31,23] = 0101 1110 1
9028 instr [22,22] = size
9029 instr [21,16] = 1000 00
9030 instr [15,10] = 1100 10
9031 instr [9, 5] = Rn
9032 instr [4, 0] = Rd. */
9033
9034 unsigned size = INSTR (22, 22);
9035 unsigned rn = INSTR (9, 5);
9036 unsigned rd = INSTR (4, 0);
9037
9038 NYI_assert (31, 23, 0x0BD);
9039 NYI_assert (21, 16, 0x20);
9040 NYI_assert (15, 10, 0x32);
9041
9042 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9043 if (size)
9044 aarch64_set_vec_u64 (cpu, rd, 0,
9045 aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9046 else
9047 aarch64_set_vec_u32 (cpu, rd, 0,
9048 aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9049 }
9050
9051 /* Floating point scalar compare equal to 0. */
9052 static void
9053 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9054 {
9055 /* instr [31,23] = 0101 1110 1
9056 instr [22,22] = size
9057 instr [21,16] = 1000 00
9058 instr [15,10] = 1101 10
9059 instr [9, 5] = Rn
9060 instr [4, 0] = Rd. */
9061
9062 unsigned size = INSTR (22, 22);
9063 unsigned rn = INSTR (9, 5);
9064 unsigned rd = INSTR (4, 0);
9065
9066 NYI_assert (31, 23, 0x0BD);
9067 NYI_assert (21, 16, 0x20);
9068 NYI_assert (15, 10, 0x36);
9069
9070 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9071 if (size)
9072 aarch64_set_vec_u64 (cpu, rd, 0,
9073 aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9074 else
9075 aarch64_set_vec_u32 (cpu, rd, 0,
9076 aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9077 }
9078
9079 /* Floating point scalar compare less than 0. */
9080 static void
9081 do_scalar_FCMLT_zero (sim_cpu *cpu)
9082 {
9083 /* instr [31,23] = 0101 1110 1
9084 instr [22,22] = size
9085 instr [21,16] = 1000 00
9086 instr [15,10] = 1110 10
9087 instr [9, 5] = Rn
9088 instr [4, 0] = Rd. */
9089
9090 unsigned size = INSTR (22, 22);
9091 unsigned rn = INSTR (9, 5);
9092 unsigned rd = INSTR (4, 0);
9093
9094 NYI_assert (31, 23, 0x0BD);
9095 NYI_assert (21, 16, 0x20);
9096 NYI_assert (15, 10, 0x3A);
9097
9098 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9099 if (size)
9100 aarch64_set_vec_u64 (cpu, rd, 0,
9101 aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9102 else
9103 aarch64_set_vec_u32 (cpu, rd, 0,
9104 aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9105 }
9106
9107 static void
9108 do_scalar_shift (sim_cpu *cpu)
9109 {
9110 /* instr [31,23] = 0101 1111 0
9111 instr [22,16] = shift amount
9112 instr [15,10] = 0101 01 [SHL]
9113 instr [15,10] = 0000 01 [SSHR]
9114 instr [9, 5] = Rn
9115 instr [4, 0] = Rd. */
9116
9117 unsigned rn = INSTR (9, 5);
9118 unsigned rd = INSTR (4, 0);
9119 unsigned amount;
9120
9121 NYI_assert (31, 23, 0x0BE);
9122
9123 if (INSTR (22, 22) == 0)
9124 HALT_UNALLOC;
9125
9126 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9127 switch (INSTR (15, 10))
9128 {
9129 case 0x01: /* SSHR */
9130 amount = 128 - INSTR (22, 16);
9131 aarch64_set_vec_s64 (cpu, rd, 0,
9132 aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9133 return;
9134 case 0x15: /* SHL */
9135 amount = INSTR (22, 16) - 64;
9136 aarch64_set_vec_u64 (cpu, rd, 0,
9137 aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9138 return;
9139 default:
9140 HALT_NYI;
9141 }
9142 }
9143
9144 /* FCMEQ FCMGT FCMGE. */
9145 static void
9146 do_scalar_FCM (sim_cpu *cpu)
9147 {
9148 /* instr [31,30] = 01
9149 instr [29] = U
9150 instr [28,24] = 1 1110
9151 instr [23] = E
9152 instr [22] = size
9153 instr [21] = 1
9154 instr [20,16] = Rm
9155 instr [15,12] = 1110
9156 instr [11] = AC
9157 instr [10] = 1
9158 instr [9, 5] = Rn
9159 instr [4, 0] = Rd. */
9160
9161 unsigned rm = INSTR (20, 16);
9162 unsigned rn = INSTR (9, 5);
9163 unsigned rd = INSTR (4, 0);
9164 unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9165 unsigned result;
9166 float val1;
9167 float val2;
9168
9169 NYI_assert (31, 30, 1);
9170 NYI_assert (28, 24, 0x1E);
9171 NYI_assert (21, 21, 1);
9172 NYI_assert (15, 12, 0xE);
9173 NYI_assert (10, 10, 1);
9174
9175 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9176 if (INSTR (22, 22))
9177 {
9178 double val1 = aarch64_get_FP_double (cpu, rn);
9179 double val2 = aarch64_get_FP_double (cpu, rm);
9180
9181 switch (EUac)
9182 {
9183 case 0: /* 000 */
9184 result = val1 == val2;
9185 break;
9186
9187 case 3: /* 011 */
9188 val1 = fabs (val1);
9189 val2 = fabs (val2);
9190 /* Fall through. */
9191 case 2: /* 010 */
9192 result = val1 >= val2;
9193 break;
9194
9195 case 7: /* 111 */
9196 val1 = fabs (val1);
9197 val2 = fabs (val2);
9198 /* Fall through. */
9199 case 6: /* 110 */
9200 result = val1 > val2;
9201 break;
9202
9203 default:
9204 HALT_UNALLOC;
9205 }
9206
9207 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9208 return;
9209 }
9210
9211 val1 = aarch64_get_FP_float (cpu, rn);
9212 val2 = aarch64_get_FP_float (cpu, rm);
9213
9214 switch (EUac)
9215 {
9216 case 0: /* 000 */
9217 result = val1 == val2;
9218 break;
9219
9220 case 3: /* 011 */
9221 val1 = fabsf (val1);
9222 val2 = fabsf (val2);
9223 /* Fall through. */
9224 case 2: /* 010 */
9225 result = val1 >= val2;
9226 break;
9227
9228 case 7: /* 111 */
9229 val1 = fabsf (val1);
9230 val2 = fabsf (val2);
9231 /* Fall through. */
9232 case 6: /* 110 */
9233 result = val1 > val2;
9234 break;
9235
9236 default:
9237 HALT_UNALLOC;
9238 }
9239
9240 aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9241 }
9242
9243 /* An alias of DUP. */
9244 static void
9245 do_scalar_MOV (sim_cpu *cpu)
9246 {
9247 /* instr [31,21] = 0101 1110 000
9248 instr [20,16] = imm5
9249 instr [15,10] = 0000 01
9250 instr [9, 5] = Rn
9251 instr [4, 0] = Rd. */
9252
9253 unsigned rn = INSTR (9, 5);
9254 unsigned rd = INSTR (4, 0);
9255 unsigned index;
9256
9257 NYI_assert (31, 21, 0x2F0);
9258 NYI_assert (15, 10, 0x01);
9259
9260 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9261 if (INSTR (16, 16))
9262 {
9263 /* 8-bit. */
9264 index = INSTR (20, 17);
9265 aarch64_set_vec_u8
9266 (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9267 }
9268 else if (INSTR (17, 17))
9269 {
9270 /* 16-bit. */
9271 index = INSTR (20, 18);
9272 aarch64_set_vec_u16
9273 (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9274 }
9275 else if (INSTR (18, 18))
9276 {
9277 /* 32-bit. */
9278 index = INSTR (20, 19);
9279 aarch64_set_vec_u32
9280 (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9281 }
9282 else if (INSTR (19, 19))
9283 {
9284 /* 64-bit. */
9285 index = INSTR (20, 20);
9286 aarch64_set_vec_u64
9287 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9288 }
9289 else
9290 HALT_UNALLOC;
9291 }
9292
9293 static void
9294 do_scalar_NEG (sim_cpu *cpu)
9295 {
9296 /* instr [31,10] = 0111 1110 1110 0000 1011 10
9297 instr [9, 5] = Rn
9298 instr [4, 0] = Rd. */
9299
9300 unsigned rn = INSTR (9, 5);
9301 unsigned rd = INSTR (4, 0);
9302
9303 NYI_assert (31, 10, 0x1FB82E);
9304
9305 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9306 aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9307 }
9308
9309 static void
9310 do_scalar_USHL (sim_cpu *cpu)
9311 {
9312 /* instr [31,21] = 0111 1110 111
9313 instr [20,16] = Rm
9314 instr [15,10] = 0100 01
9315 instr [9, 5] = Rn
9316 instr [4, 0] = Rd. */
9317
9318 unsigned rm = INSTR (20, 16);
9319 unsigned rn = INSTR (9, 5);
9320 unsigned rd = INSTR (4, 0);
9321 signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9322
9323 NYI_assert (31, 21, 0x3F7);
9324 NYI_assert (15, 10, 0x11);
9325
9326 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9327 if (shift >= 0)
9328 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9329 else
9330 aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9331 }
9332
9333 static void
9334 do_double_add (sim_cpu *cpu)
9335 {
9336 /* instr [31,21] = 0101 1110 111
9337 instr [20,16] = Fn
9338 instr [15,10] = 1000 01
9339 instr [9,5] = Fm
9340 instr [4,0] = Fd. */
9341 unsigned Fd;
9342 unsigned Fm;
9343 unsigned Fn;
9344 double val1;
9345 double val2;
9346
9347 NYI_assert (31, 21, 0x2F7);
9348 NYI_assert (15, 10, 0x21);
9349
9350 Fd = INSTR (4, 0);
9351 Fm = INSTR (9, 5);
9352 Fn = INSTR (20, 16);
9353
9354 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9355 val1 = aarch64_get_FP_double (cpu, Fm);
9356 val2 = aarch64_get_FP_double (cpu, Fn);
9357
9358 aarch64_set_FP_double (cpu, Fd, val1 + val2);
9359 }
9360
9361 static void
9362 do_scalar_UCVTF (sim_cpu *cpu)
9363 {
9364 /* instr [31,23] = 0111 1110 0
9365 instr [22] = single(0)/double(1)
9366 instr [21,10] = 10 0001 1101 10
9367 instr [9,5] = rn
9368 instr [4,0] = rd. */
9369
9370 unsigned rn = INSTR (9, 5);
9371 unsigned rd = INSTR (4, 0);
9372
9373 NYI_assert (31, 23, 0x0FC);
9374 NYI_assert (21, 10, 0x876);
9375
9376 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9377 if (INSTR (22, 22))
9378 {
9379 uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9380
9381 aarch64_set_vec_double (cpu, rd, 0, (double) val);
9382 }
9383 else
9384 {
9385 uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9386
9387 aarch64_set_vec_float (cpu, rd, 0, (float) val);
9388 }
9389 }
9390
9391 static void
9392 do_scalar_vec (sim_cpu *cpu)
9393 {
9394 /* instr [30] = 1. */
9395 /* instr [28,25] = 1111. */
9396 switch (INSTR (31, 23))
9397 {
9398 case 0xBC:
9399 switch (INSTR (15, 10))
9400 {
9401 case 0x01: do_scalar_MOV (cpu); return;
9402 case 0x39: do_scalar_FCM (cpu); return;
9403 case 0x3B: do_scalar_FCM (cpu); return;
9404 }
9405 break;
9406
9407 case 0xBE: do_scalar_shift (cpu); return;
9408
9409 case 0xFC:
9410 switch (INSTR (15, 10))
9411 {
9412 case 0x36:
9413 switch (INSTR (21, 16))
9414 {
9415 case 0x30: do_scalar_FADDP (cpu); return;
9416 case 0x21: do_scalar_UCVTF (cpu); return;
9417 }
9418 HALT_NYI;
9419 case 0x39: do_scalar_FCM (cpu); return;
9420 case 0x3B: do_scalar_FCM (cpu); return;
9421 }
9422 break;
9423
9424 case 0xFD:
9425 switch (INSTR (15, 10))
9426 {
9427 case 0x0D: do_scalar_CMGT (cpu); return;
9428 case 0x11: do_scalar_USHL (cpu); return;
9429 case 0x2E: do_scalar_NEG (cpu); return;
9430 case 0x32: do_scalar_FCMGE_zero (cpu); return;
9431 case 0x35: do_scalar_FABD (cpu); return;
9432 case 0x36: do_scalar_FCMLE_zero (cpu); return;
9433 case 0x39: do_scalar_FCM (cpu); return;
9434 case 0x3B: do_scalar_FCM (cpu); return;
9435 default:
9436 HALT_NYI;
9437 }
9438
9439 case 0xFE: do_scalar_USHR (cpu); return;
9440
9441 case 0xBD:
9442 switch (INSTR (15, 10))
9443 {
9444 case 0x21: do_double_add (cpu); return;
9445 case 0x11: do_scalar_SSHL (cpu); return;
9446 case 0x32: do_scalar_FCMGT_zero (cpu); return;
9447 case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9448 case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9449 default:
9450 HALT_NYI;
9451 }
9452
9453 default:
9454 HALT_NYI;
9455 }
9456 }
9457
9458 static void
9459 dexAdvSIMD1 (sim_cpu *cpu)
9460 {
9461 /* instr [28,25] = 1 111. */
9462
9463 /* We are currently only interested in the basic
9464 scalar fp routines which all have bit 30 = 0. */
9465 if (INSTR (30, 30))
9466 do_scalar_vec (cpu);
9467
9468 /* instr[24] is set for FP data processing 3-source and clear for
9469 all other basic scalar fp instruction groups. */
9470 else if (INSTR (24, 24))
9471 dexSimpleFPDataProc3Source (cpu);
9472
9473 /* instr[21] is clear for floating <-> fixed conversions and set for
9474 all other basic scalar fp instruction groups. */
9475 else if (!INSTR (21, 21))
9476 dexSimpleFPFixedConvert (cpu);
9477
9478 /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9479 11 ==> cond select, 00 ==> other. */
9480 else
9481 switch (INSTR (11, 10))
9482 {
9483 case 1: dexSimpleFPCondCompare (cpu); return;
9484 case 2: dexSimpleFPDataProc2Source (cpu); return;
9485 case 3: dexSimpleFPCondSelect (cpu); return;
9486
9487 default:
9488 /* Now an ordered cascade of tests.
9489 FP immediate has instr [12] == 1.
9490 FP compare has instr [13] == 1.
9491 FP Data Proc 1 Source has instr [14] == 1.
9492 FP floating <--> integer conversions has instr [15] == 0. */
9493 if (INSTR (12, 12))
9494 dexSimpleFPImmediate (cpu);
9495
9496 else if (INSTR (13, 13))
9497 dexSimpleFPCompare (cpu);
9498
9499 else if (INSTR (14, 14))
9500 dexSimpleFPDataProc1Source (cpu);
9501
9502 else if (!INSTR (15, 15))
9503 dexSimpleFPIntegerConvert (cpu);
9504
9505 else
9506 /* If we get here then instr[15] == 1 which means UNALLOC. */
9507 HALT_UNALLOC;
9508 }
9509 }
9510
9511 /* PC relative addressing. */
9512
9513 static void
9514 pcadr (sim_cpu *cpu)
9515 {
9516 /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9517 instr[30,29] = immlo
9518 instr[23,5] = immhi. */
9519 uint64_t address;
9520 unsigned rd = INSTR (4, 0);
9521 uint32_t isPage = INSTR (31, 31);
9522 union { int64_t u64; uint64_t s64; } imm;
9523 uint64_t offset;
9524
9525 imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9526 offset = imm.u64;
9527 offset = (offset << 2) | INSTR (30, 29);
9528
9529 address = aarch64_get_PC (cpu);
9530
9531 if (isPage)
9532 {
9533 offset <<= 12;
9534 address &= ~0xfff;
9535 }
9536
9537 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9538 aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9539 }
9540
9541 /* Specific decode and execute for group Data Processing Immediate. */
9542
9543 static void
9544 dexPCRelAddressing (sim_cpu *cpu)
9545 {
9546 /* assert instr[28,24] = 10000. */
9547 pcadr (cpu);
9548 }
9549
9550 /* Immediate logical.
9551 The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9552 16, 32 or 64 bit sequence pulled out at decode and possibly
9553 inverting it..
9554
9555 N.B. the output register (dest) can normally be Xn or SP
9556 the exception occurs for flag setting instructions which may
9557 only use Xn for the output (dest). The input register can
9558 never be SP. */
9559
9560 /* 32 bit and immediate. */
9561 static void
9562 and32 (sim_cpu *cpu, uint32_t bimm)
9563 {
9564 unsigned rn = INSTR (9, 5);
9565 unsigned rd = INSTR (4, 0);
9566
9567 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9568 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9569 aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9570 }
9571
9572 /* 64 bit and immediate. */
9573 static void
9574 and64 (sim_cpu *cpu, uint64_t bimm)
9575 {
9576 unsigned rn = INSTR (9, 5);
9577 unsigned rd = INSTR (4, 0);
9578
9579 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9580 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9581 aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9582 }
9583
9584 /* 32 bit and immediate set flags. */
9585 static void
9586 ands32 (sim_cpu *cpu, uint32_t bimm)
9587 {
9588 unsigned rn = INSTR (9, 5);
9589 unsigned rd = INSTR (4, 0);
9590
9591 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9592 uint32_t value2 = bimm;
9593
9594 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9595 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9596 set_flags_for_binop32 (cpu, value1 & value2);
9597 }
9598
9599 /* 64 bit and immediate set flags. */
9600 static void
9601 ands64 (sim_cpu *cpu, uint64_t bimm)
9602 {
9603 unsigned rn = INSTR (9, 5);
9604 unsigned rd = INSTR (4, 0);
9605
9606 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9607 uint64_t value2 = bimm;
9608
9609 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9610 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9611 set_flags_for_binop64 (cpu, value1 & value2);
9612 }
9613
9614 /* 32 bit exclusive or immediate. */
9615 static void
9616 eor32 (sim_cpu *cpu, uint32_t bimm)
9617 {
9618 unsigned rn = INSTR (9, 5);
9619 unsigned rd = INSTR (4, 0);
9620
9621 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9622 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9623 aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9624 }
9625
9626 /* 64 bit exclusive or immediate. */
9627 static void
9628 eor64 (sim_cpu *cpu, uint64_t bimm)
9629 {
9630 unsigned rn = INSTR (9, 5);
9631 unsigned rd = INSTR (4, 0);
9632
9633 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9634 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9635 aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9636 }
9637
9638 /* 32 bit or immediate. */
9639 static void
9640 orr32 (sim_cpu *cpu, uint32_t bimm)
9641 {
9642 unsigned rn = INSTR (9, 5);
9643 unsigned rd = INSTR (4, 0);
9644
9645 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9646 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9647 aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9648 }
9649
9650 /* 64 bit or immediate. */
9651 static void
9652 orr64 (sim_cpu *cpu, uint64_t bimm)
9653 {
9654 unsigned rn = INSTR (9, 5);
9655 unsigned rd = INSTR (4, 0);
9656
9657 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9658 aarch64_set_reg_u64 (cpu, rd, SP_OK,
9659 aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9660 }
9661
9662 /* Logical shifted register.
9663 These allow an optional LSL, ASR, LSR or ROR to the second source
9664 register with a count up to the register bit count.
9665 N.B register args may not be SP. */
9666
9667 /* 32 bit AND shifted register. */
9668 static void
9669 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9670 {
9671 unsigned rm = INSTR (20, 16);
9672 unsigned rn = INSTR (9, 5);
9673 unsigned rd = INSTR (4, 0);
9674
9675 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9676 aarch64_set_reg_u64
9677 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9678 & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9679 }
9680
9681 /* 64 bit AND shifted register. */
9682 static void
9683 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9684 {
9685 unsigned rm = INSTR (20, 16);
9686 unsigned rn = INSTR (9, 5);
9687 unsigned rd = INSTR (4, 0);
9688
9689 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9690 aarch64_set_reg_u64
9691 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9692 & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9693 }
9694
9695 /* 32 bit AND shifted register setting flags. */
9696 static void
9697 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9698 {
9699 unsigned rm = INSTR (20, 16);
9700 unsigned rn = INSTR (9, 5);
9701 unsigned rd = INSTR (4, 0);
9702
9703 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9704 uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9705 shift, count);
9706
9707 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9708 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9709 set_flags_for_binop32 (cpu, value1 & value2);
9710 }
9711
9712 /* 64 bit AND shifted register setting flags. */
9713 static void
9714 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9715 {
9716 unsigned rm = INSTR (20, 16);
9717 unsigned rn = INSTR (9, 5);
9718 unsigned rd = INSTR (4, 0);
9719
9720 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9721 uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9722 shift, count);
9723
9724 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9725 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9726 set_flags_for_binop64 (cpu, value1 & value2);
9727 }
9728
9729 /* 32 bit BIC shifted register. */
9730 static void
9731 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9732 {
9733 unsigned rm = INSTR (20, 16);
9734 unsigned rn = INSTR (9, 5);
9735 unsigned rd = INSTR (4, 0);
9736
9737 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9738 aarch64_set_reg_u64
9739 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9740 & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9741 }
9742
9743 /* 64 bit BIC shifted register. */
9744 static void
9745 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9746 {
9747 unsigned rm = INSTR (20, 16);
9748 unsigned rn = INSTR (9, 5);
9749 unsigned rd = INSTR (4, 0);
9750
9751 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9752 aarch64_set_reg_u64
9753 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9754 & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9755 }
9756
9757 /* 32 bit BIC shifted register setting flags. */
9758 static void
9759 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9760 {
9761 unsigned rm = INSTR (20, 16);
9762 unsigned rn = INSTR (9, 5);
9763 unsigned rd = INSTR (4, 0);
9764
9765 uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9766 uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9767 shift, count);
9768
9769 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9770 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9771 set_flags_for_binop32 (cpu, value1 & value2);
9772 }
9773
9774 /* 64 bit BIC shifted register setting flags. */
9775 static void
9776 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9777 {
9778 unsigned rm = INSTR (20, 16);
9779 unsigned rn = INSTR (9, 5);
9780 unsigned rd = INSTR (4, 0);
9781
9782 uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9783 uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9784 shift, count);
9785
9786 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9787 aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9788 set_flags_for_binop64 (cpu, value1 & value2);
9789 }
9790
9791 /* 32 bit EON shifted register. */
9792 static void
9793 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9794 {
9795 unsigned rm = INSTR (20, 16);
9796 unsigned rn = INSTR (9, 5);
9797 unsigned rd = INSTR (4, 0);
9798
9799 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9800 aarch64_set_reg_u64
9801 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9802 ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9803 }
9804
9805 /* 64 bit EON shifted register. */
9806 static void
9807 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9808 {
9809 unsigned rm = INSTR (20, 16);
9810 unsigned rn = INSTR (9, 5);
9811 unsigned rd = INSTR (4, 0);
9812
9813 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9814 aarch64_set_reg_u64
9815 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9816 ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9817 }
9818
9819 /* 32 bit EOR shifted register. */
9820 static void
9821 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9822 {
9823 unsigned rm = INSTR (20, 16);
9824 unsigned rn = INSTR (9, 5);
9825 unsigned rd = INSTR (4, 0);
9826
9827 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9828 aarch64_set_reg_u64
9829 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9830 ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9831 }
9832
9833 /* 64 bit EOR shifted register. */
9834 static void
9835 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9836 {
9837 unsigned rm = INSTR (20, 16);
9838 unsigned rn = INSTR (9, 5);
9839 unsigned rd = INSTR (4, 0);
9840
9841 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9842 aarch64_set_reg_u64
9843 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9844 ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9845 }
9846
9847 /* 32 bit ORR shifted register. */
9848 static void
9849 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9850 {
9851 unsigned rm = INSTR (20, 16);
9852 unsigned rn = INSTR (9, 5);
9853 unsigned rd = INSTR (4, 0);
9854
9855 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9856 aarch64_set_reg_u64
9857 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9858 | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9859 }
9860
9861 /* 64 bit ORR shifted register. */
9862 static void
9863 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9864 {
9865 unsigned rm = INSTR (20, 16);
9866 unsigned rn = INSTR (9, 5);
9867 unsigned rd = INSTR (4, 0);
9868
9869 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9870 aarch64_set_reg_u64
9871 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9872 | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9873 }
9874
9875 /* 32 bit ORN shifted register. */
9876 static void
9877 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9878 {
9879 unsigned rm = INSTR (20, 16);
9880 unsigned rn = INSTR (9, 5);
9881 unsigned rd = INSTR (4, 0);
9882
9883 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9884 aarch64_set_reg_u64
9885 (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9886 | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9887 }
9888
9889 /* 64 bit ORN shifted register. */
9890 static void
9891 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9892 {
9893 unsigned rm = INSTR (20, 16);
9894 unsigned rn = INSTR (9, 5);
9895 unsigned rd = INSTR (4, 0);
9896
9897 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9898 aarch64_set_reg_u64
9899 (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9900 | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9901 }
9902
9903 static void
9904 dexLogicalImmediate (sim_cpu *cpu)
9905 {
9906 /* assert instr[28,23] = 1001000
9907 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9908 instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9909 instr[22] = N : used to construct immediate mask
9910 instr[21,16] = immr
9911 instr[15,10] = imms
9912 instr[9,5] = Rn
9913 instr[4,0] = Rd */
9914
9915 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
9916 uint32_t size = INSTR (31, 31);
9917 uint32_t N = INSTR (22, 22);
9918 /* uint32_t immr = INSTR (21, 16);. */
9919 /* uint32_t imms = INSTR (15, 10);. */
9920 uint32_t index = INSTR (22, 10);
9921 uint64_t bimm64 = LITable [index];
9922 uint32_t dispatch = INSTR (30, 29);
9923
9924 if (~size & N)
9925 HALT_UNALLOC;
9926
9927 if (!bimm64)
9928 HALT_UNALLOC;
9929
9930 if (size == 0)
9931 {
9932 uint32_t bimm = (uint32_t) bimm64;
9933
9934 switch (dispatch)
9935 {
9936 case 0: and32 (cpu, bimm); return;
9937 case 1: orr32 (cpu, bimm); return;
9938 case 2: eor32 (cpu, bimm); return;
9939 case 3: ands32 (cpu, bimm); return;
9940 }
9941 }
9942 else
9943 {
9944 switch (dispatch)
9945 {
9946 case 0: and64 (cpu, bimm64); return;
9947 case 1: orr64 (cpu, bimm64); return;
9948 case 2: eor64 (cpu, bimm64); return;
9949 case 3: ands64 (cpu, bimm64); return;
9950 }
9951 }
9952 HALT_UNALLOC;
9953 }
9954
9955 /* Immediate move.
9956 The uimm argument is a 16 bit value to be inserted into the
9957 target register the pos argument locates the 16 bit word in the
9958 dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9959 3} for 64 bit.
9960 N.B register arg may not be SP so it should be.
9961 accessed using the setGZRegisterXXX accessors. */
9962
9963 /* 32 bit move 16 bit immediate zero remaining shorts. */
9964 static void
9965 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9966 {
9967 unsigned rd = INSTR (4, 0);
9968
9969 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9970 aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9971 }
9972
9973 /* 64 bit move 16 bit immediate zero remaining shorts. */
9974 static void
9975 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9976 {
9977 unsigned rd = INSTR (4, 0);
9978
9979 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9980 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9981 }
9982
9983 /* 32 bit move 16 bit immediate negated. */
9984 static void
9985 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9986 {
9987 unsigned rd = INSTR (4, 0);
9988
9989 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9990 aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9991 }
9992
9993 /* 64 bit move 16 bit immediate negated. */
9994 static void
9995 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9996 {
9997 unsigned rd = INSTR (4, 0);
9998
9999 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10000 aarch64_set_reg_u64
10001 (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10002 ^ 0xffffffffffffffffULL));
10003 }
10004
10005 /* 32 bit move 16 bit immediate keep remaining shorts. */
10006 static void
10007 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10008 {
10009 unsigned rd = INSTR (4, 0);
10010 uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10011 uint32_t value = val << (pos * 16);
10012 uint32_t mask = ~(0xffffU << (pos * 16));
10013
10014 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10015 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10016 }
10017
10018 /* 64 bit move 16 it immediate keep remaining shorts. */
10019 static void
10020 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10021 {
10022 unsigned rd = INSTR (4, 0);
10023 uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10024 uint64_t value = (uint64_t) val << (pos * 16);
10025 uint64_t mask = ~(0xffffULL << (pos * 16));
10026
10027 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10028 aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10029 }
10030
10031 static void
10032 dexMoveWideImmediate (sim_cpu *cpu)
10033 {
10034 /* assert instr[28:23] = 100101
10035 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10036 instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10037 instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10038 instr[20,5] = uimm16
10039 instr[4,0] = Rd */
10040
10041 /* N.B. the (multiple of 16) shift is applied by the called routine,
10042 we just pass the multiplier. */
10043
10044 uint32_t imm;
10045 uint32_t size = INSTR (31, 31);
10046 uint32_t op = INSTR (30, 29);
10047 uint32_t shift = INSTR (22, 21);
10048
10049 /* 32 bit can only shift 0 or 1 lot of 16.
10050 anything else is an unallocated instruction. */
10051 if (size == 0 && (shift > 1))
10052 HALT_UNALLOC;
10053
10054 if (op == 1)
10055 HALT_UNALLOC;
10056
10057 imm = INSTR (20, 5);
10058
10059 if (size == 0)
10060 {
10061 if (op == 0)
10062 movn32 (cpu, imm, shift);
10063 else if (op == 2)
10064 movz32 (cpu, imm, shift);
10065 else
10066 movk32 (cpu, imm, shift);
10067 }
10068 else
10069 {
10070 if (op == 0)
10071 movn64 (cpu, imm, shift);
10072 else if (op == 2)
10073 movz64 (cpu, imm, shift);
10074 else
10075 movk64 (cpu, imm, shift);
10076 }
10077 }
10078
10079 /* Bitfield operations.
10080 These take a pair of bit positions r and s which are in {0..31}
10081 or {0..63} depending on the instruction word size.
10082 N.B register args may not be SP. */
10083
10084 /* OK, we start with ubfm which just needs to pick
10085 some bits out of source zero the rest and write
10086 the result to dest. Just need two logical shifts. */
10087
10088 /* 32 bit bitfield move, left and right of affected zeroed
10089 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10090 static void
10091 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10092 {
10093 unsigned rd;
10094 unsigned rn = INSTR (9, 5);
10095 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10096
10097 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10098 if (r <= s)
10099 {
10100 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10101 We want only bits s:xxx:r at the bottom of the word
10102 so we LSL bit s up to bit 31 i.e. by 31 - s
10103 and then we LSR to bring bit 31 down to bit s - r
10104 i.e. by 31 + r - s. */
10105 value <<= 31 - s;
10106 value >>= 31 + r - s;
10107 }
10108 else
10109 {
10110 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10111 We want only bits s:xxx:0 starting at it 31-(r-1)
10112 so we LSL bit s up to bit 31 i.e. by 31 - s
10113 and then we LSL to bring bit 31 down to 31-(r-1)+s
10114 i.e. by r - (s + 1). */
10115 value <<= 31 - s;
10116 value >>= r - (s + 1);
10117 }
10118
10119 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10120 rd = INSTR (4, 0);
10121 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10122 }
10123
10124 /* 64 bit bitfield move, left and right of affected zeroed
10125 if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10126 static void
10127 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10128 {
10129 unsigned rd;
10130 unsigned rn = INSTR (9, 5);
10131 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10132
10133 if (r <= s)
10134 {
10135 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10136 We want only bits s:xxx:r at the bottom of the word.
10137 So we LSL bit s up to bit 63 i.e. by 63 - s
10138 and then we LSR to bring bit 63 down to bit s - r
10139 i.e. by 63 + r - s. */
10140 value <<= 63 - s;
10141 value >>= 63 + r - s;
10142 }
10143 else
10144 {
10145 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10146 We want only bits s:xxx:0 starting at it 63-(r-1).
10147 So we LSL bit s up to bit 63 i.e. by 63 - s
10148 and then we LSL to bring bit 63 down to 63-(r-1)+s
10149 i.e. by r - (s + 1). */
10150 value <<= 63 - s;
10151 value >>= r - (s + 1);
10152 }
10153
10154 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10155 rd = INSTR (4, 0);
10156 aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10157 }
10158
10159 /* The signed versions need to insert sign bits
10160 on the left of the inserted bit field. so we do
10161 much the same as the unsigned version except we
10162 use an arithmetic shift right -- this just means
10163 we need to operate on signed values. */
10164
10165 /* 32 bit bitfield move, left of affected sign-extended, right zeroed. */
10166 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10167 static void
10168 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10169 {
10170 unsigned rd;
10171 unsigned rn = INSTR (9, 5);
10172 /* as per ubfm32 but use an ASR instead of an LSR. */
10173 int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10174
10175 if (r <= s)
10176 {
10177 value <<= 31 - s;
10178 value >>= 31 + r - s;
10179 }
10180 else
10181 {
10182 value <<= 31 - s;
10183 value >>= r - (s + 1);
10184 }
10185
10186 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10187 rd = INSTR (4, 0);
10188 aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10189 }
10190
10191 /* 64 bit bitfield move, left of affected sign-extended, right zeroed. */
10192 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10193 static void
10194 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10195 {
10196 unsigned rd;
10197 unsigned rn = INSTR (9, 5);
10198 /* acpu per ubfm but use an ASR instead of an LSR. */
10199 int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10200
10201 if (r <= s)
10202 {
10203 value <<= 63 - s;
10204 value >>= 63 + r - s;
10205 }
10206 else
10207 {
10208 value <<= 63 - s;
10209 value >>= r - (s + 1);
10210 }
10211
10212 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10213 rd = INSTR (4, 0);
10214 aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10215 }
10216
10217 /* Finally, these versions leave non-affected bits
10218 as is. so we need to generate the bits as per
10219 ubfm and also generate a mask to pick the
10220 bits from the original and computed values. */
10221
10222 /* 32 bit bitfield move, non-affected bits left as is.
10223 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>. */
10224 static void
10225 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10226 {
10227 unsigned rn = INSTR (9, 5);
10228 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10229 uint32_t mask = -1;
10230 unsigned rd;
10231 uint32_t value2;
10232
10233 /* Pick either s+1-r or s+1 consecutive bits out of the original word. */
10234 if (r <= s)
10235 {
10236 /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10237 We want only bits s:xxx:r at the bottom of the word
10238 so we LSL bit s up to bit 31 i.e. by 31 - s
10239 and then we LSR to bring bit 31 down to bit s - r
10240 i.e. by 31 + r - s. */
10241 value <<= 31 - s;
10242 value >>= 31 + r - s;
10243 /* the mask must include the same bits. */
10244 mask <<= 31 - s;
10245 mask >>= 31 + r - s;
10246 }
10247 else
10248 {
10249 /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10250 We want only bits s:xxx:0 starting at it 31-(r-1)
10251 so we LSL bit s up to bit 31 i.e. by 31 - s
10252 and then we LSL to bring bit 31 down to 31-(r-1)+s
10253 i.e. by r - (s + 1). */
10254 value <<= 31 - s;
10255 value >>= r - (s + 1);
10256 /* The mask must include the same bits. */
10257 mask <<= 31 - s;
10258 mask >>= r - (s + 1);
10259 }
10260
10261 rd = INSTR (4, 0);
10262 value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10263
10264 value2 &= ~mask;
10265 value2 |= value;
10266
10267 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10268 aarch64_set_reg_u64
10269 (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10270 }
10271
10272 /* 64 bit bitfield move, non-affected bits left as is.
10273 If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>. */
10274 static void
10275 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10276 {
10277 unsigned rd;
10278 unsigned rn = INSTR (9, 5);
10279 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10280 uint64_t mask = 0xffffffffffffffffULL;
10281
10282 if (r <= s)
10283 {
10284 /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10285 We want only bits s:xxx:r at the bottom of the word
10286 so we LSL bit s up to bit 63 i.e. by 63 - s
10287 and then we LSR to bring bit 63 down to bit s - r
10288 i.e. by 63 + r - s. */
10289 value <<= 63 - s;
10290 value >>= 63 + r - s;
10291 /* The mask must include the same bits. */
10292 mask <<= 63 - s;
10293 mask >>= 63 + r - s;
10294 }
10295 else
10296 {
10297 /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10298 We want only bits s:xxx:0 starting at it 63-(r-1)
10299 so we LSL bit s up to bit 63 i.e. by 63 - s
10300 and then we LSL to bring bit 63 down to 63-(r-1)+s
10301 i.e. by r - (s + 1). */
10302 value <<= 63 - s;
10303 value >>= r - (s + 1);
10304 /* The mask must include the same bits. */
10305 mask <<= 63 - s;
10306 mask >>= r - (s + 1);
10307 }
10308
10309 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10310 rd = INSTR (4, 0);
10311 aarch64_set_reg_u64
10312 (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10313 }
10314
10315 static void
10316 dexBitfieldImmediate (sim_cpu *cpu)
10317 {
10318 /* assert instr[28:23] = 100110
10319 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10320 instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10321 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10322 instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10323 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10324 instr[9,5] = Rn
10325 instr[4,0] = Rd */
10326
10327 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10328 uint32_t dispatch;
10329 uint32_t imms;
10330 uint32_t size = INSTR (31, 31);
10331 uint32_t N = INSTR (22, 22);
10332 /* 32 bit operations must have immr[5] = 0 and imms[5] = 0. */
10333 /* or else we have an UNALLOC. */
10334 uint32_t immr = INSTR (21, 16);
10335
10336 if (~size & N)
10337 HALT_UNALLOC;
10338
10339 if (!size && uimm (immr, 5, 5))
10340 HALT_UNALLOC;
10341
10342 imms = INSTR (15, 10);
10343 if (!size && uimm (imms, 5, 5))
10344 HALT_UNALLOC;
10345
10346 /* Switch on combined size and op. */
10347 dispatch = INSTR (31, 29);
10348 switch (dispatch)
10349 {
10350 case 0: sbfm32 (cpu, immr, imms); return;
10351 case 1: bfm32 (cpu, immr, imms); return;
10352 case 2: ubfm32 (cpu, immr, imms); return;
10353 case 4: sbfm (cpu, immr, imms); return;
10354 case 5: bfm (cpu, immr, imms); return;
10355 case 6: ubfm (cpu, immr, imms); return;
10356 default: HALT_UNALLOC;
10357 }
10358 }
10359
10360 static void
10361 do_EXTR_32 (sim_cpu *cpu)
10362 {
10363 /* instr[31:21] = 00010011100
10364 instr[20,16] = Rm
10365 instr[15,10] = imms : 0xxxxx for 32 bit
10366 instr[9,5] = Rn
10367 instr[4,0] = Rd */
10368 unsigned rm = INSTR (20, 16);
10369 unsigned imms = INSTR (15, 10) & 31;
10370 unsigned rn = INSTR ( 9, 5);
10371 unsigned rd = INSTR ( 4, 0);
10372 uint64_t val1;
10373 uint64_t val2;
10374
10375 val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10376 val1 >>= imms;
10377 val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10378 val2 <<= (32 - imms);
10379
10380 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10381 aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10382 }
10383
10384 static void
10385 do_EXTR_64 (sim_cpu *cpu)
10386 {
10387 /* instr[31:21] = 10010011100
10388 instr[20,16] = Rm
10389 instr[15,10] = imms
10390 instr[9,5] = Rn
10391 instr[4,0] = Rd */
10392 unsigned rm = INSTR (20, 16);
10393 unsigned imms = INSTR (15, 10) & 63;
10394 unsigned rn = INSTR ( 9, 5);
10395 unsigned rd = INSTR ( 4, 0);
10396 uint64_t val;
10397
10398 val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10399 val >>= imms;
10400 val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10401
10402 aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10403 }
10404
10405 static void
10406 dexExtractImmediate (sim_cpu *cpu)
10407 {
10408 /* assert instr[28:23] = 100111
10409 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10410 instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10411 instr[22] = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10412 instr[21] = op0 : must be 0 or UNALLOC
10413 instr[20,16] = Rm
10414 instr[15,10] = imms : 0xxxxx for 32 bit, xxxxxx for 64 bit
10415 instr[9,5] = Rn
10416 instr[4,0] = Rd */
10417
10418 /* 32 bit operations must have N = 0 or else we have an UNALLOC. */
10419 /* 64 bit operations must have N = 1 or else we have an UNALLOC. */
10420 uint32_t dispatch;
10421 uint32_t size = INSTR (31, 31);
10422 uint32_t N = INSTR (22, 22);
10423 /* 32 bit operations must have imms[5] = 0
10424 or else we have an UNALLOC. */
10425 uint32_t imms = INSTR (15, 10);
10426
10427 if (size ^ N)
10428 HALT_UNALLOC;
10429
10430 if (!size && uimm (imms, 5, 5))
10431 HALT_UNALLOC;
10432
10433 /* Switch on combined size and op. */
10434 dispatch = INSTR (31, 29);
10435
10436 if (dispatch == 0)
10437 do_EXTR_32 (cpu);
10438
10439 else if (dispatch == 4)
10440 do_EXTR_64 (cpu);
10441
10442 else if (dispatch == 1)
10443 HALT_NYI;
10444 else
10445 HALT_UNALLOC;
10446 }
10447
10448 static void
10449 dexDPImm (sim_cpu *cpu)
10450 {
10451 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10452 assert group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10453 bits [25,23] of a DPImm are the secondary dispatch vector. */
10454 uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10455
10456 switch (group2)
10457 {
10458 case DPIMM_PCADR_000:
10459 case DPIMM_PCADR_001:
10460 dexPCRelAddressing (cpu);
10461 return;
10462
10463 case DPIMM_ADDSUB_010:
10464 case DPIMM_ADDSUB_011:
10465 dexAddSubtractImmediate (cpu);
10466 return;
10467
10468 case DPIMM_LOG_100:
10469 dexLogicalImmediate (cpu);
10470 return;
10471
10472 case DPIMM_MOV_101:
10473 dexMoveWideImmediate (cpu);
10474 return;
10475
10476 case DPIMM_BITF_110:
10477 dexBitfieldImmediate (cpu);
10478 return;
10479
10480 case DPIMM_EXTR_111:
10481 dexExtractImmediate (cpu);
10482 return;
10483
10484 default:
10485 /* Should never reach here. */
10486 HALT_NYI;
10487 }
10488 }
10489
10490 static void
10491 dexLoadUnscaledImmediate (sim_cpu *cpu)
10492 {
10493 /* instr[29,24] == 111_00
10494 instr[21] == 0
10495 instr[11,10] == 00
10496 instr[31,30] = size
10497 instr[26] = V
10498 instr[23,22] = opc
10499 instr[20,12] = simm9
10500 instr[9,5] = rn may be SP. */
10501 /* unsigned rt = INSTR (4, 0); */
10502 uint32_t V = INSTR (26, 26);
10503 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10504 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10505
10506 if (!V)
10507 {
10508 /* GReg operations. */
10509 switch (dispatch)
10510 {
10511 case 0: sturb (cpu, imm); return;
10512 case 1: ldurb32 (cpu, imm); return;
10513 case 2: ldursb64 (cpu, imm); return;
10514 case 3: ldursb32 (cpu, imm); return;
10515 case 4: sturh (cpu, imm); return;
10516 case 5: ldurh32 (cpu, imm); return;
10517 case 6: ldursh64 (cpu, imm); return;
10518 case 7: ldursh32 (cpu, imm); return;
10519 case 8: stur32 (cpu, imm); return;
10520 case 9: ldur32 (cpu, imm); return;
10521 case 10: ldursw (cpu, imm); return;
10522 case 12: stur64 (cpu, imm); return;
10523 case 13: ldur64 (cpu, imm); return;
10524
10525 case 14:
10526 /* PRFUM NYI. */
10527 HALT_NYI;
10528
10529 default:
10530 case 11:
10531 case 15:
10532 HALT_UNALLOC;
10533 }
10534 }
10535
10536 /* FReg operations. */
10537 switch (dispatch)
10538 {
10539 case 2: fsturq (cpu, imm); return;
10540 case 3: fldurq (cpu, imm); return;
10541 case 8: fsturs (cpu, imm); return;
10542 case 9: fldurs (cpu, imm); return;
10543 case 12: fsturd (cpu, imm); return;
10544 case 13: fldurd (cpu, imm); return;
10545
10546 case 0: /* STUR 8 bit FP. */
10547 case 1: /* LDUR 8 bit FP. */
10548 case 4: /* STUR 16 bit FP. */
10549 case 5: /* LDUR 8 bit FP. */
10550 HALT_NYI;
10551
10552 default:
10553 case 6:
10554 case 7:
10555 case 10:
10556 case 11:
10557 case 14:
10558 case 15:
10559 HALT_UNALLOC;
10560 }
10561 }
10562
10563 /* N.B. A preliminary note regarding all the ldrs<x>32
10564 instructions
10565
10566 The signed value loaded by these instructions is cast to unsigned
10567 before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10568 64 bit element of the GReg union. this performs a 32 bit sign extension
10569 (as required) but avoids 64 bit sign extension, thus ensuring that the
10570 top half of the register word is zero. this is what the spec demands
10571 when a 32 bit load occurs. */
10572
10573 /* 32 bit load sign-extended byte scaled unsigned 12 bit. */
10574 static void
10575 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10576 {
10577 unsigned int rn = INSTR (9, 5);
10578 unsigned int rt = INSTR (4, 0);
10579
10580 /* The target register may not be SP but the source may be
10581 there is no scaling required for a byte load. */
10582 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10583 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10584 (int64_t) aarch64_get_mem_s8 (cpu, address));
10585 }
10586
10587 /* 32 bit load sign-extended byte scaled or unscaled zero-
10588 or sign-extended 32-bit register offset. */
10589 static void
10590 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10591 {
10592 unsigned int rm = INSTR (20, 16);
10593 unsigned int rn = INSTR (9, 5);
10594 unsigned int rt = INSTR (4, 0);
10595
10596 /* rn may reference SP, rm and rt must reference ZR. */
10597
10598 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10599 int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10600 extension);
10601
10602 /* There is no scaling required for a byte load. */
10603 aarch64_set_reg_u64
10604 (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10605 + displacement));
10606 }
10607
10608 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10609 pre- or post-writeback. */
10610 static void
10611 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10612 {
10613 uint64_t address;
10614 unsigned int rn = INSTR (9, 5);
10615 unsigned int rt = INSTR (4, 0);
10616
10617 if (rn == rt && wb != NoWriteBack)
10618 HALT_UNALLOC;
10619
10620 address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10621
10622 if (wb == Pre)
10623 address += offset;
10624
10625 aarch64_set_reg_u64 (cpu, rt, NO_SP,
10626 (int64_t) aarch64_get_mem_s8 (cpu, address));
10627
10628 if (wb == Post)
10629 address += offset;
10630
10631 if (wb != NoWriteBack)
10632 aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10633 }
10634
10635 /* 8 bit store scaled. */
10636 static void
10637 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10638 {
10639 unsigned st = INSTR (4, 0);
10640 unsigned rn = INSTR (9, 5);
10641
10642 aarch64_set_mem_u8 (cpu,
10643 aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10644 aarch64_get_vec_u8 (cpu, st, 0));
10645 }
10646
10647 /* 8 bit store scaled or unscaled zero- or
10648 sign-extended 8-bit register offset. */
10649 static void
10650 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10651 {
10652 unsigned rm = INSTR (20, 16);
10653 unsigned rn = INSTR (9, 5);
10654 unsigned st = INSTR (4, 0);
10655
10656 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10657 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10658 extension);
10659 uint64_t displacement = scaling == Scaled ? extended : 0;
10660
10661 aarch64_set_mem_u8
10662 (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10663 }
10664
10665 /* 16 bit store scaled. */
10666 static void
10667 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10668 {
10669 unsigned st = INSTR (4, 0);
10670 unsigned rn = INSTR (9, 5);
10671
10672 aarch64_set_mem_u16
10673 (cpu,
10674 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10675 aarch64_get_vec_u16 (cpu, st, 0));
10676 }
10677
10678 /* 16 bit store scaled or unscaled zero-
10679 or sign-extended 16-bit register offset. */
10680 static void
10681 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10682 {
10683 unsigned rm = INSTR (20, 16);
10684 unsigned rn = INSTR (9, 5);
10685 unsigned st = INSTR (4, 0);
10686
10687 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10688 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10689 extension);
10690 uint64_t displacement = OPT_SCALE (extended, 16, scaling);
10691
10692 aarch64_set_mem_u16
10693 (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10694 }
10695
10696 /* 32 bit store scaled unsigned 12 bit. */
10697 static void
10698 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10699 {
10700 unsigned st = INSTR (4, 0);
10701 unsigned rn = INSTR (9, 5);
10702
10703 aarch64_set_mem_u32
10704 (cpu,
10705 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10706 aarch64_get_vec_u32 (cpu, st, 0));
10707 }
10708
10709 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback. */
10710 static void
10711 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10712 {
10713 unsigned rn = INSTR (9, 5);
10714 unsigned st = INSTR (4, 0);
10715
10716 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10717
10718 if (wb != Post)
10719 address += offset;
10720
10721 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10722
10723 if (wb == Post)
10724 address += offset;
10725
10726 if (wb != NoWriteBack)
10727 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10728 }
10729
10730 /* 32 bit store scaled or unscaled zero-
10731 or sign-extended 32-bit register offset. */
10732 static void
10733 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10734 {
10735 unsigned rm = INSTR (20, 16);
10736 unsigned rn = INSTR (9, 5);
10737 unsigned st = INSTR (4, 0);
10738
10739 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10740 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10741 extension);
10742 uint64_t displacement = OPT_SCALE (extended, 32, scaling);
10743
10744 aarch64_set_mem_u32
10745 (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10746 }
10747
10748 /* 64 bit store scaled unsigned 12 bit. */
10749 static void
10750 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10751 {
10752 unsigned st = INSTR (4, 0);
10753 unsigned rn = INSTR (9, 5);
10754
10755 aarch64_set_mem_u64
10756 (cpu,
10757 aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10758 aarch64_get_vec_u64 (cpu, st, 0));
10759 }
10760
10761 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback. */
10762 static void
10763 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10764 {
10765 unsigned rn = INSTR (9, 5);
10766 unsigned st = INSTR (4, 0);
10767
10768 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10769
10770 if (wb != Post)
10771 address += offset;
10772
10773 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10774
10775 if (wb == Post)
10776 address += offset;
10777
10778 if (wb != NoWriteBack)
10779 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10780 }
10781
10782 /* 64 bit store scaled or unscaled zero-
10783 or sign-extended 32-bit register offset. */
10784 static void
10785 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10786 {
10787 unsigned rm = INSTR (20, 16);
10788 unsigned rn = INSTR (9, 5);
10789 unsigned st = INSTR (4, 0);
10790
10791 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10792 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10793 extension);
10794 uint64_t displacement = OPT_SCALE (extended, 64, scaling);
10795
10796 aarch64_set_mem_u64
10797 (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10798 }
10799
10800 /* 128 bit store scaled unsigned 12 bit. */
10801 static void
10802 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10803 {
10804 FRegister a;
10805 unsigned st = INSTR (4, 0);
10806 unsigned rn = INSTR (9, 5);
10807 uint64_t addr;
10808
10809 aarch64_get_FP_long_double (cpu, st, & a);
10810
10811 addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10812 aarch64_set_mem_long_double (cpu, addr, a);
10813 }
10814
10815 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback. */
10816 static void
10817 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10818 {
10819 FRegister a;
10820 unsigned rn = INSTR (9, 5);
10821 unsigned st = INSTR (4, 0);
10822 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10823
10824 if (wb != Post)
10825 address += offset;
10826
10827 aarch64_get_FP_long_double (cpu, st, & a);
10828 aarch64_set_mem_long_double (cpu, address, a);
10829
10830 if (wb == Post)
10831 address += offset;
10832
10833 if (wb != NoWriteBack)
10834 aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10835 }
10836
10837 /* 128 bit store scaled or unscaled zero-
10838 or sign-extended 32-bit register offset. */
10839 static void
10840 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10841 {
10842 unsigned rm = INSTR (20, 16);
10843 unsigned rn = INSTR (9, 5);
10844 unsigned st = INSTR (4, 0);
10845
10846 uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10847 int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10848 extension);
10849 uint64_t displacement = OPT_SCALE (extended, 128, scaling);
10850
10851 FRegister a;
10852
10853 aarch64_get_FP_long_double (cpu, st, & a);
10854 aarch64_set_mem_long_double (cpu, address + displacement, a);
10855 }
10856
10857 static void
10858 dexLoadImmediatePrePost (sim_cpu *cpu)
10859 {
10860 /* instr[31,30] = size
10861 instr[29,27] = 111
10862 instr[26] = V
10863 instr[25,24] = 00
10864 instr[23,22] = opc
10865 instr[21] = 0
10866 instr[20,12] = simm9
10867 instr[11] = wb : 0 ==> Post, 1 ==> Pre
10868 instr[10] = 0
10869 instr[9,5] = Rn may be SP.
10870 instr[4,0] = Rt */
10871
10872 uint32_t V = INSTR (26, 26);
10873 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10874 int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10875 WriteBack wb = INSTR (11, 11);
10876
10877 if (!V)
10878 {
10879 /* GReg operations. */
10880 switch (dispatch)
10881 {
10882 case 0: strb_wb (cpu, imm, wb); return;
10883 case 1: ldrb32_wb (cpu, imm, wb); return;
10884 case 2: ldrsb_wb (cpu, imm, wb); return;
10885 case 3: ldrsb32_wb (cpu, imm, wb); return;
10886 case 4: strh_wb (cpu, imm, wb); return;
10887 case 5: ldrh32_wb (cpu, imm, wb); return;
10888 case 6: ldrsh64_wb (cpu, imm, wb); return;
10889 case 7: ldrsh32_wb (cpu, imm, wb); return;
10890 case 8: str32_wb (cpu, imm, wb); return;
10891 case 9: ldr32_wb (cpu, imm, wb); return;
10892 case 10: ldrsw_wb (cpu, imm, wb); return;
10893 case 12: str_wb (cpu, imm, wb); return;
10894 case 13: ldr_wb (cpu, imm, wb); return;
10895
10896 default:
10897 case 11:
10898 case 14:
10899 case 15:
10900 HALT_UNALLOC;
10901 }
10902 }
10903
10904 /* FReg operations. */
10905 switch (dispatch)
10906 {
10907 case 2: fstrq_wb (cpu, imm, wb); return;
10908 case 3: fldrq_wb (cpu, imm, wb); return;
10909 case 8: fstrs_wb (cpu, imm, wb); return;
10910 case 9: fldrs_wb (cpu, imm, wb); return;
10911 case 12: fstrd_wb (cpu, imm, wb); return;
10912 case 13: fldrd_wb (cpu, imm, wb); return;
10913
10914 case 0: /* STUR 8 bit FP. */
10915 case 1: /* LDUR 8 bit FP. */
10916 case 4: /* STUR 16 bit FP. */
10917 case 5: /* LDUR 8 bit FP. */
10918 HALT_NYI;
10919
10920 default:
10921 case 6:
10922 case 7:
10923 case 10:
10924 case 11:
10925 case 14:
10926 case 15:
10927 HALT_UNALLOC;
10928 }
10929 }
10930
10931 static void
10932 dexLoadRegisterOffset (sim_cpu *cpu)
10933 {
10934 /* instr[31,30] = size
10935 instr[29,27] = 111
10936 instr[26] = V
10937 instr[25,24] = 00
10938 instr[23,22] = opc
10939 instr[21] = 1
10940 instr[20,16] = rm
10941 instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10942 110 ==> SXTW, 111 ==> SXTX,
10943 ow ==> RESERVED
10944 instr[12] = scaled
10945 instr[11,10] = 10
10946 instr[9,5] = rn
10947 instr[4,0] = rt. */
10948
10949 uint32_t V = INSTR (26, 26);
10950 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10951 Scaling scale = INSTR (12, 12);
10952 Extension extensionType = INSTR (15, 13);
10953
10954 /* Check for illegal extension types. */
10955 if (uimm (extensionType, 1, 1) == 0)
10956 HALT_UNALLOC;
10957
10958 if (extensionType == UXTX || extensionType == SXTX)
10959 extensionType = NoExtension;
10960
10961 if (!V)
10962 {
10963 /* GReg operations. */
10964 switch (dispatch)
10965 {
10966 case 0: strb_scale_ext (cpu, scale, extensionType); return;
10967 case 1: ldrb32_scale_ext (cpu, scale, extensionType); return;
10968 case 2: ldrsb_scale_ext (cpu, scale, extensionType); return;
10969 case 3: ldrsb32_scale_ext (cpu, scale, extensionType); return;
10970 case 4: strh_scale_ext (cpu, scale, extensionType); return;
10971 case 5: ldrh32_scale_ext (cpu, scale, extensionType); return;
10972 case 6: ldrsh_scale_ext (cpu, scale, extensionType); return;
10973 case 7: ldrsh32_scale_ext (cpu, scale, extensionType); return;
10974 case 8: str32_scale_ext (cpu, scale, extensionType); return;
10975 case 9: ldr32_scale_ext (cpu, scale, extensionType); return;
10976 case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10977 case 12: str_scale_ext (cpu, scale, extensionType); return;
10978 case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10979 case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10980
10981 default:
10982 case 11:
10983 case 15:
10984 HALT_UNALLOC;
10985 }
10986 }
10987
10988 /* FReg operations. */
10989 switch (dispatch)
10990 {
10991 case 1: /* LDUR 8 bit FP. */
10992 HALT_NYI;
10993 case 3: fldrq_scale_ext (cpu, scale, extensionType); return;
10994 case 5: /* LDUR 8 bit FP. */
10995 HALT_NYI;
10996 case 9: fldrs_scale_ext (cpu, scale, extensionType); return;
10997 case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10998
10999 case 0: fstrb_scale_ext (cpu, scale, extensionType); return;
11000 case 2: fstrq_scale_ext (cpu, scale, extensionType); return;
11001 case 4: fstrh_scale_ext (cpu, scale, extensionType); return;
11002 case 8: fstrs_scale_ext (cpu, scale, extensionType); return;
11003 case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11004
11005 default:
11006 case 6:
11007 case 7:
11008 case 10:
11009 case 11:
11010 case 14:
11011 case 15:
11012 HALT_UNALLOC;
11013 }
11014 }
11015
11016 static void
11017 dexLoadUnsignedImmediate (sim_cpu *cpu)
11018 {
11019 /* instr[29,24] == 111_01
11020 instr[31,30] = size
11021 instr[26] = V
11022 instr[23,22] = opc
11023 instr[21,10] = uimm12 : unsigned immediate offset
11024 instr[9,5] = rn may be SP.
11025 instr[4,0] = rt. */
11026
11027 uint32_t V = INSTR (26,26);
11028 uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11029 uint32_t imm = INSTR (21, 10);
11030
11031 if (!V)
11032 {
11033 /* GReg operations. */
11034 switch (dispatch)
11035 {
11036 case 0: strb_abs (cpu, imm); return;
11037 case 1: ldrb32_abs (cpu, imm); return;
11038 case 2: ldrsb_abs (cpu, imm); return;
11039 case 3: ldrsb32_abs (cpu, imm); return;
11040 case 4: strh_abs (cpu, imm); return;
11041 case 5: ldrh32_abs (cpu, imm); return;
11042 case 6: ldrsh_abs (cpu, imm); return;
11043 case 7: ldrsh32_abs (cpu, imm); return;
11044 case 8: str32_abs (cpu, imm); return;
11045 case 9: ldr32_abs (cpu, imm); return;
11046 case 10: ldrsw_abs (cpu, imm); return;
11047 case 12: str_abs (cpu, imm); return;
11048 case 13: ldr_abs (cpu, imm); return;
11049 case 14: prfm_abs (cpu, imm); return;
11050
11051 default:
11052 case 11:
11053 case 15:
11054 HALT_UNALLOC;
11055 }
11056 }
11057
11058 /* FReg operations. */
11059 switch (dispatch)
11060 {
11061 case 0: fstrb_abs (cpu, imm); return;
11062 case 4: fstrh_abs (cpu, imm); return;
11063 case 8: fstrs_abs (cpu, imm); return;
11064 case 12: fstrd_abs (cpu, imm); return;
11065 case 2: fstrq_abs (cpu, imm); return;
11066
11067 case 1: fldrb_abs (cpu, imm); return;
11068 case 5: fldrh_abs (cpu, imm); return;
11069 case 9: fldrs_abs (cpu, imm); return;
11070 case 13: fldrd_abs (cpu, imm); return;
11071 case 3: fldrq_abs (cpu, imm); return;
11072
11073 default:
11074 case 6:
11075 case 7:
11076 case 10:
11077 case 11:
11078 case 14:
11079 case 15:
11080 HALT_UNALLOC;
11081 }
11082 }
11083
11084 static void
11085 dexLoadExclusive (sim_cpu *cpu)
11086 {
11087 /* assert instr[29:24] = 001000;
11088 instr[31,30] = size
11089 instr[23] = 0 if exclusive
11090 instr[22] = L : 1 if load, 0 if store
11091 instr[21] = 1 if pair
11092 instr[20,16] = Rs
11093 instr[15] = o0 : 1 if ordered
11094 instr[14,10] = Rt2
11095 instr[9,5] = Rn
11096 instr[4.0] = Rt. */
11097
11098 switch (INSTR (22, 21))
11099 {
11100 case 2: ldxr (cpu); return;
11101 case 0: stxr (cpu); return;
11102 default: HALT_NYI;
11103 }
11104 }
11105
11106 static void
11107 dexLoadOther (sim_cpu *cpu)
11108 {
11109 uint32_t dispatch;
11110
11111 /* instr[29,25] = 111_0
11112 instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11113 instr[21:11,10] is the secondary dispatch. */
11114 if (INSTR (24, 24))
11115 {
11116 dexLoadUnsignedImmediate (cpu);
11117 return;
11118 }
11119
11120 dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11121 switch (dispatch)
11122 {
11123 case 0: dexLoadUnscaledImmediate (cpu); return;
11124 case 1: dexLoadImmediatePrePost (cpu); return;
11125 case 3: dexLoadImmediatePrePost (cpu); return;
11126 case 6: dexLoadRegisterOffset (cpu); return;
11127
11128 default:
11129 case 2:
11130 case 4:
11131 case 5:
11132 case 7:
11133 HALT_NYI;
11134 }
11135 }
11136
11137 static void
11138 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11139 {
11140 unsigned rn = INSTR (14, 10);
11141 unsigned rd = INSTR (9, 5);
11142 unsigned rm = INSTR (4, 0);
11143 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11144
11145 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11146 HALT_UNALLOC; /* ??? */
11147
11148 offset <<= 2;
11149
11150 if (wb != Post)
11151 address += offset;
11152
11153 aarch64_set_mem_u32 (cpu, address,
11154 aarch64_get_reg_u32 (cpu, rm, NO_SP));
11155 aarch64_set_mem_u32 (cpu, address + 4,
11156 aarch64_get_reg_u32 (cpu, rn, NO_SP));
11157
11158 if (wb == Post)
11159 address += offset;
11160
11161 if (wb != NoWriteBack)
11162 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11163 }
11164
11165 static void
11166 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11167 {
11168 unsigned rn = INSTR (14, 10);
11169 unsigned rd = INSTR (9, 5);
11170 unsigned rm = INSTR (4, 0);
11171 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11172
11173 if ((rn == rd || rm == rd) && wb != NoWriteBack)
11174 HALT_UNALLOC; /* ??? */
11175
11176 offset <<= 3;
11177
11178 if (wb != Post)
11179 address += offset;
11180
11181 aarch64_set_mem_u64 (cpu, address,
11182 aarch64_get_reg_u64 (cpu, rm, NO_SP));
11183 aarch64_set_mem_u64 (cpu, address + 8,
11184 aarch64_get_reg_u64 (cpu, rn, NO_SP));
11185
11186 if (wb == Post)
11187 address += offset;
11188
11189 if (wb != NoWriteBack)
11190 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11191 }
11192
11193 static void
11194 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11195 {
11196 unsigned rn = INSTR (14, 10);
11197 unsigned rd = INSTR (9, 5);
11198 unsigned rm = INSTR (4, 0);
11199 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11200
11201 /* Treat this as unalloc to make sure we don't do it. */
11202 if (rn == rm)
11203 HALT_UNALLOC;
11204
11205 offset <<= 2;
11206
11207 if (wb != Post)
11208 address += offset;
11209
11210 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11211 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11212
11213 if (wb == Post)
11214 address += offset;
11215
11216 if (wb != NoWriteBack)
11217 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11218 }
11219
11220 static void
11221 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11222 {
11223 unsigned rn = INSTR (14, 10);
11224 unsigned rd = INSTR (9, 5);
11225 unsigned rm = INSTR (4, 0);
11226 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11227
11228 /* Treat this as unalloc to make sure we don't do it. */
11229 if (rn == rm)
11230 HALT_UNALLOC;
11231
11232 offset <<= 2;
11233
11234 if (wb != Post)
11235 address += offset;
11236
11237 aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11238 aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11239
11240 if (wb == Post)
11241 address += offset;
11242
11243 if (wb != NoWriteBack)
11244 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11245 }
11246
11247 static void
11248 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11249 {
11250 unsigned rn = INSTR (14, 10);
11251 unsigned rd = INSTR (9, 5);
11252 unsigned rm = INSTR (4, 0);
11253 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11254
11255 /* Treat this as unalloc to make sure we don't do it. */
11256 if (rn == rm)
11257 HALT_UNALLOC;
11258
11259 offset <<= 3;
11260
11261 if (wb != Post)
11262 address += offset;
11263
11264 aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11265 aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11266
11267 if (wb == Post)
11268 address += offset;
11269
11270 if (wb != NoWriteBack)
11271 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11272 }
11273
11274 static void
11275 dex_load_store_pair_gr (sim_cpu *cpu)
11276 {
11277 /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11278 instr[29,25] = instruction encoding: 101_0
11279 instr[26] = V : 1 if fp 0 if gp
11280 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11281 instr[22] = load/store (1=> load)
11282 instr[21,15] = signed, scaled, offset
11283 instr[14,10] = Rn
11284 instr[ 9, 5] = Rd
11285 instr[ 4, 0] = Rm. */
11286
11287 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11288 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11289
11290 switch (dispatch)
11291 {
11292 case 2: store_pair_u32 (cpu, offset, Post); return;
11293 case 3: load_pair_u32 (cpu, offset, Post); return;
11294 case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11295 case 5: load_pair_u32 (cpu, offset, NoWriteBack); return;
11296 case 6: store_pair_u32 (cpu, offset, Pre); return;
11297 case 7: load_pair_u32 (cpu, offset, Pre); return;
11298
11299 case 11: load_pair_s32 (cpu, offset, Post); return;
11300 case 13: load_pair_s32 (cpu, offset, NoWriteBack); return;
11301 case 15: load_pair_s32 (cpu, offset, Pre); return;
11302
11303 case 18: store_pair_u64 (cpu, offset, Post); return;
11304 case 19: load_pair_u64 (cpu, offset, Post); return;
11305 case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11306 case 21: load_pair_u64 (cpu, offset, NoWriteBack); return;
11307 case 22: store_pair_u64 (cpu, offset, Pre); return;
11308 case 23: load_pair_u64 (cpu, offset, Pre); return;
11309
11310 default:
11311 HALT_UNALLOC;
11312 }
11313 }
11314
11315 static void
11316 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11317 {
11318 unsigned rn = INSTR (14, 10);
11319 unsigned rd = INSTR (9, 5);
11320 unsigned rm = INSTR (4, 0);
11321 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11322
11323 offset <<= 2;
11324
11325 if (wb != Post)
11326 address += offset;
11327
11328 aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, rm, 0));
11329 aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11330
11331 if (wb == Post)
11332 address += offset;
11333
11334 if (wb != NoWriteBack)
11335 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11336 }
11337
11338 static void
11339 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11340 {
11341 unsigned rn = INSTR (14, 10);
11342 unsigned rd = INSTR (9, 5);
11343 unsigned rm = INSTR (4, 0);
11344 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11345
11346 offset <<= 3;
11347
11348 if (wb != Post)
11349 address += offset;
11350
11351 aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, rm, 0));
11352 aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11353
11354 if (wb == Post)
11355 address += offset;
11356
11357 if (wb != NoWriteBack)
11358 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11359 }
11360
11361 static void
11362 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11363 {
11364 FRegister a;
11365 unsigned rn = INSTR (14, 10);
11366 unsigned rd = INSTR (9, 5);
11367 unsigned rm = INSTR (4, 0);
11368 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11369
11370 offset <<= 4;
11371
11372 if (wb != Post)
11373 address += offset;
11374
11375 aarch64_get_FP_long_double (cpu, rm, & a);
11376 aarch64_set_mem_long_double (cpu, address, a);
11377 aarch64_get_FP_long_double (cpu, rn, & a);
11378 aarch64_set_mem_long_double (cpu, address + 16, a);
11379
11380 if (wb == Post)
11381 address += offset;
11382
11383 if (wb != NoWriteBack)
11384 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11385 }
11386
11387 static void
11388 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11389 {
11390 unsigned rn = INSTR (14, 10);
11391 unsigned rd = INSTR (9, 5);
11392 unsigned rm = INSTR (4, 0);
11393 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11394
11395 if (rm == rn)
11396 HALT_UNALLOC;
11397
11398 offset <<= 2;
11399
11400 if (wb != Post)
11401 address += offset;
11402
11403 aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11404 aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11405
11406 if (wb == Post)
11407 address += offset;
11408
11409 if (wb != NoWriteBack)
11410 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11411 }
11412
11413 static void
11414 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11415 {
11416 unsigned rn = INSTR (14, 10);
11417 unsigned rd = INSTR (9, 5);
11418 unsigned rm = INSTR (4, 0);
11419 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11420
11421 if (rm == rn)
11422 HALT_UNALLOC;
11423
11424 offset <<= 3;
11425
11426 if (wb != Post)
11427 address += offset;
11428
11429 aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11430 aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11431
11432 if (wb == Post)
11433 address += offset;
11434
11435 if (wb != NoWriteBack)
11436 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11437 }
11438
11439 static void
11440 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11441 {
11442 FRegister a;
11443 unsigned rn = INSTR (14, 10);
11444 unsigned rd = INSTR (9, 5);
11445 unsigned rm = INSTR (4, 0);
11446 uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11447
11448 if (rm == rn)
11449 HALT_UNALLOC;
11450
11451 offset <<= 4;
11452
11453 if (wb != Post)
11454 address += offset;
11455
11456 aarch64_get_mem_long_double (cpu, address, & a);
11457 aarch64_set_FP_long_double (cpu, rm, a);
11458 aarch64_get_mem_long_double (cpu, address + 16, & a);
11459 aarch64_set_FP_long_double (cpu, rn, a);
11460
11461 if (wb == Post)
11462 address += offset;
11463
11464 if (wb != NoWriteBack)
11465 aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11466 }
11467
11468 static void
11469 dex_load_store_pair_fp (sim_cpu *cpu)
11470 {
11471 /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11472 instr[29,25] = instruction encoding
11473 instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11474 instr[22] = load/store (1=> load)
11475 instr[21,15] = signed, scaled, offset
11476 instr[14,10] = Rn
11477 instr[ 9, 5] = Rd
11478 instr[ 4, 0] = Rm */
11479
11480 uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11481 int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11482
11483 switch (dispatch)
11484 {
11485 case 2: store_pair_float (cpu, offset, Post); return;
11486 case 3: load_pair_float (cpu, offset, Post); return;
11487 case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11488 case 5: load_pair_float (cpu, offset, NoWriteBack); return;
11489 case 6: store_pair_float (cpu, offset, Pre); return;
11490 case 7: load_pair_float (cpu, offset, Pre); return;
11491
11492 case 10: store_pair_double (cpu, offset, Post); return;
11493 case 11: load_pair_double (cpu, offset, Post); return;
11494 case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11495 case 13: load_pair_double (cpu, offset, NoWriteBack); return;
11496 case 14: store_pair_double (cpu, offset, Pre); return;
11497 case 15: load_pair_double (cpu, offset, Pre); return;
11498
11499 case 18: store_pair_long_double (cpu, offset, Post); return;
11500 case 19: load_pair_long_double (cpu, offset, Post); return;
11501 case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11502 case 21: load_pair_long_double (cpu, offset, NoWriteBack); return;
11503 case 22: store_pair_long_double (cpu, offset, Pre); return;
11504 case 23: load_pair_long_double (cpu, offset, Pre); return;
11505
11506 default:
11507 HALT_UNALLOC;
11508 }
11509 }
11510
11511 static inline unsigned
11512 vec_reg (unsigned v, unsigned o)
11513 {
11514 return (v + o) & 0x3F;
11515 }
11516
11517 /* Load multiple N-element structures to M consecutive registers. */
11518 static void
11519 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11520 {
11521 int all = INSTR (30, 30);
11522 unsigned size = INSTR (11, 10);
11523 unsigned vd = INSTR (4, 0);
11524 unsigned rpt = (N == M) ? 1 : M;
11525 unsigned selem = N;
11526 unsigned i, j, k;
11527
11528 switch (size)
11529 {
11530 case 0: /* 8-bit operations. */
11531 for (i = 0; i < rpt; i++)
11532 for (j = 0; j < (8 + (8 * all)); j++)
11533 for (k = 0; k < selem; k++)
11534 {
11535 aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11536 aarch64_get_mem_u8 (cpu, address));
11537 address += 1;
11538 }
11539 return;
11540
11541 case 1: /* 16-bit operations. */
11542 for (i = 0; i < rpt; i++)
11543 for (j = 0; j < (4 + (4 * all)); j++)
11544 for (k = 0; k < selem; k++)
11545 {
11546 aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11547 aarch64_get_mem_u16 (cpu, address));
11548 address += 2;
11549 }
11550 return;
11551
11552 case 2: /* 32-bit operations. */
11553 for (i = 0; i < rpt; i++)
11554 for (j = 0; j < (2 + (2 * all)); j++)
11555 for (k = 0; k < selem; k++)
11556 {
11557 aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11558 aarch64_get_mem_u32 (cpu, address));
11559 address += 4;
11560 }
11561 return;
11562
11563 case 3: /* 64-bit operations. */
11564 for (i = 0; i < rpt; i++)
11565 for (j = 0; j < (1 + all); j++)
11566 for (k = 0; k < selem; k++)
11567 {
11568 aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11569 aarch64_get_mem_u64 (cpu, address));
11570 address += 8;
11571 }
11572 return;
11573 }
11574 }
11575
11576 /* Load multiple 4-element structures into four consecutive registers. */
11577 static void
11578 LD4 (sim_cpu *cpu, uint64_t address)
11579 {
11580 vec_load (cpu, address, 4, 4);
11581 }
11582
11583 /* Load multiple 3-element structures into three consecutive registers. */
11584 static void
11585 LD3 (sim_cpu *cpu, uint64_t address)
11586 {
11587 vec_load (cpu, address, 3, 3);
11588 }
11589
11590 /* Load multiple 2-element structures into two consecutive registers. */
11591 static void
11592 LD2 (sim_cpu *cpu, uint64_t address)
11593 {
11594 vec_load (cpu, address, 2, 2);
11595 }
11596
11597 /* Load multiple 1-element structures into one register. */
11598 static void
11599 LD1_1 (sim_cpu *cpu, uint64_t address)
11600 {
11601 vec_load (cpu, address, 1, 1);
11602 }
11603
11604 /* Load multiple 1-element structures into two registers. */
11605 static void
11606 LD1_2 (sim_cpu *cpu, uint64_t address)
11607 {
11608 vec_load (cpu, address, 1, 2);
11609 }
11610
11611 /* Load multiple 1-element structures into three registers. */
11612 static void
11613 LD1_3 (sim_cpu *cpu, uint64_t address)
11614 {
11615 vec_load (cpu, address, 1, 3);
11616 }
11617
11618 /* Load multiple 1-element structures into four registers. */
11619 static void
11620 LD1_4 (sim_cpu *cpu, uint64_t address)
11621 {
11622 vec_load (cpu, address, 1, 4);
11623 }
11624
11625 /* Store multiple N-element structures from M consecutive registers. */
11626 static void
11627 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11628 {
11629 int all = INSTR (30, 30);
11630 unsigned size = INSTR (11, 10);
11631 unsigned vd = INSTR (4, 0);
11632 unsigned rpt = (N == M) ? 1 : M;
11633 unsigned selem = N;
11634 unsigned i, j, k;
11635
11636 switch (size)
11637 {
11638 case 0: /* 8-bit operations. */
11639 for (i = 0; i < rpt; i++)
11640 for (j = 0; j < (8 + (8 * all)); j++)
11641 for (k = 0; k < selem; k++)
11642 {
11643 aarch64_set_mem_u8
11644 (cpu, address,
11645 aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11646 address += 1;
11647 }
11648 return;
11649
11650 case 1: /* 16-bit operations. */
11651 for (i = 0; i < rpt; i++)
11652 for (j = 0; j < (4 + (4 * all)); j++)
11653 for (k = 0; k < selem; k++)
11654 {
11655 aarch64_set_mem_u16
11656 (cpu, address,
11657 aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11658 address += 2;
11659 }
11660 return;
11661
11662 case 2: /* 32-bit operations. */
11663 for (i = 0; i < rpt; i++)
11664 for (j = 0; j < (2 + (2 * all)); j++)
11665 for (k = 0; k < selem; k++)
11666 {
11667 aarch64_set_mem_u32
11668 (cpu, address,
11669 aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11670 address += 4;
11671 }
11672 return;
11673
11674 case 3: /* 64-bit operations. */
11675 for (i = 0; i < rpt; i++)
11676 for (j = 0; j < (1 + all); j++)
11677 for (k = 0; k < selem; k++)
11678 {
11679 aarch64_set_mem_u64
11680 (cpu, address,
11681 aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11682 address += 8;
11683 }
11684 return;
11685 }
11686 }
11687
11688 /* Store multiple 4-element structure from four consecutive registers. */
11689 static void
11690 ST4 (sim_cpu *cpu, uint64_t address)
11691 {
11692 vec_store (cpu, address, 4, 4);
11693 }
11694
11695 /* Store multiple 3-element structures from three consecutive registers. */
11696 static void
11697 ST3 (sim_cpu *cpu, uint64_t address)
11698 {
11699 vec_store (cpu, address, 3, 3);
11700 }
11701
11702 /* Store multiple 2-element structures from two consecutive registers. */
11703 static void
11704 ST2 (sim_cpu *cpu, uint64_t address)
11705 {
11706 vec_store (cpu, address, 2, 2);
11707 }
11708
11709 /* Store multiple 1-element structures from one register. */
11710 static void
11711 ST1_1 (sim_cpu *cpu, uint64_t address)
11712 {
11713 vec_store (cpu, address, 1, 1);
11714 }
11715
11716 /* Store multiple 1-element structures from two registers. */
11717 static void
11718 ST1_2 (sim_cpu *cpu, uint64_t address)
11719 {
11720 vec_store (cpu, address, 1, 2);
11721 }
11722
11723 /* Store multiple 1-element structures from three registers. */
11724 static void
11725 ST1_3 (sim_cpu *cpu, uint64_t address)
11726 {
11727 vec_store (cpu, address, 1, 3);
11728 }
11729
11730 /* Store multiple 1-element structures from four registers. */
11731 static void
11732 ST1_4 (sim_cpu *cpu, uint64_t address)
11733 {
11734 vec_store (cpu, address, 1, 4);
11735 }
11736
11737 #define LDn_STn_SINGLE_LANE_AND_SIZE() \
11738 do \
11739 { \
11740 switch (INSTR (15, 14)) \
11741 { \
11742 case 0: \
11743 lane = (full << 3) | (s << 2) | size; \
11744 size = 0; \
11745 break; \
11746 \
11747 case 1: \
11748 if ((size & 1) == 1) \
11749 HALT_UNALLOC; \
11750 lane = (full << 2) | (s << 1) | (size >> 1); \
11751 size = 1; \
11752 break; \
11753 \
11754 case 2: \
11755 if ((size & 2) == 2) \
11756 HALT_UNALLOC; \
11757 \
11758 if ((size & 1) == 0) \
11759 { \
11760 lane = (full << 1) | s; \
11761 size = 2; \
11762 } \
11763 else \
11764 { \
11765 if (s) \
11766 HALT_UNALLOC; \
11767 lane = full; \
11768 size = 3; \
11769 } \
11770 break; \
11771 \
11772 default: \
11773 HALT_UNALLOC; \
11774 } \
11775 } \
11776 while (0)
11777
11778 /* Load single structure into one lane of N registers. */
11779 static void
11780 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11781 {
11782 /* instr[31] = 0
11783 instr[30] = element selector 0=>half, 1=>all elements
11784 instr[29,24] = 00 1101
11785 instr[23] = 0=>simple, 1=>post
11786 instr[22] = 1
11787 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11788 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11789 11111 (immediate post inc)
11790 instr[15,13] = opcode
11791 instr[12] = S, used for lane number
11792 instr[11,10] = size, also used for lane number
11793 instr[9,5] = address
11794 instr[4,0] = Vd */
11795
11796 unsigned full = INSTR (30, 30);
11797 unsigned vd = INSTR (4, 0);
11798 unsigned size = INSTR (11, 10);
11799 unsigned s = INSTR (12, 12);
11800 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11801 int lane = 0;
11802 int i;
11803
11804 NYI_assert (29, 24, 0x0D);
11805 NYI_assert (22, 22, 1);
11806
11807 /* Compute the lane number first (using size), and then compute size. */
11808 LDn_STn_SINGLE_LANE_AND_SIZE ();
11809
11810 for (i = 0; i < nregs; i++)
11811 switch (size)
11812 {
11813 case 0:
11814 {
11815 uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11816 aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11817 break;
11818 }
11819
11820 case 1:
11821 {
11822 uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11823 aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11824 break;
11825 }
11826
11827 case 2:
11828 {
11829 uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11830 aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11831 break;
11832 }
11833
11834 case 3:
11835 {
11836 uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11837 aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11838 break;
11839 }
11840 }
11841 }
11842
11843 /* Store single structure from one lane from N registers. */
11844 static void
11845 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11846 {
11847 /* instr[31] = 0
11848 instr[30] = element selector 0=>half, 1=>all elements
11849 instr[29,24] = 00 1101
11850 instr[23] = 0=>simple, 1=>post
11851 instr[22] = 0
11852 instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11853 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11854 11111 (immediate post inc)
11855 instr[15,13] = opcode
11856 instr[12] = S, used for lane number
11857 instr[11,10] = size, also used for lane number
11858 instr[9,5] = address
11859 instr[4,0] = Vd */
11860
11861 unsigned full = INSTR (30, 30);
11862 unsigned vd = INSTR (4, 0);
11863 unsigned size = INSTR (11, 10);
11864 unsigned s = INSTR (12, 12);
11865 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11866 int lane = 0;
11867 int i;
11868
11869 NYI_assert (29, 24, 0x0D);
11870 NYI_assert (22, 22, 0);
11871
11872 /* Compute the lane number first (using size), and then compute size. */
11873 LDn_STn_SINGLE_LANE_AND_SIZE ();
11874
11875 for (i = 0; i < nregs; i++)
11876 switch (size)
11877 {
11878 case 0:
11879 {
11880 uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11881 aarch64_set_mem_u8 (cpu, address + i, val);
11882 break;
11883 }
11884
11885 case 1:
11886 {
11887 uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11888 aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11889 break;
11890 }
11891
11892 case 2:
11893 {
11894 uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11895 aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11896 break;
11897 }
11898
11899 case 3:
11900 {
11901 uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11902 aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11903 break;
11904 }
11905 }
11906 }
11907
11908 /* Load single structure into all lanes of N registers. */
11909 static void
11910 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11911 {
11912 /* instr[31] = 0
11913 instr[30] = element selector 0=>half, 1=>all elements
11914 instr[29,24] = 00 1101
11915 instr[23] = 0=>simple, 1=>post
11916 instr[22] = 1
11917 instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11918 instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11919 11111 (immediate post inc)
11920 instr[15,14] = 11
11921 instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11922 instr[12] = 0
11923 instr[11,10] = element size 00=> byte(b), 01=> half(h),
11924 10=> word(s), 11=> double(d)
11925 instr[9,5] = address
11926 instr[4,0] = Vd */
11927
11928 unsigned full = INSTR (30, 30);
11929 unsigned vd = INSTR (4, 0);
11930 unsigned size = INSTR (11, 10);
11931 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11932 int i, n;
11933
11934 NYI_assert (29, 24, 0x0D);
11935 NYI_assert (22, 22, 1);
11936 NYI_assert (15, 14, 3);
11937 NYI_assert (12, 12, 0);
11938
11939 for (n = 0; n < nregs; n++)
11940 switch (size)
11941 {
11942 case 0:
11943 {
11944 uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11945 for (i = 0; i < (full ? 16 : 8); i++)
11946 aarch64_set_vec_u8 (cpu, vd + n, i, val);
11947 break;
11948 }
11949
11950 case 1:
11951 {
11952 uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11953 for (i = 0; i < (full ? 8 : 4); i++)
11954 aarch64_set_vec_u16 (cpu, vd + n, i, val);
11955 break;
11956 }
11957
11958 case 2:
11959 {
11960 uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11961 for (i = 0; i < (full ? 4 : 2); i++)
11962 aarch64_set_vec_u32 (cpu, vd + n, i, val);
11963 break;
11964 }
11965
11966 case 3:
11967 {
11968 uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11969 for (i = 0; i < (full ? 2 : 1); i++)
11970 aarch64_set_vec_u64 (cpu, vd + n, i, val);
11971 break;
11972 }
11973
11974 default:
11975 HALT_UNALLOC;
11976 }
11977 }
11978
11979 static void
11980 do_vec_load_store (sim_cpu *cpu)
11981 {
11982 /* {LD|ST}<N> {Vd..Vd+N}, vaddr
11983
11984 instr[31] = 0
11985 instr[30] = element selector 0=>half, 1=>all elements
11986 instr[29,25] = 00110
11987 instr[24] = 0=>multiple struct, 1=>single struct
11988 instr[23] = 0=>simple, 1=>post
11989 instr[22] = 0=>store, 1=>load
11990 instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
11991 instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11992 11111 (immediate post inc)
11993 instr[15,12] = elements and destinations. eg for load:
11994 0000=>LD4 => load multiple 4-element to
11995 four consecutive registers
11996 0100=>LD3 => load multiple 3-element to
11997 three consecutive registers
11998 1000=>LD2 => load multiple 2-element to
11999 two consecutive registers
12000 0010=>LD1 => load multiple 1-element to
12001 four consecutive registers
12002 0110=>LD1 => load multiple 1-element to
12003 three consecutive registers
12004 1010=>LD1 => load multiple 1-element to
12005 two consecutive registers
12006 0111=>LD1 => load multiple 1-element to
12007 one register
12008 1100=>LDR1,LDR2
12009 1110=>LDR3,LDR4
12010 instr[11,10] = element size 00=> byte(b), 01=> half(h),
12011 10=> word(s), 11=> double(d)
12012 instr[9,5] = Vn, can be SP
12013 instr[4,0] = Vd */
12014
12015 int single;
12016 int post;
12017 int load;
12018 unsigned vn;
12019 uint64_t address;
12020 int type;
12021
12022 if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12023 HALT_NYI;
12024
12025 single = INSTR (24, 24);
12026 post = INSTR (23, 23);
12027 load = INSTR (22, 22);
12028 type = INSTR (15, 12);
12029 vn = INSTR (9, 5);
12030 address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12031
12032 if (! single && INSTR (21, 21) != 0)
12033 HALT_UNALLOC;
12034
12035 if (post)
12036 {
12037 unsigned vm = INSTR (20, 16);
12038
12039 if (vm == R31)
12040 {
12041 unsigned sizeof_operation;
12042
12043 if (single)
12044 {
12045 if ((type >= 0) && (type <= 11))
12046 {
12047 int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12048 switch (INSTR (15, 14))
12049 {
12050 case 0:
12051 sizeof_operation = nregs * 1;
12052 break;
12053 case 1:
12054 sizeof_operation = nregs * 2;
12055 break;
12056 case 2:
12057 if (INSTR (10, 10) == 0)
12058 sizeof_operation = nregs * 4;
12059 else
12060 sizeof_operation = nregs * 8;
12061 break;
12062 default:
12063 HALT_UNALLOC;
12064 }
12065 }
12066 else if (type == 0xC)
12067 {
12068 sizeof_operation = INSTR (21, 21) ? 2 : 1;
12069 sizeof_operation <<= INSTR (11, 10);
12070 }
12071 else if (type == 0xE)
12072 {
12073 sizeof_operation = INSTR (21, 21) ? 4 : 3;
12074 sizeof_operation <<= INSTR (11, 10);
12075 }
12076 else
12077 HALT_UNALLOC;
12078 }
12079 else
12080 {
12081 switch (type)
12082 {
12083 case 0: sizeof_operation = 32; break;
12084 case 4: sizeof_operation = 24; break;
12085 case 8: sizeof_operation = 16; break;
12086
12087 case 7:
12088 /* One register, immediate offset variant. */
12089 sizeof_operation = 8;
12090 break;
12091
12092 case 10:
12093 /* Two registers, immediate offset variant. */
12094 sizeof_operation = 16;
12095 break;
12096
12097 case 6:
12098 /* Three registers, immediate offset variant. */
12099 sizeof_operation = 24;
12100 break;
12101
12102 case 2:
12103 /* Four registers, immediate offset variant. */
12104 sizeof_operation = 32;
12105 break;
12106
12107 default:
12108 HALT_UNALLOC;
12109 }
12110
12111 if (INSTR (30, 30))
12112 sizeof_operation *= 2;
12113 }
12114
12115 aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12116 }
12117 else
12118 aarch64_set_reg_u64 (cpu, vn, SP_OK,
12119 address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12120 }
12121 else
12122 {
12123 NYI_assert (20, 16, 0);
12124 }
12125
12126 if (single)
12127 {
12128 if (load)
12129 {
12130 if ((type >= 0) && (type <= 11))
12131 do_vec_LDn_single (cpu, address);
12132 else if ((type == 0xC) || (type == 0xE))
12133 do_vec_LDnR (cpu, address);
12134 else
12135 HALT_UNALLOC;
12136 return;
12137 }
12138
12139 /* Stores. */
12140 if ((type >= 0) && (type <= 11))
12141 {
12142 do_vec_STn_single (cpu, address);
12143 return;
12144 }
12145
12146 HALT_UNALLOC;
12147 }
12148
12149 if (load)
12150 {
12151 switch (type)
12152 {
12153 case 0: LD4 (cpu, address); return;
12154 case 4: LD3 (cpu, address); return;
12155 case 8: LD2 (cpu, address); return;
12156 case 2: LD1_4 (cpu, address); return;
12157 case 6: LD1_3 (cpu, address); return;
12158 case 10: LD1_2 (cpu, address); return;
12159 case 7: LD1_1 (cpu, address); return;
12160
12161 default:
12162 HALT_UNALLOC;
12163 }
12164 }
12165
12166 /* Stores. */
12167 switch (type)
12168 {
12169 case 0: ST4 (cpu, address); return;
12170 case 4: ST3 (cpu, address); return;
12171 case 8: ST2 (cpu, address); return;
12172 case 2: ST1_4 (cpu, address); return;
12173 case 6: ST1_3 (cpu, address); return;
12174 case 10: ST1_2 (cpu, address); return;
12175 case 7: ST1_1 (cpu, address); return;
12176 default:
12177 HALT_UNALLOC;
12178 }
12179 }
12180
12181 static void
12182 dexLdSt (sim_cpu *cpu)
12183 {
12184 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12185 assert group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12186 group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12187 bits [29,28:26] of a LS are the secondary dispatch vector. */
12188 uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12189
12190 switch (group2)
12191 {
12192 case LS_EXCL_000:
12193 dexLoadExclusive (cpu); return;
12194
12195 case LS_LIT_010:
12196 case LS_LIT_011:
12197 dexLoadLiteral (cpu); return;
12198
12199 case LS_OTHER_110:
12200 case LS_OTHER_111:
12201 dexLoadOther (cpu); return;
12202
12203 case LS_ADVSIMD_001:
12204 do_vec_load_store (cpu); return;
12205
12206 case LS_PAIR_100:
12207 dex_load_store_pair_gr (cpu); return;
12208
12209 case LS_PAIR_101:
12210 dex_load_store_pair_fp (cpu); return;
12211
12212 default:
12213 /* Should never reach here. */
12214 HALT_NYI;
12215 }
12216 }
12217
12218 /* Specific decode and execute for group Data Processing Register. */
12219
12220 static void
12221 dexLogicalShiftedRegister (sim_cpu *cpu)
12222 {
12223 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12224 instr[30,29] = op
12225 instr[28:24] = 01010
12226 instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12227 instr[21] = N
12228 instr[20,16] = Rm
12229 instr[15,10] = count : must be 0xxxxx for 32 bit
12230 instr[9,5] = Rn
12231 instr[4,0] = Rd */
12232
12233 uint32_t size = INSTR (31, 31);
12234 Shift shiftType = INSTR (23, 22);
12235 uint32_t count = INSTR (15, 10);
12236
12237 /* 32 bit operations must have count[5] = 0.
12238 or else we have an UNALLOC. */
12239 if (size == 0 && uimm (count, 5, 5))
12240 HALT_UNALLOC;
12241
12242 /* Dispatch on size:op:N. */
12243 switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12244 {
12245 case 0: and32_shift (cpu, shiftType, count); return;
12246 case 1: bic32_shift (cpu, shiftType, count); return;
12247 case 2: orr32_shift (cpu, shiftType, count); return;
12248 case 3: orn32_shift (cpu, shiftType, count); return;
12249 case 4: eor32_shift (cpu, shiftType, count); return;
12250 case 5: eon32_shift (cpu, shiftType, count); return;
12251 case 6: ands32_shift (cpu, shiftType, count); return;
12252 case 7: bics32_shift (cpu, shiftType, count); return;
12253 case 8: and64_shift (cpu, shiftType, count); return;
12254 case 9: bic64_shift (cpu, shiftType, count); return;
12255 case 10:orr64_shift (cpu, shiftType, count); return;
12256 case 11:orn64_shift (cpu, shiftType, count); return;
12257 case 12:eor64_shift (cpu, shiftType, count); return;
12258 case 13:eon64_shift (cpu, shiftType, count); return;
12259 case 14:ands64_shift (cpu, shiftType, count); return;
12260 case 15:bics64_shift (cpu, shiftType, count); return;
12261 }
12262 }
12263
12264 /* 32 bit conditional select. */
12265 static void
12266 csel32 (sim_cpu *cpu, CondCode cc)
12267 {
12268 unsigned rm = INSTR (20, 16);
12269 unsigned rn = INSTR (9, 5);
12270 unsigned rd = INSTR (4, 0);
12271
12272 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12273 testConditionCode (cpu, cc)
12274 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12275 : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12276 }
12277
12278 /* 64 bit conditional select. */
12279 static void
12280 csel64 (sim_cpu *cpu, CondCode cc)
12281 {
12282 unsigned rm = INSTR (20, 16);
12283 unsigned rn = INSTR (9, 5);
12284 unsigned rd = INSTR (4, 0);
12285
12286 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12287 testConditionCode (cpu, cc)
12288 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12289 : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12290 }
12291
12292 /* 32 bit conditional increment. */
12293 static void
12294 csinc32 (sim_cpu *cpu, CondCode cc)
12295 {
12296 unsigned rm = INSTR (20, 16);
12297 unsigned rn = INSTR (9, 5);
12298 unsigned rd = INSTR (4, 0);
12299
12300 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12301 testConditionCode (cpu, cc)
12302 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12303 : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12304 }
12305
12306 /* 64 bit conditional increment. */
12307 static void
12308 csinc64 (sim_cpu *cpu, CondCode cc)
12309 {
12310 unsigned rm = INSTR (20, 16);
12311 unsigned rn = INSTR (9, 5);
12312 unsigned rd = INSTR (4, 0);
12313
12314 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12315 testConditionCode (cpu, cc)
12316 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12317 : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12318 }
12319
12320 /* 32 bit conditional invert. */
12321 static void
12322 csinv32 (sim_cpu *cpu, CondCode cc)
12323 {
12324 unsigned rm = INSTR (20, 16);
12325 unsigned rn = INSTR (9, 5);
12326 unsigned rd = INSTR (4, 0);
12327
12328 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12329 testConditionCode (cpu, cc)
12330 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12331 : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12332 }
12333
12334 /* 64 bit conditional invert. */
12335 static void
12336 csinv64 (sim_cpu *cpu, CondCode cc)
12337 {
12338 unsigned rm = INSTR (20, 16);
12339 unsigned rn = INSTR (9, 5);
12340 unsigned rd = INSTR (4, 0);
12341
12342 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12343 testConditionCode (cpu, cc)
12344 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12345 : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12346 }
12347
12348 /* 32 bit conditional negate. */
12349 static void
12350 csneg32 (sim_cpu *cpu, CondCode cc)
12351 {
12352 unsigned rm = INSTR (20, 16);
12353 unsigned rn = INSTR (9, 5);
12354 unsigned rd = INSTR (4, 0);
12355
12356 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12357 testConditionCode (cpu, cc)
12358 ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12359 : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12360 }
12361
12362 /* 64 bit conditional negate. */
12363 static void
12364 csneg64 (sim_cpu *cpu, CondCode cc)
12365 {
12366 unsigned rm = INSTR (20, 16);
12367 unsigned rn = INSTR (9, 5);
12368 unsigned rd = INSTR (4, 0);
12369
12370 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12371 testConditionCode (cpu, cc)
12372 ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12373 : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12374 }
12375
12376 static void
12377 dexCondSelect (sim_cpu *cpu)
12378 {
12379 /* instr[28,21] = 11011011
12380 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12381 instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12382 100 ==> CSINV, 101 ==> CSNEG,
12383 _1_ ==> UNALLOC
12384 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12385 instr[15,12] = cond
12386 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC */
12387
12388 CondCode cc = INSTR (15, 12);
12389 uint32_t S = INSTR (29, 29);
12390 uint32_t op2 = INSTR (11, 10);
12391
12392 if (S == 1)
12393 HALT_UNALLOC;
12394
12395 if (op2 & 0x2)
12396 HALT_UNALLOC;
12397
12398 switch ((INSTR (31, 30) << 1) | op2)
12399 {
12400 case 0: csel32 (cpu, cc); return;
12401 case 1: csinc32 (cpu, cc); return;
12402 case 2: csinv32 (cpu, cc); return;
12403 case 3: csneg32 (cpu, cc); return;
12404 case 4: csel64 (cpu, cc); return;
12405 case 5: csinc64 (cpu, cc); return;
12406 case 6: csinv64 (cpu, cc); return;
12407 case 7: csneg64 (cpu, cc); return;
12408 }
12409 }
12410
12411 /* Some helpers for counting leading 1 or 0 bits. */
12412
12413 /* Counts the number of leading bits which are the same
12414 in a 32 bit value in the range 1 to 32. */
12415 static uint32_t
12416 leading32 (uint32_t value)
12417 {
12418 int32_t mask= 0xffff0000;
12419 uint32_t count= 16; /* Counts number of bits set in mask. */
12420 uint32_t lo = 1; /* Lower bound for number of sign bits. */
12421 uint32_t hi = 32; /* Upper bound for number of sign bits. */
12422
12423 while (lo + 1 < hi)
12424 {
12425 int32_t test = (value & mask);
12426
12427 if (test == 0 || test == mask)
12428 {
12429 lo = count;
12430 count = (lo + hi) / 2;
12431 mask >>= (count - lo);
12432 }
12433 else
12434 {
12435 hi = count;
12436 count = (lo + hi) / 2;
12437 mask <<= hi - count;
12438 }
12439 }
12440
12441 if (lo != hi)
12442 {
12443 int32_t test;
12444
12445 mask >>= 1;
12446 test = (value & mask);
12447
12448 if (test == 0 || test == mask)
12449 count = hi;
12450 else
12451 count = lo;
12452 }
12453
12454 return count;
12455 }
12456
12457 /* Counts the number of leading bits which are the same
12458 in a 64 bit value in the range 1 to 64. */
12459 static uint64_t
12460 leading64 (uint64_t value)
12461 {
12462 int64_t mask= 0xffffffff00000000LL;
12463 uint64_t count = 32; /* Counts number of bits set in mask. */
12464 uint64_t lo = 1; /* Lower bound for number of sign bits. */
12465 uint64_t hi = 64; /* Upper bound for number of sign bits. */
12466
12467 while (lo + 1 < hi)
12468 {
12469 int64_t test = (value & mask);
12470
12471 if (test == 0 || test == mask)
12472 {
12473 lo = count;
12474 count = (lo + hi) / 2;
12475 mask >>= (count - lo);
12476 }
12477 else
12478 {
12479 hi = count;
12480 count = (lo + hi) / 2;
12481 mask <<= hi - count;
12482 }
12483 }
12484
12485 if (lo != hi)
12486 {
12487 int64_t test;
12488
12489 mask >>= 1;
12490 test = (value & mask);
12491
12492 if (test == 0 || test == mask)
12493 count = hi;
12494 else
12495 count = lo;
12496 }
12497
12498 return count;
12499 }
12500
12501 /* Bit operations. */
12502 /* N.B register args may not be SP. */
12503
12504 /* 32 bit count leading sign bits. */
12505 static void
12506 cls32 (sim_cpu *cpu)
12507 {
12508 unsigned rn = INSTR (9, 5);
12509 unsigned rd = INSTR (4, 0);
12510
12511 /* N.B. the result needs to exclude the leading bit. */
12512 aarch64_set_reg_u64
12513 (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12514 }
12515
12516 /* 64 bit count leading sign bits. */
12517 static void
12518 cls64 (sim_cpu *cpu)
12519 {
12520 unsigned rn = INSTR (9, 5);
12521 unsigned rd = INSTR (4, 0);
12522
12523 /* N.B. the result needs to exclude the leading bit. */
12524 aarch64_set_reg_u64
12525 (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12526 }
12527
12528 /* 32 bit count leading zero bits. */
12529 static void
12530 clz32 (sim_cpu *cpu)
12531 {
12532 unsigned rn = INSTR (9, 5);
12533 unsigned rd = INSTR (4, 0);
12534 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12535
12536 /* if the sign (top) bit is set then the count is 0. */
12537 if (pick32 (value, 31, 31))
12538 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12539 else
12540 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12541 }
12542
12543 /* 64 bit count leading zero bits. */
12544 static void
12545 clz64 (sim_cpu *cpu)
12546 {
12547 unsigned rn = INSTR (9, 5);
12548 unsigned rd = INSTR (4, 0);
12549 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12550
12551 /* if the sign (top) bit is set then the count is 0. */
12552 if (pick64 (value, 63, 63))
12553 aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12554 else
12555 aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12556 }
12557
12558 /* 32 bit reverse bits. */
12559 static void
12560 rbit32 (sim_cpu *cpu)
12561 {
12562 unsigned rn = INSTR (9, 5);
12563 unsigned rd = INSTR (4, 0);
12564 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12565 uint32_t result = 0;
12566 int i;
12567
12568 for (i = 0; i < 32; i++)
12569 {
12570 result <<= 1;
12571 result |= (value & 1);
12572 value >>= 1;
12573 }
12574 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12575 }
12576
12577 /* 64 bit reverse bits. */
12578 static void
12579 rbit64 (sim_cpu *cpu)
12580 {
12581 unsigned rn = INSTR (9, 5);
12582 unsigned rd = INSTR (4, 0);
12583 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12584 uint64_t result = 0;
12585 int i;
12586
12587 for (i = 0; i < 64; i++)
12588 {
12589 result <<= 1;
12590 result |= (value & 1UL);
12591 value >>= 1;
12592 }
12593 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12594 }
12595
12596 /* 32 bit reverse bytes. */
12597 static void
12598 rev32 (sim_cpu *cpu)
12599 {
12600 unsigned rn = INSTR (9, 5);
12601 unsigned rd = INSTR (4, 0);
12602 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12603 uint32_t result = 0;
12604 int i;
12605
12606 for (i = 0; i < 4; i++)
12607 {
12608 result <<= 8;
12609 result |= (value & 0xff);
12610 value >>= 8;
12611 }
12612 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12613 }
12614
12615 /* 64 bit reverse bytes. */
12616 static void
12617 rev64 (sim_cpu *cpu)
12618 {
12619 unsigned rn = INSTR (9, 5);
12620 unsigned rd = INSTR (4, 0);
12621 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12622 uint64_t result = 0;
12623 int i;
12624
12625 for (i = 0; i < 8; i++)
12626 {
12627 result <<= 8;
12628 result |= (value & 0xffULL);
12629 value >>= 8;
12630 }
12631 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12632 }
12633
12634 /* 32 bit reverse shorts. */
12635 /* N.B.this reverses the order of the bytes in each half word. */
12636 static void
12637 revh32 (sim_cpu *cpu)
12638 {
12639 unsigned rn = INSTR (9, 5);
12640 unsigned rd = INSTR (4, 0);
12641 uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12642 uint32_t result = 0;
12643 int i;
12644
12645 for (i = 0; i < 2; i++)
12646 {
12647 result <<= 8;
12648 result |= (value & 0x00ff00ff);
12649 value >>= 8;
12650 }
12651 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12652 }
12653
12654 /* 64 bit reverse shorts. */
12655 /* N.B.this reverses the order of the bytes in each half word. */
12656 static void
12657 revh64 (sim_cpu *cpu)
12658 {
12659 unsigned rn = INSTR (9, 5);
12660 unsigned rd = INSTR (4, 0);
12661 uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12662 uint64_t result = 0;
12663 int i;
12664
12665 for (i = 0; i < 2; i++)
12666 {
12667 result <<= 8;
12668 result |= (value & 0x00ff00ff00ff00ffULL);
12669 value >>= 8;
12670 }
12671 aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12672 }
12673
12674 static void
12675 dexDataProc1Source (sim_cpu *cpu)
12676 {
12677 /* instr[30] = 1
12678 instr[28,21] = 111010110
12679 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12680 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12681 instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12682 instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12683 000010 ==> REV, 000011 ==> UNALLOC
12684 000100 ==> CLZ, 000101 ==> CLS
12685 ow ==> UNALLOC
12686 instr[9,5] = rn : may not be SP
12687 instr[4,0] = rd : may not be SP. */
12688
12689 uint32_t S = INSTR (29, 29);
12690 uint32_t opcode2 = INSTR (20, 16);
12691 uint32_t opcode = INSTR (15, 10);
12692 uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12693
12694 if (S == 1)
12695 HALT_UNALLOC;
12696
12697 if (opcode2 != 0)
12698 HALT_UNALLOC;
12699
12700 if (opcode & 0x38)
12701 HALT_UNALLOC;
12702
12703 switch (dispatch)
12704 {
12705 case 0: rbit32 (cpu); return;
12706 case 1: revh32 (cpu); return;
12707 case 2: rev32 (cpu); return;
12708 case 4: clz32 (cpu); return;
12709 case 5: cls32 (cpu); return;
12710 case 8: rbit64 (cpu); return;
12711 case 9: revh64 (cpu); return;
12712 case 10:rev32 (cpu); return;
12713 case 11:rev64 (cpu); return;
12714 case 12:clz64 (cpu); return;
12715 case 13:cls64 (cpu); return;
12716 default: HALT_UNALLOC;
12717 }
12718 }
12719
12720 /* Variable shift.
12721 Shifts by count supplied in register.
12722 N.B register args may not be SP.
12723 These all use the shifted auxiliary function for
12724 simplicity and clarity. Writing the actual shift
12725 inline would avoid a branch and so be faster but
12726 would also necessitate getting signs right. */
12727
12728 /* 32 bit arithmetic shift right. */
12729 static void
12730 asrv32 (sim_cpu *cpu)
12731 {
12732 unsigned rm = INSTR (20, 16);
12733 unsigned rn = INSTR (9, 5);
12734 unsigned rd = INSTR (4, 0);
12735
12736 aarch64_set_reg_u64
12737 (cpu, rd, NO_SP,
12738 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12739 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12740 }
12741
12742 /* 64 bit arithmetic shift right. */
12743 static void
12744 asrv64 (sim_cpu *cpu)
12745 {
12746 unsigned rm = INSTR (20, 16);
12747 unsigned rn = INSTR (9, 5);
12748 unsigned rd = INSTR (4, 0);
12749
12750 aarch64_set_reg_u64
12751 (cpu, rd, NO_SP,
12752 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12753 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12754 }
12755
12756 /* 32 bit logical shift left. */
12757 static void
12758 lslv32 (sim_cpu *cpu)
12759 {
12760 unsigned rm = INSTR (20, 16);
12761 unsigned rn = INSTR (9, 5);
12762 unsigned rd = INSTR (4, 0);
12763
12764 aarch64_set_reg_u64
12765 (cpu, rd, NO_SP,
12766 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12767 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12768 }
12769
12770 /* 64 bit arithmetic shift left. */
12771 static void
12772 lslv64 (sim_cpu *cpu)
12773 {
12774 unsigned rm = INSTR (20, 16);
12775 unsigned rn = INSTR (9, 5);
12776 unsigned rd = INSTR (4, 0);
12777
12778 aarch64_set_reg_u64
12779 (cpu, rd, NO_SP,
12780 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12781 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12782 }
12783
12784 /* 32 bit logical shift right. */
12785 static void
12786 lsrv32 (sim_cpu *cpu)
12787 {
12788 unsigned rm = INSTR (20, 16);
12789 unsigned rn = INSTR (9, 5);
12790 unsigned rd = INSTR (4, 0);
12791
12792 aarch64_set_reg_u64
12793 (cpu, rd, NO_SP,
12794 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12795 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12796 }
12797
12798 /* 64 bit logical shift right. */
12799 static void
12800 lsrv64 (sim_cpu *cpu)
12801 {
12802 unsigned rm = INSTR (20, 16);
12803 unsigned rn = INSTR (9, 5);
12804 unsigned rd = INSTR (4, 0);
12805
12806 aarch64_set_reg_u64
12807 (cpu, rd, NO_SP,
12808 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12809 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12810 }
12811
12812 /* 32 bit rotate right. */
12813 static void
12814 rorv32 (sim_cpu *cpu)
12815 {
12816 unsigned rm = INSTR (20, 16);
12817 unsigned rn = INSTR (9, 5);
12818 unsigned rd = INSTR (4, 0);
12819
12820 aarch64_set_reg_u64
12821 (cpu, rd, NO_SP,
12822 shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12823 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12824 }
12825
12826 /* 64 bit rotate right. */
12827 static void
12828 rorv64 (sim_cpu *cpu)
12829 {
12830 unsigned rm = INSTR (20, 16);
12831 unsigned rn = INSTR (9, 5);
12832 unsigned rd = INSTR (4, 0);
12833
12834 aarch64_set_reg_u64
12835 (cpu, rd, NO_SP,
12836 shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12837 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12838 }
12839
12840
12841 /* divide. */
12842
12843 /* 32 bit signed divide. */
12844 static void
12845 cpuiv32 (sim_cpu *cpu)
12846 {
12847 unsigned rm = INSTR (20, 16);
12848 unsigned rn = INSTR (9, 5);
12849 unsigned rd = INSTR (4, 0);
12850 /* N.B. the pseudo-code does the divide using 64 bit data. */
12851 /* TODO : check that this rounds towards zero as required. */
12852 int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12853 int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12854
12855 aarch64_set_reg_s64 (cpu, rd, NO_SP,
12856 divisor ? ((int32_t) (dividend / divisor)) : 0);
12857 }
12858
12859 /* 64 bit signed divide. */
12860 static void
12861 cpuiv64 (sim_cpu *cpu)
12862 {
12863 unsigned rm = INSTR (20, 16);
12864 unsigned rn = INSTR (9, 5);
12865 unsigned rd = INSTR (4, 0);
12866
12867 /* TODO : check that this rounds towards zero as required. */
12868 int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12869
12870 aarch64_set_reg_s64
12871 (cpu, rd, NO_SP,
12872 divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12873 }
12874
12875 /* 32 bit unsigned divide. */
12876 static void
12877 udiv32 (sim_cpu *cpu)
12878 {
12879 unsigned rm = INSTR (20, 16);
12880 unsigned rn = INSTR (9, 5);
12881 unsigned rd = INSTR (4, 0);
12882
12883 /* N.B. the pseudo-code does the divide using 64 bit data. */
12884 uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12885 uint64_t divisor = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12886
12887 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12888 divisor ? (uint32_t) (dividend / divisor) : 0);
12889 }
12890
12891 /* 64 bit unsigned divide. */
12892 static void
12893 udiv64 (sim_cpu *cpu)
12894 {
12895 unsigned rm = INSTR (20, 16);
12896 unsigned rn = INSTR (9, 5);
12897 unsigned rd = INSTR (4, 0);
12898
12899 /* TODO : check that this rounds towards zero as required. */
12900 uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12901
12902 aarch64_set_reg_u64
12903 (cpu, rd, NO_SP,
12904 divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12905 }
12906
12907 static void
12908 dexDataProc2Source (sim_cpu *cpu)
12909 {
12910 /* assert instr[30] == 0
12911 instr[28,21] == 11010110
12912 instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12913 instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12914 instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12915 001000 ==> LSLV, 001001 ==> LSRV
12916 001010 ==> ASRV, 001011 ==> RORV
12917 ow ==> UNALLOC. */
12918
12919 uint32_t dispatch;
12920 uint32_t S = INSTR (29, 29);
12921 uint32_t opcode = INSTR (15, 10);
12922
12923 if (S == 1)
12924 HALT_UNALLOC;
12925
12926 if (opcode & 0x34)
12927 HALT_UNALLOC;
12928
12929 dispatch = ( (INSTR (31, 31) << 3)
12930 | (uimm (opcode, 3, 3) << 2)
12931 | uimm (opcode, 1, 0));
12932 switch (dispatch)
12933 {
12934 case 2: udiv32 (cpu); return;
12935 case 3: cpuiv32 (cpu); return;
12936 case 4: lslv32 (cpu); return;
12937 case 5: lsrv32 (cpu); return;
12938 case 6: asrv32 (cpu); return;
12939 case 7: rorv32 (cpu); return;
12940 case 10: udiv64 (cpu); return;
12941 case 11: cpuiv64 (cpu); return;
12942 case 12: lslv64 (cpu); return;
12943 case 13: lsrv64 (cpu); return;
12944 case 14: asrv64 (cpu); return;
12945 case 15: rorv64 (cpu); return;
12946 default: HALT_UNALLOC;
12947 }
12948 }
12949
12950
12951 /* Multiply. */
12952
12953 /* 32 bit multiply and add. */
12954 static void
12955 madd32 (sim_cpu *cpu)
12956 {
12957 unsigned rm = INSTR (20, 16);
12958 unsigned ra = INSTR (14, 10);
12959 unsigned rn = INSTR (9, 5);
12960 unsigned rd = INSTR (4, 0);
12961
12962 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12963 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12964 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12965 + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12966 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12967 }
12968
12969 /* 64 bit multiply and add. */
12970 static void
12971 madd64 (sim_cpu *cpu)
12972 {
12973 unsigned rm = INSTR (20, 16);
12974 unsigned ra = INSTR (14, 10);
12975 unsigned rn = INSTR (9, 5);
12976 unsigned rd = INSTR (4, 0);
12977
12978 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12979 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12980 aarch64_get_reg_u64 (cpu, ra, NO_SP)
12981 + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12982 * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12983 }
12984
12985 /* 32 bit multiply and sub. */
12986 static void
12987 msub32 (sim_cpu *cpu)
12988 {
12989 unsigned rm = INSTR (20, 16);
12990 unsigned ra = INSTR (14, 10);
12991 unsigned rn = INSTR (9, 5);
12992 unsigned rd = INSTR (4, 0);
12993
12994 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12995 aarch64_set_reg_u64 (cpu, rd, NO_SP,
12996 aarch64_get_reg_u32 (cpu, ra, NO_SP)
12997 - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12998 * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12999 }
13000
13001 /* 64 bit multiply and sub. */
13002 static void
13003 msub64 (sim_cpu *cpu)
13004 {
13005 unsigned rm = INSTR (20, 16);
13006 unsigned ra = INSTR (14, 10);
13007 unsigned rn = INSTR (9, 5);
13008 unsigned rd = INSTR (4, 0);
13009
13010 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13011 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13012 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13013 - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13014 * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13015 }
13016
13017 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit. */
13018 static void
13019 smaddl (sim_cpu *cpu)
13020 {
13021 unsigned rm = INSTR (20, 16);
13022 unsigned ra = INSTR (14, 10);
13023 unsigned rn = INSTR (9, 5);
13024 unsigned rd = INSTR (4, 0);
13025
13026 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13027 obtain a 64 bit product. */
13028 aarch64_set_reg_s64
13029 (cpu, rd, NO_SP,
13030 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13031 + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13032 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13033 }
13034
13035 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13036 static void
13037 smsubl (sim_cpu *cpu)
13038 {
13039 unsigned rm = INSTR (20, 16);
13040 unsigned ra = INSTR (14, 10);
13041 unsigned rn = INSTR (9, 5);
13042 unsigned rd = INSTR (4, 0);
13043
13044 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13045 obtain a 64 bit product. */
13046 aarch64_set_reg_s64
13047 (cpu, rd, NO_SP,
13048 aarch64_get_reg_s64 (cpu, ra, NO_SP)
13049 - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13050 * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13051 }
13052
13053 /* Integer Multiply/Divide. */
13054
13055 /* First some macros and a helper function. */
13056 /* Macros to test or access elements of 64 bit words. */
13057
13058 /* Mask used to access lo 32 bits of 64 bit unsigned int. */
13059 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13060 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13061 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13062 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int. */
13063 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13064
13065 /* Offset of sign bit in 64 bit signed integger. */
13066 #define SIGN_SHIFT_U64 63
13067 /* The sign bit itself -- also identifies the minimum negative int value. */
13068 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13069 /* Return true if a 64 bit signed int presented as an unsigned int is the
13070 most negative value. */
13071 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13072 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13073 int has its sign bit set to false. */
13074 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13075 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13076 an unsigned int has its sign bit set or not. */
13077 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13078 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int. */
13079 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13080
13081 /* Multiply two 64 bit ints and return.
13082 the hi 64 bits of the 128 bit product. */
13083
13084 static uint64_t
13085 mul64hi (uint64_t value1, uint64_t value2)
13086 {
13087 uint64_t resultmid1;
13088 uint64_t result;
13089 uint64_t value1_lo = lowWordToU64 (value1);
13090 uint64_t value1_hi = highWordToU64 (value1) ;
13091 uint64_t value2_lo = lowWordToU64 (value2);
13092 uint64_t value2_hi = highWordToU64 (value2);
13093
13094 /* Cross-multiply and collect results. */
13095 uint64_t xproductlo = value1_lo * value2_lo;
13096 uint64_t xproductmid1 = value1_lo * value2_hi;
13097 uint64_t xproductmid2 = value1_hi * value2_lo;
13098 uint64_t xproducthi = value1_hi * value2_hi;
13099 uint64_t carry = 0;
13100 /* Start accumulating 64 bit results. */
13101 /* Drop bottom half of lowest cross-product. */
13102 uint64_t resultmid = xproductlo >> 32;
13103 /* Add in middle products. */
13104 resultmid = resultmid + xproductmid1;
13105
13106 /* Check for overflow. */
13107 if (resultmid < xproductmid1)
13108 /* Carry over 1 into top cross-product. */
13109 carry++;
13110
13111 resultmid1 = resultmid + xproductmid2;
13112
13113 /* Check for overflow. */
13114 if (resultmid1 < xproductmid2)
13115 /* Carry over 1 into top cross-product. */
13116 carry++;
13117
13118 /* Drop lowest 32 bits of middle cross-product. */
13119 result = resultmid1 >> 32;
13120 /* Move carry bit to just above middle cross-product highest bit. */
13121 carry = carry << 32;
13122
13123 /* Add top cross-product plus and any carry. */
13124 result += xproducthi + carry;
13125
13126 return result;
13127 }
13128
13129 /* Signed multiply high, source, source2 :
13130 64 bit, dest <-- high 64-bit of result. */
13131 static void
13132 smulh (sim_cpu *cpu)
13133 {
13134 uint64_t uresult;
13135 int64_t result;
13136 unsigned rm = INSTR (20, 16);
13137 unsigned rn = INSTR (9, 5);
13138 unsigned rd = INSTR (4, 0);
13139 GReg ra = INSTR (14, 10);
13140 int64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13141 int64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13142 uint64_t uvalue1;
13143 uint64_t uvalue2;
13144 int negate = 0;
13145
13146 if (ra != R31)
13147 HALT_UNALLOC;
13148
13149 /* Convert to unsigned and use the unsigned mul64hi routine
13150 the fix the sign up afterwards. */
13151 if (value1 < 0)
13152 {
13153 negate = !negate;
13154 uvalue1 = -value1;
13155 }
13156 else
13157 {
13158 uvalue1 = value1;
13159 }
13160
13161 if (value2 < 0)
13162 {
13163 negate = !negate;
13164 uvalue2 = -value2;
13165 }
13166 else
13167 {
13168 uvalue2 = value2;
13169 }
13170
13171 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13172
13173 uresult = mul64hi (uvalue1, uvalue2);
13174 result = uresult;
13175
13176 if (negate)
13177 {
13178 /* Multiply 128-bit result by -1, which means highpart gets inverted,
13179 and has carry in added only if low part is 0. */
13180 result = ~result;
13181 if ((uvalue1 * uvalue2) == 0)
13182 result += 1;
13183 }
13184
13185 aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13186 }
13187
13188 /* Unsigned multiply add long -- source, source2 :
13189 32 bit, source3 : 64 bit. */
13190 static void
13191 umaddl (sim_cpu *cpu)
13192 {
13193 unsigned rm = INSTR (20, 16);
13194 unsigned ra = INSTR (14, 10);
13195 unsigned rn = INSTR (9, 5);
13196 unsigned rd = INSTR (4, 0);
13197
13198 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13199 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13200 obtain a 64 bit product. */
13201 aarch64_set_reg_u64
13202 (cpu, rd, NO_SP,
13203 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13204 + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13205 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13206 }
13207
13208 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit. */
13209 static void
13210 umsubl (sim_cpu *cpu)
13211 {
13212 unsigned rm = INSTR (20, 16);
13213 unsigned ra = INSTR (14, 10);
13214 unsigned rn = INSTR (9, 5);
13215 unsigned rd = INSTR (4, 0);
13216
13217 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13218 /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13219 obtain a 64 bit product. */
13220 aarch64_set_reg_u64
13221 (cpu, rd, NO_SP,
13222 aarch64_get_reg_u64 (cpu, ra, NO_SP)
13223 - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13224 * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13225 }
13226
13227 /* Unsigned multiply high, source, source2 :
13228 64 bit, dest <-- high 64-bit of result. */
13229 static void
13230 umulh (sim_cpu *cpu)
13231 {
13232 unsigned rm = INSTR (20, 16);
13233 unsigned rn = INSTR (9, 5);
13234 unsigned rd = INSTR (4, 0);
13235 GReg ra = INSTR (14, 10);
13236
13237 if (ra != R31)
13238 HALT_UNALLOC;
13239
13240 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13241 aarch64_set_reg_u64 (cpu, rd, NO_SP,
13242 mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13243 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13244 }
13245
13246 static void
13247 dexDataProc3Source (sim_cpu *cpu)
13248 {
13249 /* assert instr[28,24] == 11011. */
13250 /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13251 instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13252 instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13253 instr[15] = o0 : 0/1 ==> ok
13254 instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB, (32/64 bit)
13255 0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13256 0100 ==> SMULH, (64 bit only)
13257 1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13258 1100 ==> UMULH (64 bit only)
13259 ow ==> UNALLOC. */
13260
13261 uint32_t dispatch;
13262 uint32_t size = INSTR (31, 31);
13263 uint32_t op54 = INSTR (30, 29);
13264 uint32_t op31 = INSTR (23, 21);
13265 uint32_t o0 = INSTR (15, 15);
13266
13267 if (op54 != 0)
13268 HALT_UNALLOC;
13269
13270 if (size == 0)
13271 {
13272 if (op31 != 0)
13273 HALT_UNALLOC;
13274
13275 if (o0 == 0)
13276 madd32 (cpu);
13277 else
13278 msub32 (cpu);
13279 return;
13280 }
13281
13282 dispatch = (op31 << 1) | o0;
13283
13284 switch (dispatch)
13285 {
13286 case 0: madd64 (cpu); return;
13287 case 1: msub64 (cpu); return;
13288 case 2: smaddl (cpu); return;
13289 case 3: smsubl (cpu); return;
13290 case 4: smulh (cpu); return;
13291 case 10: umaddl (cpu); return;
13292 case 11: umsubl (cpu); return;
13293 case 12: umulh (cpu); return;
13294 default: HALT_UNALLOC;
13295 }
13296 }
13297
13298 static void
13299 dexDPReg (sim_cpu *cpu)
13300 {
13301 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13302 assert group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13303 bits [28:24:21] of a DPReg are the secondary dispatch vector. */
13304 uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13305
13306 switch (group2)
13307 {
13308 case DPREG_LOG_000:
13309 case DPREG_LOG_001:
13310 dexLogicalShiftedRegister (cpu); return;
13311
13312 case DPREG_ADDSHF_010:
13313 dexAddSubtractShiftedRegister (cpu); return;
13314
13315 case DPREG_ADDEXT_011:
13316 dexAddSubtractExtendedRegister (cpu); return;
13317
13318 case DPREG_ADDCOND_100:
13319 {
13320 /* This set bundles a variety of different operations. */
13321 /* Check for. */
13322 /* 1) add/sub w carry. */
13323 uint32_t mask1 = 0x1FE00000U;
13324 uint32_t val1 = 0x1A000000U;
13325 /* 2) cond compare register/immediate. */
13326 uint32_t mask2 = 0x1FE00000U;
13327 uint32_t val2 = 0x1A400000U;
13328 /* 3) cond select. */
13329 uint32_t mask3 = 0x1FE00000U;
13330 uint32_t val3 = 0x1A800000U;
13331 /* 4) data proc 1/2 source. */
13332 uint32_t mask4 = 0x1FE00000U;
13333 uint32_t val4 = 0x1AC00000U;
13334
13335 if ((aarch64_get_instr (cpu) & mask1) == val1)
13336 dexAddSubtractWithCarry (cpu);
13337
13338 else if ((aarch64_get_instr (cpu) & mask2) == val2)
13339 CondCompare (cpu);
13340
13341 else if ((aarch64_get_instr (cpu) & mask3) == val3)
13342 dexCondSelect (cpu);
13343
13344 else if ((aarch64_get_instr (cpu) & mask4) == val4)
13345 {
13346 /* Bit 30 is clear for data proc 2 source
13347 and set for data proc 1 source. */
13348 if (aarch64_get_instr (cpu) & (1U << 30))
13349 dexDataProc1Source (cpu);
13350 else
13351 dexDataProc2Source (cpu);
13352 }
13353
13354 else
13355 /* Should not reach here. */
13356 HALT_NYI;
13357
13358 return;
13359 }
13360
13361 case DPREG_3SRC_110:
13362 dexDataProc3Source (cpu); return;
13363
13364 case DPREG_UNALLOC_101:
13365 HALT_UNALLOC;
13366
13367 case DPREG_3SRC_111:
13368 dexDataProc3Source (cpu); return;
13369
13370 default:
13371 /* Should never reach here. */
13372 HALT_NYI;
13373 }
13374 }
13375
13376 /* Unconditional Branch immediate.
13377 Offset is a PC-relative byte offset in the range +/- 128MiB.
13378 The offset is assumed to be raw from the decode i.e. the
13379 simulator is expected to scale them from word offsets to byte. */
13380
13381 /* Unconditional branch. */
13382 static void
13383 buc (sim_cpu *cpu, int32_t offset)
13384 {
13385 aarch64_set_next_PC_by_offset (cpu, offset);
13386 }
13387
13388 static unsigned stack_depth = 0;
13389
13390 /* Unconditional branch and link -- writes return PC to LR. */
13391 static void
13392 bl (sim_cpu *cpu, int32_t offset)
13393 {
13394 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13395 aarch64_save_LR (cpu);
13396 aarch64_set_next_PC_by_offset (cpu, offset);
13397
13398 if (TRACE_BRANCH_P (cpu))
13399 {
13400 ++ stack_depth;
13401 TRACE_BRANCH (cpu,
13402 " %*scall %" PRIx64 " [%s]"
13403 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13404 stack_depth, " ", aarch64_get_next_PC (cpu),
13405 aarch64_get_func (CPU_STATE (cpu),
13406 aarch64_get_next_PC (cpu)),
13407 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13408 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13409 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13410 );
13411 }
13412 }
13413
13414 /* Unconditional Branch register.
13415 Branch/return address is in source register. */
13416
13417 /* Unconditional branch. */
13418 static void
13419 br (sim_cpu *cpu)
13420 {
13421 unsigned rn = INSTR (9, 5);
13422 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13423 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13424 }
13425
13426 /* Unconditional branch and link -- writes return PC to LR. */
13427 static void
13428 blr (sim_cpu *cpu)
13429 {
13430 /* Ensure we read the destination before we write LR. */
13431 uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13432
13433 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13434 aarch64_save_LR (cpu);
13435 aarch64_set_next_PC (cpu, target);
13436
13437 if (TRACE_BRANCH_P (cpu))
13438 {
13439 ++ stack_depth;
13440 TRACE_BRANCH (cpu,
13441 " %*scall %" PRIx64 " [%s]"
13442 " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13443 stack_depth, " ", aarch64_get_next_PC (cpu),
13444 aarch64_get_func (CPU_STATE (cpu),
13445 aarch64_get_next_PC (cpu)),
13446 aarch64_get_reg_u64 (cpu, 0, NO_SP),
13447 aarch64_get_reg_u64 (cpu, 1, NO_SP),
13448 aarch64_get_reg_u64 (cpu, 2, NO_SP)
13449 );
13450 }
13451 }
13452
13453 /* Return -- assembler will default source to LR this is functionally
13454 equivalent to br but, presumably, unlike br it side effects the
13455 branch predictor. */
13456 static void
13457 ret (sim_cpu *cpu)
13458 {
13459 unsigned rn = INSTR (9, 5);
13460 aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13461
13462 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13463 if (TRACE_BRANCH_P (cpu))
13464 {
13465 TRACE_BRANCH (cpu,
13466 " %*sreturn [result: %" PRIx64 "]",
13467 stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13468 -- stack_depth;
13469 }
13470 }
13471
13472 /* NOP -- we implement this and call it from the decode in case we
13473 want to intercept it later. */
13474
13475 static void
13476 nop (sim_cpu *cpu)
13477 {
13478 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13479 }
13480
13481 /* Data synchronization barrier. */
13482
13483 static void
13484 dsb (sim_cpu *cpu)
13485 {
13486 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13487 }
13488
13489 /* Data memory barrier. */
13490
13491 static void
13492 dmb (sim_cpu *cpu)
13493 {
13494 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13495 }
13496
13497 /* Instruction synchronization barrier. */
13498
13499 static void
13500 isb (sim_cpu *cpu)
13501 {
13502 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13503 }
13504
13505 static void
13506 dexBranchImmediate (sim_cpu *cpu)
13507 {
13508 /* assert instr[30,26] == 00101
13509 instr[31] ==> 0 == B, 1 == BL
13510 instr[25,0] == imm26 branch offset counted in words. */
13511
13512 uint32_t top = INSTR (31, 31);
13513 /* We have a 26 byte signed word offset which we need to pass to the
13514 execute routine as a signed byte offset. */
13515 int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13516
13517 if (top)
13518 bl (cpu, offset);
13519 else
13520 buc (cpu, offset);
13521 }
13522
13523 /* Control Flow. */
13524
13525 /* Conditional branch
13526
13527 Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13528 a bit position in the range 0 .. 63
13529
13530 cc is a CondCode enum value as pulled out of the decode
13531
13532 N.B. any offset register (source) can only be Xn or Wn. */
13533
13534 static void
13535 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13536 {
13537 /* The test returns TRUE if CC is met. */
13538 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13539 if (testConditionCode (cpu, cc))
13540 aarch64_set_next_PC_by_offset (cpu, offset);
13541 }
13542
13543 /* 32 bit branch on register non-zero. */
13544 static void
13545 cbnz32 (sim_cpu *cpu, int32_t offset)
13546 {
13547 unsigned rt = INSTR (4, 0);
13548
13549 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13550 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13551 aarch64_set_next_PC_by_offset (cpu, offset);
13552 }
13553
13554 /* 64 bit branch on register zero. */
13555 static void
13556 cbnz (sim_cpu *cpu, int32_t offset)
13557 {
13558 unsigned rt = INSTR (4, 0);
13559
13560 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13561 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13562 aarch64_set_next_PC_by_offset (cpu, offset);
13563 }
13564
13565 /* 32 bit branch on register non-zero. */
13566 static void
13567 cbz32 (sim_cpu *cpu, int32_t offset)
13568 {
13569 unsigned rt = INSTR (4, 0);
13570
13571 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13572 if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13573 aarch64_set_next_PC_by_offset (cpu, offset);
13574 }
13575
13576 /* 64 bit branch on register zero. */
13577 static void
13578 cbz (sim_cpu *cpu, int32_t offset)
13579 {
13580 unsigned rt = INSTR (4, 0);
13581
13582 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13583 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13584 aarch64_set_next_PC_by_offset (cpu, offset);
13585 }
13586
13587 /* Branch on register bit test non-zero -- one size fits all. */
13588 static void
13589 tbnz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13590 {
13591 unsigned rt = INSTR (4, 0);
13592
13593 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13594 if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13595 aarch64_set_next_PC_by_offset (cpu, offset);
13596 }
13597
13598 /* Branch on register bit test zero -- one size fits all. */
13599 static void
13600 tbz (sim_cpu *cpu, uint32_t pos, int32_t offset)
13601 {
13602 unsigned rt = INSTR (4, 0);
13603
13604 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13605 if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13606 aarch64_set_next_PC_by_offset (cpu, offset);
13607 }
13608
13609 static void
13610 dexCompareBranchImmediate (sim_cpu *cpu)
13611 {
13612 /* instr[30,25] = 01 1010
13613 instr[31] = size : 0 ==> 32, 1 ==> 64
13614 instr[24] = op : 0 ==> CBZ, 1 ==> CBNZ
13615 instr[23,5] = simm19 branch offset counted in words
13616 instr[4,0] = rt */
13617
13618 uint32_t size = INSTR (31, 31);
13619 uint32_t op = INSTR (24, 24);
13620 int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13621
13622 if (size == 0)
13623 {
13624 if (op == 0)
13625 cbz32 (cpu, offset);
13626 else
13627 cbnz32 (cpu, offset);
13628 }
13629 else
13630 {
13631 if (op == 0)
13632 cbz (cpu, offset);
13633 else
13634 cbnz (cpu, offset);
13635 }
13636 }
13637
13638 static void
13639 dexTestBranchImmediate (sim_cpu *cpu)
13640 {
13641 /* instr[31] = b5 : bit 5 of test bit idx
13642 instr[30,25] = 01 1011
13643 instr[24] = op : 0 ==> TBZ, 1 == TBNZ
13644 instr[23,19] = b40 : bits 4 to 0 of test bit idx
13645 instr[18,5] = simm14 : signed offset counted in words
13646 instr[4,0] = uimm5 */
13647
13648 uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13649 int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13650
13651 NYI_assert (30, 25, 0x1b);
13652
13653 if (INSTR (24, 24) == 0)
13654 tbz (cpu, pos, offset);
13655 else
13656 tbnz (cpu, pos, offset);
13657 }
13658
13659 static void
13660 dexCondBranchImmediate (sim_cpu *cpu)
13661 {
13662 /* instr[31,25] = 010 1010
13663 instr[24] = op1; op => 00 ==> B.cond
13664 instr[23,5] = simm19 : signed offset counted in words
13665 instr[4] = op0
13666 instr[3,0] = cond */
13667
13668 int32_t offset;
13669 uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13670
13671 NYI_assert (31, 25, 0x2a);
13672
13673 if (op != 0)
13674 HALT_UNALLOC;
13675
13676 offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13677
13678 bcc (cpu, offset, INSTR (3, 0));
13679 }
13680
13681 static void
13682 dexBranchRegister (sim_cpu *cpu)
13683 {
13684 /* instr[31,25] = 110 1011
13685 instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13686 instr[20,16] = op2 : must be 11111
13687 instr[15,10] = op3 : must be 000000
13688 instr[4,0] = op2 : must be 11111. */
13689
13690 uint32_t op = INSTR (24, 21);
13691 uint32_t op2 = INSTR (20, 16);
13692 uint32_t op3 = INSTR (15, 10);
13693 uint32_t op4 = INSTR (4, 0);
13694
13695 NYI_assert (31, 25, 0x6b);
13696
13697 if (op2 != 0x1F || op3 != 0 || op4 != 0)
13698 HALT_UNALLOC;
13699
13700 if (op == 0)
13701 br (cpu);
13702
13703 else if (op == 1)
13704 blr (cpu);
13705
13706 else if (op == 2)
13707 ret (cpu);
13708
13709 else
13710 {
13711 /* ERET and DRPS accept 0b11111 for rn = instr [4,0]. */
13712 /* anything else is unallocated. */
13713 uint32_t rn = INSTR (4, 0);
13714
13715 if (rn != 0x1f)
13716 HALT_UNALLOC;
13717
13718 if (op == 4 || op == 5)
13719 HALT_NYI;
13720
13721 HALT_UNALLOC;
13722 }
13723 }
13724
13725 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13726 but this may not be available. So instead we define the values we need
13727 here. */
13728 #define AngelSVC_Reason_Open 0x01
13729 #define AngelSVC_Reason_Close 0x02
13730 #define AngelSVC_Reason_Write 0x05
13731 #define AngelSVC_Reason_Read 0x06
13732 #define AngelSVC_Reason_IsTTY 0x09
13733 #define AngelSVC_Reason_Seek 0x0A
13734 #define AngelSVC_Reason_FLen 0x0C
13735 #define AngelSVC_Reason_Remove 0x0E
13736 #define AngelSVC_Reason_Rename 0x0F
13737 #define AngelSVC_Reason_Clock 0x10
13738 #define AngelSVC_Reason_Time 0x11
13739 #define AngelSVC_Reason_System 0x12
13740 #define AngelSVC_Reason_Errno 0x13
13741 #define AngelSVC_Reason_GetCmdLine 0x15
13742 #define AngelSVC_Reason_HeapInfo 0x16
13743 #define AngelSVC_Reason_ReportException 0x18
13744 #define AngelSVC_Reason_Elapsed 0x30
13745
13746
13747 static void
13748 handle_halt (sim_cpu *cpu, uint32_t val)
13749 {
13750 uint64_t result = 0;
13751
13752 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13753 if (val != 0xf000)
13754 {
13755 TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13756 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13757 sim_stopped, SIM_SIGTRAP);
13758 }
13759
13760 /* We have encountered an Angel SVC call. See if we can process it. */
13761 switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13762 {
13763 case AngelSVC_Reason_HeapInfo:
13764 {
13765 /* Get the values. */
13766 uint64_t stack_top = aarch64_get_stack_start (cpu);
13767 uint64_t heap_base = aarch64_get_heap_start (cpu);
13768
13769 /* Get the pointer */
13770 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13771 ptr = aarch64_get_mem_u64 (cpu, ptr);
13772
13773 /* Fill in the memory block. */
13774 /* Start addr of heap. */
13775 aarch64_set_mem_u64 (cpu, ptr + 0, heap_base);
13776 /* End addr of heap. */
13777 aarch64_set_mem_u64 (cpu, ptr + 8, stack_top);
13778 /* Lowest stack addr. */
13779 aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13780 /* Initial stack addr. */
13781 aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13782
13783 TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13784 }
13785 break;
13786
13787 case AngelSVC_Reason_Open:
13788 {
13789 /* Get the pointer */
13790 /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);. */
13791 /* FIXME: For now we just assume that we will only be asked
13792 to open the standard file descriptors. */
13793 static int fd = 0;
13794 result = fd ++;
13795
13796 TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13797 }
13798 break;
13799
13800 case AngelSVC_Reason_Close:
13801 {
13802 uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13803 TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13804 result = 0;
13805 }
13806 break;
13807
13808 case AngelSVC_Reason_Errno:
13809 result = 0;
13810 TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13811 break;
13812
13813 case AngelSVC_Reason_Clock:
13814 result =
13815 #ifdef CLOCKS_PER_SEC
13816 (CLOCKS_PER_SEC >= 100)
13817 ? (clock () / (CLOCKS_PER_SEC / 100))
13818 : ((clock () * 100) / CLOCKS_PER_SEC)
13819 #else
13820 /* Presume unix... clock() returns microseconds. */
13821 (clock () / 10000)
13822 #endif
13823 ;
13824 TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13825 break;
13826
13827 case AngelSVC_Reason_GetCmdLine:
13828 {
13829 /* Get the pointer */
13830 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13831 ptr = aarch64_get_mem_u64 (cpu, ptr);
13832
13833 /* FIXME: No command line for now. */
13834 aarch64_set_mem_u64 (cpu, ptr, 0);
13835 TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13836 }
13837 break;
13838
13839 case AngelSVC_Reason_IsTTY:
13840 result = 1;
13841 TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13842 break;
13843
13844 case AngelSVC_Reason_Write:
13845 {
13846 /* Get the pointer */
13847 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13848 /* Get the write control block. */
13849 uint64_t fd = aarch64_get_mem_u64 (cpu, ptr);
13850 uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13851 uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13852
13853 TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13854 PRIx64 " on descriptor %" PRIx64,
13855 len, buf, fd);
13856
13857 if (len > 1280)
13858 {
13859 TRACE_SYSCALL (cpu,
13860 " AngelSVC: Write: Suspiciously long write: %ld",
13861 (long) len);
13862 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13863 sim_stopped, SIM_SIGBUS);
13864 }
13865 else if (fd == 1)
13866 {
13867 printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13868 }
13869 else if (fd == 2)
13870 {
13871 TRACE (cpu, 0, "\n");
13872 sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13873 (int) len, aarch64_get_mem_ptr (cpu, buf));
13874 TRACE (cpu, 0, "\n");
13875 }
13876 else
13877 {
13878 TRACE_SYSCALL (cpu,
13879 " AngelSVC: Write: Unexpected file handle: %d",
13880 (int) fd);
13881 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13882 sim_stopped, SIM_SIGABRT);
13883 }
13884 }
13885 break;
13886
13887 case AngelSVC_Reason_ReportException:
13888 {
13889 /* Get the pointer */
13890 uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13891 /*ptr = aarch64_get_mem_u64 (cpu, ptr);. */
13892 uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13893 uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13894
13895 TRACE_SYSCALL (cpu,
13896 "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13897 type, state);
13898
13899 if (type == 0x20026)
13900 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13901 sim_exited, state);
13902 else
13903 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13904 sim_stopped, SIM_SIGINT);
13905 }
13906 break;
13907
13908 case AngelSVC_Reason_Read:
13909 case AngelSVC_Reason_FLen:
13910 case AngelSVC_Reason_Seek:
13911 case AngelSVC_Reason_Remove:
13912 case AngelSVC_Reason_Time:
13913 case AngelSVC_Reason_System:
13914 case AngelSVC_Reason_Rename:
13915 case AngelSVC_Reason_Elapsed:
13916 default:
13917 TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13918 aarch64_get_reg_u32 (cpu, 0, NO_SP));
13919 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13920 sim_stopped, SIM_SIGTRAP);
13921 }
13922
13923 aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13924 }
13925
13926 static void
13927 dexExcpnGen (sim_cpu *cpu)
13928 {
13929 /* instr[31:24] = 11010100
13930 instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13931 010 ==> HLT, 101 ==> DBG GEN EXCPN
13932 instr[20,5] = imm16
13933 instr[4,2] = opc2 000 ==> OK, ow ==> UNALLOC
13934 instr[1,0] = LL : discriminates opc */
13935
13936 uint32_t opc = INSTR (23, 21);
13937 uint32_t imm16 = INSTR (20, 5);
13938 uint32_t opc2 = INSTR (4, 2);
13939 uint32_t LL;
13940
13941 NYI_assert (31, 24, 0xd4);
13942
13943 if (opc2 != 0)
13944 HALT_UNALLOC;
13945
13946 LL = INSTR (1, 0);
13947
13948 /* We only implement HLT and BRK for now. */
13949 if (opc == 1 && LL == 0)
13950 {
13951 TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13952 sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13953 sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13954 }
13955
13956 if (opc == 2 && LL == 0)
13957 handle_halt (cpu, imm16);
13958
13959 else if (opc == 0 || opc == 5)
13960 HALT_NYI;
13961
13962 else
13963 HALT_UNALLOC;
13964 }
13965
13966 /* Stub for accessing system registers. */
13967
13968 static uint64_t
13969 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13970 unsigned crm, unsigned op2)
13971 {
13972 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13973 /* DCZID_EL0 - the Data Cache Zero ID register.
13974 We do not support DC ZVA at the moment, so
13975 we return a value with the disable bit set.
13976 We implement support for the DCZID register since
13977 it is used by the C library's memset function. */
13978 return ((uint64_t) 1) << 4;
13979
13980 if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13981 /* Cache Type Register. */
13982 return 0x80008000UL;
13983
13984 if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13985 /* TPIDR_EL0 - thread pointer id. */
13986 return aarch64_get_thread_id (cpu);
13987
13988 if (op1 == 3 && crm == 4 && op2 == 0)
13989 return aarch64_get_FPCR (cpu);
13990
13991 if (op1 == 3 && crm == 4 && op2 == 1)
13992 return aarch64_get_FPSR (cpu);
13993
13994 else if (op1 == 3 && crm == 2 && op2 == 0)
13995 return aarch64_get_CPSR (cpu);
13996
13997 HALT_NYI;
13998 }
13999
14000 static void
14001 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14002 unsigned crm, unsigned op2, uint64_t val)
14003 {
14004 if (op1 == 3 && crm == 4 && op2 == 0)
14005 aarch64_set_FPCR (cpu, val);
14006
14007 else if (op1 == 3 && crm == 4 && op2 == 1)
14008 aarch64_set_FPSR (cpu, val);
14009
14010 else if (op1 == 3 && crm == 2 && op2 == 0)
14011 aarch64_set_CPSR (cpu, val);
14012
14013 else
14014 HALT_NYI;
14015 }
14016
14017 static void
14018 do_mrs (sim_cpu *cpu)
14019 {
14020 /* instr[31:20] = 1101 0101 0001 1
14021 instr[19] = op0
14022 instr[18,16] = op1
14023 instr[15,12] = CRn
14024 instr[11,8] = CRm
14025 instr[7,5] = op2
14026 instr[4,0] = Rt */
14027 unsigned sys_op0 = INSTR (19, 19) + 2;
14028 unsigned sys_op1 = INSTR (18, 16);
14029 unsigned sys_crn = INSTR (15, 12);
14030 unsigned sys_crm = INSTR (11, 8);
14031 unsigned sys_op2 = INSTR (7, 5);
14032 unsigned rt = INSTR (4, 0);
14033
14034 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14035 aarch64_set_reg_u64 (cpu, rt, NO_SP,
14036 system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14037 }
14038
14039 static void
14040 do_MSR_immediate (sim_cpu *cpu)
14041 {
14042 /* instr[31:19] = 1101 0101 0000 0
14043 instr[18,16] = op1
14044 instr[15,12] = 0100
14045 instr[11,8] = CRm
14046 instr[7,5] = op2
14047 instr[4,0] = 1 1111 */
14048
14049 unsigned op1 = INSTR (18, 16);
14050 /*unsigned crm = INSTR (11, 8);*/
14051 unsigned op2 = INSTR (7, 5);
14052
14053 NYI_assert (31, 19, 0x1AA0);
14054 NYI_assert (15, 12, 0x4);
14055 NYI_assert (4, 0, 0x1F);
14056
14057 if (op1 == 0)
14058 {
14059 if (op2 == 5)
14060 HALT_NYI; /* set SPSel. */
14061 else
14062 HALT_UNALLOC;
14063 }
14064 else if (op1 == 3)
14065 {
14066 if (op2 == 6)
14067 HALT_NYI; /* set DAIFset. */
14068 else if (op2 == 7)
14069 HALT_NYI; /* set DAIFclr. */
14070 else
14071 HALT_UNALLOC;
14072 }
14073 else
14074 HALT_UNALLOC;
14075 }
14076
14077 static void
14078 do_MSR_reg (sim_cpu *cpu)
14079 {
14080 /* instr[31:20] = 1101 0101 0001
14081 instr[19] = op0
14082 instr[18,16] = op1
14083 instr[15,12] = CRn
14084 instr[11,8] = CRm
14085 instr[7,5] = op2
14086 instr[4,0] = Rt */
14087
14088 unsigned sys_op0 = INSTR (19, 19) + 2;
14089 unsigned sys_op1 = INSTR (18, 16);
14090 unsigned sys_crn = INSTR (15, 12);
14091 unsigned sys_crm = INSTR (11, 8);
14092 unsigned sys_op2 = INSTR (7, 5);
14093 unsigned rt = INSTR (4, 0);
14094
14095 NYI_assert (31, 20, 0xD51);
14096
14097 TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14098 system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14099 aarch64_get_reg_u64 (cpu, rt, NO_SP));
14100 }
14101
14102 static void
14103 do_SYS (sim_cpu *cpu)
14104 {
14105 /* instr[31,19] = 1101 0101 0000 1
14106 instr[18,16] = op1
14107 instr[15,12] = CRn
14108 instr[11,8] = CRm
14109 instr[7,5] = op2
14110 instr[4,0] = Rt */
14111 NYI_assert (31, 19, 0x1AA1);
14112
14113 /* FIXME: For now we just silently accept system ops. */
14114 }
14115
14116 static void
14117 dexSystem (sim_cpu *cpu)
14118 {
14119 /* instr[31:22] = 1101 01010 0
14120 instr[21] = L
14121 instr[20,19] = op0
14122 instr[18,16] = op1
14123 instr[15,12] = CRn
14124 instr[11,8] = CRm
14125 instr[7,5] = op2
14126 instr[4,0] = uimm5 */
14127
14128 /* We are interested in HINT, DSB, DMB and ISB
14129
14130 Hint #0 encodes NOOP (this is the only hint we care about)
14131 L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14132 CRm op2 != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14133
14134 DSB, DMB, ISB are data store barrier, data memory barrier and
14135 instruction store barrier, respectively, where
14136
14137 L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14138 op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14139 CRm<3:2> ==> domain, CRm<1:0> ==> types,
14140 domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14141 10 ==> InerShareable, 11 ==> FullSystem
14142 types : 01 ==> Reads, 10 ==> Writes,
14143 11 ==> All, 00 ==> All (domain == FullSystem). */
14144
14145 unsigned rt = INSTR (4, 0);
14146
14147 NYI_assert (31, 22, 0x354);
14148
14149 switch (INSTR (21, 12))
14150 {
14151 case 0x032:
14152 if (rt == 0x1F)
14153 {
14154 /* NOP has CRm != 0000 OR. */
14155 /* (CRm == 0000 AND (op2 == 000 OR op2 > 101)). */
14156 uint32_t crm = INSTR (11, 8);
14157 uint32_t op2 = INSTR (7, 5);
14158
14159 if (crm != 0 || (op2 == 0 || op2 > 5))
14160 {
14161 /* Actually call nop method so we can reimplement it later. */
14162 nop (cpu);
14163 return;
14164 }
14165 }
14166 HALT_NYI;
14167
14168 case 0x033:
14169 {
14170 uint32_t op2 = INSTR (7, 5);
14171
14172 switch (op2)
14173 {
14174 case 2: HALT_NYI;
14175 case 4: dsb (cpu); return;
14176 case 5: dmb (cpu); return;
14177 case 6: isb (cpu); return;
14178 default: HALT_UNALLOC;
14179 }
14180 }
14181
14182 case 0x3B0:
14183 case 0x3B4:
14184 case 0x3BD:
14185 do_mrs (cpu);
14186 return;
14187
14188 case 0x0B7:
14189 do_SYS (cpu); /* DC is an alias of SYS. */
14190 return;
14191
14192 default:
14193 if (INSTR (21, 20) == 0x1)
14194 do_MSR_reg (cpu);
14195 else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14196 do_MSR_immediate (cpu);
14197 else
14198 HALT_NYI;
14199 return;
14200 }
14201 }
14202
14203 static void
14204 dexBr (sim_cpu *cpu)
14205 {
14206 /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14207 assert group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14208 bits [31,29] of a BrExSys are the secondary dispatch vector. */
14209 uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14210
14211 switch (group2)
14212 {
14213 case BR_IMM_000:
14214 return dexBranchImmediate (cpu);
14215
14216 case BR_IMMCMP_001:
14217 /* Compare has bit 25 clear while test has it set. */
14218 if (!INSTR (25, 25))
14219 dexCompareBranchImmediate (cpu);
14220 else
14221 dexTestBranchImmediate (cpu);
14222 return;
14223
14224 case BR_IMMCOND_010:
14225 /* This is a conditional branch if bit 25 is clear otherwise
14226 unallocated. */
14227 if (!INSTR (25, 25))
14228 dexCondBranchImmediate (cpu);
14229 else
14230 HALT_UNALLOC;
14231 return;
14232
14233 case BR_UNALLOC_011:
14234 HALT_UNALLOC;
14235
14236 case BR_IMM_100:
14237 dexBranchImmediate (cpu);
14238 return;
14239
14240 case BR_IMMCMP_101:
14241 /* Compare has bit 25 clear while test has it set. */
14242 if (!INSTR (25, 25))
14243 dexCompareBranchImmediate (cpu);
14244 else
14245 dexTestBranchImmediate (cpu);
14246 return;
14247
14248 case BR_REG_110:
14249 /* Unconditional branch reg has bit 25 set. */
14250 if (INSTR (25, 25))
14251 dexBranchRegister (cpu);
14252
14253 /* This includes both Excpn Gen, System and unalloc operations.
14254 We need to decode the Excpn Gen operation BRK so we can plant
14255 debugger entry points.
14256 Excpn Gen operations have instr [24] = 0.
14257 we need to decode at least one of the System operations NOP
14258 which is an alias for HINT #0.
14259 System operations have instr [24,22] = 100. */
14260 else if (INSTR (24, 24) == 0)
14261 dexExcpnGen (cpu);
14262
14263 else if (INSTR (24, 22) == 4)
14264 dexSystem (cpu);
14265
14266 else
14267 HALT_UNALLOC;
14268
14269 return;
14270
14271 case BR_UNALLOC_111:
14272 HALT_UNALLOC;
14273
14274 default:
14275 /* Should never reach here. */
14276 HALT_NYI;
14277 }
14278 }
14279
14280 static void
14281 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14282 {
14283 /* We need to check if gdb wants an in here. */
14284 /* checkBreak (cpu);. */
14285
14286 uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14287
14288 switch (group)
14289 {
14290 case GROUP_PSEUDO_0000: dexPseudo (cpu); break;
14291 case GROUP_LDST_0100: dexLdSt (cpu); break;
14292 case GROUP_DPREG_0101: dexDPReg (cpu); break;
14293 case GROUP_LDST_0110: dexLdSt (cpu); break;
14294 case GROUP_ADVSIMD_0111: dexAdvSIMD0 (cpu); break;
14295 case GROUP_DPIMM_1000: dexDPImm (cpu); break;
14296 case GROUP_DPIMM_1001: dexDPImm (cpu); break;
14297 case GROUP_BREXSYS_1010: dexBr (cpu); break;
14298 case GROUP_BREXSYS_1011: dexBr (cpu); break;
14299 case GROUP_LDST_1100: dexLdSt (cpu); break;
14300 case GROUP_DPREG_1101: dexDPReg (cpu); break;
14301 case GROUP_LDST_1110: dexLdSt (cpu); break;
14302 case GROUP_ADVSIMD_1111: dexAdvSIMD1 (cpu); break;
14303
14304 case GROUP_UNALLOC_0001:
14305 case GROUP_UNALLOC_0010:
14306 case GROUP_UNALLOC_0011:
14307 HALT_UNALLOC;
14308
14309 default:
14310 /* Should never reach here. */
14311 HALT_NYI;
14312 }
14313 }
14314
14315 static bfd_boolean
14316 aarch64_step (sim_cpu *cpu)
14317 {
14318 uint64_t pc = aarch64_get_PC (cpu);
14319
14320 if (pc == TOP_LEVEL_RETURN_PC)
14321 return FALSE;
14322
14323 aarch64_set_next_PC (cpu, pc + 4);
14324
14325 /* Code is always little-endian. */
14326 sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14327 & aarch64_get_instr (cpu), pc, 4);
14328 aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14329
14330 TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14331 aarch64_get_instr (cpu));
14332 TRACE_DISASM (cpu, pc);
14333
14334 aarch64_decode_and_execute (cpu, pc);
14335
14336 return TRUE;
14337 }
14338
14339 void
14340 aarch64_run (SIM_DESC sd)
14341 {
14342 sim_cpu *cpu = STATE_CPU (sd, 0);
14343
14344 while (aarch64_step (cpu))
14345 {
14346 aarch64_update_PC (cpu);
14347
14348 if (sim_events_tick (sd))
14349 sim_events_process (sd);
14350 }
14351
14352 sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14353 sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14354 }
14355
14356 void
14357 aarch64_init (sim_cpu *cpu, uint64_t pc)
14358 {
14359 uint64_t sp = aarch64_get_stack_start (cpu);
14360
14361 /* Install SP, FP and PC and set LR to -20
14362 so we can detect a top-level return. */
14363 aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14364 aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14365 aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14366 aarch64_set_next_PC (cpu, pc);
14367 aarch64_update_PC (cpu);
14368 aarch64_init_LIT_table ();
14369 }