sim/aarch64/simulator.c

   1 /* simulator.c -- Interface for the AArch64 simulator.
   2
   3    Copyright (C) 2015-2023 Free Software Foundation, Inc.
   4
   5    Contributed by Red Hat.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 /* This must come before any other includes.  */
  23 #include "defs.h"
  24
  25 #include <stdlib.h>
  26 #include <stdio.h>
  27 #include <string.h>
  28 #include <sys/types.h>
  29 #include <math.h>
  30 #include <time.h>
  31 #include <limits.h>
  32
  33 #include "aarch64-sim.h"
  34 #include "simulator.h"
  35 #include "cpustate.h"
  36 #include "memory.h"
  37
  38 #include "sim-signal.h"
  39
  40 #define NO_SP 0
  41 #define SP_OK 1
  42
  43 #define TST(_flag)   (aarch64_test_CPSR_bit (cpu, _flag))
  44 #define IS_SET(_X)   (TST (( _X )) ? 1 : 0)
  45 #define IS_CLEAR(_X) (TST (( _X )) ? 0 : 1)
  46
  47 /* Space saver macro.  */
  48 #define INSTR(HIGH, LOW) uimm (aarch64_get_instr (cpu), (HIGH), (LOW))
  49
  50 #define HALT_UNALLOC                                                    \
  51   do                                                                    \
  52     {                                                                   \
  53       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  54       TRACE_INSN (cpu,                                                  \
  55                   "Unallocated instruction detected at sim line %d,"    \
  56                   " exe addr %" PRIx64,                                 \
  57                   __LINE__, aarch64_get_PC (cpu));                      \
  58       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  59                        sim_stopped, SIM_SIGILL);                        \
  60     }                                                                   \
  61   while (0)
  62
  63 #define HALT_NYI                                                        \
  64   do                                                                    \
  65     {                                                                   \
  66       TRACE_DISASM (cpu, aarch64_get_PC (cpu));                         \
  67       TRACE_INSN (cpu,                                                  \
  68                   "Unimplemented instruction detected at sim line %d,"  \
  69                   " exe addr %" PRIx64,                                 \
  70                   __LINE__, aarch64_get_PC (cpu));                      \
  71       if (! TRACE_ANY_P (cpu))                                          \
  72         sim_io_eprintf (CPU_STATE (cpu), "SIM Error: Unimplemented instruction: %#08x\n", \
  73                         aarch64_get_instr (cpu));                       \
  74       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),\
  75                        sim_stopped, SIM_SIGABRT);                       \
  76     }                                                                   \
  77   while (0)
  78
  79 #define NYI_assert(HI, LO, EXPECTED)                                    \
  80   do                                                                    \
  81     {                                                                   \
  82       if (INSTR ((HI), (LO)) != (EXPECTED))                             \
  83         HALT_NYI;                                                       \
  84     }                                                                   \
  85   while (0)
  86
  87 static uint64_t
  88 expand_logical_immediate (uint32_t S, uint32_t R, uint32_t N)
  89 {
  90   uint64_t mask;
  91   uint64_t imm;
  92   unsigned simd_size;
  93
  94   /* The immediate value is S+1 bits to 1, left rotated by SIMDsize - R
  95      (in other words, right rotated by R), then replicated. */
  96   if (N != 0)
  97     {
  98       simd_size = 64;
  99       mask = 0xffffffffffffffffull;
 100     }
 101   else
 102     {
 103       switch (S)
 104         {
 105         case 0x00 ... 0x1f: /* 0xxxxx */ simd_size = 32;           break;
 106         case 0x20 ... 0x2f: /* 10xxxx */ simd_size = 16; S &= 0xf; break;
 107         case 0x30 ... 0x37: /* 110xxx */ simd_size =  8; S &= 0x7; break;
 108         case 0x38 ... 0x3b: /* 1110xx */ simd_size =  4; S &= 0x3; break;
 109         case 0x3c ... 0x3d: /* 11110x */ simd_size =  2; S &= 0x1; break;
 110         default: return 0;
 111         }
 112       mask = (1ull << simd_size) - 1;
 113       /* Top bits are IGNORED.  */
 114       R &= simd_size - 1;
 115     }
 116
 117   /* NOTE: if S = simd_size - 1 we get 0xf..f which is rejected.  */
 118   if (S == simd_size - 1)
 119     return 0;
 120
 121   /* S+1 consecutive bits to 1.  */
 122   /* NOTE: S can't be 63 due to detection above.  */
 123   imm = (1ull << (S + 1)) - 1;
 124
 125   /* Rotate to the left by simd_size - R.  */
 126   if (R != 0)
 127     imm = ((imm << (simd_size - R)) & mask) | (imm >> R);
 128
 129   /* Replicate the value according to SIMD size.  */
 130   switch (simd_size)
 131     {
 132     case  2: imm = (imm <<  2) | imm;
 133     case  4: imm = (imm <<  4) | imm;
 134     case  8: imm = (imm <<  8) | imm;
 135     case 16: imm = (imm << 16) | imm;
 136     case 32: imm = (imm << 32) | imm;
 137     case 64: break;
 138     default: return 0;
 139     }
 140
 141   return imm;
 142 }
 143
 144 /* Instr[22,10] encodes N immr and imms. we want a lookup table
 145    for each possible combination i.e. 13 bits worth of int entries.  */
 146 #define  LI_TABLE_SIZE  (1 << 13)
 147 static uint64_t LITable[LI_TABLE_SIZE];
 148
 149 void
 150 aarch64_init_LIT_table (void)
 151 {
 152   unsigned index;
 153
 154   for (index = 0; index < LI_TABLE_SIZE; index++)
 155     {
 156       uint32_t N    = uimm (index, 12, 12);
 157       uint32_t immr = uimm (index, 11, 6);
 158       uint32_t imms = uimm (index, 5, 0);
 159
 160       LITable [index] = expand_logical_immediate (imms, immr, N);
 161     }
 162 }
 163
 164 static void
 165 dexNotify (sim_cpu *cpu)
 166 {
 167   /* instr[14,0] == type : 0 ==> method entry, 1 ==> method reentry
 168                            2 ==> exit Java, 3 ==> start next bytecode.  */
 169   uint32_t type = INSTR (14, 0);
 170
 171   TRACE_EVENTS (cpu, "Notify Insn encountered, type = 0x%x", type);
 172
 173   switch (type)
 174     {
 175     case 0:
 176       /* aarch64_notifyMethodEntry (aarch64_get_reg_u64 (cpu, R23, 0),
 177          aarch64_get_reg_u64 (cpu, R22, 0));  */
 178       break;
 179     case 1:
 180       /* aarch64_notifyMethodReentry (aarch64_get_reg_u64 (cpu, R23, 0),
 181          aarch64_get_reg_u64 (cpu, R22, 0));  */
 182       break;
 183     case 2:
 184       /* aarch64_notifyMethodExit ();  */
 185       break;
 186     case 3:
 187       /* aarch64_notifyBCStart (aarch64_get_reg_u64 (cpu, R23, 0),
 188          aarch64_get_reg_u64 (cpu, R22, 0));  */
 189       break;
 190     }
 191 }
 192
 193 /* secondary decode within top level groups  */
 194
 195 static void
 196 dexPseudo (sim_cpu *cpu)
 197 {
 198   /* assert instr[28,27] = 00
 199
 200      We provide 2 pseudo instructions:
 201
 202      HALT stops execution of the simulator causing an immediate
 203      return to the x86 code which entered it.
 204
 205      CALLOUT initiates recursive entry into x86 code.  A register
 206      argument holds the address of the x86 routine.  Immediate
 207      values in the instruction identify the number of general
 208      purpose and floating point register arguments to be passed
 209      and the type of any value to be returned.  */
 210
 211   uint32_t PSEUDO_HALT      =  0xE0000000U;
 212   uint32_t PSEUDO_CALLOUT   =  0x00018000U;
 213   uint32_t PSEUDO_CALLOUTR  =  0x00018001U;
 214   uint32_t PSEUDO_NOTIFY    =  0x00014000U;
 215   uint32_t dispatch;
 216
 217   if (aarch64_get_instr (cpu) == PSEUDO_HALT)
 218     {
 219       TRACE_EVENTS (cpu, " Pseudo Halt Instruction");
 220       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 221                        sim_stopped, SIM_SIGTRAP);
 222     }
 223
 224   dispatch = INSTR (31, 15);
 225
 226   /* We do not handle callouts at the moment.  */
 227   if (dispatch == PSEUDO_CALLOUT || dispatch == PSEUDO_CALLOUTR)
 228     {
 229       TRACE_EVENTS (cpu, " Callout");
 230       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
 231                        sim_stopped, SIM_SIGABRT);
 232     }
 233
 234   else if (dispatch == PSEUDO_NOTIFY)
 235     dexNotify (cpu);
 236
 237   else
 238     HALT_UNALLOC;
 239 }
 240
 241 /* Load-store single register (unscaled offset)
 242    These instructions employ a base register plus an unscaled signed
 243    9 bit offset.
 244
 245    N.B. the base register (source) can be Xn or SP. all other
 246    registers may not be SP.  */
 247
 248 /* 32 bit load 32 bit unscaled signed 9 bit.  */
 249 static void
 250 ldur32 (sim_cpu *cpu, int32_t offset)
 251 {
 252   unsigned rn = INSTR (9, 5);
 253   unsigned rt = INSTR (4, 0);
 254
 255   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 256   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 257                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 258                         + offset));
 259 }
 260
 261 /* 64 bit load 64 bit unscaled signed 9 bit.  */
 262 static void
 263 ldur64 (sim_cpu *cpu, int32_t offset)
 264 {
 265   unsigned rn = INSTR (9, 5);
 266   unsigned rt = INSTR (4, 0);
 267
 268   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 269   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 270                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 271                         + offset));
 272 }
 273
 274 /* 32 bit load zero-extended byte unscaled signed 9 bit.  */
 275 static void
 276 ldurb32 (sim_cpu *cpu, int32_t offset)
 277 {
 278   unsigned rn = INSTR (9, 5);
 279   unsigned rt = INSTR (4, 0);
 280
 281   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 282   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8
 283                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 284                         + offset));
 285 }
 286
 287 /* 32 bit load sign-extended byte unscaled signed 9 bit.  */
 288 static void
 289 ldursb32 (sim_cpu *cpu, int32_t offset)
 290 {
 291   unsigned rn = INSTR (9, 5);
 292   unsigned rt = INSTR (4, 0);
 293
 294   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 295   aarch64_set_reg_u64 (cpu, rt, NO_SP, (uint32_t) aarch64_get_mem_s8
 296                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 297                         + offset));
 298 }
 299
 300 /* 64 bit load sign-extended byte unscaled signed 9 bit.  */
 301 static void
 302 ldursb64 (sim_cpu *cpu, int32_t offset)
 303 {
 304   unsigned rn = INSTR (9, 5);
 305   unsigned rt = INSTR (4, 0);
 306
 307   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 308   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s8
 309                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 310                         + offset));
 311 }
 312
 313 /* 32 bit load zero-extended short unscaled signed 9 bit  */
 314 static void
 315 ldurh32 (sim_cpu *cpu, int32_t offset)
 316 {
 317   unsigned rn = INSTR (9, 5);
 318   unsigned rd = INSTR (4, 0);
 319
 320   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 321   aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_mem_u16
 322                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 323                         + offset));
 324 }
 325
 326 /* 32 bit load sign-extended short unscaled signed 9 bit  */
 327 static void
 328 ldursh32 (sim_cpu *cpu, int32_t offset)
 329 {
 330   unsigned rn = INSTR (9, 5);
 331   unsigned rd = INSTR (4, 0);
 332
 333   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 334   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s16
 335                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 336                         + offset));
 337 }
 338
 339 /* 64 bit load sign-extended short unscaled signed 9 bit  */
 340 static void
 341 ldursh64 (sim_cpu *cpu, int32_t offset)
 342 {
 343   unsigned rn = INSTR (9, 5);
 344   unsigned rt = INSTR (4, 0);
 345
 346   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 347   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s16
 348                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 349                         + offset));
 350 }
 351
 352 /* 64 bit load sign-extended word unscaled signed 9 bit  */
 353 static void
 354 ldursw (sim_cpu *cpu, int32_t offset)
 355 {
 356   unsigned rn = INSTR (9, 5);
 357   unsigned rd = INSTR (4, 0);
 358
 359   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 360   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) aarch64_get_mem_s32
 361                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 362                         + offset));
 363 }
 364
 365 /* N.B. with stores the value in source is written to the address
 366    identified by source2 modified by offset.  */
 367
 368 /* 32 bit store 32 bit unscaled signed 9 bit.  */
 369 static void
 370 stur32 (sim_cpu *cpu, int32_t offset)
 371 {
 372   unsigned rn = INSTR (9, 5);
 373   unsigned rd = INSTR (4, 0);
 374
 375   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 376   aarch64_set_mem_u32 (cpu,
 377                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 378                        aarch64_get_reg_u32 (cpu, rd, NO_SP));
 379 }
 380
 381 /* 64 bit store 64 bit unscaled signed 9 bit  */
 382 static void
 383 stur64 (sim_cpu *cpu, int32_t offset)
 384 {
 385   unsigned rn = INSTR (9, 5);
 386   unsigned rd = INSTR (4, 0);
 387
 388   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 389   aarch64_set_mem_u64 (cpu,
 390                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 391                        aarch64_get_reg_u64 (cpu, rd, NO_SP));
 392 }
 393
 394 /* 32 bit store byte unscaled signed 9 bit  */
 395 static void
 396 sturb (sim_cpu *cpu, int32_t offset)
 397 {
 398   unsigned rn = INSTR (9, 5);
 399   unsigned rd = INSTR (4, 0);
 400
 401   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 402   aarch64_set_mem_u8 (cpu,
 403                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 404                       aarch64_get_reg_u8 (cpu, rd, NO_SP));
 405 }
 406
 407 /* 32 bit store short unscaled signed 9 bit  */
 408 static void
 409 sturh (sim_cpu *cpu, int32_t offset)
 410 {
 411   unsigned rn = INSTR (9, 5);
 412   unsigned rd = INSTR (4, 0);
 413
 414   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 415   aarch64_set_mem_u16 (cpu,
 416                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
 417                        aarch64_get_reg_u16 (cpu, rd, NO_SP));
 418 }
 419
 420 /* Load single register pc-relative label
 421    Offset is a signed 19 bit immediate count in words
 422    rt may not be SP.  */
 423
 424 /* 32 bit pc-relative load  */
 425 static void
 426 ldr32_pcrel (sim_cpu *cpu, int32_t offset)
 427 {
 428   unsigned rd = INSTR (4, 0);
 429
 430   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 431   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 432                        aarch64_get_mem_u32
 433                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 434 }
 435
 436 /* 64 bit pc-relative load  */
 437 static void
 438 ldr_pcrel (sim_cpu *cpu, int32_t offset)
 439 {
 440   unsigned rd = INSTR (4, 0);
 441
 442   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 443   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 444                        aarch64_get_mem_u64
 445                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 446 }
 447
 448 /* sign extended 32 bit pc-relative load  */
 449 static void
 450 ldrsw_pcrel (sim_cpu *cpu, int32_t offset)
 451 {
 452   unsigned rd = INSTR (4, 0);
 453
 454   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 455   aarch64_set_reg_u64 (cpu, rd, NO_SP,
 456                        aarch64_get_mem_s32
 457                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 458 }
 459
 460 /* float pc-relative load  */
 461 static void
 462 fldrs_pcrel (sim_cpu *cpu, int32_t offset)
 463 {
 464   unsigned int rd = INSTR (4, 0);
 465
 466   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 467   aarch64_set_vec_u32 (cpu, rd, 0,
 468                        aarch64_get_mem_u32
 469                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 470 }
 471
 472 /* double pc-relative load  */
 473 static void
 474 fldrd_pcrel (sim_cpu *cpu, int32_t offset)
 475 {
 476   unsigned int st = INSTR (4, 0);
 477
 478   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 479   aarch64_set_vec_u64 (cpu, st, 0,
 480                        aarch64_get_mem_u64
 481                        (cpu, aarch64_get_PC (cpu) + offset * 4));
 482 }
 483
 484 /* long double pc-relative load.  */
 485 static void
 486 fldrq_pcrel (sim_cpu *cpu, int32_t offset)
 487 {
 488   unsigned int st = INSTR (4, 0);
 489   uint64_t addr = aarch64_get_PC (cpu) + offset * 4;
 490   FRegister a;
 491
 492   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 493   aarch64_get_mem_long_double (cpu, addr, & a);
 494   aarch64_set_FP_long_double (cpu, st, a);
 495 }
 496
 497 /* This can be used to scale an offset by applying
 498    the requisite shift. the second argument is either
 499    16, 32 or 64.  */
 500
 501 #define SCALE(_offset, _elementSize) \
 502     ((_offset) << ScaleShift ## _elementSize)
 503
 504 /* This can be used to optionally scale a register derived offset
 505    by applying the requisite shift as indicated by the Scaling
 506    argument.  The second argument is either Byte, Short, Word
 507    or Long. The third argument is either Scaled or Unscaled.
 508    N.B. when _Scaling is Scaled the shift gets ANDed with
 509    all 1s while when it is Unscaled it gets ANDed with 0.  */
 510
 511 #define OPT_SCALE(_offset, _elementType, _Scaling) \
 512   ((_offset) << (_Scaling ? ScaleShift ## _elementType : 0))
 513
 514 /* This can be used to zero or sign extend a 32 bit register derived
 515    value to a 64 bit value.  the first argument must be the value as
 516    a uint32_t and the second must be either UXTW or SXTW. The result
 517    is returned as an int64_t.  */
 518
 519 static inline int64_t
 520 extend (uint32_t value, Extension extension)
 521 {
 522   union
 523   {
 524     uint32_t u;
 525     int32_t   n;
 526   } x;
 527
 528   /* A branchless variant of this ought to be possible.  */
 529   if (extension == UXTW || extension == NoExtension)
 530     return value;
 531
 532   x.u = value;
 533   return x.n;
 534 }
 535
 536 /* Scalar Floating Point
 537
 538    FP load/store single register (4 addressing modes)
 539
 540    N.B. the base register (source) can be the stack pointer.
 541    The secondary source register (source2) can only be an Xn register.  */
 542
 543 /* Load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 544 static void
 545 fldrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 546 {
 547   unsigned rn = INSTR (9, 5);
 548   unsigned st = INSTR (4, 0);
 549   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 550
 551   if (wb != Post)
 552     address += offset;
 553
 554   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 555   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32 (cpu, address));
 556   if (wb == Post)
 557     address += offset;
 558
 559   if (wb != NoWriteBack)
 560     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 561 }
 562
 563 /* Load 8 bit with unsigned 12 bit offset.  */
 564 static void
 565 fldrb_abs (sim_cpu *cpu, uint32_t offset)
 566 {
 567   unsigned rd = INSTR (4, 0);
 568   unsigned rn = INSTR (9, 5);
 569   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
 570
 571   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 572   aarch64_set_vec_u8 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 573 }
 574
 575 /* Load 16 bit scaled unsigned 12 bit.  */
 576 static void
 577 fldrh_abs (sim_cpu *cpu, uint32_t offset)
 578 {
 579   unsigned rd = INSTR (4, 0);
 580   unsigned rn = INSTR (9, 5);
 581   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16);
 582
 583   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 584   aarch64_set_vec_u16 (cpu, rd, 0, aarch64_get_mem_u16 (cpu, addr));
 585 }
 586
 587 /* Load 32 bit scaled unsigned 12 bit.  */
 588 static void
 589 fldrs_abs (sim_cpu *cpu, uint32_t offset)
 590 {
 591   unsigned rd = INSTR (4, 0);
 592   unsigned rn = INSTR (9, 5);
 593   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32);
 594
 595   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 596   aarch64_set_vec_u32 (cpu, rd, 0, aarch64_get_mem_u32 (cpu, addr));
 597 }
 598
 599 /* Load 64 bit scaled unsigned 12 bit.  */
 600 static void
 601 fldrd_abs (sim_cpu *cpu, uint32_t offset)
 602 {
 603   unsigned rd = INSTR (4, 0);
 604   unsigned rn = INSTR (9, 5);
 605   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64);
 606
 607   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 608   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 609 }
 610
 611 /* Load 128 bit scaled unsigned 12 bit.  */
 612 static void
 613 fldrq_abs (sim_cpu *cpu, uint32_t offset)
 614 {
 615   unsigned rd = INSTR (4, 0);
 616   unsigned rn = INSTR (9, 5);
 617   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
 618
 619   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 620   aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_mem_u64 (cpu, addr));
 621   aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_mem_u64 (cpu, addr + 8));
 622 }
 623
 624 /* Load 32 bit scaled or unscaled zero- or sign-extended
 625    32-bit register offset.  */
 626 static void
 627 fldrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 628 {
 629   unsigned rm = INSTR (20, 16);
 630   unsigned rn = INSTR (9, 5);
 631   unsigned st = INSTR (4, 0);
 632   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 633   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 634   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
 635
 636   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 637   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
 638                        (cpu, address + displacement));
 639 }
 640
 641 /* Load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 642 static void
 643 fldrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 644 {
 645   unsigned rn = INSTR (9, 5);
 646   unsigned st = INSTR (4, 0);
 647   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 648
 649   if (wb != Post)
 650     address += offset;
 651
 652   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 653   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64 (cpu, address));
 654
 655   if (wb == Post)
 656     address += offset;
 657
 658   if (wb != NoWriteBack)
 659     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 660 }
 661
 662 /* Load 64 bit scaled or unscaled zero- or sign-extended 32-bit register offset.  */
 663 static void
 664 fldrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 665 {
 666   unsigned rm = INSTR (20, 16);
 667   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 668   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
 669
 670   fldrd_wb (cpu, displacement, NoWriteBack);
 671 }
 672
 673 /* Load 128 bit unscaled signed 9 bit with pre- or post-writeback.  */
 674 static void
 675 fldrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 676 {
 677   FRegister a;
 678   unsigned rn = INSTR (9, 5);
 679   unsigned st = INSTR (4, 0);
 680   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 681
 682   if (wb != Post)
 683     address += offset;
 684
 685   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 686   aarch64_get_mem_long_double (cpu, address, & a);
 687   aarch64_set_FP_long_double (cpu, st, a);
 688
 689   if (wb == Post)
 690     address += offset;
 691
 692   if (wb != NoWriteBack)
 693     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 694 }
 695
 696 /* Load 128 bit scaled or unscaled zero- or sign-extended 32-bit register offset  */
 697 static void
 698 fldrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 699 {
 700   unsigned rm = INSTR (20, 16);
 701   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 702   uint64_t displacement = OPT_SCALE (extended, 128, scaling);
 703
 704   fldrq_wb (cpu, displacement, NoWriteBack);
 705 }
 706
 707 /* Memory Access
 708
 709    load-store single register
 710    There are four addressing modes available here which all employ a
 711    64 bit source (base) register.
 712
 713    N.B. the base register (source) can be the stack pointer.
 714    The secondary source register (source2)can only be an Xn register.
 715
 716    Scaled, 12-bit, unsigned immediate offset, without pre- and
 717    post-index options.
 718    Unscaled, 9-bit, signed immediate offset with pre- or post-index
 719    writeback.
 720    scaled or unscaled 64-bit register offset.
 721    scaled or unscaled 32-bit extended register offset.
 722
 723    All offsets are assumed to be raw from the decode i.e. the
 724    simulator is expected to adjust scaled offsets based on the
 725    accessed data size with register or extended register offset
 726    versions the same applies except that in the latter case the
 727    operation may also require a sign extend.
 728
 729    A separate method is provided for each possible addressing mode.  */
 730
 731 /* 32 bit load 32 bit scaled unsigned 12 bit  */
 732 static void
 733 ldr32_abs (sim_cpu *cpu, uint32_t offset)
 734 {
 735   unsigned rn = INSTR (9, 5);
 736   unsigned rt = INSTR (4, 0);
 737
 738   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 739   /* The target register may not be SP but the source may be.  */
 740   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32
 741                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 742                         + SCALE (offset, 32)));
 743 }
 744
 745 /* 32 bit load 32 bit unscaled signed 9 bit with pre- or post-writeback.  */
 746 static void
 747 ldr32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 748 {
 749   unsigned rn = INSTR (9, 5);
 750   unsigned rt = INSTR (4, 0);
 751   uint64_t address;
 752
 753   if (rn == rt && wb != NoWriteBack)
 754     HALT_UNALLOC;
 755
 756   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 757
 758   if (wb != Post)
 759     address += offset;
 760
 761   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 762   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
 763
 764   if (wb == Post)
 765     address += offset;
 766
 767   if (wb != NoWriteBack)
 768     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 769 }
 770
 771 /* 32 bit load 32 bit scaled or unscaled
 772    zero- or sign-extended 32-bit register offset  */
 773 static void
 774 ldr32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 775 {
 776   unsigned rm = INSTR (20, 16);
 777   unsigned rn = INSTR (9, 5);
 778   unsigned rt = INSTR (4, 0);
 779   /* rn may reference SP, rm and rt must reference ZR  */
 780
 781   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 782   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 783   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
 784
 785   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 786   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 787                        aarch64_get_mem_u32 (cpu, address + displacement));
 788 }
 789
 790 /* 64 bit load 64 bit scaled unsigned 12 bit  */
 791 static void
 792 ldr_abs (sim_cpu *cpu, uint32_t offset)
 793 {
 794   unsigned rn = INSTR (9, 5);
 795   unsigned rt = INSTR (4, 0);
 796
 797   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 798   /* The target register may not be SP but the source may be.  */
 799   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64
 800                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 801                         + SCALE (offset, 64)));
 802 }
 803
 804 /* 64 bit load 64 bit unscaled signed 9 bit with pre- or post-writeback.  */
 805 static void
 806 ldr_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 807 {
 808   unsigned rn = INSTR (9, 5);
 809   unsigned rt = INSTR (4, 0);
 810   uint64_t address;
 811
 812   if (rn == rt && wb != NoWriteBack)
 813     HALT_UNALLOC;
 814
 815   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 816
 817   if (wb != Post)
 818     address += offset;
 819
 820   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 821   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
 822
 823   if (wb == Post)
 824     address += offset;
 825
 826   if (wb != NoWriteBack)
 827     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 828 }
 829
 830 /* 64 bit load 64 bit scaled or unscaled zero-
 831    or sign-extended 32-bit register offset.  */
 832 static void
 833 ldr_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 834 {
 835   unsigned rm = INSTR (20, 16);
 836   unsigned rn = INSTR (9, 5);
 837   unsigned rt = INSTR (4, 0);
 838   /* rn may reference SP, rm and rt must reference ZR  */
 839
 840   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 841   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
 842   uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
 843
 844   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 845   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 846                        aarch64_get_mem_u64 (cpu, address + displacement));
 847 }
 848
 849 /* 32 bit load zero-extended byte scaled unsigned 12 bit.  */
 850 static void
 851 ldrb32_abs (sim_cpu *cpu, uint32_t offset)
 852 {
 853   unsigned rn = INSTR (9, 5);
 854   unsigned rt = INSTR (4, 0);
 855
 856   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 857   /* The target register may not be SP but the source may be
 858      there is no scaling required for a byte load.  */
 859   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 860                        aarch64_get_mem_u8
 861                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
 862 }
 863
 864 /* 32 bit load zero-extended byte unscaled signed 9 bit with pre- or post-writeback.  */
 865 static void
 866 ldrb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 867 {
 868   unsigned rn = INSTR (9, 5);
 869   unsigned rt = INSTR (4, 0);
 870   uint64_t address;
 871
 872   if (rn == rt && wb != NoWriteBack)
 873     HALT_UNALLOC;
 874
 875   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 876
 877   if (wb != Post)
 878     address += offset;
 879
 880   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 881   aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
 882
 883   if (wb == Post)
 884     address += offset;
 885
 886   if (wb != NoWriteBack)
 887     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 888 }
 889
 890 /* 32 bit load zero-extended byte scaled or unscaled zero-
 891    or sign-extended 32-bit register offset.  */
 892 static void
 893 ldrb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 894 {
 895   unsigned rm = INSTR (20, 16);
 896   unsigned rn = INSTR (9, 5);
 897   unsigned rt = INSTR (4, 0);
 898   /* rn may reference SP, rm and rt must reference ZR  */
 899
 900   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 901   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 902                                  extension);
 903
 904   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 905   /* There is no scaling required for a byte load.  */
 906   aarch64_set_reg_u64 (cpu, rt, NO_SP,
 907                        aarch64_get_mem_u8 (cpu, address + displacement));
 908 }
 909
 910 /* 64 bit load sign-extended byte unscaled signed 9 bit
 911    with pre- or post-writeback.  */
 912 static void
 913 ldrsb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 914 {
 915   unsigned rn = INSTR (9, 5);
 916   unsigned rt = INSTR (4, 0);
 917   uint64_t address;
 918   int64_t val;
 919
 920   if (rn == rt && wb != NoWriteBack)
 921     HALT_UNALLOC;
 922
 923   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 924
 925   if (wb != Post)
 926     address += offset;
 927
 928   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 929   val = aarch64_get_mem_s8 (cpu, address);
 930   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
 931
 932   if (wb == Post)
 933     address += offset;
 934
 935   if (wb != NoWriteBack)
 936     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
 937 }
 938
 939 /* 64 bit load sign-extended byte scaled unsigned 12 bit.  */
 940 static void
 941 ldrsb_abs (sim_cpu *cpu, uint32_t offset)
 942 {
 943   ldrsb_wb (cpu, offset, NoWriteBack);
 944 }
 945
 946 /* 64 bit load sign-extended byte scaled or unscaled zero-
 947    or sign-extended 32-bit register offset.  */
 948 static void
 949 ldrsb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
 950 {
 951   unsigned rm = INSTR (20, 16);
 952   unsigned rn = INSTR (9, 5);
 953   unsigned rt = INSTR (4, 0);
 954   /* rn may reference SP, rm and rt must reference ZR  */
 955
 956   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 957   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
 958                                  extension);
 959   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 960   /* There is no scaling required for a byte load.  */
 961   aarch64_set_reg_s64 (cpu, rt, NO_SP,
 962                        aarch64_get_mem_s8 (cpu, address + displacement));
 963 }
 964
 965 /* 32 bit load zero-extended short scaled unsigned 12 bit.  */
 966 static void
 967 ldrh32_abs (sim_cpu *cpu, uint32_t offset)
 968 {
 969   unsigned rn = INSTR (9, 5);
 970   unsigned rt = INSTR (4, 0);
 971   uint32_t val;
 972
 973   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 974   /* The target register may not be SP but the source may be.  */
 975   val = aarch64_get_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
 976                              + SCALE (offset, 16));
 977   aarch64_set_reg_u32 (cpu, rt, NO_SP, val);
 978 }
 979
 980 /* 32 bit load zero-extended short unscaled signed 9 bit
 981    with pre- or post-writeback.  */
 982 static void
 983 ldrh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
 984 {
 985   unsigned rn = INSTR (9, 5);
 986   unsigned rt = INSTR (4, 0);
 987   uint64_t address;
 988
 989   if (rn == rt && wb != NoWriteBack)
 990     HALT_UNALLOC;
 991
 992   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
 993
 994   if (wb != Post)
 995     address += offset;
 996
 997   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
 998   aarch64_set_reg_u32 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
 999
1000   if (wb == Post)
1001     address += offset;
1002
1003   if (wb != NoWriteBack)
1004     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1005 }
1006
1007 /* 32 bit load zero-extended short scaled or unscaled zero-
1008    or sign-extended 32-bit register offset.  */
1009 static void
1010 ldrh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1011 {
1012   unsigned rm = INSTR (20, 16);
1013   unsigned rn = INSTR (9, 5);
1014   unsigned rt = INSTR (4, 0);
1015   /* rn may reference SP, rm and rt must reference ZR  */
1016
1017   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1018   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1019   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1020
1021   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1022   aarch64_set_reg_u32 (cpu, rt, NO_SP,
1023                        aarch64_get_mem_u16 (cpu, address + displacement));
1024 }
1025
1026 /* 32 bit load sign-extended short scaled unsigned 12 bit.  */
1027 static void
1028 ldrsh32_abs (sim_cpu *cpu, uint32_t offset)
1029 {
1030   unsigned rn = INSTR (9, 5);
1031   unsigned rt = INSTR (4, 0);
1032   int32_t val;
1033
1034   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1035   /* The target register may not be SP but the source may be.  */
1036   val = aarch64_get_mem_s16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1037                              + SCALE (offset, 16));
1038   aarch64_set_reg_s32 (cpu, rt, NO_SP, val);
1039 }
1040
1041 /* 32 bit load sign-extended short unscaled signed 9 bit
1042    with pre- or post-writeback.  */
1043 static void
1044 ldrsh32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1045 {
1046   unsigned rn = INSTR (9, 5);
1047   unsigned rt = INSTR (4, 0);
1048   uint64_t address;
1049
1050   if (rn == rt && wb != NoWriteBack)
1051     HALT_UNALLOC;
1052
1053   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1054
1055   if (wb != Post)
1056     address += offset;
1057
1058   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1059   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1060                        (int32_t) aarch64_get_mem_s16 (cpu, address));
1061
1062   if (wb == Post)
1063     address += offset;
1064
1065   if (wb != NoWriteBack)
1066     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1067 }
1068
1069 /* 32 bit load sign-extended short scaled or unscaled zero-
1070    or sign-extended 32-bit register offset.  */
1071 static void
1072 ldrsh32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1073 {
1074   unsigned rm = INSTR (20, 16);
1075   unsigned rn = INSTR (9, 5);
1076   unsigned rt = INSTR (4, 0);
1077   /* rn may reference SP, rm and rt must reference ZR  */
1078
1079   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1080   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1081   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1082
1083   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1084   aarch64_set_reg_s32 (cpu, rt, NO_SP,
1085                        (int32_t) aarch64_get_mem_s16
1086                        (cpu, address + displacement));
1087 }
1088
1089 /* 64 bit load sign-extended short scaled unsigned 12 bit.  */
1090 static void
1091 ldrsh_abs (sim_cpu *cpu, uint32_t offset)
1092 {
1093   unsigned rn = INSTR (9, 5);
1094   unsigned rt = INSTR (4, 0);
1095   int64_t val;
1096
1097   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1098   /* The target register may not be SP but the source may be.  */
1099   val = aarch64_get_mem_s16  (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1100                               + SCALE (offset, 16));
1101   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1102 }
1103
1104 /* 64 bit load sign-extended short unscaled signed 9 bit
1105    with pre- or post-writeback.  */
1106 static void
1107 ldrsh64_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1108 {
1109   unsigned rn = INSTR (9, 5);
1110   unsigned rt = INSTR (4, 0);
1111   uint64_t address;
1112   int64_t val;
1113
1114   if (rn == rt && wb != NoWriteBack)
1115     HALT_UNALLOC;
1116
1117   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1118   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1119
1120   if (wb != Post)
1121     address += offset;
1122
1123   val = aarch64_get_mem_s16 (cpu, address);
1124   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1125
1126   if (wb == Post)
1127     address += offset;
1128
1129   if (wb != NoWriteBack)
1130     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1131 }
1132
1133 /* 64 bit load sign-extended short scaled or unscaled zero-
1134    or sign-extended 32-bit register offset.  */
1135 static void
1136 ldrsh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1137 {
1138   unsigned rm = INSTR (20, 16);
1139   unsigned rn = INSTR (9, 5);
1140   unsigned rt = INSTR (4, 0);
1141
1142   /* rn may reference SP, rm and rt must reference ZR  */
1143
1144   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1145   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1146   uint64_t displacement = OPT_SCALE (extended, 16, scaling);
1147   int64_t val;
1148
1149   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1150   val = aarch64_get_mem_s16 (cpu, address + displacement);
1151   aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1152 }
1153
1154 /* 64 bit load sign-extended 32 bit scaled unsigned 12 bit.  */
1155 static void
1156 ldrsw_abs (sim_cpu *cpu, uint32_t offset)
1157 {
1158   unsigned rn = INSTR (9, 5);
1159   unsigned rt = INSTR (4, 0);
1160   int64_t val;
1161
1162   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1163   val = aarch64_get_mem_s32 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1164                              + SCALE (offset, 32));
1165   /* The target register may not be SP but the source may be.  */
1166   return aarch64_set_reg_s64 (cpu, rt, NO_SP, val);
1167 }
1168
1169 /* 64 bit load sign-extended 32 bit unscaled signed 9 bit
1170    with pre- or post-writeback.  */
1171 static void
1172 ldrsw_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1173 {
1174   unsigned rn = INSTR (9, 5);
1175   unsigned rt = INSTR (4, 0);
1176   uint64_t address;
1177
1178   if (rn == rt && wb != NoWriteBack)
1179     HALT_UNALLOC;
1180
1181   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1182
1183   if (wb != Post)
1184     address += offset;
1185
1186   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1187   aarch64_set_reg_s64 (cpu, rt, NO_SP, aarch64_get_mem_s32 (cpu, address));
1188
1189   if (wb == Post)
1190     address += offset;
1191
1192   if (wb != NoWriteBack)
1193     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1194 }
1195
1196 /* 64 bit load sign-extended 32 bit scaled or unscaled zero-
1197    or sign-extended 32-bit register offset.  */
1198 static void
1199 ldrsw_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1200 {
1201   unsigned rm = INSTR (20, 16);
1202   unsigned rn = INSTR (9, 5);
1203   unsigned rt = INSTR (4, 0);
1204   /* rn may reference SP, rm and rt must reference ZR  */
1205
1206   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1207   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1208   uint64_t displacement =  OPT_SCALE (extended, 32, scaling);
1209
1210   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1211   aarch64_set_reg_s64 (cpu, rt, NO_SP,
1212                        aarch64_get_mem_s32 (cpu, address + displacement));
1213 }
1214
1215 /* N.B. with stores the value in source is written to the
1216    address identified by source2 modified by source3/offset.  */
1217
1218 /* 32 bit store scaled unsigned 12 bit.  */
1219 static void
1220 str32_abs (sim_cpu *cpu, uint32_t offset)
1221 {
1222   unsigned rn = INSTR (9, 5);
1223   unsigned rt = INSTR (4, 0);
1224
1225   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1226   /* The target register may not be SP but the source may be.  */
1227   aarch64_set_mem_u32 (cpu, (aarch64_get_reg_u64 (cpu, rn, SP_OK)
1228                              + SCALE (offset, 32)),
1229                        aarch64_get_reg_u32 (cpu, rt, NO_SP));
1230 }
1231
1232 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1233 static void
1234 str32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1235 {
1236   unsigned rn = INSTR (9, 5);
1237   unsigned rt = INSTR (4, 0);
1238   uint64_t address;
1239
1240   if (rn == rt && wb != NoWriteBack)
1241     HALT_UNALLOC;
1242
1243   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1244   if (wb != Post)
1245     address += offset;
1246
1247   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1248   aarch64_set_mem_u32 (cpu, address, aarch64_get_reg_u32 (cpu, rt, NO_SP));
1249
1250   if (wb == Post)
1251     address += offset;
1252
1253   if (wb != NoWriteBack)
1254     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1255 }
1256
1257 /* 32 bit store scaled or unscaled zero- or
1258    sign-extended 32-bit register offset.  */
1259 static void
1260 str32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1261 {
1262   unsigned rm = INSTR (20, 16);
1263   unsigned rn = INSTR (9, 5);
1264   unsigned rt = INSTR (4, 0);
1265
1266   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1267   int64_t  extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1268   uint64_t displacement = OPT_SCALE (extended, 32, scaling);
1269
1270   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1271   aarch64_set_mem_u32 (cpu, address + displacement,
1272                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1273 }
1274
1275 /* 64 bit store scaled unsigned 12 bit.  */
1276 static void
1277 str_abs (sim_cpu *cpu, uint32_t offset)
1278 {
1279   unsigned rn = INSTR (9, 5);
1280   unsigned rt = INSTR (4, 0);
1281
1282   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1283   aarch64_set_mem_u64 (cpu,
1284                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
1285                        + SCALE (offset, 64),
1286                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1287 }
1288
1289 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
1290 static void
1291 str_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1292 {
1293   unsigned rn = INSTR (9, 5);
1294   unsigned rt = INSTR (4, 0);
1295   uint64_t address;
1296
1297   if (rn == rt && wb != NoWriteBack)
1298     HALT_UNALLOC;
1299
1300   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1301
1302   if (wb != Post)
1303     address += offset;
1304
1305   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1306   aarch64_set_mem_u64 (cpu, address, aarch64_get_reg_u64 (cpu, rt, NO_SP));
1307
1308   if (wb == Post)
1309     address += offset;
1310
1311   if (wb != NoWriteBack)
1312     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1313 }
1314
1315 /* 64 bit store scaled or unscaled zero-
1316    or sign-extended 32-bit register offset.  */
1317 static void
1318 str_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1319 {
1320   unsigned rm = INSTR (20, 16);
1321   unsigned rn = INSTR (9, 5);
1322   unsigned rt = INSTR (4, 0);
1323   /* rn may reference SP, rm and rt must reference ZR  */
1324
1325   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1326   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1327                                extension);
1328   uint64_t displacement = OPT_SCALE (extended, 64, scaling);
1329
1330   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1331   aarch64_set_mem_u64 (cpu, address + displacement,
1332                        aarch64_get_reg_u64 (cpu, rt, NO_SP));
1333 }
1334
1335 /* 32 bit store byte scaled unsigned 12 bit.  */
1336 static void
1337 strb_abs (sim_cpu *cpu, uint32_t offset)
1338 {
1339   unsigned rn = INSTR (9, 5);
1340   unsigned rt = INSTR (4, 0);
1341
1342   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1343   /* The target register may not be SP but the source may be.
1344      There is no scaling required for a byte load.  */
1345   aarch64_set_mem_u8 (cpu,
1346                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
1347                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1348 }
1349
1350 /* 32 bit store byte unscaled signed 9 bit with pre- or post-writeback.  */
1351 static void
1352 strb_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1353 {
1354   unsigned rn = INSTR (9, 5);
1355   unsigned rt = INSTR (4, 0);
1356   uint64_t address;
1357
1358   if (rn == rt && wb != NoWriteBack)
1359     HALT_UNALLOC;
1360
1361   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1362
1363   if (wb != Post)
1364     address += offset;
1365
1366   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1367   aarch64_set_mem_u8 (cpu, address, aarch64_get_reg_u8 (cpu, rt, NO_SP));
1368
1369   if (wb == Post)
1370     address += offset;
1371
1372   if (wb != NoWriteBack)
1373     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1374 }
1375
1376 /* 32 bit store byte scaled or unscaled zero-
1377    or sign-extended 32-bit register offset.  */
1378 static void
1379 strb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1380 {
1381   unsigned rm = INSTR (20, 16);
1382   unsigned rn = INSTR (9, 5);
1383   unsigned rt = INSTR (4, 0);
1384   /* rn may reference SP, rm and rt must reference ZR  */
1385
1386   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1387   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1388                                  extension);
1389
1390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1391   /* There is no scaling required for a byte load.  */
1392   aarch64_set_mem_u8 (cpu, address + displacement,
1393                       aarch64_get_reg_u8 (cpu, rt, NO_SP));
1394 }
1395
1396 /* 32 bit store short scaled unsigned 12 bit.  */
1397 static void
1398 strh_abs (sim_cpu *cpu, uint32_t offset)
1399 {
1400   unsigned rn = INSTR (9, 5);
1401   unsigned rt = INSTR (4, 0);
1402
1403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1404   /* The target register may not be SP but the source may be.  */
1405   aarch64_set_mem_u16 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK)
1406                        + SCALE (offset, 16),
1407                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1408 }
1409
1410 /* 32 bit store short unscaled signed 9 bit with pre- or post-writeback.  */
1411 static void
1412 strh_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
1413 {
1414   unsigned rn = INSTR (9, 5);
1415   unsigned rt = INSTR (4, 0);
1416   uint64_t address;
1417
1418   if (rn == rt && wb != NoWriteBack)
1419     HALT_UNALLOC;
1420
1421   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1422
1423   if (wb != Post)
1424     address += offset;
1425
1426   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1427   aarch64_set_mem_u16 (cpu, address, aarch64_get_reg_u16 (cpu, rt, NO_SP));
1428
1429   if (wb == Post)
1430     address += offset;
1431
1432   if (wb != NoWriteBack)
1433     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
1434 }
1435
1436 /* 32 bit store short scaled or unscaled zero-
1437    or sign-extended 32-bit register offset.  */
1438 static void
1439 strh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1440 {
1441   unsigned rm = INSTR (20, 16);
1442   unsigned rn = INSTR (9, 5);
1443   unsigned rt = INSTR (4, 0);
1444   /* rn may reference SP, rm and rt must reference ZR  */
1445
1446   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1447   int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP), extension);
1448   uint64_t displacement =  OPT_SCALE (extended, 16, scaling);
1449
1450   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1451   aarch64_set_mem_u16 (cpu, address + displacement,
1452                        aarch64_get_reg_u16 (cpu, rt, NO_SP));
1453 }
1454
1455 /* Prefetch unsigned 12 bit.  */
1456 static void
1457 prfm_abs (sim_cpu *cpu, uint32_t offset)
1458 {
1459   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1460                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1461                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1462                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1463                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1464                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1465                           ow ==> UNALLOC
1466      PrfOp prfop = prfop (instr, 4, 0);
1467      uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK)
1468      + SCALE (offset, 64).  */
1469
1470   /* TODO : implement prefetch of address.  */
1471 }
1472
1473 /* Prefetch scaled or unscaled zero- or sign-extended 32-bit register offset.  */
1474 static void
1475 prfm_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
1476 {
1477   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1478                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1479                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1480                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1481                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1482                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1483                           ow ==> UNALLOC
1484      rn may reference SP, rm may only reference ZR
1485      PrfOp prfop = prfop (instr, 4, 0);
1486      uint64_t base = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1487      int64_t extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1488                                 extension);
1489      uint64_t displacement =  OPT_SCALE (extended, 64, scaling);
1490      uint64_t address = base + displacement.  */
1491
1492   /* TODO : implement prefetch of address  */
1493 }
1494
1495 /* 64 bit pc-relative prefetch.  */
1496 static void
1497 prfm_pcrel (sim_cpu *cpu, int32_t offset)
1498 {
1499   /* instr[4,0] = prfop : 00000 ==> PLDL1KEEP, 00001 ==> PLDL1STRM,
1500                           00010 ==> PLDL2KEEP, 00001 ==> PLDL2STRM,
1501                           00100 ==> PLDL3KEEP, 00101 ==> PLDL3STRM,
1502                           10000 ==> PSTL1KEEP, 10001 ==> PSTL1STRM,
1503                           10010 ==> PSTL2KEEP, 10001 ==> PSTL2STRM,
1504                           10100 ==> PSTL3KEEP, 10101 ==> PSTL3STRM,
1505                           ow ==> UNALLOC
1506      PrfOp prfop = prfop (instr, 4, 0);
1507      uint64_t address = aarch64_get_PC (cpu) + offset.  */
1508
1509   /* TODO : implement this  */
1510 }
1511
1512 /* Load-store exclusive.  */
1513
1514 static void
1515 ldxr (sim_cpu *cpu)
1516 {
1517   unsigned rn = INSTR (9, 5);
1518   unsigned rt = INSTR (4, 0);
1519   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1520   int size = INSTR (31, 30);
1521   /* int ordered = INSTR (15, 15);  */
1522   /* int exclusive = ! INSTR (23, 23);  */
1523
1524   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1525   switch (size)
1526     {
1527     case 0:
1528       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u8 (cpu, address));
1529       break;
1530     case 1:
1531       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u16 (cpu, address));
1532       break;
1533     case 2:
1534       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u32 (cpu, address));
1535       break;
1536     case 3:
1537       aarch64_set_reg_u64 (cpu, rt, NO_SP, aarch64_get_mem_u64 (cpu, address));
1538       break;
1539     }
1540 }
1541
1542 static void
1543 stxr (sim_cpu *cpu)
1544 {
1545   unsigned rn = INSTR (9, 5);
1546   unsigned rt = INSTR (4, 0);
1547   unsigned rs = INSTR (20, 16);
1548   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1549   int      size = INSTR (31, 30);
1550   uint64_t data = aarch64_get_reg_u64 (cpu, rt, NO_SP);
1551
1552   switch (size)
1553     {
1554     case 0: aarch64_set_mem_u8 (cpu, address, data); break;
1555     case 1: aarch64_set_mem_u16 (cpu, address, data); break;
1556     case 2: aarch64_set_mem_u32 (cpu, address, data); break;
1557     case 3: aarch64_set_mem_u64 (cpu, address, data); break;
1558     }
1559
1560   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1561   aarch64_set_reg_u64 (cpu, rs, NO_SP, 0); /* Always exclusive...  */
1562 }
1563
1564 static void
1565 dexLoadLiteral (sim_cpu *cpu)
1566 {
1567   /* instr[29,27] == 011
1568      instr[25,24] == 00
1569      instr[31,30:26] = opc: 000 ==> LDRW,  001 ==> FLDRS
1570                             010 ==> LDRX,  011 ==> FLDRD
1571                             100 ==> LDRSW, 101 ==> FLDRQ
1572                             110 ==> PRFM, 111 ==> UNALLOC
1573      instr[26] ==> V : 0 ==> GReg, 1 ==> FReg
1574      instr[23, 5] == simm19  */
1575
1576   /* unsigned rt = INSTR (4, 0);  */
1577   uint32_t dispatch = (INSTR (31, 30) << 1) | INSTR (26, 26);
1578   int32_t imm = simm32 (aarch64_get_instr (cpu), 23, 5);
1579
1580   switch (dispatch)
1581     {
1582     case 0: ldr32_pcrel (cpu, imm); break;
1583     case 1: fldrs_pcrel (cpu, imm); break;
1584     case 2: ldr_pcrel   (cpu, imm); break;
1585     case 3: fldrd_pcrel (cpu, imm); break;
1586     case 4: ldrsw_pcrel (cpu, imm); break;
1587     case 5: fldrq_pcrel (cpu, imm); break;
1588     case 6: prfm_pcrel  (cpu, imm); break;
1589     case 7:
1590     default:
1591       HALT_UNALLOC;
1592     }
1593 }
1594
1595 /* Immediate arithmetic
1596    The aimm argument is a 12 bit unsigned value or a 12 bit unsigned
1597    value left shifted by 12 bits (done at decode).
1598
1599    N.B. the register args (dest, source) can normally be Xn or SP.
1600    the exception occurs for flag setting instructions which may
1601    only use Xn for the output (dest).  */
1602
1603 /* 32 bit add immediate.  */
1604 static void
1605 add32 (sim_cpu *cpu, uint32_t aimm)
1606 {
1607   unsigned rn = INSTR (9, 5);
1608   unsigned rd = INSTR (4, 0);
1609
1610   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1611   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1612                        aarch64_get_reg_u32 (cpu, rn, SP_OK) + aimm);
1613 }
1614
1615 /* 64 bit add immediate.  */
1616 static void
1617 add64 (sim_cpu *cpu, uint32_t aimm)
1618 {
1619   unsigned rn = INSTR (9, 5);
1620   unsigned rd = INSTR (4, 0);
1621
1622   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1623   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1624                        aarch64_get_reg_u64 (cpu, rn, SP_OK) + aimm);
1625 }
1626
1627 static void
1628 set_flags_for_add32 (sim_cpu *cpu, int32_t value1, int32_t value2)
1629 {
1630   int32_t   result = value1 + value2;
1631   int64_t   sresult = (int64_t) value1 + (int64_t) value2;
1632   uint64_t  uresult = (uint64_t)(uint32_t) value1
1633     + (uint64_t)(uint32_t) value2;
1634   uint32_t  flags = 0;
1635
1636   if (result == 0)
1637     flags |= Z;
1638
1639   if (result & (1 << 31))
1640     flags |= N;
1641
1642   if (uresult != (uint32_t)uresult)
1643     flags |= C;
1644
1645   if (sresult != (int32_t)sresult)
1646     flags |= V;
1647
1648   aarch64_set_CPSR (cpu, flags);
1649 }
1650
1651 #define NEG(a) (((a) & signbit) == signbit)
1652 #define POS(a) (((a) & signbit) == 0)
1653
1654 static void
1655 set_flags_for_add64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1656 {
1657   uint64_t result = value1 + value2;
1658   uint32_t flags = 0;
1659   uint64_t signbit = 1ULL << 63;
1660
1661   if (result == 0)
1662     flags |= Z;
1663
1664   if (NEG (result))
1665     flags |= N;
1666
1667   if (   (NEG (value1) && NEG (value2))
1668       || (NEG (value1) && POS (result))
1669       || (NEG (value2) && POS (result)))
1670     flags |= C;
1671
1672   if (   (NEG (value1) && NEG (value2) && POS (result))
1673       || (POS (value1) && POS (value2) && NEG (result)))
1674     flags |= V;
1675
1676   aarch64_set_CPSR (cpu, flags);
1677 }
1678
1679 static void
1680 set_flags_for_sub32 (sim_cpu *cpu, uint32_t value1, uint32_t value2)
1681 {
1682   uint32_t result = value1 - value2;
1683   uint32_t flags = 0;
1684   uint32_t signbit = 1U << 31;
1685
1686   if (result == 0)
1687     flags |= Z;
1688
1689   if (NEG (result))
1690     flags |= N;
1691
1692   if (   (NEG (value1) && POS (value2))
1693       || (NEG (value1) && POS (result))
1694       || (POS (value2) && POS (result)))
1695     flags |= C;
1696
1697   if (   (NEG (value1) && POS (value2) && POS (result))
1698       || (POS (value1) && NEG (value2) && NEG (result)))
1699     flags |= V;
1700
1701   aarch64_set_CPSR (cpu, flags);
1702 }
1703
1704 static void
1705 set_flags_for_sub64 (sim_cpu *cpu, uint64_t value1, uint64_t value2)
1706 {
1707   uint64_t result = value1 - value2;
1708   uint32_t flags = 0;
1709   uint64_t signbit = 1ULL << 63;
1710
1711   if (result == 0)
1712     flags |= Z;
1713
1714   if (NEG (result))
1715     flags |= N;
1716
1717   if (   (NEG (value1) && POS (value2))
1718       || (NEG (value1) && POS (result))
1719       || (POS (value2) && POS (result)))
1720     flags |= C;
1721
1722   if (   (NEG (value1) && POS (value2) && POS (result))
1723       || (POS (value1) && NEG (value2) && NEG (result)))
1724     flags |= V;
1725
1726   aarch64_set_CPSR (cpu, flags);
1727 }
1728
1729 static void
1730 set_flags_for_binop32 (sim_cpu *cpu, uint32_t result)
1731 {
1732   uint32_t flags = 0;
1733
1734   if (result == 0)
1735     flags |= Z;
1736   else
1737     flags &= ~ Z;
1738
1739   if (result & (1 << 31))
1740     flags |= N;
1741   else
1742     flags &= ~ N;
1743
1744   aarch64_set_CPSR (cpu, flags);
1745 }
1746
1747 static void
1748 set_flags_for_binop64 (sim_cpu *cpu, uint64_t result)
1749 {
1750   uint32_t flags = 0;
1751
1752   if (result == 0)
1753     flags |= Z;
1754   else
1755     flags &= ~ Z;
1756
1757   if (result & (1ULL << 63))
1758     flags |= N;
1759   else
1760     flags &= ~ N;
1761
1762   aarch64_set_CPSR (cpu, flags);
1763 }
1764
1765 /* 32 bit add immediate set flags.  */
1766 static void
1767 adds32 (sim_cpu *cpu, uint32_t aimm)
1768 {
1769   unsigned rn = INSTR (9, 5);
1770   unsigned rd = INSTR (4, 0);
1771   /* TODO : do we need to worry about signs here?  */
1772   int32_t value1 = aarch64_get_reg_s32 (cpu, rn, SP_OK);
1773
1774   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1775   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + aimm);
1776   set_flags_for_add32 (cpu, value1, aimm);
1777 }
1778
1779 /* 64 bit add immediate set flags.  */
1780 static void
1781 adds64 (sim_cpu *cpu, uint32_t aimm)
1782 {
1783   unsigned rn = INSTR (9, 5);
1784   unsigned rd = INSTR (4, 0);
1785   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1786   uint64_t value2 = aimm;
1787
1788   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1789   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1790   set_flags_for_add64 (cpu, value1, value2);
1791 }
1792
1793 /* 32 bit sub immediate.  */
1794 static void
1795 sub32 (sim_cpu *cpu, uint32_t aimm)
1796 {
1797   unsigned rn = INSTR (9, 5);
1798   unsigned rd = INSTR (4, 0);
1799
1800   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1801   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1802                        aarch64_get_reg_u32 (cpu, rn, SP_OK) - aimm);
1803 }
1804
1805 /* 64 bit sub immediate.  */
1806 static void
1807 sub64 (sim_cpu *cpu, uint32_t aimm)
1808 {
1809   unsigned rn = INSTR (9, 5);
1810   unsigned rd = INSTR (4, 0);
1811
1812   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1813   aarch64_set_reg_u64 (cpu, rd, SP_OK,
1814                        aarch64_get_reg_u64 (cpu, rn, SP_OK) - aimm);
1815 }
1816
1817 /* 32 bit sub immediate set flags.  */
1818 static void
1819 subs32 (sim_cpu *cpu, uint32_t aimm)
1820 {
1821   unsigned rn = INSTR (9, 5);
1822   unsigned rd = INSTR (4, 0);
1823   uint32_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1824   uint32_t value2 = aimm;
1825
1826   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1827   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1828   set_flags_for_sub32 (cpu, value1, value2);
1829 }
1830
1831 /* 64 bit sub immediate set flags.  */
1832 static void
1833 subs64 (sim_cpu *cpu, uint32_t aimm)
1834 {
1835   unsigned rn = INSTR (9, 5);
1836   unsigned rd = INSTR (4, 0);
1837   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
1838   uint32_t value2 = aimm;
1839
1840   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1841   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
1842   set_flags_for_sub64 (cpu, value1, value2);
1843 }
1844
1845 /* Data Processing Register.  */
1846
1847 /* First two helpers to perform the shift operations.  */
1848
1849 static inline uint32_t
1850 shifted32 (uint32_t value, Shift shift, uint32_t count)
1851 {
1852   switch (shift)
1853     {
1854     default:
1855     case LSL:
1856       return (value << count);
1857     case LSR:
1858       return (value >> count);
1859     case ASR:
1860       {
1861         int32_t svalue = value;
1862         return (svalue >> count);
1863       }
1864     case ROR:
1865       {
1866         uint32_t top = value >> count;
1867         uint32_t bottom = value << (32 - count);
1868         return (bottom | top);
1869       }
1870     }
1871 }
1872
1873 static inline uint64_t
1874 shifted64 (uint64_t value, Shift shift, uint32_t count)
1875 {
1876   switch (shift)
1877     {
1878     default:
1879     case LSL:
1880       return (value << count);
1881     case LSR:
1882       return (value >> count);
1883     case ASR:
1884       {
1885         int64_t svalue = value;
1886         return (svalue >> count);
1887       }
1888     case ROR:
1889       {
1890         uint64_t top = value >> count;
1891         uint64_t bottom = value << (64 - count);
1892         return (bottom | top);
1893       }
1894     }
1895 }
1896
1897 /* Arithmetic shifted register.
1898    These allow an optional LSL, ASR or LSR to the second source
1899    register with a count up to the register bit count.
1900
1901    N.B register args may not be SP.  */
1902
1903 /* 32 bit ADD shifted register.  */
1904 static void
1905 add32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1906 {
1907   unsigned rm = INSTR (20, 16);
1908   unsigned rn = INSTR (9, 5);
1909   unsigned rd = INSTR (4, 0);
1910
1911   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1912   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1913                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1914                        + shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1915                                     shift, count));
1916 }
1917
1918 /* 64 bit ADD shifted register.  */
1919 static void
1920 add64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1921 {
1922   unsigned rm = INSTR (20, 16);
1923   unsigned rn = INSTR (9, 5);
1924   unsigned rd = INSTR (4, 0);
1925
1926   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1927   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1928                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1929                        + shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1930                                     shift, count));
1931 }
1932
1933 /* 32 bit ADD shifted register setting flags.  */
1934 static void
1935 adds32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1936 {
1937   unsigned rm = INSTR (20, 16);
1938   unsigned rn = INSTR (9, 5);
1939   unsigned rd = INSTR (4, 0);
1940
1941   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
1942   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1943                                shift, count);
1944
1945   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1946   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1947   set_flags_for_add32 (cpu, value1, value2);
1948 }
1949
1950 /* 64 bit ADD shifted register setting flags.  */
1951 static void
1952 adds64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1953 {
1954   unsigned rm = INSTR (20, 16);
1955   unsigned rn = INSTR (9, 5);
1956   unsigned rd = INSTR (4, 0);
1957
1958   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
1959   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1960                                shift, count);
1961
1962   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1963   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
1964   set_flags_for_add64 (cpu, value1, value2);
1965 }
1966
1967 /* 32 bit SUB shifted register.  */
1968 static void
1969 sub32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1970 {
1971   unsigned rm = INSTR (20, 16);
1972   unsigned rn = INSTR (9, 5);
1973   unsigned rd = INSTR (4, 0);
1974
1975   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1976   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1977                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
1978                        - shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
1979                                     shift, count));
1980 }
1981
1982 /* 64 bit SUB shifted register.  */
1983 static void
1984 sub64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
1985 {
1986   unsigned rm = INSTR (20, 16);
1987   unsigned rn = INSTR (9, 5);
1988   unsigned rd = INSTR (4, 0);
1989
1990   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
1991   aarch64_set_reg_u64 (cpu, rd, NO_SP,
1992                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
1993                        - shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
1994                                     shift, count));
1995 }
1996
1997 /* 32 bit SUB shifted register setting flags.  */
1998 static void
1999 subs32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2000 {
2001   unsigned rm = INSTR (20, 16);
2002   unsigned rn = INSTR (9, 5);
2003   unsigned rd = INSTR (4, 0);
2004
2005   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2006   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
2007                               shift, count);
2008
2009   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2010   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2011   set_flags_for_sub32 (cpu, value1, value2);
2012 }
2013
2014 /* 64 bit SUB shifted register setting flags.  */
2015 static void
2016 subs64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
2017 {
2018   unsigned rm = INSTR (20, 16);
2019   unsigned rn = INSTR (9, 5);
2020   unsigned rd = INSTR (4, 0);
2021
2022   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2023   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
2024                                shift, count);
2025
2026   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2027   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2028   set_flags_for_sub64 (cpu, value1, value2);
2029 }
2030
2031 /* First a couple more helpers to fetch the
2032    relevant source register element either
2033    sign or zero extended as required by the
2034    extension value.  */
2035
2036 static uint32_t
2037 extreg32 (sim_cpu *cpu, unsigned int lo, Extension extension)
2038 {
2039   switch (extension)
2040     {
2041     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2042     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2043     case UXTW: /* Fall through.  */
2044     case UXTX: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2045     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2046     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2047     case SXTW: /* Fall through.  */
2048     case SXTX: /* Fall through.  */
2049     default:   return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2050   }
2051 }
2052
2053 static uint64_t
2054 extreg64 (sim_cpu *cpu, unsigned int lo, Extension extension)
2055 {
2056   switch (extension)
2057     {
2058     case UXTB: return aarch64_get_reg_u8  (cpu, lo, NO_SP);
2059     case UXTH: return aarch64_get_reg_u16 (cpu, lo, NO_SP);
2060     case UXTW: return aarch64_get_reg_u32 (cpu, lo, NO_SP);
2061     case UXTX: return aarch64_get_reg_u64 (cpu, lo, NO_SP);
2062     case SXTB: return aarch64_get_reg_s8  (cpu, lo, NO_SP);
2063     case SXTH: return aarch64_get_reg_s16 (cpu, lo, NO_SP);
2064     case SXTW: return aarch64_get_reg_s32 (cpu, lo, NO_SP);
2065     case SXTX:
2066     default:   return aarch64_get_reg_s64 (cpu, lo, NO_SP);
2067     }
2068 }
2069
2070 /* Arithmetic extending register
2071    These allow an optional sign extension of some portion of the
2072    second source register followed by an optional left shift of
2073    between 1 and 4 bits (i.e. a shift of 0-4 bits???)
2074
2075    N.B output (dest) and first input arg (source) may normally be Xn
2076    or SP. However, for flag setting operations dest can only be
2077    Xn. Second input registers are always Xn.  */
2078
2079 /* 32 bit ADD extending register.  */
2080 static void
2081 add32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2082 {
2083   unsigned rm = INSTR (20, 16);
2084   unsigned rn = INSTR (9, 5);
2085   unsigned rd = INSTR (4, 0);
2086
2087   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2088   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2089                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2090                        + (extreg32 (cpu, rm, extension) << shift));
2091 }
2092
2093 /* 64 bit ADD extending register.
2094    N.B. This subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2095 static void
2096 add64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2097 {
2098   unsigned rm = INSTR (20, 16);
2099   unsigned rn = INSTR (9, 5);
2100   unsigned rd = INSTR (4, 0);
2101
2102   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2103   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2104                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2105                        + (extreg64 (cpu, rm, extension) << shift));
2106 }
2107
2108 /* 32 bit ADD extending register setting flags.  */
2109 static void
2110 adds32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2111 {
2112   unsigned rm = INSTR (20, 16);
2113   unsigned rn = INSTR (9, 5);
2114   unsigned rd = INSTR (4, 0);
2115
2116   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2117   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2118
2119   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2120   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2121   set_flags_for_add32 (cpu, value1, value2);
2122 }
2123
2124 /* 64 bit ADD extending register setting flags  */
2125 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2126 static void
2127 adds64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2128 {
2129   unsigned rm = INSTR (20, 16);
2130   unsigned rn = INSTR (9, 5);
2131   unsigned rd = INSTR (4, 0);
2132
2133   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2134   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2135
2136   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2137   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2);
2138   set_flags_for_add64 (cpu, value1, value2);
2139 }
2140
2141 /* 32 bit SUB extending register.  */
2142 static void
2143 sub32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2144 {
2145   unsigned rm = INSTR (20, 16);
2146   unsigned rn = INSTR (9, 5);
2147   unsigned rd = INSTR (4, 0);
2148
2149   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2150   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2151                        aarch64_get_reg_u32 (cpu, rn, SP_OK)
2152                        - (extreg32 (cpu, rm, extension) << shift));
2153 }
2154
2155 /* 64 bit SUB extending register.  */
2156 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0.  */
2157 static void
2158 sub64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2159 {
2160   unsigned rm = INSTR (20, 16);
2161   unsigned rn = INSTR (9, 5);
2162   unsigned rd = INSTR (4, 0);
2163
2164   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2165   aarch64_set_reg_u64 (cpu, rd, SP_OK,
2166                        aarch64_get_reg_u64 (cpu, rn, SP_OK)
2167                        - (extreg64 (cpu, rm, extension) << shift));
2168 }
2169
2170 /* 32 bit SUB extending register setting flags.  */
2171 static void
2172 subs32_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2173 {
2174   unsigned rm = INSTR (20, 16);
2175   unsigned rn = INSTR (9, 5);
2176   unsigned rd = INSTR (4, 0);
2177
2178   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, SP_OK);
2179   uint32_t value2 = extreg32 (cpu, rm, extension) << shift;
2180
2181   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2182   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2183   set_flags_for_sub32 (cpu, value1, value2);
2184 }
2185
2186 /* 64 bit SUB extending register setting flags  */
2187 /* N.B. this subsumes the case with 64 bit source2 and UXTX #n or LSL #0  */
2188 static void
2189 subs64_ext (sim_cpu *cpu, Extension extension, uint32_t shift)
2190 {
2191   unsigned rm = INSTR (20, 16);
2192   unsigned rn = INSTR (9, 5);
2193   unsigned rd = INSTR (4, 0);
2194
2195   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, SP_OK);
2196   uint64_t value2 = extreg64 (cpu, rm, extension) << shift;
2197
2198   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2199   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 - value2);
2200   set_flags_for_sub64 (cpu, value1, value2);
2201 }
2202
2203 static void
2204 dexAddSubtractImmediate (sim_cpu *cpu)
2205 {
2206   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2207      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2208      instr[29]    = set : 0 ==> no flags, 1 ==> set flags
2209      instr[28,24] = 10001
2210      instr[23,22] = shift : 00 == LSL#0, 01 = LSL#12 1x = UNALLOC
2211      instr[21,10] = uimm12
2212      instr[9,5]   = Rn
2213      instr[4,0]   = Rd  */
2214
2215   /* N.B. the shift is applied at decode before calling the add/sub routine.  */
2216   uint32_t shift = INSTR (23, 22);
2217   uint32_t imm = INSTR (21, 10);
2218   uint32_t dispatch = INSTR (31, 29);
2219
2220   NYI_assert (28, 24, 0x11);
2221
2222   if (shift > 1)
2223     HALT_UNALLOC;
2224
2225   if (shift)
2226     imm <<= 12;
2227
2228   switch (dispatch)
2229     {
2230     case 0: add32 (cpu, imm); break;
2231     case 1: adds32 (cpu, imm); break;
2232     case 2: sub32 (cpu, imm); break;
2233     case 3: subs32 (cpu, imm); break;
2234     case 4: add64 (cpu, imm); break;
2235     case 5: adds64 (cpu, imm); break;
2236     case 6: sub64 (cpu, imm); break;
2237     case 7: subs64 (cpu, imm); break;
2238     }
2239 }
2240
2241 static void
2242 dexAddSubtractShiftedRegister (sim_cpu *cpu)
2243 {
2244   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2245      instr[30,29] = op : 00 ==> ADD, 01 ==> ADDS, 10 ==> SUB, 11 ==> SUBS
2246      instr[28,24] = 01011
2247      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> UNALLOC
2248      instr[21]    = 0
2249      instr[20,16] = Rm
2250      instr[15,10] = count : must be 0xxxxx for 32 bit
2251      instr[9,5]   = Rn
2252      instr[4,0]   = Rd  */
2253
2254   uint32_t size = INSTR (31, 31);
2255   uint32_t count = INSTR (15, 10);
2256   Shift shiftType = INSTR (23, 22);
2257
2258   NYI_assert (28, 24, 0x0B);
2259   NYI_assert (21, 21, 0);
2260
2261   /* Shift encoded as ROR is unallocated.  */
2262   if (shiftType == ROR)
2263     HALT_UNALLOC;
2264
2265   /* 32 bit operations must have count[5] = 0
2266      or else we have an UNALLOC.  */
2267   if (size == 0 && uimm (count, 5, 5))
2268     HALT_UNALLOC;
2269
2270   /* Dispatch on size:op i.e instr [31,29].  */
2271   switch (INSTR (31, 29))
2272     {
2273     case 0: add32_shift  (cpu, shiftType, count); break;
2274     case 1: adds32_shift (cpu, shiftType, count); break;
2275     case 2: sub32_shift  (cpu, shiftType, count); break;
2276     case 3: subs32_shift (cpu, shiftType, count); break;
2277     case 4: add64_shift  (cpu, shiftType, count); break;
2278     case 5: adds64_shift (cpu, shiftType, count); break;
2279     case 6: sub64_shift  (cpu, shiftType, count); break;
2280     case 7: subs64_shift (cpu, shiftType, count); break;
2281     }
2282 }
2283
2284 static void
2285 dexAddSubtractExtendedRegister (sim_cpu *cpu)
2286 {
2287   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2288      instr[30]    = op : 0 ==> ADD, 1 ==> SUB
2289      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2290      instr[28,24] = 01011
2291      instr[23,22] = opt : 0 ==> ok, 1,2,3 ==> UNALLOC
2292      instr[21]    = 1
2293      instr[20,16] = Rm
2294      instr[15,13] = option : 000 ==> UXTB, 001 ==> UXTH,
2295                              000 ==> LSL|UXTW, 001 ==> UXTZ,
2296                              000 ==> SXTB, 001 ==> SXTH,
2297                              000 ==> SXTW, 001 ==> SXTX,
2298      instr[12,10] = shift : 0,1,2,3,4 ==> ok, 5,6,7 ==> UNALLOC
2299      instr[9,5]   = Rn
2300      instr[4,0]   = Rd  */
2301
2302   Extension extensionType = INSTR (15, 13);
2303   uint32_t shift = INSTR (12, 10);
2304
2305   NYI_assert (28, 24, 0x0B);
2306   NYI_assert (21, 21, 1);
2307
2308   /* Shift may not exceed 4.  */
2309   if (shift > 4)
2310     HALT_UNALLOC;
2311
2312   /* Dispatch on size:op:set?.  */
2313   switch (INSTR (31, 29))
2314     {
2315     case 0: add32_ext  (cpu, extensionType, shift); break;
2316     case 1: adds32_ext (cpu, extensionType, shift); break;
2317     case 2: sub32_ext  (cpu, extensionType, shift); break;
2318     case 3: subs32_ext (cpu, extensionType, shift); break;
2319     case 4: add64_ext  (cpu, extensionType, shift); break;
2320     case 5: adds64_ext (cpu, extensionType, shift); break;
2321     case 6: sub64_ext  (cpu, extensionType, shift); break;
2322     case 7: subs64_ext (cpu, extensionType, shift); break;
2323     }
2324 }
2325
2326 /* Conditional data processing
2327    Condition register is implicit 3rd source.  */
2328
2329 /* 32 bit add with carry.  */
2330 /* N.B register args may not be SP.  */
2331
2332 static void
2333 adc32 (sim_cpu *cpu)
2334 {
2335   unsigned rm = INSTR (20, 16);
2336   unsigned rn = INSTR (9, 5);
2337   unsigned rd = INSTR (4, 0);
2338
2339   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2340   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2341                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2342                        + aarch64_get_reg_u32 (cpu, rm, NO_SP)
2343                        + IS_SET (C));
2344 }
2345
2346 /* 64 bit add with carry  */
2347 static void
2348 adc64 (sim_cpu *cpu)
2349 {
2350   unsigned rm = INSTR (20, 16);
2351   unsigned rn = INSTR (9, 5);
2352   unsigned rd = INSTR (4, 0);
2353
2354   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2355   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2356                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2357                        + aarch64_get_reg_u64 (cpu, rm, NO_SP)
2358                        + IS_SET (C));
2359 }
2360
2361 /* 32 bit add with carry setting flags.  */
2362 static void
2363 adcs32 (sim_cpu *cpu)
2364 {
2365   unsigned rm = INSTR (20, 16);
2366   unsigned rn = INSTR (9, 5);
2367   unsigned rd = INSTR (4, 0);
2368
2369   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2370   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2371   uint32_t carry = IS_SET (C);
2372
2373   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2374   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2375   set_flags_for_add32 (cpu, value1, value2 + carry);
2376 }
2377
2378 /* 64 bit add with carry setting flags.  */
2379 static void
2380 adcs64 (sim_cpu *cpu)
2381 {
2382   unsigned rm = INSTR (20, 16);
2383   unsigned rn = INSTR (9, 5);
2384   unsigned rd = INSTR (4, 0);
2385
2386   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2387   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2388   uint64_t carry = IS_SET (C);
2389
2390   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2391   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 + value2 + carry);
2392   set_flags_for_add64 (cpu, value1, value2 + carry);
2393 }
2394
2395 /* 32 bit sub with carry.  */
2396 static void
2397 sbc32 (sim_cpu *cpu)
2398 {
2399   unsigned rm = INSTR (20, 16);
2400   unsigned rn = INSTR (9, 5); /* ngc iff rn == 31.  */
2401   unsigned rd = INSTR (4, 0);
2402
2403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2404   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2405                        aarch64_get_reg_u32 (cpu, rn, NO_SP)
2406                        - aarch64_get_reg_u32 (cpu, rm, NO_SP)
2407                        - 1 + IS_SET (C));
2408 }
2409
2410 /* 64 bit sub with carry  */
2411 static void
2412 sbc64 (sim_cpu *cpu)
2413 {
2414   unsigned rm = INSTR (20, 16);
2415   unsigned rn = INSTR (9, 5);
2416   unsigned rd = INSTR (4, 0);
2417
2418   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2419   aarch64_set_reg_u64 (cpu, rd, NO_SP,
2420                        aarch64_get_reg_u64 (cpu, rn, NO_SP)
2421                        - aarch64_get_reg_u64 (cpu, rm, NO_SP)
2422                        - 1 + IS_SET (C));
2423 }
2424
2425 /* 32 bit sub with carry setting flags  */
2426 static void
2427 sbcs32 (sim_cpu *cpu)
2428 {
2429   unsigned rm = INSTR (20, 16);
2430   unsigned rn = INSTR (9, 5);
2431   unsigned rd = INSTR (4, 0);
2432
2433   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
2434   uint32_t value2 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
2435   uint32_t carry  = IS_SET (C);
2436   uint32_t result = value1 - value2 + 1 - carry;
2437
2438   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2439   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2440   set_flags_for_sub32 (cpu, value1, value2 + 1 - carry);
2441 }
2442
2443 /* 64 bit sub with carry setting flags  */
2444 static void
2445 sbcs64 (sim_cpu *cpu)
2446 {
2447   unsigned rm = INSTR (20, 16);
2448   unsigned rn = INSTR (9, 5);
2449   unsigned rd = INSTR (4, 0);
2450
2451   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
2452   uint64_t value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
2453   uint64_t carry  = IS_SET (C);
2454   uint64_t result = value1 - value2 + 1 - carry;
2455
2456   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2457   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
2458   set_flags_for_sub64 (cpu, value1, value2 + 1 - carry);
2459 }
2460
2461 static void
2462 dexAddSubtractWithCarry (sim_cpu *cpu)
2463 {
2464   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2465      instr[30]    = op : 0 ==> ADC, 1 ==> SBC
2466      instr[29]    = set? : 0 ==> no flags, 1 ==> set flags
2467      instr[28,21] = 1 1010 000
2468      instr[20,16] = Rm
2469      instr[15,10] = op2 : 00000 ==> ok, ow ==> UNALLOC
2470      instr[9,5]   = Rn
2471      instr[4,0]   = Rd  */
2472
2473   uint32_t op2 = INSTR (15, 10);
2474
2475   NYI_assert (28, 21, 0xD0);
2476
2477   if (op2 != 0)
2478     HALT_UNALLOC;
2479
2480   /* Dispatch on size:op:set?.  */
2481   switch (INSTR (31, 29))
2482     {
2483     case 0: adc32 (cpu); break;
2484     case 1: adcs32 (cpu); break;
2485     case 2: sbc32 (cpu); break;
2486     case 3: sbcs32 (cpu); break;
2487     case 4: adc64 (cpu); break;
2488     case 5: adcs64 (cpu); break;
2489     case 6: sbc64 (cpu); break;
2490     case 7: sbcs64 (cpu); break;
2491     }
2492 }
2493
2494 static uint32_t
2495 testConditionCode (sim_cpu *cpu, CondCode cc)
2496 {
2497   /* This should be reduceable to branchless logic
2498      by some careful testing of bits in CC followed
2499      by the requisite masking and combining of bits
2500      from the flag register.
2501
2502      For now we do it with a switch.  */
2503   int res;
2504
2505   switch (cc)
2506     {
2507     case EQ:  res = IS_SET (Z);    break;
2508     case NE:  res = IS_CLEAR (Z);  break;
2509     case CS:  res = IS_SET (C);    break;
2510     case CC:  res = IS_CLEAR (C);  break;
2511     case MI:  res = IS_SET (N);    break;
2512     case PL:  res = IS_CLEAR (N);  break;
2513     case VS:  res = IS_SET (V);    break;
2514     case VC:  res = IS_CLEAR (V);  break;
2515     case HI:  res = IS_SET (C) && IS_CLEAR (Z);  break;
2516     case LS:  res = IS_CLEAR (C) || IS_SET (Z);  break;
2517     case GE:  res = IS_SET (N) == IS_SET (V);    break;
2518     case LT:  res = IS_SET (N) != IS_SET (V);    break;
2519     case GT:  res = IS_CLEAR (Z) && (IS_SET (N) == IS_SET (V));  break;
2520     case LE:  res = IS_SET (Z) || (IS_SET (N) != IS_SET (V));    break;
2521     case AL:
2522     case NV:
2523     default:
2524       res = 1;
2525       break;
2526     }
2527   return res;
2528 }
2529
2530 static void
2531 CondCompare (sim_cpu *cpu) /* aka: ccmp and ccmn  */
2532 {
2533   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
2534      instr[30]    = compare with positive (1) or negative value (0)
2535      instr[29,21] = 1 1101 0010
2536      instr[20,16] = Rm or const
2537      instr[15,12] = cond
2538      instr[11]    = compare reg (0) or const (1)
2539      instr[10]    = 0
2540      instr[9,5]   = Rn
2541      instr[4]     = 0
2542      instr[3,0]   = value for CPSR bits if the comparison does not take place.  */
2543   signed int negate;
2544   unsigned rm;
2545   unsigned rn;
2546
2547   NYI_assert (29, 21, 0x1d2);
2548   NYI_assert (10, 10, 0);
2549   NYI_assert (4, 4, 0);
2550
2551   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2552   if (! testConditionCode (cpu, INSTR (15, 12)))
2553     {
2554       aarch64_set_CPSR (cpu, INSTR (3, 0));
2555       return;
2556     }
2557
2558   negate = INSTR (30, 30) ? 1 : -1;
2559   rm = INSTR (20, 16);
2560   rn = INSTR ( 9,  5);
2561
2562   if (INSTR (31, 31))
2563     {
2564       if (INSTR (11, 11))
2565         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2566                              negate * (uint64_t) rm);
2567       else
2568         set_flags_for_sub64 (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK),
2569                              negate * aarch64_get_reg_u64 (cpu, rm, SP_OK));
2570     }
2571   else
2572     {
2573       if (INSTR (11, 11))
2574         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2575                              negate * rm);
2576       else
2577         set_flags_for_sub32 (cpu, aarch64_get_reg_u32 (cpu, rn, SP_OK),
2578                              negate * aarch64_get_reg_u32 (cpu, rm, SP_OK));
2579     }
2580 }
2581
2582 static void
2583 do_vec_MOV_whole_vector (sim_cpu *cpu)
2584 {
2585   /* MOV Vd.T, Vs.T  (alias for ORR Vd.T, Vn.T, Vm.T where Vn == Vm)
2586
2587      instr[31]    = 0
2588      instr[30]    = half(0)/full(1)
2589      instr[29,21] = 001110101
2590      instr[20,16] = Vs
2591      instr[15,10] = 000111
2592      instr[9,5]   = Vs
2593      instr[4,0]   = Vd  */
2594
2595   unsigned vs = INSTR (9, 5);
2596   unsigned vd = INSTR (4, 0);
2597
2598   NYI_assert (29, 21, 0x075);
2599   NYI_assert (15, 10, 0x07);
2600
2601   if (INSTR (20, 16) != vs)
2602     HALT_NYI;
2603
2604   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2605   if (INSTR (30, 30))
2606     aarch64_set_vec_u64 (cpu, vd, 1, aarch64_get_vec_u64 (cpu, vs, 1));
2607
2608   aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vs, 0));
2609 }
2610
2611 static void
2612 do_vec_SMOV_into_scalar (sim_cpu *cpu)
2613 {
2614   /* instr[31]    = 0
2615      instr[30]    = word(0)/long(1)
2616      instr[29,21] = 00 1110 000
2617      instr[20,16] = element size and index
2618      instr[15,10] = 00 0010 11
2619      instr[9,5]   = V source
2620      instr[4,0]   = R dest  */
2621
2622   unsigned vs = INSTR (9, 5);
2623   unsigned rd = INSTR (4, 0);
2624   unsigned imm5 = INSTR (20, 16);
2625   unsigned full = INSTR (30, 30);
2626   int size, index;
2627
2628   NYI_assert (29, 21, 0x070);
2629   NYI_assert (15, 10, 0x0B);
2630
2631   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2632
2633   if (imm5 & 0x1)
2634     {
2635       size = 0;
2636       index = (imm5 >> 1) & 0xF;
2637     }
2638   else if (imm5 & 0x2)
2639     {
2640       size = 1;
2641       index = (imm5 >> 2) & 0x7;
2642     }
2643   else if (full && (imm5 & 0x4))
2644     {
2645       size = 2;
2646       index = (imm5 >> 3) & 0x3;
2647     }
2648   else
2649     HALT_UNALLOC;
2650
2651   switch (size)
2652     {
2653     case 0:
2654       if (full)
2655         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2656                              aarch64_get_vec_s8 (cpu, vs, index));
2657       else
2658         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2659                              aarch64_get_vec_s8 (cpu, vs, index));
2660       break;
2661
2662     case 1:
2663       if (full)
2664         aarch64_set_reg_s64 (cpu, rd, NO_SP,
2665                              aarch64_get_vec_s16 (cpu, vs, index));
2666       else
2667         aarch64_set_reg_s32 (cpu, rd, NO_SP,
2668                              aarch64_get_vec_s16 (cpu, vs, index));
2669       break;
2670
2671     case 2:
2672       aarch64_set_reg_s64 (cpu, rd, NO_SP,
2673                            aarch64_get_vec_s32 (cpu, vs, index));
2674       break;
2675
2676     default:
2677       HALT_UNALLOC;
2678     }
2679 }
2680
2681 static void
2682 do_vec_UMOV_into_scalar (sim_cpu *cpu)
2683 {
2684   /* instr[31]    = 0
2685      instr[30]    = word(0)/long(1)
2686      instr[29,21] = 00 1110 000
2687      instr[20,16] = element size and index
2688      instr[15,10] = 00 0011 11
2689      instr[9,5]   = V source
2690      instr[4,0]   = R dest  */
2691
2692   unsigned vs = INSTR (9, 5);
2693   unsigned rd = INSTR (4, 0);
2694   unsigned imm5 = INSTR (20, 16);
2695   unsigned full = INSTR (30, 30);
2696   int size, index;
2697
2698   NYI_assert (29, 21, 0x070);
2699   NYI_assert (15, 10, 0x0F);
2700
2701   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2702
2703   if (!full)
2704     {
2705       if (imm5 & 0x1)
2706         {
2707           size = 0;
2708           index = (imm5 >> 1) & 0xF;
2709         }
2710       else if (imm5 & 0x2)
2711         {
2712           size = 1;
2713           index = (imm5 >> 2) & 0x7;
2714         }
2715       else if (imm5 & 0x4)
2716         {
2717           size = 2;
2718           index = (imm5 >> 3) & 0x3;
2719         }
2720       else
2721         HALT_UNALLOC;
2722     }
2723   else if (imm5 & 0x8)
2724     {
2725       size = 3;
2726       index = (imm5 >> 4) & 0x1;
2727     }
2728   else
2729     HALT_UNALLOC;
2730
2731   switch (size)
2732     {
2733     case 0:
2734       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2735                            aarch64_get_vec_u8 (cpu, vs, index));
2736       break;
2737
2738     case 1:
2739       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2740                            aarch64_get_vec_u16 (cpu, vs, index));
2741       break;
2742
2743     case 2:
2744       aarch64_set_reg_u32 (cpu, rd, NO_SP,
2745                            aarch64_get_vec_u32 (cpu, vs, index));
2746       break;
2747
2748     case 3:
2749       aarch64_set_reg_u64 (cpu, rd, NO_SP,
2750                            aarch64_get_vec_u64 (cpu, vs, index));
2751       break;
2752
2753     default:
2754       HALT_UNALLOC;
2755     }
2756 }
2757
2758 static void
2759 do_vec_INS (sim_cpu *cpu)
2760 {
2761   /* instr[31,21] = 01001110000
2762      instr[20,16] = element size and index
2763      instr[15,10] = 000111
2764      instr[9,5]   = W source
2765      instr[4,0]   = V dest  */
2766
2767   int index;
2768   unsigned rs = INSTR (9, 5);
2769   unsigned vd = INSTR (4, 0);
2770
2771   NYI_assert (31, 21, 0x270);
2772   NYI_assert (15, 10, 0x07);
2773
2774   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2775   if (INSTR (16, 16))
2776     {
2777       index = INSTR (20, 17);
2778       aarch64_set_vec_u8 (cpu, vd, index,
2779                           aarch64_get_reg_u8 (cpu, rs, NO_SP));
2780     }
2781   else if (INSTR (17, 17))
2782     {
2783       index = INSTR (20, 18);
2784       aarch64_set_vec_u16 (cpu, vd, index,
2785                            aarch64_get_reg_u16 (cpu, rs, NO_SP));
2786     }
2787   else if (INSTR (18, 18))
2788     {
2789       index = INSTR (20, 19);
2790       aarch64_set_vec_u32 (cpu, vd, index,
2791                            aarch64_get_reg_u32 (cpu, rs, NO_SP));
2792     }
2793   else if (INSTR (19, 19))
2794     {
2795       index = INSTR (20, 20);
2796       aarch64_set_vec_u64 (cpu, vd, index,
2797                            aarch64_get_reg_u64 (cpu, rs, NO_SP));
2798     }
2799   else
2800     HALT_NYI;
2801 }
2802
2803 static void
2804 do_vec_DUP_vector_into_vector (sim_cpu *cpu)
2805 {
2806   /* instr[31]    = 0
2807      instr[30]    = half(0)/full(1)
2808      instr[29,21] = 00 1110 000
2809      instr[20,16] = element size and index
2810      instr[15,10] = 0000 01
2811      instr[9,5]   = V source
2812      instr[4,0]   = V dest.  */
2813
2814   unsigned full = INSTR (30, 30);
2815   unsigned vs = INSTR (9, 5);
2816   unsigned vd = INSTR (4, 0);
2817   int i, index;
2818
2819   NYI_assert (29, 21, 0x070);
2820   NYI_assert (15, 10, 0x01);
2821
2822   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2823   if (INSTR (16, 16))
2824     {
2825       index = INSTR (20, 17);
2826
2827       for (i = 0; i < (full ? 16 : 8); i++)
2828         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vs, index));
2829     }
2830   else if (INSTR (17, 17))
2831     {
2832       index = INSTR (20, 18);
2833
2834       for (i = 0; i < (full ? 8 : 4); i++)
2835         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, index));
2836     }
2837   else if (INSTR (18, 18))
2838     {
2839       index = INSTR (20, 19);
2840
2841       for (i = 0; i < (full ? 4 : 2); i++)
2842         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, index));
2843     }
2844   else
2845     {
2846       if (INSTR (19, 19) == 0)
2847         HALT_UNALLOC;
2848
2849       if (! full)
2850         HALT_UNALLOC;
2851
2852       index = INSTR (20, 20);
2853
2854       for (i = 0; i < 2; i++)
2855         aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, index));
2856     }
2857 }
2858
2859 static void
2860 do_vec_TBL (sim_cpu *cpu)
2861 {
2862   /* instr[31]    = 0
2863      instr[30]    = half(0)/full(1)
2864      instr[29,21] = 00 1110 000
2865      instr[20,16] = Vm
2866      instr[15]    = 0
2867      instr[14,13] = vec length
2868      instr[12,10] = 000
2869      instr[9,5]   = V start
2870      instr[4,0]   = V dest  */
2871
2872   int full    = INSTR (30, 30);
2873   int len     = INSTR (14, 13) + 1;
2874   unsigned vm = INSTR (20, 16);
2875   unsigned vn = INSTR (9, 5);
2876   unsigned vd = INSTR (4, 0);
2877   unsigned i;
2878
2879   NYI_assert (29, 21, 0x070);
2880   NYI_assert (12, 10, 0);
2881
2882   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2883   for (i = 0; i < (full ? 16 : 8); i++)
2884     {
2885       unsigned int selector = aarch64_get_vec_u8 (cpu, vm, i);
2886       uint8_t val;
2887
2888       if (selector < 16)
2889         val = aarch64_get_vec_u8 (cpu, vn, selector);
2890       else if (selector < 32)
2891         val = len < 2 ? 0 : aarch64_get_vec_u8 (cpu, vn + 1, selector - 16);
2892       else if (selector < 48)
2893         val = len < 3 ? 0 : aarch64_get_vec_u8 (cpu, vn + 2, selector - 32);
2894       else if (selector < 64)
2895         val = len < 4 ? 0 : aarch64_get_vec_u8 (cpu, vn + 3, selector - 48);
2896       else
2897         val = 0;
2898
2899       aarch64_set_vec_u8 (cpu, vd, i, val);
2900     }
2901 }
2902
2903 static void
2904 do_vec_TRN (sim_cpu *cpu)
2905 {
2906   /* instr[31]    = 0
2907      instr[30]    = half(0)/full(1)
2908      instr[29,24] = 00 1110
2909      instr[23,22] = size
2910      instr[21]    = 0
2911      instr[20,16] = Vm
2912      instr[15]    = 0
2913      instr[14]    = TRN1 (0) / TRN2 (1)
2914      instr[13,10] = 1010
2915      instr[9,5]   = V source
2916      instr[4,0]   = V dest.  */
2917
2918   int full    = INSTR (30, 30);
2919   int second  = INSTR (14, 14);
2920   unsigned vm = INSTR (20, 16);
2921   unsigned vn = INSTR (9, 5);
2922   unsigned vd = INSTR (4, 0);
2923   unsigned i;
2924
2925   NYI_assert (29, 24, 0x0E);
2926   NYI_assert (13, 10, 0xA);
2927
2928   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
2929   switch (INSTR (23, 22))
2930     {
2931     case 0:
2932       for (i = 0; i < (full ? 8 : 4); i++)
2933         {
2934           aarch64_set_vec_u8
2935             (cpu, vd, i * 2,
2936              aarch64_get_vec_u8 (cpu, second ? vm : vn, i * 2));
2937           aarch64_set_vec_u8
2938             (cpu, vd, 1 * 2 + 1,
2939              aarch64_get_vec_u8 (cpu, second ? vn : vm, i * 2 + 1));
2940         }
2941       break;
2942
2943     case 1:
2944       for (i = 0; i < (full ? 4 : 2); i++)
2945         {
2946           aarch64_set_vec_u16
2947             (cpu, vd, i * 2,
2948              aarch64_get_vec_u16 (cpu, second ? vm : vn, i * 2));
2949           aarch64_set_vec_u16
2950             (cpu, vd, 1 * 2 + 1,
2951              aarch64_get_vec_u16 (cpu, second ? vn : vm, i * 2 + 1));
2952         }
2953       break;
2954
2955     case 2:
2956       aarch64_set_vec_u32
2957         (cpu, vd, 0, aarch64_get_vec_u32 (cpu, second ? vm : vn, 0));
2958       aarch64_set_vec_u32
2959         (cpu, vd, 1, aarch64_get_vec_u32 (cpu, second ? vn : vm, 1));
2960       aarch64_set_vec_u32
2961         (cpu, vd, 2, aarch64_get_vec_u32 (cpu, second ? vm : vn, 2));
2962       aarch64_set_vec_u32
2963         (cpu, vd, 3, aarch64_get_vec_u32 (cpu, second ? vn : vm, 3));
2964       break;
2965
2966     case 3:
2967       if (! full)
2968         HALT_UNALLOC;
2969
2970       aarch64_set_vec_u64 (cpu, vd, 0,
2971                            aarch64_get_vec_u64 (cpu, second ? vm : vn, 0));
2972       aarch64_set_vec_u64 (cpu, vd, 1,
2973                            aarch64_get_vec_u64 (cpu, second ? vn : vm, 1));
2974       break;
2975     }
2976 }
2977
2978 static void
2979 do_vec_DUP_scalar_into_vector (sim_cpu *cpu)
2980 {
2981   /* instr[31]    = 0
2982      instr[30]    = 0=> zero top 64-bits, 1=> duplicate into top 64-bits
2983                     [must be 1 for 64-bit xfer]
2984      instr[29,20] = 00 1110 0000
2985      instr[19,16] = element size: 0001=> 8-bits, 0010=> 16-bits,
2986                                   0100=> 32-bits. 1000=>64-bits
2987      instr[15,10] = 0000 11
2988      instr[9,5]   = W source
2989      instr[4,0]   = V dest.  */
2990
2991   unsigned i;
2992   unsigned Vd = INSTR (4, 0);
2993   unsigned Rs = INSTR (9, 5);
2994   int both    = INSTR (30, 30);
2995
2996   NYI_assert (29, 20, 0x0E0);
2997   NYI_assert (15, 10, 0x03);
2998
2999   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3000   switch (INSTR (19, 16))
3001     {
3002     case 1:
3003       for (i = 0; i < (both ? 16 : 8); i++)
3004         aarch64_set_vec_u8 (cpu, Vd, i, aarch64_get_reg_u8 (cpu, Rs, NO_SP));
3005       break;
3006
3007     case 2:
3008       for (i = 0; i < (both ? 8 : 4); i++)
3009         aarch64_set_vec_u16 (cpu, Vd, i, aarch64_get_reg_u16 (cpu, Rs, NO_SP));
3010       break;
3011
3012     case 4:
3013       for (i = 0; i < (both ? 4 : 2); i++)
3014         aarch64_set_vec_u32 (cpu, Vd, i, aarch64_get_reg_u32 (cpu, Rs, NO_SP));
3015       break;
3016
3017     case 8:
3018       if (!both)
3019         HALT_NYI;
3020       aarch64_set_vec_u64 (cpu, Vd, 0, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3021       aarch64_set_vec_u64 (cpu, Vd, 1, aarch64_get_reg_u64 (cpu, Rs, NO_SP));
3022       break;
3023
3024     default:
3025       HALT_NYI;
3026     }
3027 }
3028
3029 static void
3030 do_vec_UZP (sim_cpu *cpu)
3031 {
3032   /* instr[31]    = 0
3033      instr[30]    = half(0)/full(1)
3034      instr[29,24] = 00 1110
3035      instr[23,22] = size: byte(00), half(01), word (10), long (11)
3036      instr[21]    = 0
3037      instr[20,16] = Vm
3038      instr[15]    = 0
3039      instr[14]    = lower (0) / upper (1)
3040      instr[13,10] = 0110
3041      instr[9,5]   = Vn
3042      instr[4,0]   = Vd.  */
3043
3044   int full = INSTR (30, 30);
3045   int upper = INSTR (14, 14);
3046
3047   unsigned vm = INSTR (20, 16);
3048   unsigned vn = INSTR (9, 5);
3049   unsigned vd = INSTR (4, 0);
3050
3051   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3052   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3053   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3054   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3055
3056   uint64_t val1;
3057   uint64_t val2;
3058
3059   uint64_t input2 = full ? val_n2 : val_m1;
3060
3061   NYI_assert (29, 24, 0x0E);
3062   NYI_assert (21, 21, 0);
3063   NYI_assert (15, 15, 0);
3064   NYI_assert (13, 10, 6);
3065
3066   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3067   switch (INSTR (23, 22))
3068     {
3069     case 0:
3070       val1 = (val_n1 >> (upper * 8)) & 0xFFULL;
3071       val1 |= (val_n1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3072       val1 |= (val_n1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3073       val1 |= (val_n1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3074
3075       val1 |= (input2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3076       val1 |= (input2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3077       val1 |= (input2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3078       val1 |= (input2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3079
3080       if (full)
3081         {
3082           val2 = (val_m1 >> (upper * 8)) & 0xFFULL;
3083           val2 |= (val_m1 >> ((upper * 8) + 8)) & 0xFF00ULL;
3084           val2 |= (val_m1 >> ((upper * 8) + 16)) & 0xFF0000ULL;
3085           val2 |= (val_m1 >> ((upper * 8) + 24)) & 0xFF000000ULL;
3086
3087           val2 |= (val_m2 << (32 - (upper * 8))) & 0xFF00000000ULL;
3088           val2 |= (val_m2 << (24 - (upper * 8))) & 0xFF0000000000ULL;
3089           val2 |= (val_m2 << (16 - (upper * 8))) & 0xFF000000000000ULL;
3090           val2 |= (val_m2 << (8 - (upper * 8))) & 0xFF00000000000000ULL;
3091         }
3092       break;
3093
3094     case 1:
3095       val1 = (val_n1 >> (upper * 16)) & 0xFFFFULL;
3096       val1 |= (val_n1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3097
3098       val1 |= (input2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;;
3099       val1 |= (input2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3100
3101       if (full)
3102         {
3103           val2 = (val_m1 >> (upper * 16)) & 0xFFFFULL;
3104           val2 |= (val_m1 >> ((upper * 16) + 16)) & 0xFFFF0000ULL;
3105
3106           val2 |= (val_m2 << (32 - (upper * 16))) & 0xFFFF00000000ULL;
3107           val2 |= (val_m2 << (16 - (upper * 16))) & 0xFFFF000000000000ULL;
3108         }
3109       break;
3110
3111     case 2:
3112       val1 = (val_n1 >> (upper * 32)) & 0xFFFFFFFF;
3113       val1 |= (input2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3114
3115       if (full)
3116         {
3117           val2 = (val_m1 >> (upper * 32)) & 0xFFFFFFFF;
3118           val2 |= (val_m2 << (32 - (upper * 32))) & 0xFFFFFFFF00000000ULL;
3119         }
3120       break;
3121
3122     case 3:
3123       if (! full)
3124         HALT_UNALLOC;
3125
3126       val1 = upper ? val_n2 : val_n1;
3127       val2 = upper ? val_m2 : val_m1;
3128       break;
3129     }
3130
3131   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3132   if (full)
3133     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3134 }
3135
3136 static void
3137 do_vec_ZIP (sim_cpu *cpu)
3138 {
3139   /* instr[31]    = 0
3140      instr[30]    = half(0)/full(1)
3141      instr[29,24] = 00 1110
3142      instr[23,22] = size: byte(00), hald(01), word (10), long (11)
3143      instr[21]    = 0
3144      instr[20,16] = Vm
3145      instr[15]    = 0
3146      instr[14]    = lower (0) / upper (1)
3147      instr[13,10] = 1110
3148      instr[9,5]   = Vn
3149      instr[4,0]   = Vd.  */
3150
3151   int full = INSTR (30, 30);
3152   int upper = INSTR (14, 14);
3153
3154   unsigned vm = INSTR (20, 16);
3155   unsigned vn = INSTR (9, 5);
3156   unsigned vd = INSTR (4, 0);
3157
3158   uint64_t val_m1 = aarch64_get_vec_u64 (cpu, vm, 0);
3159   uint64_t val_m2 = aarch64_get_vec_u64 (cpu, vm, 1);
3160   uint64_t val_n1 = aarch64_get_vec_u64 (cpu, vn, 0);
3161   uint64_t val_n2 = aarch64_get_vec_u64 (cpu, vn, 1);
3162
3163   uint64_t val1 = 0;
3164   uint64_t val2 = 0;
3165
3166   uint64_t input1 = upper ? val_n1 : val_m1;
3167   uint64_t input2 = upper ? val_n2 : val_m2;
3168
3169   NYI_assert (29, 24, 0x0E);
3170   NYI_assert (21, 21, 0);
3171   NYI_assert (15, 15, 0);
3172   NYI_assert (13, 10, 0xE);
3173
3174   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3175   switch (INSTR (23, 23))
3176     {
3177     case 0:
3178       val1 =
3179           ((input1 <<  0) & (0xFF    <<  0))
3180         | ((input2 <<  8) & (0xFF    <<  8))
3181         | ((input1 <<  8) & (0xFF    << 16))
3182         | ((input2 << 16) & (0xFF    << 24))
3183         | ((input1 << 16) & (0xFFULL << 32))
3184         | ((input2 << 24) & (0xFFULL << 40))
3185         | ((input1 << 24) & (0xFFULL << 48))
3186         | ((input2 << 32) & (0xFFULL << 56));
3187
3188       val2 =
3189           ((input1 >> 32) & (0xFF    <<  0))
3190         | ((input2 >> 24) & (0xFF    <<  8))
3191         | ((input1 >> 24) & (0xFF    << 16))
3192         | ((input2 >> 16) & (0xFF    << 24))
3193         | ((input1 >> 16) & (0xFFULL << 32))
3194         | ((input2 >>  8) & (0xFFULL << 40))
3195         | ((input1 >>  8) & (0xFFULL << 48))
3196         | ((input2 >>  0) & (0xFFULL << 56));
3197       break;
3198
3199     case 1:
3200       val1 =
3201           ((input1 <<  0) & (0xFFFF    <<  0))
3202         | ((input2 << 16) & (0xFFFF    << 16))
3203         | ((input1 << 16) & (0xFFFFULL << 32))
3204         | ((input2 << 32) & (0xFFFFULL << 48));
3205
3206       val2 =
3207           ((input1 >> 32) & (0xFFFF    <<  0))
3208         | ((input2 >> 16) & (0xFFFF    << 16))
3209         | ((input1 >> 16) & (0xFFFFULL << 32))
3210         | ((input2 >>  0) & (0xFFFFULL << 48));
3211       break;
3212
3213     case 2:
3214       val1 = (input1 & 0xFFFFFFFFULL) | (input2 << 32);
3215       val2 = (input2 & 0xFFFFFFFFULL) | (input1 << 32);
3216       break;
3217
3218     case 3:
3219       val1 = input1;
3220       val2 = input2;
3221       break;
3222     }
3223
3224   aarch64_set_vec_u64 (cpu, vd, 0, val1);
3225   if (full)
3226     aarch64_set_vec_u64 (cpu, vd, 1, val2);
3227 }
3228
3229 /* Floating point immediates are encoded in 8 bits.
3230    fpimm[7] = sign bit.
3231    fpimm[6:4] = signed exponent.
3232    fpimm[3:0] = fraction (assuming leading 1).
3233    i.e. F = s * 1.f * 2^(e - b).  */
3234
3235 static float
3236 fp_immediate_for_encoding_32 (uint32_t imm8)
3237 {
3238   float u;
3239   uint32_t s, e, f, i;
3240
3241   s = (imm8 >> 7) & 0x1;
3242   e = (imm8 >> 4) & 0x7;
3243   f = imm8 & 0xf;
3244
3245   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3246   u = (16.0 + f) / 16.0;
3247
3248   /* N.B. exponent is signed.  */
3249   if (e < 4)
3250     {
3251       int epos = e;
3252
3253       for (i = 0; i <= epos; i++)
3254         u *= 2.0;
3255     }
3256   else
3257     {
3258       int eneg = 7 - e;
3259
3260       for (i = 0; i < eneg; i++)
3261         u /= 2.0;
3262     }
3263
3264   if (s)
3265     u = - u;
3266
3267   return u;
3268 }
3269
3270 static double
3271 fp_immediate_for_encoding_64 (uint32_t imm8)
3272 {
3273   double u;
3274   uint32_t s, e, f, i;
3275
3276   s = (imm8 >> 7) & 0x1;
3277   e = (imm8 >> 4) & 0x7;
3278   f = imm8 & 0xf;
3279
3280   /* The fp value is s * n/16 * 2r where n is 16+e.  */
3281   u = (16.0 + f) / 16.0;
3282
3283   /* N.B. exponent is signed.  */
3284   if (e < 4)
3285     {
3286       int epos = e;
3287
3288       for (i = 0; i <= epos; i++)
3289         u *= 2.0;
3290     }
3291   else
3292     {
3293       int eneg = 7 - e;
3294
3295       for (i = 0; i < eneg; i++)
3296         u /= 2.0;
3297     }
3298
3299   if (s)
3300     u = - u;
3301
3302   return u;
3303 }
3304
3305 static void
3306 do_vec_MOV_immediate (sim_cpu *cpu)
3307 {
3308   /* instr[31]    = 0
3309      instr[30]    = full/half selector
3310      instr[29,19] = 00111100000
3311      instr[18,16] = high 3 bits of uimm8
3312      instr[15,12] = size & shift:
3313                                   0000 => 32-bit
3314                                   0010 => 32-bit + LSL#8
3315                                   0100 => 32-bit + LSL#16
3316                                   0110 => 32-bit + LSL#24
3317                                   1010 => 16-bit + LSL#8
3318                                   1000 => 16-bit
3319                                   1101 => 32-bit + MSL#16
3320                                   1100 => 32-bit + MSL#8
3321                                   1110 => 8-bit
3322                                   1111 => double
3323      instr[11,10] = 01
3324      instr[9,5]   = low 5-bits of uimm8
3325      instr[4,0]   = Vd.  */
3326
3327   int full     = INSTR (30, 30);
3328   unsigned vd  = INSTR (4, 0);
3329   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3330   unsigned i;
3331
3332   NYI_assert (29, 19, 0x1E0);
3333   NYI_assert (11, 10, 1);
3334
3335   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3336   switch (INSTR (15, 12))
3337     {
3338     case 0x0: /* 32-bit, no shift.  */
3339     case 0x2: /* 32-bit, shift by 8.  */
3340     case 0x4: /* 32-bit, shift by 16.  */
3341     case 0x6: /* 32-bit, shift by 24.  */
3342       val <<= (8 * INSTR (14, 13));
3343       for (i = 0; i < (full ? 4 : 2); i++)
3344         aarch64_set_vec_u32 (cpu, vd, i, val);
3345       break;
3346
3347     case 0xa: /* 16-bit, shift by 8.  */
3348       val <<= 8;
3349       /* Fall through.  */
3350     case 0x8: /* 16-bit, no shift.  */
3351       for (i = 0; i < (full ? 8 : 4); i++)
3352         aarch64_set_vec_u16 (cpu, vd, i, val);
3353       break;
3354
3355     case 0xd: /* 32-bit, mask shift by 16.  */
3356       val <<= 8;
3357       val |= 0xFF;
3358       /* Fall through.  */
3359     case 0xc: /* 32-bit, mask shift by 8. */
3360       val <<= 8;
3361       val |= 0xFF;
3362       for (i = 0; i < (full ? 4 : 2); i++)
3363         aarch64_set_vec_u32 (cpu, vd, i, val);
3364       break;
3365
3366     case 0xe: /* 8-bit, no shift.  */
3367       for (i = 0; i < (full ? 16 : 8); i++)
3368         aarch64_set_vec_u8 (cpu, vd, i, val);
3369       break;
3370
3371     case 0xf: /* FMOV Vs.{2|4}S, #fpimm.  */
3372       {
3373         float u = fp_immediate_for_encoding_32 (val);
3374         for (i = 0; i < (full ? 4 : 2); i++)
3375           aarch64_set_vec_float (cpu, vd, i, u);
3376         break;
3377       }
3378
3379     default:
3380       HALT_NYI;
3381     }
3382 }
3383
3384 static void
3385 do_vec_MVNI (sim_cpu *cpu)
3386 {
3387   /* instr[31]    = 0
3388      instr[30]    = full/half selector
3389      instr[29,19] = 10111100000
3390      instr[18,16] = high 3 bits of uimm8
3391      instr[15,12] = selector
3392      instr[11,10] = 01
3393      instr[9,5]   = low 5-bits of uimm8
3394      instr[4,0]   = Vd.  */
3395
3396   int full     = INSTR (30, 30);
3397   unsigned vd  = INSTR (4, 0);
3398   unsigned val = (INSTR (18, 16) << 5) | INSTR (9, 5);
3399   unsigned i;
3400
3401   NYI_assert (29, 19, 0x5E0);
3402   NYI_assert (11, 10, 1);
3403
3404   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3405   switch (INSTR (15, 12))
3406     {
3407     case 0x0: /* 32-bit, no shift.  */
3408     case 0x2: /* 32-bit, shift by 8.  */
3409     case 0x4: /* 32-bit, shift by 16.  */
3410     case 0x6: /* 32-bit, shift by 24.  */
3411       val <<= (8 * INSTR (14, 13));
3412       val = ~ val;
3413       for (i = 0; i < (full ? 4 : 2); i++)
3414         aarch64_set_vec_u32 (cpu, vd, i, val);
3415       return;
3416
3417     case 0xa: /* 16-bit, 8 bit shift. */
3418       val <<= 8;
3419     case 0x8: /* 16-bit, no shift. */
3420       val = ~ val;
3421       for (i = 0; i < (full ? 8 : 4); i++)
3422         aarch64_set_vec_u16 (cpu, vd, i, val);
3423       return;
3424
3425     case 0xd: /* 32-bit, mask shift by 16.  */
3426       val <<= 8;
3427       val |= 0xFF;
3428     case 0xc: /* 32-bit, mask shift by 8. */
3429       val <<= 8;
3430       val |= 0xFF;
3431       val = ~ val;
3432       for (i = 0; i < (full ? 4 : 2); i++)
3433         aarch64_set_vec_u32 (cpu, vd, i, val);
3434       return;
3435
3436     case 0xE: /* MOVI Dn, #mask64 */
3437       {
3438         uint64_t mask = 0;
3439
3440         for (i = 0; i < 8; i++)
3441           if (val & (1 << i))
3442             mask |= (0xFFUL << (i * 8));
3443         aarch64_set_vec_u64 (cpu, vd, 0, mask);
3444         aarch64_set_vec_u64 (cpu, vd, 1, mask);
3445         return;
3446       }
3447
3448     case 0xf: /* FMOV Vd.2D, #fpimm.  */
3449       {
3450         double u = fp_immediate_for_encoding_64 (val);
3451
3452         if (! full)
3453           HALT_UNALLOC;
3454
3455         aarch64_set_vec_double (cpu, vd, 0, u);
3456         aarch64_set_vec_double (cpu, vd, 1, u);
3457         return;
3458       }
3459
3460     default:
3461       HALT_NYI;
3462     }
3463 }
3464
3465 #define ABS(A) ((A) < 0 ? - (A) : (A))
3466
3467 static void
3468 do_vec_ABS (sim_cpu *cpu)
3469 {
3470   /* instr[31]    = 0
3471      instr[30]    = half(0)/full(1)
3472      instr[29,24] = 00 1110
3473      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3474      instr[21,10] = 10 0000 1011 10
3475      instr[9,5]   = Vn
3476      instr[4.0]   = Vd.  */
3477
3478   unsigned vn = INSTR (9, 5);
3479   unsigned vd = INSTR (4, 0);
3480   unsigned full = INSTR (30, 30);
3481   unsigned i;
3482
3483   NYI_assert (29, 24, 0x0E);
3484   NYI_assert (21, 10, 0x82E);
3485
3486   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3487   switch (INSTR (23, 22))
3488     {
3489     case 0:
3490       for (i = 0; i < (full ? 16 : 8); i++)
3491         aarch64_set_vec_s8 (cpu, vd, i,
3492                             ABS (aarch64_get_vec_s8 (cpu, vn, i)));
3493       break;
3494
3495     case 1:
3496       for (i = 0; i < (full ? 8 : 4); i++)
3497         aarch64_set_vec_s16 (cpu, vd, i,
3498                              ABS (aarch64_get_vec_s16 (cpu, vn, i)));
3499       break;
3500
3501     case 2:
3502       for (i = 0; i < (full ? 4 : 2); i++)
3503         aarch64_set_vec_s32 (cpu, vd, i,
3504                              ABS (aarch64_get_vec_s32 (cpu, vn, i)));
3505       break;
3506
3507     case 3:
3508       if (! full)
3509         HALT_NYI;
3510       for (i = 0; i < 2; i++)
3511         aarch64_set_vec_s64 (cpu, vd, i,
3512                              ABS (aarch64_get_vec_s64 (cpu, vn, i)));
3513       break;
3514     }
3515 }
3516
3517 static void
3518 do_vec_ADDV (sim_cpu *cpu)
3519 {
3520   /* instr[31]    = 0
3521      instr[30]    = full/half selector
3522      instr[29,24] = 00 1110
3523      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3524      instr[21,10] = 11 0001 1011 10
3525      instr[9,5]   = Vm
3526      instr[4.0]   = Rd.  */
3527
3528   unsigned vm = INSTR (9, 5);
3529   unsigned rd = INSTR (4, 0);
3530   unsigned i;
3531   int      full = INSTR (30, 30);
3532
3533   NYI_assert (29, 24, 0x0E);
3534   NYI_assert (21, 10, 0xC6E);
3535
3536   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3537   switch (INSTR (23, 22))
3538     {
3539     case 0:
3540       {
3541         uint8_t val = 0;
3542         for (i = 0; i < (full ? 16 : 8); i++)
3543           val += aarch64_get_vec_u8 (cpu, vm, i);
3544         aarch64_set_vec_u64 (cpu, rd, 0, val);
3545         return;
3546       }
3547
3548     case 1:
3549       {
3550         uint16_t val = 0;
3551         for (i = 0; i < (full ? 8 : 4); i++)
3552           val += aarch64_get_vec_u16 (cpu, vm, i);
3553         aarch64_set_vec_u64 (cpu, rd, 0, val);
3554         return;
3555       }
3556
3557     case 2:
3558       {
3559         uint32_t val = 0;
3560         if (! full)
3561           HALT_UNALLOC;
3562         for (i = 0; i < 4; i++)
3563           val += aarch64_get_vec_u32 (cpu, vm, i);
3564         aarch64_set_vec_u64 (cpu, rd, 0, val);
3565         return;
3566       }
3567
3568     case 3:
3569       HALT_UNALLOC;
3570     }
3571 }
3572
3573 static void
3574 do_vec_ins_2 (sim_cpu *cpu)
3575 {
3576   /* instr[31,21] = 01001110000
3577      instr[20,18] = size & element selector
3578      instr[17,14] = 0000
3579      instr[13]    = direction: to vec(0), from vec (1)
3580      instr[12,10] = 111
3581      instr[9,5]   = Vm
3582      instr[4,0]   = Vd.  */
3583
3584   unsigned elem;
3585   unsigned vm = INSTR (9, 5);
3586   unsigned vd = INSTR (4, 0);
3587
3588   NYI_assert (31, 21, 0x270);
3589   NYI_assert (17, 14, 0);
3590   NYI_assert (12, 10, 7);
3591
3592   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3593   if (INSTR (13, 13) == 1)
3594     {
3595       if (INSTR (18, 18) == 1)
3596         {
3597           /* 32-bit moves.  */
3598           elem = INSTR (20, 19);
3599           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3600                                aarch64_get_vec_u32 (cpu, vm, elem));
3601         }
3602       else
3603         {
3604           /* 64-bit moves.  */
3605           if (INSTR (19, 19) != 1)
3606             HALT_NYI;
3607
3608           elem = INSTR (20, 20);
3609           aarch64_set_reg_u64 (cpu, vd, NO_SP,
3610                                aarch64_get_vec_u64 (cpu, vm, elem));
3611         }
3612     }
3613   else
3614     {
3615       if (INSTR (18, 18) == 1)
3616         {
3617           /* 32-bit moves.  */
3618           elem = INSTR (20, 19);
3619           aarch64_set_vec_u32 (cpu, vd, elem,
3620                                aarch64_get_reg_u32 (cpu, vm, NO_SP));
3621         }
3622       else
3623         {
3624           /* 64-bit moves.  */
3625           if (INSTR (19, 19) != 1)
3626             HALT_NYI;
3627
3628           elem = INSTR (20, 20);
3629           aarch64_set_vec_u64 (cpu, vd, elem,
3630                                aarch64_get_reg_u64 (cpu, vm, NO_SP));
3631         }
3632     }
3633 }
3634
3635 #define DO_VEC_WIDENING_MUL(N, DST_TYPE, READ_TYPE, WRITE_TYPE)   \
3636   do                                                              \
3637     {                                                             \
3638       DST_TYPE a[N], b[N];                                        \
3639                                                                   \
3640       for (i = 0; i < (N); i++)                                   \
3641         {                                                         \
3642           a[i] = aarch64_get_vec_##READ_TYPE (cpu, vn, i + bias); \
3643           b[i] = aarch64_get_vec_##READ_TYPE (cpu, vm, i + bias); \
3644         }                                                         \
3645       for (i = 0; i < (N); i++)                                   \
3646         aarch64_set_vec_##WRITE_TYPE (cpu, vd, i, a[i] * b[i]);   \
3647     }                                                             \
3648   while (0)
3649
3650 static void
3651 do_vec_mull (sim_cpu *cpu)
3652 {
3653   /* instr[31]    = 0
3654      instr[30]    = lower(0)/upper(1) selector
3655      instr[29]    = signed(0)/unsigned(1)
3656      instr[28,24] = 0 1110
3657      instr[23,22] = size: 8-bit (00), 16-bit (01), 32-bit (10)
3658      instr[21]    = 1
3659      instr[20,16] = Vm
3660      instr[15,10] = 11 0000
3661      instr[9,5]   = Vn
3662      instr[4.0]   = Vd.  */
3663
3664   int    unsign = INSTR (29, 29);
3665   int    bias = INSTR (30, 30);
3666   unsigned vm = INSTR (20, 16);
3667   unsigned vn = INSTR ( 9,  5);
3668   unsigned vd = INSTR ( 4,  0);
3669   unsigned i;
3670
3671   NYI_assert (28, 24, 0x0E);
3672   NYI_assert (15, 10, 0x30);
3673
3674   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3675   /* NB: Read source values before writing results, in case
3676      the source and destination vectors are the same.  */
3677   switch (INSTR (23, 22))
3678     {
3679     case 0:
3680       if (bias)
3681         bias = 8;
3682       if (unsign)
3683         DO_VEC_WIDENING_MUL (8, uint16_t, u8, u16);
3684       else
3685         DO_VEC_WIDENING_MUL (8, int16_t, s8, s16);
3686       return;
3687
3688     case 1:
3689       if (bias)
3690         bias = 4;
3691       if (unsign)
3692         DO_VEC_WIDENING_MUL (4, uint32_t, u16, u32);
3693       else
3694         DO_VEC_WIDENING_MUL (4, int32_t, s16, s32);
3695       return;
3696
3697     case 2:
3698       if (bias)
3699         bias = 2;
3700       if (unsign)
3701         DO_VEC_WIDENING_MUL (2, uint64_t, u32, u64);
3702       else
3703         DO_VEC_WIDENING_MUL (2, int64_t, s32, s64);
3704       return;
3705
3706     case 3:
3707       HALT_NYI;
3708     }
3709 }
3710
3711 static void
3712 do_vec_fadd (sim_cpu *cpu)
3713 {
3714   /* instr[31]    = 0
3715      instr[30]    = half(0)/full(1)
3716      instr[29,24] = 001110
3717      instr[23]    = FADD(0)/FSUB(1)
3718      instr[22]    = float (0)/double(1)
3719      instr[21]    = 1
3720      instr[20,16] = Vm
3721      instr[15,10] = 110101
3722      instr[9,5]   = Vn
3723      instr[4.0]   = Vd.  */
3724
3725   unsigned vm = INSTR (20, 16);
3726   unsigned vn = INSTR (9, 5);
3727   unsigned vd = INSTR (4, 0);
3728   unsigned i;
3729   int      full = INSTR (30, 30);
3730
3731   NYI_assert (29, 24, 0x0E);
3732   NYI_assert (21, 21, 1);
3733   NYI_assert (15, 10, 0x35);
3734
3735   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3736   if (INSTR (23, 23))
3737     {
3738       if (INSTR (22, 22))
3739         {
3740           if (! full)
3741             HALT_NYI;
3742
3743           for (i = 0; i < 2; i++)
3744             aarch64_set_vec_double (cpu, vd, i,
3745                                     aarch64_get_vec_double (cpu, vn, i)
3746                                     - aarch64_get_vec_double (cpu, vm, i));
3747         }
3748       else
3749         {
3750           for (i = 0; i < (full ? 4 : 2); i++)
3751             aarch64_set_vec_float (cpu, vd, i,
3752                                    aarch64_get_vec_float (cpu, vn, i)
3753                                    - aarch64_get_vec_float (cpu, vm, i));
3754         }
3755     }
3756   else
3757     {
3758       if (INSTR (22, 22))
3759         {
3760           if (! full)
3761             HALT_NYI;
3762
3763           for (i = 0; i < 2; i++)
3764             aarch64_set_vec_double (cpu, vd, i,
3765                                     aarch64_get_vec_double (cpu, vm, i)
3766                                     + aarch64_get_vec_double (cpu, vn, i));
3767         }
3768       else
3769         {
3770           for (i = 0; i < (full ? 4 : 2); i++)
3771             aarch64_set_vec_float (cpu, vd, i,
3772                                    aarch64_get_vec_float (cpu, vm, i)
3773                                    + aarch64_get_vec_float (cpu, vn, i));
3774         }
3775     }
3776 }
3777
3778 static void
3779 do_vec_add (sim_cpu *cpu)
3780 {
3781   /* instr[31]    = 0
3782      instr[30]    = full/half selector
3783      instr[29,24] = 001110
3784      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit, 11=> 64-bit
3785      instr[21]    = 1
3786      instr[20,16] = Vn
3787      instr[15,10] = 100001
3788      instr[9,5]   = Vm
3789      instr[4.0]   = Vd.  */
3790
3791   unsigned vm = INSTR (20, 16);
3792   unsigned vn = INSTR (9, 5);
3793   unsigned vd = INSTR (4, 0);
3794   unsigned i;
3795   int      full = INSTR (30, 30);
3796
3797   NYI_assert (29, 24, 0x0E);
3798   NYI_assert (21, 21, 1);
3799   NYI_assert (15, 10, 0x21);
3800
3801   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3802   switch (INSTR (23, 22))
3803     {
3804     case 0:
3805       for (i = 0; i < (full ? 16 : 8); i++)
3806         aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
3807                             + aarch64_get_vec_u8 (cpu, vm, i));
3808       return;
3809
3810     case 1:
3811       for (i = 0; i < (full ? 8 : 4); i++)
3812         aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
3813                              + aarch64_get_vec_u16 (cpu, vm, i));
3814       return;
3815
3816     case 2:
3817       for (i = 0; i < (full ? 4 : 2); i++)
3818         aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
3819                              + aarch64_get_vec_u32 (cpu, vm, i));
3820       return;
3821
3822     case 3:
3823       if (! full)
3824         HALT_UNALLOC;
3825       aarch64_set_vec_u64 (cpu, vd, 0, aarch64_get_vec_u64 (cpu, vn, 0)
3826                            + aarch64_get_vec_u64 (cpu, vm, 0));
3827       aarch64_set_vec_u64 (cpu, vd, 1,
3828                            aarch64_get_vec_u64 (cpu, vn, 1)
3829                            + aarch64_get_vec_u64 (cpu, vm, 1));
3830       return;
3831     }
3832 }
3833
3834 static void
3835 do_vec_mul (sim_cpu *cpu)
3836 {
3837   /* instr[31]    = 0
3838      instr[30]    = full/half selector
3839      instr[29,24] = 00 1110
3840      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3841      instr[21]    = 1
3842      instr[20,16] = Vn
3843      instr[15,10] = 10 0111
3844      instr[9,5]   = Vm
3845      instr[4.0]   = Vd.  */
3846
3847   unsigned vm = INSTR (20, 16);
3848   unsigned vn = INSTR (9, 5);
3849   unsigned vd = INSTR (4, 0);
3850   unsigned i;
3851   int      full = INSTR (30, 30);
3852   int      bias = 0;
3853
3854   NYI_assert (29, 24, 0x0E);
3855   NYI_assert (21, 21, 1);
3856   NYI_assert (15, 10, 0x27);
3857
3858   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3859   switch (INSTR (23, 22))
3860     {
3861     case 0:
3862       DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8);
3863       return;
3864
3865     case 1:
3866       DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16);
3867       return;
3868
3869     case 2:
3870       DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32);
3871       return;
3872
3873     case 3:
3874       HALT_UNALLOC;
3875     }
3876 }
3877
3878 static void
3879 do_vec_MLA (sim_cpu *cpu)
3880 {
3881   /* instr[31]    = 0
3882      instr[30]    = full/half selector
3883      instr[29,24] = 00 1110
3884      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
3885      instr[21]    = 1
3886      instr[20,16] = Vn
3887      instr[15,10] = 1001 01
3888      instr[9,5]   = Vm
3889      instr[4.0]   = Vd.  */
3890
3891   unsigned vm = INSTR (20, 16);
3892   unsigned vn = INSTR (9, 5);
3893   unsigned vd = INSTR (4, 0);
3894   unsigned i;
3895   int      full = INSTR (30, 30);
3896
3897   NYI_assert (29, 24, 0x0E);
3898   NYI_assert (21, 21, 1);
3899   NYI_assert (15, 10, 0x25);
3900
3901   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
3902   switch (INSTR (23, 22))
3903     {
3904     case 0:
3905       for (i = 0; i < (full ? 16 : 8); i++)
3906         aarch64_set_vec_u8 (cpu, vd, i,
3907                             aarch64_get_vec_u8 (cpu, vd, i)
3908                             + (aarch64_get_vec_u8 (cpu, vn, i)
3909                                * aarch64_get_vec_u8 (cpu, vm, i)));
3910       return;
3911
3912     case 1:
3913       for (i = 0; i < (full ? 8 : 4); i++)
3914         aarch64_set_vec_u16 (cpu, vd, i,
3915                              aarch64_get_vec_u16 (cpu, vd, i)
3916                              + (aarch64_get_vec_u16 (cpu, vn, i)
3917                                 * aarch64_get_vec_u16 (cpu, vm, i)));
3918       return;
3919
3920     case 2:
3921       for (i = 0; i < (full ? 4 : 2); i++)
3922         aarch64_set_vec_u32 (cpu, vd, i,
3923                              aarch64_get_vec_u32 (cpu, vd, i)
3924                              + (aarch64_get_vec_u32 (cpu, vn, i)
3925                                 * aarch64_get_vec_u32 (cpu, vm, i)));
3926       return;
3927
3928     default:
3929       HALT_UNALLOC;
3930     }
3931 }
3932
3933 static float
3934 fmaxnm (float a, float b)
3935 {
3936   if (! isnan (a))
3937     {
3938       if (! isnan (b))
3939         return a > b ? a : b;
3940       return a;
3941     }
3942   else if (! isnan (b))
3943     return b;
3944   return a;
3945 }
3946
3947 static float
3948 fminnm (float a, float b)
3949 {
3950   if (! isnan (a))
3951     {
3952       if (! isnan (b))
3953         return a < b ? a : b;
3954       return a;
3955     }
3956   else if (! isnan (b))
3957     return b;
3958   return a;
3959 }
3960
3961 static double
3962 dmaxnm (double a, double b)
3963 {
3964   if (! isnan (a))
3965     {
3966       if (! isnan (b))
3967         return a > b ? a : b;
3968       return a;
3969     }
3970   else if (! isnan (b))
3971     return b;
3972   return a;
3973 }
3974
3975 static double
3976 dminnm (double a, double b)
3977 {
3978   if (! isnan (a))
3979     {
3980       if (! isnan (b))
3981         return a < b ? a : b;
3982       return a;
3983     }
3984   else if (! isnan (b))
3985     return b;
3986   return a;
3987 }
3988
3989 static void
3990 do_vec_FminmaxNMP (sim_cpu *cpu)
3991 {
3992   /* instr [31]    = 0
3993      instr [30]    = half (0)/full (1)
3994      instr [29,24] = 10 1110
3995      instr [23]    = max(0)/min(1)
3996      instr [22]    = float (0)/double (1)
3997      instr [21]    = 1
3998      instr [20,16] = Vn
3999      instr [15,10] = 1100 01
4000      instr [9,5]   = Vm
4001      instr [4.0]   = Vd.  */
4002
4003   unsigned vm = INSTR (20, 16);
4004   unsigned vn = INSTR (9, 5);
4005   unsigned vd = INSTR (4, 0);
4006   int      full = INSTR (30, 30);
4007
4008   NYI_assert (29, 24, 0x2E);
4009   NYI_assert (21, 21, 1);
4010   NYI_assert (15, 10, 0x31);
4011
4012   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4013   if (INSTR (22, 22))
4014     {
4015       double (* fn)(double, double) = INSTR (23, 23)
4016         ? dminnm : dmaxnm;
4017
4018       if (! full)
4019         HALT_NYI;
4020       aarch64_set_vec_double (cpu, vd, 0,
4021                               fn (aarch64_get_vec_double (cpu, vn, 0),
4022                                   aarch64_get_vec_double (cpu, vn, 1)));
4023       aarch64_set_vec_double (cpu, vd, 0,
4024                               fn (aarch64_get_vec_double (cpu, vm, 0),
4025                                   aarch64_get_vec_double (cpu, vm, 1)));
4026     }
4027   else
4028     {
4029       float (* fn)(float, float) = INSTR (23, 23)
4030         ? fminnm : fmaxnm;
4031
4032       aarch64_set_vec_float (cpu, vd, 0,
4033                              fn (aarch64_get_vec_float (cpu, vn, 0),
4034                                  aarch64_get_vec_float (cpu, vn, 1)));
4035       if (full)
4036         aarch64_set_vec_float (cpu, vd, 1,
4037                                fn (aarch64_get_vec_float (cpu, vn, 2),
4038                                    aarch64_get_vec_float (cpu, vn, 3)));
4039
4040       aarch64_set_vec_float (cpu, vd, (full ? 2 : 1),
4041                              fn (aarch64_get_vec_float (cpu, vm, 0),
4042                                  aarch64_get_vec_float (cpu, vm, 1)));
4043       if (full)
4044         aarch64_set_vec_float (cpu, vd, 3,
4045                                fn (aarch64_get_vec_float (cpu, vm, 2),
4046                                    aarch64_get_vec_float (cpu, vm, 3)));
4047     }
4048 }
4049
4050 static void
4051 do_vec_AND (sim_cpu *cpu)
4052 {
4053   /* instr[31]    = 0
4054      instr[30]    = half (0)/full (1)
4055      instr[29,21] = 001110001
4056      instr[20,16] = Vm
4057      instr[15,10] = 000111
4058      instr[9,5]   = Vn
4059      instr[4.0]   = Vd.  */
4060
4061   unsigned vm = INSTR (20, 16);
4062   unsigned vn = INSTR (9, 5);
4063   unsigned vd = INSTR (4, 0);
4064   unsigned i;
4065   int      full = INSTR (30, 30);
4066
4067   NYI_assert (29, 21, 0x071);
4068   NYI_assert (15, 10, 0x07);
4069
4070   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4071   for (i = 0; i < (full ? 4 : 2); i++)
4072     aarch64_set_vec_u32 (cpu, vd, i,
4073                          aarch64_get_vec_u32 (cpu, vn, i)
4074                          & aarch64_get_vec_u32 (cpu, vm, i));
4075 }
4076
4077 static void
4078 do_vec_BSL (sim_cpu *cpu)
4079 {
4080   /* instr[31]    = 0
4081      instr[30]    = half (0)/full (1)
4082      instr[29,21] = 101110011
4083      instr[20,16] = Vm
4084      instr[15,10] = 000111
4085      instr[9,5]   = Vn
4086      instr[4.0]   = Vd.  */
4087
4088   unsigned vm = INSTR (20, 16);
4089   unsigned vn = INSTR (9, 5);
4090   unsigned vd = INSTR (4, 0);
4091   unsigned i;
4092   int      full = INSTR (30, 30);
4093
4094   NYI_assert (29, 21, 0x173);
4095   NYI_assert (15, 10, 0x07);
4096
4097   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4098   for (i = 0; i < (full ? 16 : 8); i++)
4099     aarch64_set_vec_u8 (cpu, vd, i,
4100                         (    aarch64_get_vec_u8 (cpu, vd, i)
4101                            & aarch64_get_vec_u8 (cpu, vn, i))
4102                         | ((~ aarch64_get_vec_u8 (cpu, vd, i))
4103                            & aarch64_get_vec_u8 (cpu, vm, i)));
4104 }
4105
4106 static void
4107 do_vec_EOR (sim_cpu *cpu)
4108 {
4109   /* instr[31]    = 0
4110      instr[30]    = half (0)/full (1)
4111      instr[29,21] = 10 1110 001
4112      instr[20,16] = Vm
4113      instr[15,10] = 000111
4114      instr[9,5]   = Vn
4115      instr[4.0]   = Vd.  */
4116
4117   unsigned vm = INSTR (20, 16);
4118   unsigned vn = INSTR (9, 5);
4119   unsigned vd = INSTR (4, 0);
4120   unsigned i;
4121   int      full = INSTR (30, 30);
4122
4123   NYI_assert (29, 21, 0x171);
4124   NYI_assert (15, 10, 0x07);
4125
4126   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4127   for (i = 0; i < (full ? 4 : 2); i++)
4128     aarch64_set_vec_u32 (cpu, vd, i,
4129                          aarch64_get_vec_u32 (cpu, vn, i)
4130                          ^ aarch64_get_vec_u32 (cpu, vm, i));
4131 }
4132
4133 static void
4134 do_vec_bit (sim_cpu *cpu)
4135 {
4136   /* instr[31]    = 0
4137      instr[30]    = half (0)/full (1)
4138      instr[29,23] = 10 1110 1
4139      instr[22]    = BIT (0) / BIF (1)
4140      instr[21]    = 1
4141      instr[20,16] = Vm
4142      instr[15,10] = 0001 11
4143      instr[9,5]   = Vn
4144      instr[4.0]   = Vd.  */
4145
4146   unsigned vm = INSTR (20, 16);
4147   unsigned vn = INSTR (9, 5);
4148   unsigned vd = INSTR (4, 0);
4149   unsigned full = INSTR (30, 30);
4150   unsigned test_false = INSTR (22, 22);
4151   unsigned i;
4152
4153   NYI_assert (29, 23, 0x5D);
4154   NYI_assert (21, 21, 1);
4155   NYI_assert (15, 10, 0x07);
4156
4157   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4158   for (i = 0; i < (full ? 4 : 2); i++)
4159     {
4160       uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
4161       uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
4162       uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
4163       if (test_false)
4164         aarch64_set_vec_u32 (cpu, vd, i,
4165                              (vd_val & vm_val) | (vn_val & ~vm_val));
4166       else
4167         aarch64_set_vec_u32 (cpu, vd, i,
4168                              (vd_val & ~vm_val) | (vn_val & vm_val));
4169     }
4170 }
4171
4172 static void
4173 do_vec_ORN (sim_cpu *cpu)
4174 {
4175   /* instr[31]    = 0
4176      instr[30]    = half (0)/full (1)
4177      instr[29,21] = 00 1110 111
4178      instr[20,16] = Vm
4179      instr[15,10] = 00 0111
4180      instr[9,5]   = Vn
4181      instr[4.0]   = Vd.  */
4182
4183   unsigned vm = INSTR (20, 16);
4184   unsigned vn = INSTR (9, 5);
4185   unsigned vd = INSTR (4, 0);
4186   unsigned i;
4187   int      full = INSTR (30, 30);
4188
4189   NYI_assert (29, 21, 0x077);
4190   NYI_assert (15, 10, 0x07);
4191
4192   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4193   for (i = 0; i < (full ? 16 : 8); i++)
4194     aarch64_set_vec_u8 (cpu, vd, i,
4195                         aarch64_get_vec_u8 (cpu, vn, i)
4196                         | ~ aarch64_get_vec_u8 (cpu, vm, i));
4197 }
4198
4199 static void
4200 do_vec_ORR (sim_cpu *cpu)
4201 {
4202   /* instr[31]    = 0
4203      instr[30]    = half (0)/full (1)
4204      instr[29,21] = 00 1110 101
4205      instr[20,16] = Vm
4206      instr[15,10] = 0001 11
4207      instr[9,5]   = Vn
4208      instr[4.0]   = Vd.  */
4209
4210   unsigned vm = INSTR (20, 16);
4211   unsigned vn = INSTR (9, 5);
4212   unsigned vd = INSTR (4, 0);
4213   unsigned i;
4214   int      full = INSTR (30, 30);
4215
4216   NYI_assert (29, 21, 0x075);
4217   NYI_assert (15, 10, 0x07);
4218
4219   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4220   for (i = 0; i < (full ? 16 : 8); i++)
4221     aarch64_set_vec_u8 (cpu, vd, i,
4222                         aarch64_get_vec_u8 (cpu, vn, i)
4223                         | aarch64_get_vec_u8 (cpu, vm, i));
4224 }
4225
4226 static void
4227 do_vec_BIC (sim_cpu *cpu)
4228 {
4229   /* instr[31]    = 0
4230      instr[30]    = half (0)/full (1)
4231      instr[29,21] = 00 1110 011
4232      instr[20,16] = Vm
4233      instr[15,10] = 00 0111
4234      instr[9,5]   = Vn
4235      instr[4.0]   = Vd.  */
4236
4237   unsigned vm = INSTR (20, 16);
4238   unsigned vn = INSTR (9, 5);
4239   unsigned vd = INSTR (4, 0);
4240   unsigned i;
4241   int      full = INSTR (30, 30);
4242
4243   NYI_assert (29, 21, 0x073);
4244   NYI_assert (15, 10, 0x07);
4245
4246   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4247   for (i = 0; i < (full ? 16 : 8); i++)
4248     aarch64_set_vec_u8 (cpu, vd, i,
4249                         aarch64_get_vec_u8 (cpu, vn, i)
4250                         & ~ aarch64_get_vec_u8 (cpu, vm, i));
4251 }
4252
4253 static void
4254 do_vec_XTN (sim_cpu *cpu)
4255 {
4256   /* instr[31]    = 0
4257      instr[30]    = first part (0)/ second part (1)
4258      instr[29,24] = 00 1110
4259      instr[23,22] = size: byte(00), half(01), word (10)
4260      instr[21,10] = 1000 0100 1010
4261      instr[9,5]   = Vs
4262      instr[4,0]   = Vd.  */
4263
4264   unsigned vs = INSTR (9, 5);
4265   unsigned vd = INSTR (4, 0);
4266   unsigned bias = INSTR (30, 30);
4267   unsigned i;
4268
4269   NYI_assert (29, 24, 0x0E);
4270   NYI_assert (21, 10, 0x84A);
4271
4272   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4273   switch (INSTR (23, 22))
4274     {
4275     case 0:
4276       for (i = 0; i < 8; i++)
4277         aarch64_set_vec_u8 (cpu, vd, i + (bias * 8),
4278                             aarch64_get_vec_u16 (cpu, vs, i));
4279       return;
4280
4281     case 1:
4282       for (i = 0; i < 4; i++)
4283         aarch64_set_vec_u16 (cpu, vd, i + (bias * 4),
4284                              aarch64_get_vec_u32 (cpu, vs, i));
4285       return;
4286
4287     case 2:
4288       for (i = 0; i < 2; i++)
4289         aarch64_set_vec_u32 (cpu, vd, i + (bias * 2),
4290                              aarch64_get_vec_u64 (cpu, vs, i));
4291       return;
4292     }
4293 }
4294
4295 /* Return the number of bits set in the input value.  */
4296 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
4297 # define popcount __builtin_popcount
4298 #else
4299 static int
4300 popcount (unsigned char x)
4301 {
4302   static const unsigned char popcnt[16] =
4303     {
4304       0, 1, 1, 2,
4305       1, 2, 2, 3,
4306       1, 2, 2, 3,
4307       2, 3, 3, 4
4308     };
4309
4310   /* Only counts the low 8 bits of the input as that is all we need.  */
4311   return popcnt[x % 16] + popcnt[x / 16];
4312 }
4313 #endif
4314
4315 static void
4316 do_vec_CNT (sim_cpu *cpu)
4317 {
4318   /* instr[31]    = 0
4319      instr[30]    = half (0)/ full (1)
4320      instr[29,24] = 00 1110
4321      instr[23,22] = size: byte(00)
4322      instr[21,10] = 1000 0001 0110
4323      instr[9,5]   = Vs
4324      instr[4,0]   = Vd.  */
4325
4326   unsigned vs = INSTR (9, 5);
4327   unsigned vd = INSTR (4, 0);
4328   int full = INSTR (30, 30);
4329   int size = INSTR (23, 22);
4330   int i;
4331
4332   NYI_assert (29, 24, 0x0E);
4333   NYI_assert (21, 10, 0x816);
4334
4335   if (size != 0)
4336     HALT_UNALLOC;
4337
4338   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4339
4340   for (i = 0; i < (full ? 16 : 8); i++)
4341     aarch64_set_vec_u8 (cpu, vd, i,
4342                         popcount (aarch64_get_vec_u8 (cpu, vs, i)));
4343 }
4344
4345 static void
4346 do_vec_maxv (sim_cpu *cpu)
4347 {
4348   /* instr[31]    = 0
4349      instr[30]    = half(0)/full(1)
4350      instr[29]    = signed (0)/unsigned(1)
4351      instr[28,24] = 0 1110
4352      instr[23,22] = size: byte(00), half(01), word (10)
4353      instr[21]    = 1
4354      instr[20,17] = 1 000
4355      instr[16]    = max(0)/min(1)
4356      instr[15,10] = 1010 10
4357      instr[9,5]   = V source
4358      instr[4.0]   = R dest.  */
4359
4360   unsigned vs = INSTR (9, 5);
4361   unsigned rd = INSTR (4, 0);
4362   unsigned full = INSTR (30, 30);
4363   unsigned i;
4364
4365   NYI_assert (28, 24, 0x0E);
4366   NYI_assert (21, 21, 1);
4367   NYI_assert (20, 17, 8);
4368   NYI_assert (15, 10, 0x2A);
4369
4370   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4371   switch ((INSTR (29, 29) << 1) | INSTR (16, 16))
4372     {
4373     case 0: /* SMAXV.  */
4374        {
4375         int64_t smax;
4376         switch (INSTR (23, 22))
4377           {
4378           case 0:
4379             smax = aarch64_get_vec_s8 (cpu, vs, 0);
4380             for (i = 1; i < (full ? 16 : 8); i++)
4381               smax = max (smax, aarch64_get_vec_s8 (cpu, vs, i));
4382             break;
4383           case 1:
4384             smax = aarch64_get_vec_s16 (cpu, vs, 0);
4385             for (i = 1; i < (full ? 8 : 4); i++)
4386               smax = max (smax, aarch64_get_vec_s16 (cpu, vs, i));
4387             break;
4388           case 2:
4389             smax = aarch64_get_vec_s32 (cpu, vs, 0);
4390             for (i = 1; i < (full ? 4 : 2); i++)
4391               smax = max (smax, aarch64_get_vec_s32 (cpu, vs, i));
4392             break;
4393           case 3:
4394             HALT_UNALLOC;
4395           }
4396         aarch64_set_reg_s64 (cpu, rd, NO_SP, smax);
4397         return;
4398       }
4399
4400     case 1: /* SMINV.  */
4401       {
4402         int64_t smin;
4403         switch (INSTR (23, 22))
4404           {
4405           case 0:
4406             smin = aarch64_get_vec_s8 (cpu, vs, 0);
4407             for (i = 1; i < (full ? 16 : 8); i++)
4408               smin = min (smin, aarch64_get_vec_s8 (cpu, vs, i));
4409             break;
4410           case 1:
4411             smin = aarch64_get_vec_s16 (cpu, vs, 0);
4412             for (i = 1; i < (full ? 8 : 4); i++)
4413               smin = min (smin, aarch64_get_vec_s16 (cpu, vs, i));
4414             break;
4415           case 2:
4416             smin = aarch64_get_vec_s32 (cpu, vs, 0);
4417             for (i = 1; i < (full ? 4 : 2); i++)
4418               smin = min (smin, aarch64_get_vec_s32 (cpu, vs, i));
4419             break;
4420
4421           case 3:
4422             HALT_UNALLOC;
4423           }
4424         aarch64_set_reg_s64 (cpu, rd, NO_SP, smin);
4425         return;
4426       }
4427
4428     case 2: /* UMAXV.  */
4429       {
4430         uint64_t umax;
4431         switch (INSTR (23, 22))
4432           {
4433           case 0:
4434             umax = aarch64_get_vec_u8 (cpu, vs, 0);
4435             for (i = 1; i < (full ? 16 : 8); i++)
4436               umax = max (umax, aarch64_get_vec_u8 (cpu, vs, i));
4437             break;
4438           case 1:
4439             umax = aarch64_get_vec_u16 (cpu, vs, 0);
4440             for (i = 1; i < (full ? 8 : 4); i++)
4441               umax = max (umax, aarch64_get_vec_u16 (cpu, vs, i));
4442             break;
4443           case 2:
4444             umax = aarch64_get_vec_u32 (cpu, vs, 0);
4445             for (i = 1; i < (full ? 4 : 2); i++)
4446               umax = max (umax, aarch64_get_vec_u32 (cpu, vs, i));
4447             break;
4448
4449           case 3:
4450             HALT_UNALLOC;
4451           }
4452         aarch64_set_reg_u64 (cpu, rd, NO_SP, umax);
4453         return;
4454       }
4455
4456     case 3: /* UMINV.  */
4457       {
4458         uint64_t umin;
4459         switch (INSTR (23, 22))
4460           {
4461           case 0:
4462             umin = aarch64_get_vec_u8 (cpu, vs, 0);
4463             for (i = 1; i < (full ? 16 : 8); i++)
4464               umin = min (umin, aarch64_get_vec_u8 (cpu, vs, i));
4465             break;
4466           case 1:
4467             umin = aarch64_get_vec_u16 (cpu, vs, 0);
4468             for (i = 1; i < (full ? 8 : 4); i++)
4469               umin = min (umin, aarch64_get_vec_u16 (cpu, vs, i));
4470             break;
4471           case 2:
4472             umin = aarch64_get_vec_u32 (cpu, vs, 0);
4473             for (i = 1; i < (full ? 4 : 2); i++)
4474               umin = min (umin, aarch64_get_vec_u32 (cpu, vs, i));
4475             break;
4476
4477           case 3:
4478             HALT_UNALLOC;
4479           }
4480         aarch64_set_reg_u64 (cpu, rd, NO_SP, umin);
4481         return;
4482       }
4483     }
4484 }
4485
4486 static void
4487 do_vec_fminmaxV (sim_cpu *cpu)
4488 {
4489   /* instr[31,24] = 0110 1110
4490      instr[23]    = max(0)/min(1)
4491      instr[22,14] = 011 0000 11
4492      instr[13,12] = nm(00)/normal(11)
4493      instr[11,10] = 10
4494      instr[9,5]   = V source
4495      instr[4.0]   = R dest.  */
4496
4497   unsigned vs = INSTR (9, 5);
4498   unsigned rd = INSTR (4, 0);
4499   unsigned i;
4500   float res   = aarch64_get_vec_float (cpu, vs, 0);
4501
4502   NYI_assert (31, 24, 0x6E);
4503   NYI_assert (22, 14, 0x0C3);
4504   NYI_assert (11, 10, 2);
4505
4506   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4507   if (INSTR (23, 23))
4508     {
4509       switch (INSTR (13, 12))
4510         {
4511         case 0: /* FMNINNMV.  */
4512           for (i = 1; i < 4; i++)
4513             res = fminnm (res, aarch64_get_vec_float (cpu, vs, i));
4514           break;
4515
4516         case 3: /* FMINV.  */
4517           for (i = 1; i < 4; i++)
4518             res = min (res, aarch64_get_vec_float (cpu, vs, i));
4519           break;
4520
4521         default:
4522           HALT_NYI;
4523         }
4524     }
4525   else
4526     {
4527       switch (INSTR (13, 12))
4528         {
4529         case 0: /* FMNAXNMV.  */
4530           for (i = 1; i < 4; i++)
4531             res = fmaxnm (res, aarch64_get_vec_float (cpu, vs, i));
4532           break;
4533
4534         case 3: /* FMAXV.  */
4535           for (i = 1; i < 4; i++)
4536             res = max (res, aarch64_get_vec_float (cpu, vs, i));
4537           break;
4538
4539         default:
4540           HALT_NYI;
4541         }
4542     }
4543
4544   aarch64_set_FP_float (cpu, rd, res);
4545 }
4546
4547 static void
4548 do_vec_Fminmax (sim_cpu *cpu)
4549 {
4550   /* instr[31]    = 0
4551      instr[30]    = half(0)/full(1)
4552      instr[29,24] = 00 1110
4553      instr[23]    = max(0)/min(1)
4554      instr[22]    = float(0)/double(1)
4555      instr[21]    = 1
4556      instr[20,16] = Vm
4557      instr[15,14] = 11
4558      instr[13,12] = nm(00)/normal(11)
4559      instr[11,10] = 01
4560      instr[9,5]   = Vn
4561      instr[4,0]   = Vd.  */
4562
4563   unsigned vm = INSTR (20, 16);
4564   unsigned vn = INSTR (9, 5);
4565   unsigned vd = INSTR (4, 0);
4566   unsigned full = INSTR (30, 30);
4567   unsigned min = INSTR (23, 23);
4568   unsigned i;
4569
4570   NYI_assert (29, 24, 0x0E);
4571   NYI_assert (21, 21, 1);
4572   NYI_assert (15, 14, 3);
4573   NYI_assert (11, 10, 1);
4574
4575   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4576   if (INSTR (22, 22))
4577     {
4578       double (* func)(double, double);
4579
4580       if (! full)
4581         HALT_NYI;
4582
4583       if (INSTR (13, 12) == 0)
4584         func = min ? dminnm : dmaxnm;
4585       else if (INSTR (13, 12) == 3)
4586         func = min ? fmin : fmax;
4587       else
4588         HALT_NYI;
4589
4590       for (i = 0; i < 2; i++)
4591         aarch64_set_vec_double (cpu, vd, i,
4592                                 func (aarch64_get_vec_double (cpu, vn, i),
4593                                       aarch64_get_vec_double (cpu, vm, i)));
4594     }
4595   else
4596     {
4597       float (* func)(float, float);
4598
4599       if (INSTR (13, 12) == 0)
4600         func = min ? fminnm : fmaxnm;
4601       else if (INSTR (13, 12) == 3)
4602         func = min ? fminf : fmaxf;
4603       else
4604         HALT_NYI;
4605
4606       for (i = 0; i < (full ? 4 : 2); i++)
4607         aarch64_set_vec_float (cpu, vd, i,
4608                                func (aarch64_get_vec_float (cpu, vn, i),
4609                                      aarch64_get_vec_float (cpu, vm, i)));
4610     }
4611 }
4612
4613 static void
4614 do_vec_SCVTF (sim_cpu *cpu)
4615 {
4616   /* instr[31]    = 0
4617      instr[30]    = Q
4618      instr[29,23] = 00 1110 0
4619      instr[22]    = float(0)/double(1)
4620      instr[21,10] = 10 0001 1101 10
4621      instr[9,5]   = Vn
4622      instr[4,0]   = Vd.  */
4623
4624   unsigned vn = INSTR (9, 5);
4625   unsigned vd = INSTR (4, 0);
4626   unsigned full = INSTR (30, 30);
4627   unsigned size = INSTR (22, 22);
4628   unsigned i;
4629
4630   NYI_assert (29, 23, 0x1C);
4631   NYI_assert (21, 10, 0x876);
4632
4633   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4634   if (size)
4635     {
4636       if (! full)
4637         HALT_UNALLOC;
4638
4639       for (i = 0; i < 2; i++)
4640         {
4641           double val = (double) aarch64_get_vec_u64 (cpu, vn, i);
4642           aarch64_set_vec_double (cpu, vd, i, val);
4643         }
4644     }
4645   else
4646     {
4647       for (i = 0; i < (full ? 4 : 2); i++)
4648         {
4649           float val = (float) aarch64_get_vec_u32 (cpu, vn, i);
4650           aarch64_set_vec_float (cpu, vd, i, val);
4651         }
4652     }
4653 }
4654
4655 #define VEC_CMP(SOURCE, CMP)                                            \
4656   do                                                                    \
4657     {                                                                   \
4658       switch (size)                                                     \
4659         {                                                               \
4660         case 0:                                                         \
4661           for (i = 0; i < (full ? 16 : 8); i++)                         \
4662             aarch64_set_vec_u8 (cpu, vd, i,                             \
4663                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4664                                 CMP                                     \
4665                                 aarch64_get_vec_##SOURCE##8 (cpu, vm, i) \
4666                                 ? -1 : 0);                              \
4667           return;                                                       \
4668         case 1:                                                         \
4669           for (i = 0; i < (full ? 8 : 4); i++)                          \
4670             aarch64_set_vec_u16 (cpu, vd, i,                            \
4671                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4672                                  CMP                                    \
4673                                  aarch64_get_vec_##SOURCE##16 (cpu, vm, i) \
4674                                  ? -1 : 0);                             \
4675           return;                                                       \
4676         case 2:                                                         \
4677           for (i = 0; i < (full ? 4 : 2); i++)                          \
4678             aarch64_set_vec_u32 (cpu, vd, i, \
4679                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4680                                  CMP                                    \
4681                                  aarch64_get_vec_##SOURCE##32 (cpu, vm, i) \
4682                                  ? -1 : 0);                             \
4683           return;                                                       \
4684         case 3:                                                         \
4685           if (! full)                                                   \
4686             HALT_UNALLOC;                                               \
4687           for (i = 0; i < 2; i++)                                       \
4688             aarch64_set_vec_u64 (cpu, vd, i, \
4689                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4690                                  CMP                                    \
4691                                  aarch64_get_vec_##SOURCE##64 (cpu, vm, i) \
4692                                  ? -1ULL : 0);                          \
4693           return;                                                       \
4694         }                                                               \
4695     }                                                                   \
4696   while (0)
4697
4698 #define VEC_CMP0(SOURCE, CMP)                                           \
4699   do                                                                    \
4700     {                                                                   \
4701       switch (size)                                                     \
4702         {                                                               \
4703         case 0:                                                         \
4704           for (i = 0; i < (full ? 16 : 8); i++)                         \
4705             aarch64_set_vec_u8 (cpu, vd, i,                             \
4706                                 aarch64_get_vec_##SOURCE##8 (cpu, vn, i) \
4707                                 CMP 0 ? -1 : 0);                        \
4708           return;                                                       \
4709         case 1:                                                         \
4710           for (i = 0; i < (full ? 8 : 4); i++)                          \
4711             aarch64_set_vec_u16 (cpu, vd, i,                            \
4712                                  aarch64_get_vec_##SOURCE##16 (cpu, vn, i) \
4713                                  CMP 0 ? -1 : 0);                       \
4714           return;                                                       \
4715         case 2:                                                         \
4716           for (i = 0; i < (full ? 4 : 2); i++)                          \
4717             aarch64_set_vec_u32 (cpu, vd, i,                            \
4718                                  aarch64_get_vec_##SOURCE##32 (cpu, vn, i) \
4719                                  CMP 0 ? -1 : 0);                       \
4720           return;                                                       \
4721         case 3:                                                         \
4722           if (! full)                                                   \
4723             HALT_UNALLOC;                                               \
4724           for (i = 0; i < 2; i++)                                       \
4725             aarch64_set_vec_u64 (cpu, vd, i,                            \
4726                                  aarch64_get_vec_##SOURCE##64 (cpu, vn, i) \
4727                                  CMP 0 ? -1ULL : 0);                    \
4728           return;                                                       \
4729         }                                                               \
4730     }                                                                   \
4731   while (0)
4732
4733 #define VEC_FCMP0(CMP)                                                  \
4734   do                                                                    \
4735     {                                                                   \
4736       if (vm != 0)                                                      \
4737         HALT_NYI;                                                       \
4738       if (INSTR (22, 22))                                               \
4739         {                                                               \
4740           if (! full)                                                   \
4741             HALT_NYI;                                                   \
4742           for (i = 0; i < 2; i++)                                       \
4743             aarch64_set_vec_u64 (cpu, vd, i,                            \
4744                                  aarch64_get_vec_double (cpu, vn, i)    \
4745                                  CMP 0.0 ? -1 : 0);                     \
4746         }                                                               \
4747       else                                                              \
4748         {                                                               \
4749           for (i = 0; i < (full ? 4 : 2); i++)                          \
4750             aarch64_set_vec_u32 (cpu, vd, i,                            \
4751                                  aarch64_get_vec_float (cpu, vn, i)     \
4752                                  CMP 0.0 ? -1 : 0);                     \
4753         }                                                               \
4754       return;                                                           \
4755     }                                                                   \
4756   while (0)
4757
4758 #define VEC_FCMP(CMP)                                                   \
4759   do                                                                    \
4760     {                                                                   \
4761       if (INSTR (22, 22))                                               \
4762         {                                                               \
4763           if (! full)                                                   \
4764             HALT_NYI;                                                   \
4765           for (i = 0; i < 2; i++)                                       \
4766             aarch64_set_vec_u64 (cpu, vd, i,                            \
4767                                  aarch64_get_vec_double (cpu, vn, i)    \
4768                                  CMP                                    \
4769                                  aarch64_get_vec_double (cpu, vm, i)    \
4770                                  ? -1 : 0);                             \
4771         }                                                               \
4772       else                                                              \
4773         {                                                               \
4774           for (i = 0; i < (full ? 4 : 2); i++)                          \
4775             aarch64_set_vec_u32 (cpu, vd, i,                            \
4776                                  aarch64_get_vec_float (cpu, vn, i)     \
4777                                  CMP                                    \
4778                                  aarch64_get_vec_float (cpu, vm, i)     \
4779                                  ? -1 : 0);                             \
4780         }                                                               \
4781       return;                                                           \
4782     }                                                                   \
4783   while (0)
4784
4785 static void
4786 do_vec_compare (sim_cpu *cpu)
4787 {
4788   /* instr[31]    = 0
4789      instr[30]    = half(0)/full(1)
4790      instr[29]    = part-of-comparison-type
4791      instr[28,24] = 0 1110
4792      instr[23,22] = size of integer compares: byte(00), half(01), word (10), long (11)
4793                     type of float compares: single (-0) / double (-1)
4794      instr[21]    = 1
4795      instr[20,16] = Vm or 00000 (compare vs 0)
4796      instr[15,10] = part-of-comparison-type
4797      instr[9,5]   = Vn
4798      instr[4.0]   = Vd.  */
4799
4800   int full = INSTR (30, 30);
4801   int size = INSTR (23, 22);
4802   unsigned vm = INSTR (20, 16);
4803   unsigned vn = INSTR (9, 5);
4804   unsigned vd = INSTR (4, 0);
4805   unsigned i;
4806
4807   NYI_assert (28, 24, 0x0E);
4808   NYI_assert (21, 21, 1);
4809
4810   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4811   if ((INSTR (11, 11)
4812        && INSTR (14, 14))
4813       || ((INSTR (11, 11) == 0
4814            && INSTR (10, 10) == 0)))
4815     {
4816       /* A compare vs 0.  */
4817       if (vm != 0)
4818         {
4819           if (INSTR (15, 10) == 0x2A)
4820             do_vec_maxv (cpu);
4821           else if (INSTR (15, 10) == 0x32
4822                    || INSTR (15, 10) == 0x3E)
4823             do_vec_fminmaxV (cpu);
4824           else if (INSTR (29, 23) == 0x1C
4825                    && INSTR (21, 10) == 0x876)
4826             do_vec_SCVTF (cpu);
4827           else
4828             HALT_NYI;
4829           return;
4830         }
4831     }
4832
4833   if (INSTR (14, 14))
4834     {
4835       /* A floating point compare.  */
4836       unsigned decode = (INSTR (29, 29) << 5) | (INSTR (23, 23) << 4)
4837         | INSTR (13, 10);
4838
4839       NYI_assert (15, 15, 1);
4840
4841       switch (decode)
4842         {
4843         case /* 0b010010: GT#0 */ 0x12: VEC_FCMP0 (>);
4844         case /* 0b110010: GE#0 */ 0x32: VEC_FCMP0 (>=);
4845         case /* 0b010110: EQ#0 */ 0x16: VEC_FCMP0 (==);
4846         case /* 0b110110: LE#0 */ 0x36: VEC_FCMP0 (<=);
4847         case /* 0b011010: LT#0 */ 0x1A: VEC_FCMP0 (<);
4848         case /* 0b111001: GT */   0x39: VEC_FCMP  (>);
4849         case /* 0b101001: GE */   0x29: VEC_FCMP  (>=);
4850         case /* 0b001001: EQ */   0x09: VEC_FCMP  (==);
4851
4852         default:
4853           HALT_NYI;
4854         }
4855     }
4856   else
4857     {
4858       unsigned decode = (INSTR (29, 29) << 6) | INSTR (15, 10);
4859
4860       switch (decode)
4861         {
4862         case 0x0D: /* 0001101 GT */     VEC_CMP  (s, > );
4863         case 0x0F: /* 0001111 GE */     VEC_CMP  (s, >= );
4864         case 0x22: /* 0100010 GT #0 */  VEC_CMP0 (s, > );
4865         case 0x23: /* 0100011 TST */    VEC_CMP  (u, & );
4866         case 0x26: /* 0100110 EQ #0 */  VEC_CMP0 (s, == );
4867         case 0x2A: /* 0101010 LT #0 */  VEC_CMP0 (s, < );
4868         case 0x4D: /* 1001101 HI */     VEC_CMP  (u, > );
4869         case 0x4F: /* 1001111 HS */     VEC_CMP  (u, >= );
4870         case 0x62: /* 1100010 GE #0 */  VEC_CMP0 (s, >= );
4871         case 0x63: /* 1100011 EQ */     VEC_CMP  (u, == );
4872         case 0x66: /* 1100110 LE #0 */  VEC_CMP0 (s, <= );
4873         default:
4874           if (vm == 0)
4875             HALT_NYI;
4876           do_vec_maxv (cpu);
4877         }
4878     }
4879 }
4880
4881 static void
4882 do_vec_SSHL (sim_cpu *cpu)
4883 {
4884   /* instr[31]    = 0
4885      instr[30]    = first part (0)/ second part (1)
4886      instr[29,24] = 00 1110
4887      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4888      instr[21]    = 1
4889      instr[20,16] = Vm
4890      instr[15,10] = 0100 01
4891      instr[9,5]   = Vn
4892      instr[4,0]   = Vd.  */
4893
4894   unsigned full = INSTR (30, 30);
4895   unsigned vm = INSTR (20, 16);
4896   unsigned vn = INSTR (9, 5);
4897   unsigned vd = INSTR (4, 0);
4898   unsigned i;
4899   signed int shift;
4900
4901   NYI_assert (29, 24, 0x0E);
4902   NYI_assert (21, 21, 1);
4903   NYI_assert (15, 10, 0x11);
4904
4905   /* FIXME: What is a signed shift left in this context ?.  */
4906
4907   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4908   switch (INSTR (23, 22))
4909     {
4910     case 0:
4911       for (i = 0; i < (full ? 16 : 8); i++)
4912         {
4913           shift = aarch64_get_vec_s8 (cpu, vm, i);
4914           if (shift >= 0)
4915             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4916                                 << shift);
4917           else
4918             aarch64_set_vec_s8 (cpu, vd, i, aarch64_get_vec_s8 (cpu, vn, i)
4919                                 >> - shift);
4920         }
4921       return;
4922
4923     case 1:
4924       for (i = 0; i < (full ? 8 : 4); i++)
4925         {
4926           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
4927           if (shift >= 0)
4928             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4929                                  << shift);
4930           else
4931             aarch64_set_vec_s16 (cpu, vd, i, aarch64_get_vec_s16 (cpu, vn, i)
4932                                  >> - shift);
4933         }
4934       return;
4935
4936     case 2:
4937       for (i = 0; i < (full ? 4 : 2); i++)
4938         {
4939           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
4940           if (shift >= 0)
4941             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4942                                  << shift);
4943           else
4944             aarch64_set_vec_s32 (cpu, vd, i, aarch64_get_vec_s32 (cpu, vn, i)
4945                                  >> - shift);
4946         }
4947       return;
4948
4949     case 3:
4950       if (! full)
4951         HALT_UNALLOC;
4952       for (i = 0; i < 2; i++)
4953         {
4954           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
4955           if (shift >= 0)
4956             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4957                                  << shift);
4958           else
4959             aarch64_set_vec_s64 (cpu, vd, i, aarch64_get_vec_s64 (cpu, vn, i)
4960                                  >> - shift);
4961         }
4962       return;
4963     }
4964 }
4965
4966 static void
4967 do_vec_USHL (sim_cpu *cpu)
4968 {
4969   /* instr[31]    = 0
4970      instr[30]    = first part (0)/ second part (1)
4971      instr[29,24] = 10 1110
4972      instr[23,22] = size: byte(00), half(01), word (10), long (11)
4973      instr[21]    = 1
4974      instr[20,16] = Vm
4975      instr[15,10] = 0100 01
4976      instr[9,5]   = Vn
4977      instr[4,0]   = Vd  */
4978
4979   unsigned full = INSTR (30, 30);
4980   unsigned vm = INSTR (20, 16);
4981   unsigned vn = INSTR (9, 5);
4982   unsigned vd = INSTR (4, 0);
4983   unsigned i;
4984   signed int shift;
4985
4986   NYI_assert (29, 24, 0x2E);
4987   NYI_assert (15, 10, 0x11);
4988
4989   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
4990   switch (INSTR (23, 22))
4991     {
4992     case 0:
4993         for (i = 0; i < (full ? 16 : 8); i++)
4994           {
4995             shift = aarch64_get_vec_s8 (cpu, vm, i);
4996             if (shift >= 0)
4997               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
4998                                   << shift);
4999             else
5000               aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u8 (cpu, vn, i)
5001                                   >> - shift);
5002           }
5003       return;
5004
5005     case 1:
5006       for (i = 0; i < (full ? 8 : 4); i++)
5007         {
5008           shift = aarch64_get_vec_s8 (cpu, vm, i * 2);
5009           if (shift >= 0)
5010             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5011                                  << shift);
5012           else
5013             aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vn, i)
5014                                  >> - shift);
5015         }
5016       return;
5017
5018     case 2:
5019       for (i = 0; i < (full ? 4 : 2); i++)
5020         {
5021           shift = aarch64_get_vec_s8 (cpu, vm, i * 4);
5022           if (shift >= 0)
5023             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5024                                  << shift);
5025           else
5026             aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vn, i)
5027                                  >> - shift);
5028         }
5029       return;
5030
5031     case 3:
5032       if (! full)
5033         HALT_UNALLOC;
5034       for (i = 0; i < 2; i++)
5035         {
5036           shift = aarch64_get_vec_s8 (cpu, vm, i * 8);
5037           if (shift >= 0)
5038             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5039                                  << shift);
5040           else
5041             aarch64_set_vec_u64 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vn, i)
5042                                  >> - shift);
5043         }
5044       return;
5045     }
5046 }
5047
5048 static void
5049 do_vec_FMLA (sim_cpu *cpu)
5050 {
5051   /* instr[31]    = 0
5052      instr[30]    = full/half selector
5053      instr[29,23] = 0011100
5054      instr[22]    = size: 0=>float, 1=>double
5055      instr[21]    = 1
5056      instr[20,16] = Vn
5057      instr[15,10] = 1100 11
5058      instr[9,5]   = Vm
5059      instr[4.0]   = Vd.  */
5060
5061   unsigned vm = INSTR (20, 16);
5062   unsigned vn = INSTR (9, 5);
5063   unsigned vd = INSTR (4, 0);
5064   unsigned i;
5065   int      full = INSTR (30, 30);
5066
5067   NYI_assert (29, 23, 0x1C);
5068   NYI_assert (21, 21, 1);
5069   NYI_assert (15, 10, 0x33);
5070
5071   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5072   if (INSTR (22, 22))
5073     {
5074       if (! full)
5075         HALT_UNALLOC;
5076       for (i = 0; i < 2; i++)
5077         aarch64_set_vec_double (cpu, vd, i,
5078                                 aarch64_get_vec_double (cpu, vn, i) *
5079                                 aarch64_get_vec_double (cpu, vm, i) +
5080                                 aarch64_get_vec_double (cpu, vd, i));
5081     }
5082   else
5083     {
5084       for (i = 0; i < (full ? 4 : 2); i++)
5085         aarch64_set_vec_float (cpu, vd, i,
5086                                aarch64_get_vec_float (cpu, vn, i) *
5087                                aarch64_get_vec_float (cpu, vm, i) +
5088                                aarch64_get_vec_float (cpu, vd, i));
5089     }
5090 }
5091
5092 static void
5093 do_vec_max (sim_cpu *cpu)
5094 {
5095   /* instr[31]    = 0
5096      instr[30]    = full/half selector
5097      instr[29]    = SMAX (0) / UMAX (1)
5098      instr[28,24] = 0 1110
5099      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5100      instr[21]    = 1
5101      instr[20,16] = Vn
5102      instr[15,10] = 0110 01
5103      instr[9,5]   = Vm
5104      instr[4.0]   = Vd.  */
5105
5106   unsigned vm = INSTR (20, 16);
5107   unsigned vn = INSTR (9, 5);
5108   unsigned vd = INSTR (4, 0);
5109   unsigned i;
5110   int      full = INSTR (30, 30);
5111
5112   NYI_assert (28, 24, 0x0E);
5113   NYI_assert (21, 21, 1);
5114   NYI_assert (15, 10, 0x19);
5115
5116   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5117   if (INSTR (29, 29))
5118     {
5119       switch (INSTR (23, 22))
5120         {
5121         case 0:
5122           for (i = 0; i < (full ? 16 : 8); i++)
5123             aarch64_set_vec_u8 (cpu, vd, i,
5124                                 aarch64_get_vec_u8 (cpu, vn, i)
5125                                 > aarch64_get_vec_u8 (cpu, vm, i)
5126                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5127                                 : aarch64_get_vec_u8 (cpu, vm, i));
5128           return;
5129
5130         case 1:
5131           for (i = 0; i < (full ? 8 : 4); i++)
5132             aarch64_set_vec_u16 (cpu, vd, i,
5133                                  aarch64_get_vec_u16 (cpu, vn, i)
5134                                  > aarch64_get_vec_u16 (cpu, vm, i)
5135                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5136                                  : aarch64_get_vec_u16 (cpu, vm, i));
5137           return;
5138
5139         case 2:
5140           for (i = 0; i < (full ? 4 : 2); i++)
5141             aarch64_set_vec_u32 (cpu, vd, i,
5142                                  aarch64_get_vec_u32 (cpu, vn, i)
5143                                  > aarch64_get_vec_u32 (cpu, vm, i)
5144                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5145                                  : aarch64_get_vec_u32 (cpu, vm, i));
5146           return;
5147
5148         case 3:
5149           HALT_UNALLOC;
5150         }
5151     }
5152   else
5153     {
5154       switch (INSTR (23, 22))
5155         {
5156         case 0:
5157           for (i = 0; i < (full ? 16 : 8); i++)
5158             aarch64_set_vec_s8 (cpu, vd, i,
5159                                 aarch64_get_vec_s8 (cpu, vn, i)
5160                                 > aarch64_get_vec_s8 (cpu, vm, i)
5161                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5162                                 : aarch64_get_vec_s8 (cpu, vm, i));
5163           return;
5164
5165         case 1:
5166           for (i = 0; i < (full ? 8 : 4); i++)
5167             aarch64_set_vec_s16 (cpu, vd, i,
5168                                  aarch64_get_vec_s16 (cpu, vn, i)
5169                                  > aarch64_get_vec_s16 (cpu, vm, i)
5170                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5171                                  : aarch64_get_vec_s16 (cpu, vm, i));
5172           return;
5173
5174         case 2:
5175           for (i = 0; i < (full ? 4 : 2); i++)
5176             aarch64_set_vec_s32 (cpu, vd, i,
5177                                  aarch64_get_vec_s32 (cpu, vn, i)
5178                                  > aarch64_get_vec_s32 (cpu, vm, i)
5179                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5180                                  : aarch64_get_vec_s32 (cpu, vm, i));
5181           return;
5182
5183         case 3:
5184           HALT_UNALLOC;
5185         }
5186     }
5187 }
5188
5189 static void
5190 do_vec_min (sim_cpu *cpu)
5191 {
5192   /* instr[31]    = 0
5193      instr[30]    = full/half selector
5194      instr[29]    = SMIN (0) / UMIN (1)
5195      instr[28,24] = 0 1110
5196      instr[23,22] = size: 00=> 8-bit, 01=> 16-bit, 10=> 32-bit
5197      instr[21]    = 1
5198      instr[20,16] = Vn
5199      instr[15,10] = 0110 11
5200      instr[9,5]   = Vm
5201      instr[4.0]   = Vd.  */
5202
5203   unsigned vm = INSTR (20, 16);
5204   unsigned vn = INSTR (9, 5);
5205   unsigned vd = INSTR (4, 0);
5206   unsigned i;
5207   int      full = INSTR (30, 30);
5208
5209   NYI_assert (28, 24, 0x0E);
5210   NYI_assert (21, 21, 1);
5211   NYI_assert (15, 10, 0x1B);
5212
5213   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5214   if (INSTR (29, 29))
5215     {
5216       switch (INSTR (23, 22))
5217         {
5218         case 0:
5219           for (i = 0; i < (full ? 16 : 8); i++)
5220             aarch64_set_vec_u8 (cpu, vd, i,
5221                                 aarch64_get_vec_u8 (cpu, vn, i)
5222                                 < aarch64_get_vec_u8 (cpu, vm, i)
5223                                 ? aarch64_get_vec_u8 (cpu, vn, i)
5224                                 : aarch64_get_vec_u8 (cpu, vm, i));
5225           return;
5226
5227         case 1:
5228           for (i = 0; i < (full ? 8 : 4); i++)
5229             aarch64_set_vec_u16 (cpu, vd, i,
5230                                  aarch64_get_vec_u16 (cpu, vn, i)
5231                                  < aarch64_get_vec_u16 (cpu, vm, i)
5232                                  ? aarch64_get_vec_u16 (cpu, vn, i)
5233                                  : aarch64_get_vec_u16 (cpu, vm, i));
5234           return;
5235
5236         case 2:
5237           for (i = 0; i < (full ? 4 : 2); i++)
5238             aarch64_set_vec_u32 (cpu, vd, i,
5239                                  aarch64_get_vec_u32 (cpu, vn, i)
5240                                  < aarch64_get_vec_u32 (cpu, vm, i)
5241                                  ? aarch64_get_vec_u32 (cpu, vn, i)
5242                                  : aarch64_get_vec_u32 (cpu, vm, i));
5243           return;
5244
5245         case 3:
5246           HALT_UNALLOC;
5247         }
5248     }
5249   else
5250     {
5251       switch (INSTR (23, 22))
5252         {
5253         case 0:
5254           for (i = 0; i < (full ? 16 : 8); i++)
5255             aarch64_set_vec_s8 (cpu, vd, i,
5256                                 aarch64_get_vec_s8 (cpu, vn, i)
5257                                 < aarch64_get_vec_s8 (cpu, vm, i)
5258                                 ? aarch64_get_vec_s8 (cpu, vn, i)
5259                                 : aarch64_get_vec_s8 (cpu, vm, i));
5260           return;
5261
5262         case 1:
5263           for (i = 0; i < (full ? 8 : 4); i++)
5264             aarch64_set_vec_s16 (cpu, vd, i,
5265                                  aarch64_get_vec_s16 (cpu, vn, i)
5266                                  < aarch64_get_vec_s16 (cpu, vm, i)
5267                                  ? aarch64_get_vec_s16 (cpu, vn, i)
5268                                  : aarch64_get_vec_s16 (cpu, vm, i));
5269           return;
5270
5271         case 2:
5272           for (i = 0; i < (full ? 4 : 2); i++)
5273             aarch64_set_vec_s32 (cpu, vd, i,
5274                                  aarch64_get_vec_s32 (cpu, vn, i)
5275                                  < aarch64_get_vec_s32 (cpu, vm, i)
5276                                  ? aarch64_get_vec_s32 (cpu, vn, i)
5277                                  : aarch64_get_vec_s32 (cpu, vm, i));
5278           return;
5279
5280         case 3:
5281           HALT_UNALLOC;
5282         }
5283     }
5284 }
5285
5286 static void
5287 do_vec_sub_long (sim_cpu *cpu)
5288 {
5289   /* instr[31]    = 0
5290      instr[30]    = lower (0) / upper (1)
5291      instr[29]    = signed (0) / unsigned (1)
5292      instr[28,24] = 0 1110
5293      instr[23,22] = size: bytes (00), half (01), word (10)
5294      instr[21]    = 1
5295      insrt[20,16] = Vm
5296      instr[15,10] = 0010 00
5297      instr[9,5]   = Vn
5298      instr[4,0]   = V dest.  */
5299
5300   unsigned size = INSTR (23, 22);
5301   unsigned vm = INSTR (20, 16);
5302   unsigned vn = INSTR (9, 5);
5303   unsigned vd = INSTR (4, 0);
5304   unsigned bias = 0;
5305   unsigned i;
5306
5307   NYI_assert (28, 24, 0x0E);
5308   NYI_assert (21, 21, 1);
5309   NYI_assert (15, 10, 0x08);
5310
5311   if (size == 3)
5312     HALT_UNALLOC;
5313
5314   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5315   switch (INSTR (30, 29))
5316     {
5317     case 2: /* SSUBL2.  */
5318       bias = 2;
5319     case 0: /* SSUBL.  */
5320       switch (size)
5321         {
5322         case 0:
5323           bias *= 3;
5324           for (i = 0; i < 8; i++)
5325             aarch64_set_vec_s16 (cpu, vd, i,
5326                                  aarch64_get_vec_s8 (cpu, vn, i + bias)
5327                                  - aarch64_get_vec_s8 (cpu, vm, i + bias));
5328           break;
5329
5330         case 1:
5331           bias *= 2;
5332           for (i = 0; i < 4; i++)
5333             aarch64_set_vec_s32 (cpu, vd, i,
5334                                  aarch64_get_vec_s16 (cpu, vn, i + bias)
5335                                  - aarch64_get_vec_s16 (cpu, vm, i + bias));
5336           break;
5337
5338         case 2:
5339           for (i = 0; i < 2; i++)
5340             aarch64_set_vec_s64 (cpu, vd, i,
5341                                  aarch64_get_vec_s32 (cpu, vn, i + bias)
5342                                  - aarch64_get_vec_s32 (cpu, vm, i + bias));
5343           break;
5344
5345         default:
5346           HALT_UNALLOC;
5347         }
5348       break;
5349
5350     case 3: /* USUBL2.  */
5351       bias = 2;
5352     case 1: /* USUBL.  */
5353       switch (size)
5354         {
5355         case 0:
5356           bias *= 3;
5357           for (i = 0; i < 8; i++)
5358             aarch64_set_vec_u16 (cpu, vd, i,
5359                                  aarch64_get_vec_u8 (cpu, vn, i + bias)
5360                                  - aarch64_get_vec_u8 (cpu, vm, i + bias));
5361           break;
5362
5363         case 1:
5364           bias *= 2;
5365           for (i = 0; i < 4; i++)
5366             aarch64_set_vec_u32 (cpu, vd, i,
5367                                  aarch64_get_vec_u16 (cpu, vn, i + bias)
5368                                  - aarch64_get_vec_u16 (cpu, vm, i + bias));
5369           break;
5370
5371         case 2:
5372           for (i = 0; i < 2; i++)
5373             aarch64_set_vec_u64 (cpu, vd, i,
5374                                  aarch64_get_vec_u32 (cpu, vn, i + bias)
5375                                  - aarch64_get_vec_u32 (cpu, vm, i + bias));
5376           break;
5377
5378         default:
5379           HALT_UNALLOC;
5380         }
5381       break;
5382     }
5383 }
5384
5385 static void
5386 do_vec_ADDP (sim_cpu *cpu)
5387 {
5388   /* instr[31]    = 0
5389      instr[30]    = half(0)/full(1)
5390      instr[29,24] = 00 1110
5391      instr[23,22] = size: bytes (00), half (01), word (10), long (11)
5392      instr[21]    = 1
5393      insrt[20,16] = Vm
5394      instr[15,10] = 1011 11
5395      instr[9,5]   = Vn
5396      instr[4,0]   = V dest.  */
5397
5398   struct aarch64_sim_cpu *aarch64_cpu = AARCH64_SIM_CPU (cpu);
5399   FRegister copy_vn;
5400   FRegister copy_vm;
5401   unsigned full = INSTR (30, 30);
5402   unsigned size = INSTR (23, 22);
5403   unsigned vm = INSTR (20, 16);
5404   unsigned vn = INSTR (9, 5);
5405   unsigned vd = INSTR (4, 0);
5406   unsigned i, range;
5407
5408   NYI_assert (29, 24, 0x0E);
5409   NYI_assert (21, 21, 1);
5410   NYI_assert (15, 10, 0x2F);
5411
5412   /* Make copies of the source registers in case vd == vn/vm.  */
5413   copy_vn = aarch64_cpu->fr[vn];
5414   copy_vm = aarch64_cpu->fr[vm];
5415
5416   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5417   switch (size)
5418     {
5419     case 0:
5420       range = full ? 8 : 4;
5421       for (i = 0; i < range; i++)
5422         {
5423           aarch64_set_vec_u8 (cpu, vd, i,
5424                               copy_vn.b[i * 2] + copy_vn.b[i * 2 + 1]);
5425           aarch64_set_vec_u8 (cpu, vd, i + range,
5426                               copy_vm.b[i * 2] + copy_vm.b[i * 2 + 1]);
5427         }
5428       return;
5429
5430     case 1:
5431       range = full ? 4 : 2;
5432       for (i = 0; i < range; i++)
5433         {
5434           aarch64_set_vec_u16 (cpu, vd, i,
5435                                copy_vn.h[i * 2] + copy_vn.h[i * 2 + 1]);
5436           aarch64_set_vec_u16 (cpu, vd, i + range,
5437                                copy_vm.h[i * 2] + copy_vm.h[i * 2 + 1]);
5438         }
5439       return;
5440
5441     case 2:
5442       range = full ? 2 : 1;
5443       for (i = 0; i < range; i++)
5444         {
5445           aarch64_set_vec_u32 (cpu, vd, i,
5446                                copy_vn.w[i * 2] + copy_vn.w[i * 2 + 1]);
5447           aarch64_set_vec_u32 (cpu, vd, i + range,
5448                                copy_vm.w[i * 2] + copy_vm.w[i * 2 + 1]);
5449         }
5450       return;
5451
5452     case 3:
5453       if (! full)
5454         HALT_UNALLOC;
5455       aarch64_set_vec_u64 (cpu, vd, 0, copy_vn.v[0] + copy_vn.v[1]);
5456       aarch64_set_vec_u64 (cpu, vd, 1, copy_vm.v[0] + copy_vm.v[1]);
5457       return;
5458     }
5459 }
5460
5461 /* Float point vector convert to longer (precision).  */
5462 static void
5463 do_vec_FCVTL (sim_cpu *cpu)
5464 {
5465   /* instr[31]    = 0
5466      instr[30]    = half (0) / all (1)
5467      instr[29,23] = 00 1110 0
5468      instr[22]    = single (0) / double (1)
5469      instr[21,10] = 10 0001 0111 10
5470      instr[9,5]   = Rn
5471      instr[4,0]   = Rd.  */
5472
5473   unsigned rn = INSTR (9, 5);
5474   unsigned rd = INSTR (4, 0);
5475   unsigned full = INSTR (30, 30);
5476   unsigned i;
5477
5478   NYI_assert (31, 31, 0);
5479   NYI_assert (29, 23, 0x1C);
5480   NYI_assert (21, 10, 0x85E);
5481
5482   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5483   if (INSTR (22, 22))
5484     {
5485       for (i = 0; i < 2; i++)
5486         aarch64_set_vec_double (cpu, rd, i,
5487                                 aarch64_get_vec_float (cpu, rn, i + 2*full));
5488     }
5489   else
5490     {
5491       HALT_NYI;
5492
5493 #if 0
5494       /* TODO: Implement missing half-float support.  */
5495       for (i = 0; i < 4; i++)
5496         aarch64_set_vec_float (cpu, rd, i,
5497                              aarch64_get_vec_halffloat (cpu, rn, i + 4*full));
5498 #endif
5499     }
5500 }
5501
5502 static void
5503 do_vec_FABS (sim_cpu *cpu)
5504 {
5505   /* instr[31]    = 0
5506      instr[30]    = half(0)/full(1)
5507      instr[29,23] = 00 1110 1
5508      instr[22]    = float(0)/double(1)
5509      instr[21,16] = 10 0000
5510      instr[15,10] = 1111 10
5511      instr[9,5]   = Vn
5512      instr[4,0]   = Vd.  */
5513
5514   unsigned vn = INSTR (9, 5);
5515   unsigned vd = INSTR (4, 0);
5516   unsigned full = INSTR (30, 30);
5517   unsigned i;
5518
5519   NYI_assert (29, 23, 0x1D);
5520   NYI_assert (21, 10, 0x83E);
5521
5522   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5523   if (INSTR (22, 22))
5524     {
5525       if (! full)
5526         HALT_NYI;
5527
5528       for (i = 0; i < 2; i++)
5529         aarch64_set_vec_double (cpu, vd, i,
5530                                 fabs (aarch64_get_vec_double (cpu, vn, i)));
5531     }
5532   else
5533     {
5534       for (i = 0; i < (full ? 4 : 2); i++)
5535         aarch64_set_vec_float (cpu, vd, i,
5536                                fabsf (aarch64_get_vec_float (cpu, vn, i)));
5537     }
5538 }
5539
5540 static void
5541 do_vec_FCVTZS (sim_cpu *cpu)
5542 {
5543   /* instr[31]    = 0
5544      instr[30]    = half (0) / all (1)
5545      instr[29,23] = 00 1110 1
5546      instr[22]    = single (0) / double (1)
5547      instr[21,10] = 10 0001 1011 10
5548      instr[9,5]   = Rn
5549      instr[4,0]   = Rd.  */
5550
5551   unsigned rn = INSTR (9, 5);
5552   unsigned rd = INSTR (4, 0);
5553   unsigned full = INSTR (30, 30);
5554   unsigned i;
5555
5556   NYI_assert (31, 31, 0);
5557   NYI_assert (29, 23, 0x1D);
5558   NYI_assert (21, 10, 0x86E);
5559
5560   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5561   if (INSTR (22, 22))
5562     {
5563       if (! full)
5564         HALT_UNALLOC;
5565
5566       for (i = 0; i < 2; i++)
5567         aarch64_set_vec_s64 (cpu, rd, i,
5568                              (int64_t) aarch64_get_vec_double (cpu, rn, i));
5569     }
5570   else
5571     for (i = 0; i < (full ? 4 : 2); i++)
5572       aarch64_set_vec_s32 (cpu, rd, i,
5573                            (int32_t) aarch64_get_vec_float (cpu, rn, i));
5574 }
5575
5576 static void
5577 do_vec_REV64 (sim_cpu *cpu)
5578 {
5579   /* instr[31]    = 0
5580      instr[30]    = full/half
5581      instr[29,24] = 00 1110
5582      instr[23,22] = size
5583      instr[21,10] = 10 0000 0000 10
5584      instr[9,5]   = Rn
5585      instr[4,0]   = Rd.  */
5586
5587   unsigned rn = INSTR (9, 5);
5588   unsigned rd = INSTR (4, 0);
5589   unsigned size = INSTR (23, 22);
5590   unsigned full = INSTR (30, 30);
5591   unsigned i;
5592   FRegister val;
5593
5594   NYI_assert (29, 24, 0x0E);
5595   NYI_assert (21, 10, 0x802);
5596
5597   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5598   switch (size)
5599     {
5600     case 0:
5601       for (i = 0; i < (full ? 16 : 8); i++)
5602         val.b[i ^ 0x7] = aarch64_get_vec_u8 (cpu, rn, i);
5603       break;
5604
5605     case 1:
5606       for (i = 0; i < (full ? 8 : 4); i++)
5607         val.h[i ^ 0x3] = aarch64_get_vec_u16 (cpu, rn, i);
5608       break;
5609
5610     case 2:
5611       for (i = 0; i < (full ? 4 : 2); i++)
5612         val.w[i ^ 0x1] = aarch64_get_vec_u32 (cpu, rn, i);
5613       break;
5614
5615     case 3:
5616       HALT_UNALLOC;
5617     }
5618
5619   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5620   if (full)
5621     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5622 }
5623
5624 static void
5625 do_vec_REV16 (sim_cpu *cpu)
5626 {
5627   /* instr[31]    = 0
5628      instr[30]    = full/half
5629      instr[29,24] = 00 1110
5630      instr[23,22] = size
5631      instr[21,10] = 10 0000 0001 10
5632      instr[9,5]   = Rn
5633      instr[4,0]   = Rd.  */
5634
5635   unsigned rn = INSTR (9, 5);
5636   unsigned rd = INSTR (4, 0);
5637   unsigned size = INSTR (23, 22);
5638   unsigned full = INSTR (30, 30);
5639   unsigned i;
5640   FRegister val;
5641
5642   NYI_assert (29, 24, 0x0E);
5643   NYI_assert (21, 10, 0x806);
5644
5645   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5646   switch (size)
5647     {
5648     case 0:
5649       for (i = 0; i < (full ? 16 : 8); i++)
5650         val.b[i ^ 0x1] = aarch64_get_vec_u8 (cpu, rn, i);
5651       break;
5652
5653     default:
5654       HALT_UNALLOC;
5655     }
5656
5657   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
5658   if (full)
5659     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
5660 }
5661
5662 static void
5663 do_vec_op1 (sim_cpu *cpu)
5664 {
5665   /* instr[31]    = 0
5666      instr[30]    = half/full
5667      instr[29,24] = 00 1110
5668      instr[23,21] = ???
5669      instr[20,16] = Vm
5670      instr[15,10] = sub-opcode
5671      instr[9,5]   = Vn
5672      instr[4,0]   = Vd  */
5673   NYI_assert (29, 24, 0x0E);
5674
5675   if (INSTR (21, 21) == 0)
5676     {
5677       if (INSTR (23, 22) == 0)
5678         {
5679           if (INSTR (30, 30) == 1
5680               && INSTR (17, 14) == 0
5681               && INSTR (12, 10) == 7)
5682             return do_vec_ins_2 (cpu);
5683
5684           switch (INSTR (15, 10))
5685             {
5686             case 0x01: do_vec_DUP_vector_into_vector (cpu); return;
5687             case 0x03: do_vec_DUP_scalar_into_vector (cpu); return;
5688             case 0x07: do_vec_INS (cpu); return;
5689             case 0x0B: do_vec_SMOV_into_scalar (cpu); return;
5690             case 0x0F: do_vec_UMOV_into_scalar (cpu); return;
5691
5692             case 0x00:
5693             case 0x08:
5694             case 0x10:
5695             case 0x18:
5696               do_vec_TBL (cpu); return;
5697
5698             case 0x06:
5699             case 0x16:
5700               do_vec_UZP (cpu); return;
5701
5702             case 0x0A: do_vec_TRN (cpu); return;
5703
5704             case 0x0E:
5705             case 0x1E:
5706               do_vec_ZIP (cpu); return;
5707
5708             default:
5709               HALT_NYI;
5710             }
5711         }
5712
5713       switch (INSTR (13, 10))
5714         {
5715         case 0x6: do_vec_UZP (cpu); return;
5716         case 0xE: do_vec_ZIP (cpu); return;
5717         case 0xA: do_vec_TRN (cpu); return;
5718         default:  HALT_NYI;
5719         }
5720     }
5721
5722   switch (INSTR (15, 10))
5723     {
5724     case 0x02: do_vec_REV64 (cpu); return;
5725     case 0x06: do_vec_REV16 (cpu); return;
5726
5727     case 0x07:
5728       switch (INSTR (23, 21))
5729         {
5730         case 1: do_vec_AND (cpu); return;
5731         case 3: do_vec_BIC (cpu); return;
5732         case 5: do_vec_ORR (cpu); return;
5733         case 7: do_vec_ORN (cpu); return;
5734         default: HALT_NYI;
5735         }
5736
5737     case 0x08: do_vec_sub_long (cpu); return;
5738     case 0x0a: do_vec_XTN (cpu); return;
5739     case 0x11: do_vec_SSHL (cpu); return;
5740     case 0x16: do_vec_CNT (cpu); return;
5741     case 0x19: do_vec_max (cpu); return;
5742     case 0x1B: do_vec_min (cpu); return;
5743     case 0x21: do_vec_add (cpu); return;
5744     case 0x25: do_vec_MLA (cpu); return;
5745     case 0x27: do_vec_mul (cpu); return;
5746     case 0x2F: do_vec_ADDP (cpu); return;
5747     case 0x30: do_vec_mull (cpu); return;
5748     case 0x33: do_vec_FMLA (cpu); return;
5749     case 0x35: do_vec_fadd (cpu); return;
5750
5751     case 0x1E:
5752       switch (INSTR (20, 16))
5753         {
5754         case 0x01: do_vec_FCVTL (cpu); return;
5755         default: HALT_NYI;
5756         }
5757
5758     case 0x2E:
5759       switch (INSTR (20, 16))
5760         {
5761         case 0x00: do_vec_ABS (cpu); return;
5762         case 0x01: do_vec_FCVTZS (cpu); return;
5763         case 0x11: do_vec_ADDV (cpu); return;
5764         default: HALT_NYI;
5765         }
5766
5767     case 0x31:
5768     case 0x3B:
5769       do_vec_Fminmax (cpu); return;
5770
5771     case 0x0D:
5772     case 0x0F:
5773     case 0x22:
5774     case 0x23:
5775     case 0x26:
5776     case 0x2A:
5777     case 0x32:
5778     case 0x36:
5779     case 0x39:
5780     case 0x3A:
5781       do_vec_compare (cpu); return;
5782
5783     case 0x3E:
5784       do_vec_FABS (cpu); return;
5785
5786     default:
5787       HALT_NYI;
5788     }
5789 }
5790
5791 static void
5792 do_vec_xtl (sim_cpu *cpu)
5793 {
5794   /* instr[31]    = 0
5795      instr[30,29] = SXTL (00), UXTL (01), SXTL2 (10), UXTL2 (11)
5796      instr[28,22] = 0 1111 00
5797      instr[21,16] = size & shift (USHLL, SSHLL, USHLL2, SSHLL2)
5798      instr[15,10] = 1010 01
5799      instr[9,5]   = V source
5800      instr[4,0]   = V dest.  */
5801
5802   unsigned vs = INSTR (9, 5);
5803   unsigned vd = INSTR (4, 0);
5804   unsigned i, shift, bias = 0;
5805
5806   NYI_assert (28, 22, 0x3C);
5807   NYI_assert (15, 10, 0x29);
5808
5809   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5810   switch (INSTR (30, 29))
5811     {
5812     case 2: /* SXTL2, SSHLL2.  */
5813       bias = 2;
5814     case 0: /* SXTL, SSHLL.  */
5815       if (INSTR (21, 21))
5816         {
5817           int64_t val1, val2;
5818
5819           shift = INSTR (20, 16);
5820           /* Get the source values before setting the destination values
5821              in case the source and destination are the same.  */
5822           val1 = aarch64_get_vec_s32 (cpu, vs, bias) << shift;
5823           val2 = aarch64_get_vec_s32 (cpu, vs, bias + 1) << shift;
5824           aarch64_set_vec_s64 (cpu, vd, 0, val1);
5825           aarch64_set_vec_s64 (cpu, vd, 1, val2);
5826         }
5827       else if (INSTR (20, 20))
5828         {
5829           int32_t v[4];
5830           int32_t v1,v2,v3,v4;
5831
5832           shift = INSTR (19, 16);
5833           bias *= 2;
5834           for (i = 0; i < 4; i++)
5835             v[i] = aarch64_get_vec_s16 (cpu, vs, bias + i) << shift;
5836           for (i = 0; i < 4; i++)
5837             aarch64_set_vec_s32 (cpu, vd, i, v[i]);
5838         }
5839       else
5840         {
5841           int16_t v[8];
5842           NYI_assert (19, 19, 1);
5843
5844           shift = INSTR (18, 16);
5845           bias *= 4;
5846           for (i = 0; i < 8; i++)
5847             v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
5848           for (i = 0; i < 8; i++)
5849             aarch64_set_vec_s16 (cpu, vd, i, v[i]);
5850         }
5851       return;
5852
5853     case 3: /* UXTL2, USHLL2.  */
5854       bias = 2;
5855     case 1: /* UXTL, USHLL.  */
5856       if (INSTR (21, 21))
5857         {
5858           uint64_t v1, v2;
5859           shift = INSTR (20, 16);
5860           v1 = aarch64_get_vec_u32 (cpu, vs, bias) << shift;
5861           v2 = aarch64_get_vec_u32 (cpu, vs, bias + 1) << shift;
5862           aarch64_set_vec_u64 (cpu, vd, 0, v1);
5863           aarch64_set_vec_u64 (cpu, vd, 1, v2);
5864         }
5865       else if (INSTR (20, 20))
5866         {
5867           uint32_t v[4];
5868           shift = INSTR (19, 16);
5869           bias *= 2;
5870           for (i = 0; i < 4; i++)
5871             v[i] = aarch64_get_vec_u16 (cpu, vs, i + bias) << shift;
5872           for (i = 0; i < 4; i++)
5873             aarch64_set_vec_u32 (cpu, vd, i, v[i]);
5874         }
5875       else
5876         {
5877           uint16_t v[8];
5878           NYI_assert (19, 19, 1);
5879
5880           shift = INSTR (18, 16);
5881           bias *= 4;
5882           for (i = 0; i < 8; i++)
5883             v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
5884           for (i = 0; i < 8; i++)
5885             aarch64_set_vec_u16 (cpu, vd, i, v[i]);
5886         }
5887       return;
5888     }
5889 }
5890
5891 static void
5892 do_vec_SHL (sim_cpu *cpu)
5893 {
5894   /* instr [31]    = 0
5895      instr [30]    = half(0)/full(1)
5896      instr [29,23] = 001 1110
5897      instr [22,16] = size and shift amount
5898      instr [15,10] = 01 0101
5899      instr [9, 5]  = Vs
5900      instr [4, 0]  = Vd.  */
5901
5902   int shift;
5903   int full    = INSTR (30, 30);
5904   unsigned vs = INSTR (9, 5);
5905   unsigned vd = INSTR (4, 0);
5906   unsigned i;
5907
5908   NYI_assert (29, 23, 0x1E);
5909   NYI_assert (15, 10, 0x15);
5910
5911   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5912   if (INSTR (22, 22))
5913     {
5914       shift = INSTR (21, 16);
5915
5916       if (full == 0)
5917         HALT_UNALLOC;
5918
5919       for (i = 0; i < 2; i++)
5920         {
5921           uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
5922           aarch64_set_vec_u64 (cpu, vd, i, val << shift);
5923         }
5924
5925       return;
5926     }
5927
5928   if (INSTR (21, 21))
5929     {
5930       shift = INSTR (20, 16);
5931
5932       for (i = 0; i < (full ? 4 : 2); i++)
5933         {
5934           uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
5935           aarch64_set_vec_u32 (cpu, vd, i, val << shift);
5936         }
5937
5938       return;
5939     }
5940
5941   if (INSTR (20, 20))
5942     {
5943       shift = INSTR (19, 16);
5944
5945       for (i = 0; i < (full ? 8 : 4); i++)
5946         {
5947           uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
5948           aarch64_set_vec_u16 (cpu, vd, i, val << shift);
5949         }
5950
5951       return;
5952     }
5953
5954   if (INSTR (19, 19) == 0)
5955     HALT_UNALLOC;
5956
5957   shift = INSTR (18, 16);
5958
5959   for (i = 0; i < (full ? 16 : 8); i++)
5960     {
5961       uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
5962       aarch64_set_vec_u8 (cpu, vd, i, val << shift);
5963     }
5964 }
5965
5966 static void
5967 do_vec_SSHR_USHR (sim_cpu *cpu)
5968 {
5969   /* instr [31]    = 0
5970      instr [30]    = half(0)/full(1)
5971      instr [29]    = signed(0)/unsigned(1)
5972      instr [28,23] = 0 1111 0
5973      instr [22,16] = size and shift amount
5974      instr [15,10] = 0000 01
5975      instr [9, 5]  = Vs
5976      instr [4, 0]  = Vd.  */
5977
5978   int full       = INSTR (30, 30);
5979   int sign       = ! INSTR (29, 29);
5980   unsigned shift = INSTR (22, 16);
5981   unsigned vs    = INSTR (9, 5);
5982   unsigned vd    = INSTR (4, 0);
5983   unsigned i;
5984
5985   NYI_assert (28, 23, 0x1E);
5986   NYI_assert (15, 10, 0x01);
5987
5988   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
5989   if (INSTR (22, 22))
5990     {
5991       shift = 128 - shift;
5992
5993       if (full == 0)
5994         HALT_UNALLOC;
5995
5996       if (sign)
5997         for (i = 0; i < 2; i++)
5998           {
5999             int64_t val = aarch64_get_vec_s64 (cpu, vs, i);
6000             aarch64_set_vec_s64 (cpu, vd, i, val >> shift);
6001           }
6002       else
6003         for (i = 0; i < 2; i++)
6004           {
6005             uint64_t val = aarch64_get_vec_u64 (cpu, vs, i);
6006             aarch64_set_vec_u64 (cpu, vd, i, val >> shift);
6007           }
6008
6009       return;
6010     }
6011
6012   if (INSTR (21, 21))
6013     {
6014       shift = 64 - shift;
6015
6016       if (sign)
6017         for (i = 0; i < (full ? 4 : 2); i++)
6018           {
6019             int32_t val = aarch64_get_vec_s32 (cpu, vs, i);
6020             aarch64_set_vec_s32 (cpu, vd, i, val >> shift);
6021           }
6022       else
6023         for (i = 0; i < (full ? 4 : 2); i++)
6024           {
6025             uint32_t val = aarch64_get_vec_u32 (cpu, vs, i);
6026             aarch64_set_vec_u32 (cpu, vd, i, val >> shift);
6027           }
6028
6029       return;
6030     }
6031
6032   if (INSTR (20, 20))
6033     {
6034       shift = 32 - shift;
6035
6036       if (sign)
6037         for (i = 0; i < (full ? 8 : 4); i++)
6038           {
6039             int16_t val = aarch64_get_vec_s16 (cpu, vs, i);
6040             aarch64_set_vec_s16 (cpu, vd, i, val >> shift);
6041           }
6042       else
6043         for (i = 0; i < (full ? 8 : 4); i++)
6044           {
6045             uint16_t val = aarch64_get_vec_u16 (cpu, vs, i);
6046             aarch64_set_vec_u16 (cpu, vd, i, val >> shift);
6047           }
6048
6049       return;
6050     }
6051
6052   if (INSTR (19, 19) == 0)
6053     HALT_UNALLOC;
6054
6055   shift = 16 - shift;
6056
6057   if (sign)
6058     for (i = 0; i < (full ? 16 : 8); i++)
6059       {
6060         int8_t val = aarch64_get_vec_s8 (cpu, vs, i);
6061         aarch64_set_vec_s8 (cpu, vd, i, val >> shift);
6062       }
6063   else
6064     for (i = 0; i < (full ? 16 : 8); i++)
6065       {
6066         uint8_t val = aarch64_get_vec_u8 (cpu, vs, i);
6067         aarch64_set_vec_u8 (cpu, vd, i, val >> shift);
6068       }
6069 }
6070
6071 static void
6072 do_vec_MUL_by_element (sim_cpu *cpu)
6073 {
6074   /* instr[31]    = 0
6075      instr[30]    = half/full
6076      instr[29,24] = 00 1111
6077      instr[23,22] = size
6078      instr[21]    = L
6079      instr[20]    = M
6080      instr[19,16] = m
6081      instr[15,12] = 1000
6082      instr[11]    = H
6083      instr[10]    = 0
6084      instr[9,5]   = Vn
6085      instr[4,0]   = Vd  */
6086
6087   unsigned full     = INSTR (30, 30);
6088   unsigned L        = INSTR (21, 21);
6089   unsigned H        = INSTR (11, 11);
6090   unsigned vn       = INSTR (9, 5);
6091   unsigned vd       = INSTR (4, 0);
6092   unsigned size     = INSTR (23, 22);
6093   unsigned index;
6094   unsigned vm;
6095   unsigned e;
6096
6097   NYI_assert (29, 24, 0x0F);
6098   NYI_assert (15, 12, 0x8);
6099   NYI_assert (10, 10, 0);
6100
6101   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6102   switch (size)
6103     {
6104     case 1:
6105       {
6106         /* 16 bit products.  */
6107         uint16_t product;
6108         uint16_t element1;
6109         uint16_t element2;
6110
6111         index = (H << 2) | (L << 1) | INSTR (20, 20);
6112         vm = INSTR (19, 16);
6113         element2 = aarch64_get_vec_u16 (cpu, vm, index);
6114
6115         for (e = 0; e < (full ? 8 : 4); e ++)
6116           {
6117             element1 = aarch64_get_vec_u16 (cpu, vn, e);
6118             product  = element1 * element2;
6119             aarch64_set_vec_u16 (cpu, vd, e, product);
6120           }
6121       }
6122       break;
6123
6124     case 2:
6125       {
6126         /* 32 bit products.  */
6127         uint32_t product;
6128         uint32_t element1;
6129         uint32_t element2;
6130
6131         index = (H << 1) | L;
6132         vm = INSTR (20, 16);
6133         element2 = aarch64_get_vec_u32 (cpu, vm, index);
6134
6135         for (e = 0; e < (full ? 4 : 2); e ++)
6136           {
6137             element1 = aarch64_get_vec_u32 (cpu, vn, e);
6138             product  = element1 * element2;
6139             aarch64_set_vec_u32 (cpu, vd, e, product);
6140           }
6141       }
6142       break;
6143
6144     default:
6145       HALT_UNALLOC;
6146     }
6147 }
6148
6149 static void
6150 do_FMLA_by_element (sim_cpu *cpu)
6151 {
6152   /* instr[31]    = 0
6153      instr[30]    = half/full
6154      instr[29,23] = 00 1111 1
6155      instr[22]    = size
6156      instr[21]    = L
6157      instr[20,16] = m
6158      instr[15,12] = 0001
6159      instr[11]    = H
6160      instr[10]    = 0
6161      instr[9,5]   = Vn
6162      instr[4,0]   = Vd  */
6163
6164   unsigned full     = INSTR (30, 30);
6165   unsigned size     = INSTR (22, 22);
6166   unsigned L        = INSTR (21, 21);
6167   unsigned vm       = INSTR (20, 16);
6168   unsigned H        = INSTR (11, 11);
6169   unsigned vn       = INSTR (9, 5);
6170   unsigned vd       = INSTR (4, 0);
6171   unsigned e;
6172
6173   NYI_assert (29, 23, 0x1F);
6174   NYI_assert (15, 12, 0x1);
6175   NYI_assert (10, 10, 0);
6176
6177   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6178   if (size)
6179     {
6180       double element1, element2;
6181
6182       if (! full || L)
6183         HALT_UNALLOC;
6184
6185       element2 = aarch64_get_vec_double (cpu, vm, H);
6186
6187       for (e = 0; e < 2; e++)
6188         {
6189           element1 = aarch64_get_vec_double (cpu, vn, e);
6190           element1 *= element2;
6191           element1 += aarch64_get_vec_double (cpu, vd, e);
6192           aarch64_set_vec_double (cpu, vd, e, element1);
6193         }
6194     }
6195   else
6196     {
6197       float element1;
6198       float element2 = aarch64_get_vec_float (cpu, vm, (H << 1) | L);
6199
6200       for (e = 0; e < (full ? 4 : 2); e++)
6201         {
6202           element1 = aarch64_get_vec_float (cpu, vn, e);
6203           element1 *= element2;
6204           element1 += aarch64_get_vec_float (cpu, vd, e);
6205           aarch64_set_vec_float (cpu, vd, e, element1);
6206         }
6207     }
6208 }
6209
6210 static void
6211 do_vec_op2 (sim_cpu *cpu)
6212 {
6213   /* instr[31]    = 0
6214      instr[30]    = half/full
6215      instr[29,24] = 00 1111
6216      instr[23]    = ?
6217      instr[22,16] = element size & index
6218      instr[15,10] = sub-opcode
6219      instr[9,5]   = Vm
6220      instr[4,0]   = Vd  */
6221
6222   NYI_assert (29, 24, 0x0F);
6223
6224   if (INSTR (23, 23) != 0)
6225     {
6226       switch (INSTR (15, 10))
6227         {
6228         case 0x04:
6229         case 0x06:
6230           do_FMLA_by_element (cpu);
6231           return;
6232
6233         case 0x20:
6234         case 0x22:
6235           do_vec_MUL_by_element (cpu);
6236           return;
6237
6238         default:
6239           HALT_NYI;
6240         }
6241     }
6242   else
6243     {
6244       switch (INSTR (15, 10))
6245         {
6246         case 0x01: do_vec_SSHR_USHR (cpu); return;
6247         case 0x15: do_vec_SHL (cpu); return;
6248         case 0x20:
6249         case 0x22: do_vec_MUL_by_element (cpu); return;
6250         case 0x29: do_vec_xtl (cpu); return;
6251         default:   HALT_NYI;
6252         }
6253     }
6254 }
6255
6256 static void
6257 do_vec_neg (sim_cpu *cpu)
6258 {
6259   /* instr[31]    = 0
6260      instr[30]    = full(1)/half(0)
6261      instr[29,24] = 10 1110
6262      instr[23,22] = size: byte(00), half (01), word (10), long (11)
6263      instr[21,10] = 1000 0010 1110
6264      instr[9,5]   = Vs
6265      instr[4,0]   = Vd  */
6266
6267   int    full = INSTR (30, 30);
6268   unsigned vs = INSTR (9, 5);
6269   unsigned vd = INSTR (4, 0);
6270   unsigned i;
6271
6272   NYI_assert (29, 24, 0x2E);
6273   NYI_assert (21, 10, 0x82E);
6274
6275   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6276   switch (INSTR (23, 22))
6277     {
6278     case 0:
6279       for (i = 0; i < (full ? 16 : 8); i++)
6280         aarch64_set_vec_s8 (cpu, vd, i, - aarch64_get_vec_s8 (cpu, vs, i));
6281       return;
6282
6283     case 1:
6284       for (i = 0; i < (full ? 8 : 4); i++)
6285         aarch64_set_vec_s16 (cpu, vd, i, - aarch64_get_vec_s16 (cpu, vs, i));
6286       return;
6287
6288     case 2:
6289       for (i = 0; i < (full ? 4 : 2); i++)
6290         aarch64_set_vec_s32 (cpu, vd, i, - aarch64_get_vec_s32 (cpu, vs, i));
6291       return;
6292
6293     case 3:
6294       if (! full)
6295         HALT_NYI;
6296       for (i = 0; i < 2; i++)
6297         aarch64_set_vec_s64 (cpu, vd, i, - aarch64_get_vec_s64 (cpu, vs, i));
6298       return;
6299     }
6300 }
6301
6302 static void
6303 do_vec_sqrt (sim_cpu *cpu)
6304 {
6305   /* instr[31]    = 0
6306      instr[30]    = full(1)/half(0)
6307      instr[29,23] = 101 1101
6308      instr[22]    = single(0)/double(1)
6309      instr[21,10] = 1000 0111 1110
6310      instr[9,5]   = Vs
6311      instr[4,0]   = Vd.  */
6312
6313   int    full = INSTR (30, 30);
6314   unsigned vs = INSTR (9, 5);
6315   unsigned vd = INSTR (4, 0);
6316   unsigned i;
6317
6318   NYI_assert (29, 23, 0x5B);
6319   NYI_assert (21, 10, 0x87E);
6320
6321   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6322   if (INSTR (22, 22) == 0)
6323     for (i = 0; i < (full ? 4 : 2); i++)
6324       aarch64_set_vec_float (cpu, vd, i,
6325                              sqrtf (aarch64_get_vec_float (cpu, vs, i)));
6326   else
6327     for (i = 0; i < 2; i++)
6328       aarch64_set_vec_double (cpu, vd, i,
6329                               sqrt (aarch64_get_vec_double (cpu, vs, i)));
6330 }
6331
6332 static void
6333 do_vec_mls_indexed (sim_cpu *cpu)
6334 {
6335   /* instr[31]       = 0
6336      instr[30]       = half(0)/full(1)
6337      instr[29,24]    = 10 1111
6338      instr[23,22]    = 16-bit(01)/32-bit(10)
6339      instr[21,20+11] = index (if 16-bit)
6340      instr[21+11]    = index (if 32-bit)
6341      instr[20,16]    = Vm
6342      instr[15,12]    = 0100
6343      instr[11]       = part of index
6344      instr[10]       = 0
6345      instr[9,5]      = Vs
6346      instr[4,0]      = Vd.  */
6347
6348   int    full = INSTR (30, 30);
6349   unsigned vs = INSTR (9, 5);
6350   unsigned vd = INSTR (4, 0);
6351   unsigned vm = INSTR (20, 16);
6352   unsigned i;
6353
6354   NYI_assert (15, 12, 4);
6355   NYI_assert (10, 10, 0);
6356
6357   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6358   switch (INSTR (23, 22))
6359     {
6360     case 1:
6361       {
6362         unsigned elem;
6363         uint32_t val;
6364
6365         if (vm > 15)
6366           HALT_NYI;
6367
6368         elem = (INSTR (21, 20) << 1) | INSTR (11, 11);
6369         val = aarch64_get_vec_u16 (cpu, vm, elem);
6370
6371         for (i = 0; i < (full ? 8 : 4); i++)
6372           aarch64_set_vec_u32 (cpu, vd, i,
6373                                aarch64_get_vec_u32 (cpu, vd, i) -
6374                                (aarch64_get_vec_u32 (cpu, vs, i) * val));
6375         return;
6376       }
6377
6378     case 2:
6379       {
6380         unsigned elem = (INSTR (21, 21) << 1) | INSTR (11, 11);
6381         uint64_t val = aarch64_get_vec_u32 (cpu, vm, elem);
6382
6383         for (i = 0; i < (full ? 4 : 2); i++)
6384           aarch64_set_vec_u64 (cpu, vd, i,
6385                                aarch64_get_vec_u64 (cpu, vd, i) -
6386                                (aarch64_get_vec_u64 (cpu, vs, i) * val));
6387         return;
6388       }
6389
6390     case 0:
6391     case 3:
6392     default:
6393       HALT_NYI;
6394     }
6395 }
6396
6397 static void
6398 do_vec_SUB (sim_cpu *cpu)
6399 {
6400   /* instr [31]    = 0
6401      instr [30]    = half(0)/full(1)
6402      instr [29,24] = 10 1110
6403      instr [23,22] = size: byte(00, half(01), word (10), long (11)
6404      instr [21]    = 1
6405      instr [20,16] = Vm
6406      instr [15,10] = 10 0001
6407      instr [9, 5]  = Vn
6408      instr [4, 0]  = Vd.  */
6409
6410   unsigned full = INSTR (30, 30);
6411   unsigned vm = INSTR (20, 16);
6412   unsigned vn = INSTR (9, 5);
6413   unsigned vd = INSTR (4, 0);
6414   unsigned i;
6415
6416   NYI_assert (29, 24, 0x2E);
6417   NYI_assert (21, 21, 1);
6418   NYI_assert (15, 10, 0x21);
6419
6420   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6421   switch (INSTR (23, 22))
6422     {
6423     case 0:
6424       for (i = 0; i < (full ? 16 : 8); i++)
6425         aarch64_set_vec_s8 (cpu, vd, i,
6426                             aarch64_get_vec_s8 (cpu, vn, i)
6427                             - aarch64_get_vec_s8 (cpu, vm, i));
6428       return;
6429
6430     case 1:
6431       for (i = 0; i < (full ? 8 : 4); i++)
6432         aarch64_set_vec_s16 (cpu, vd, i,
6433                              aarch64_get_vec_s16 (cpu, vn, i)
6434                              - aarch64_get_vec_s16 (cpu, vm, i));
6435       return;
6436
6437     case 2:
6438       for (i = 0; i < (full ? 4 : 2); i++)
6439         aarch64_set_vec_s32 (cpu, vd, i,
6440                              aarch64_get_vec_s32 (cpu, vn, i)
6441                              - aarch64_get_vec_s32 (cpu, vm, i));
6442       return;
6443
6444     case 3:
6445       if (full == 0)
6446         HALT_UNALLOC;
6447
6448       for (i = 0; i < 2; i++)
6449         aarch64_set_vec_s64 (cpu, vd, i,
6450                              aarch64_get_vec_s64 (cpu, vn, i)
6451                              - aarch64_get_vec_s64 (cpu, vm, i));
6452       return;
6453     }
6454 }
6455
6456 static void
6457 do_vec_MLS (sim_cpu *cpu)
6458 {
6459   /* instr [31]    = 0
6460      instr [30]    = half(0)/full(1)
6461      instr [29,24] = 10 1110
6462      instr [23,22] = size: byte(00, half(01), word (10)
6463      instr [21]    = 1
6464      instr [20,16] = Vm
6465      instr [15,10] = 10 0101
6466      instr [9, 5]  = Vn
6467      instr [4, 0]  = Vd.  */
6468
6469   unsigned full = INSTR (30, 30);
6470   unsigned vm = INSTR (20, 16);
6471   unsigned vn = INSTR (9, 5);
6472   unsigned vd = INSTR (4, 0);
6473   unsigned i;
6474
6475   NYI_assert (29, 24, 0x2E);
6476   NYI_assert (21, 21, 1);
6477   NYI_assert (15, 10, 0x25);
6478
6479   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6480   switch (INSTR (23, 22))
6481     {
6482     case 0:
6483       for (i = 0; i < (full ? 16 : 8); i++)
6484         aarch64_set_vec_u8 (cpu, vd, i,
6485                             aarch64_get_vec_u8 (cpu, vd, i)
6486                             - (aarch64_get_vec_u8 (cpu, vn, i)
6487                                * aarch64_get_vec_u8 (cpu, vm, i)));
6488       return;
6489
6490     case 1:
6491       for (i = 0; i < (full ? 8 : 4); i++)
6492         aarch64_set_vec_u16 (cpu, vd, i,
6493                              aarch64_get_vec_u16 (cpu, vd, i)
6494                              - (aarch64_get_vec_u16 (cpu, vn, i)
6495                                 * aarch64_get_vec_u16 (cpu, vm, i)));
6496       return;
6497
6498     case 2:
6499       for (i = 0; i < (full ? 4 : 2); i++)
6500         aarch64_set_vec_u32 (cpu, vd, i,
6501                              aarch64_get_vec_u32 (cpu, vd, i)
6502                              - (aarch64_get_vec_u32 (cpu, vn, i)
6503                                 * aarch64_get_vec_u32 (cpu, vm, i)));
6504       return;
6505
6506     default:
6507       HALT_UNALLOC;
6508     }
6509 }
6510
6511 static void
6512 do_vec_FDIV (sim_cpu *cpu)
6513 {
6514   /* instr [31]    = 0
6515      instr [30]    = half(0)/full(1)
6516      instr [29,23] = 10 1110 0
6517      instr [22]    = float()/double(1)
6518      instr [21]    = 1
6519      instr [20,16] = Vm
6520      instr [15,10] = 1111 11
6521      instr [9, 5]  = Vn
6522      instr [4, 0]  = Vd.  */
6523
6524   unsigned full = INSTR (30, 30);
6525   unsigned vm = INSTR (20, 16);
6526   unsigned vn = INSTR (9, 5);
6527   unsigned vd = INSTR (4, 0);
6528   unsigned i;
6529
6530   NYI_assert (29, 23, 0x5C);
6531   NYI_assert (21, 21, 1);
6532   NYI_assert (15, 10, 0x3F);
6533
6534   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6535   if (INSTR (22, 22))
6536     {
6537       if (! full)
6538         HALT_UNALLOC;
6539
6540       for (i = 0; i < 2; i++)
6541         aarch64_set_vec_double (cpu, vd, i,
6542                                 aarch64_get_vec_double (cpu, vn, i)
6543                                 / aarch64_get_vec_double (cpu, vm, i));
6544     }
6545   else
6546     for (i = 0; i < (full ? 4 : 2); i++)
6547       aarch64_set_vec_float (cpu, vd, i,
6548                              aarch64_get_vec_float (cpu, vn, i)
6549                              / aarch64_get_vec_float (cpu, vm, i));
6550 }
6551
6552 static void
6553 do_vec_FMUL (sim_cpu *cpu)
6554 {
6555   /* instr [31]    = 0
6556      instr [30]    = half(0)/full(1)
6557      instr [29,23] = 10 1110 0
6558      instr [22]    = float(0)/double(1)
6559      instr [21]    = 1
6560      instr [20,16] = Vm
6561      instr [15,10] = 1101 11
6562      instr [9, 5]  = Vn
6563      instr [4, 0]  = Vd.  */
6564
6565   unsigned full = INSTR (30, 30);
6566   unsigned vm = INSTR (20, 16);
6567   unsigned vn = INSTR (9, 5);
6568   unsigned vd = INSTR (4, 0);
6569   unsigned i;
6570
6571   NYI_assert (29, 23, 0x5C);
6572   NYI_assert (21, 21, 1);
6573   NYI_assert (15, 10, 0x37);
6574
6575   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6576   if (INSTR (22, 22))
6577     {
6578       if (! full)
6579         HALT_UNALLOC;
6580
6581       for (i = 0; i < 2; i++)
6582         aarch64_set_vec_double (cpu, vd, i,
6583                                 aarch64_get_vec_double (cpu, vn, i)
6584                                 * aarch64_get_vec_double (cpu, vm, i));
6585     }
6586   else
6587     for (i = 0; i < (full ? 4 : 2); i++)
6588       aarch64_set_vec_float (cpu, vd, i,
6589                              aarch64_get_vec_float (cpu, vn, i)
6590                              * aarch64_get_vec_float (cpu, vm, i));
6591 }
6592
6593 static void
6594 do_vec_FADDP (sim_cpu *cpu)
6595 {
6596   /* instr [31]    = 0
6597      instr [30]    = half(0)/full(1)
6598      instr [29,23] = 10 1110 0
6599      instr [22]    = float(0)/double(1)
6600      instr [21]    = 1
6601      instr [20,16] = Vm
6602      instr [15,10] = 1101 01
6603      instr [9, 5]  = Vn
6604      instr [4, 0]  = Vd.  */
6605
6606   unsigned full = INSTR (30, 30);
6607   unsigned vm = INSTR (20, 16);
6608   unsigned vn = INSTR (9, 5);
6609   unsigned vd = INSTR (4, 0);
6610
6611   NYI_assert (29, 23, 0x5C);
6612   NYI_assert (21, 21, 1);
6613   NYI_assert (15, 10, 0x35);
6614
6615   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6616   if (INSTR (22, 22))
6617     {
6618       /* Extract values before adding them incase vd == vn/vm.  */
6619       double tmp1 = aarch64_get_vec_double (cpu, vn, 0);
6620       double tmp2 = aarch64_get_vec_double (cpu, vn, 1);
6621       double tmp3 = aarch64_get_vec_double (cpu, vm, 0);
6622       double tmp4 = aarch64_get_vec_double (cpu, vm, 1);
6623
6624       if (! full)
6625         HALT_UNALLOC;
6626
6627       aarch64_set_vec_double (cpu, vd, 0, tmp1 + tmp2);
6628       aarch64_set_vec_double (cpu, vd, 1, tmp3 + tmp4);
6629     }
6630   else
6631     {
6632       /* Extract values before adding them incase vd == vn/vm.  */
6633       float tmp1 = aarch64_get_vec_float (cpu, vn, 0);
6634       float tmp2 = aarch64_get_vec_float (cpu, vn, 1);
6635       float tmp5 = aarch64_get_vec_float (cpu, vm, 0);
6636       float tmp6 = aarch64_get_vec_float (cpu, vm, 1);
6637
6638       if (full)
6639         {
6640           float tmp3 = aarch64_get_vec_float (cpu, vn, 2);
6641           float tmp4 = aarch64_get_vec_float (cpu, vn, 3);
6642           float tmp7 = aarch64_get_vec_float (cpu, vm, 2);
6643           float tmp8 = aarch64_get_vec_float (cpu, vm, 3);
6644
6645           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6646           aarch64_set_vec_float (cpu, vd, 1, tmp3 + tmp4);
6647           aarch64_set_vec_float (cpu, vd, 2, tmp5 + tmp6);
6648           aarch64_set_vec_float (cpu, vd, 3, tmp7 + tmp8);
6649         }
6650       else
6651         {
6652           aarch64_set_vec_float (cpu, vd, 0, tmp1 + tmp2);
6653           aarch64_set_vec_float (cpu, vd, 1, tmp5 + tmp6);
6654         }
6655     }
6656 }
6657
6658 static void
6659 do_vec_FSQRT (sim_cpu *cpu)
6660 {
6661   /* instr[31]    = 0
6662      instr[30]    = half(0)/full(1)
6663      instr[29,23] = 10 1110 1
6664      instr[22]    = single(0)/double(1)
6665      instr[21,10] = 10 0001 1111 10
6666      instr[9,5]   = Vsrc
6667      instr[4,0]   = Vdest.  */
6668
6669   unsigned vn = INSTR (9, 5);
6670   unsigned vd = INSTR (4, 0);
6671   unsigned full = INSTR (30, 30);
6672   int i;
6673
6674   NYI_assert (29, 23, 0x5D);
6675   NYI_assert (21, 10, 0x87E);
6676
6677   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6678   if (INSTR (22, 22))
6679     {
6680       if (! full)
6681         HALT_UNALLOC;
6682
6683       for (i = 0; i < 2; i++)
6684         aarch64_set_vec_double (cpu, vd, i,
6685                                 sqrt (aarch64_get_vec_double (cpu, vn, i)));
6686     }
6687   else
6688     {
6689       for (i = 0; i < (full ? 4 : 2); i++)
6690         aarch64_set_vec_float (cpu, vd, i,
6691                                sqrtf (aarch64_get_vec_float (cpu, vn, i)));
6692     }
6693 }
6694
6695 static void
6696 do_vec_FNEG (sim_cpu *cpu)
6697 {
6698   /* instr[31]    = 0
6699      instr[30]    = half (0)/full (1)
6700      instr[29,23] = 10 1110 1
6701      instr[22]    = single (0)/double (1)
6702      instr[21,10] = 10 0000 1111 10
6703      instr[9,5]   = Vsrc
6704      instr[4,0]   = Vdest.  */
6705
6706   unsigned vn = INSTR (9, 5);
6707   unsigned vd = INSTR (4, 0);
6708   unsigned full = INSTR (30, 30);
6709   int i;
6710
6711   NYI_assert (29, 23, 0x5D);
6712   NYI_assert (21, 10, 0x83E);
6713
6714   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6715   if (INSTR (22, 22))
6716     {
6717       if (! full)
6718         HALT_UNALLOC;
6719
6720       for (i = 0; i < 2; i++)
6721         aarch64_set_vec_double (cpu, vd, i,
6722                                 - aarch64_get_vec_double (cpu, vn, i));
6723     }
6724   else
6725     {
6726       for (i = 0; i < (full ? 4 : 2); i++)
6727         aarch64_set_vec_float (cpu, vd, i,
6728                                - aarch64_get_vec_float (cpu, vn, i));
6729     }
6730 }
6731
6732 static void
6733 do_vec_NOT (sim_cpu *cpu)
6734 {
6735   /* instr[31]    = 0
6736      instr[30]    = half (0)/full (1)
6737      instr[29,10] = 10 1110 0010 0000 0101 10
6738      instr[9,5]   = Vn
6739      instr[4.0]   = Vd.  */
6740
6741   unsigned vn = INSTR (9, 5);
6742   unsigned vd = INSTR (4, 0);
6743   unsigned i;
6744   int      full = INSTR (30, 30);
6745
6746   NYI_assert (29, 10, 0xB8816);
6747
6748   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6749   for (i = 0; i < (full ? 16 : 8); i++)
6750     aarch64_set_vec_u8 (cpu, vd, i, ~ aarch64_get_vec_u8 (cpu, vn, i));
6751 }
6752
6753 static unsigned int
6754 clz (uint64_t val, unsigned size)
6755 {
6756   uint64_t mask = 1;
6757   int      count;
6758
6759   mask <<= (size - 1);
6760   count = 0;
6761   do
6762     {
6763       if (val & mask)
6764         break;
6765       mask >>= 1;
6766       count ++;
6767     }
6768   while (mask);
6769
6770   return count;
6771 }
6772
6773 static void
6774 do_vec_CLZ (sim_cpu *cpu)
6775 {
6776   /* instr[31]    = 0
6777      instr[30]    = half (0)/full (1)
6778      instr[29,24] = 10 1110
6779      instr[23,22] = size
6780      instr[21,10] = 10 0000 0100 10
6781      instr[9,5]   = Vn
6782      instr[4.0]   = Vd.  */
6783
6784   unsigned vn = INSTR (9, 5);
6785   unsigned vd = INSTR (4, 0);
6786   unsigned i;
6787   int      full = INSTR (30,30);
6788
6789   NYI_assert (29, 24, 0x2E);
6790   NYI_assert (21, 10, 0x812);
6791
6792   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6793   switch (INSTR (23, 22))
6794     {
6795     case 0:
6796       for (i = 0; i < (full ? 16 : 8); i++)
6797         aarch64_set_vec_u8 (cpu, vd, i, clz (aarch64_get_vec_u8 (cpu, vn, i), 8));
6798       break;
6799     case 1:
6800       for (i = 0; i < (full ? 8 : 4); i++)
6801         aarch64_set_vec_u16 (cpu, vd, i, clz (aarch64_get_vec_u16 (cpu, vn, i), 16));
6802       break;
6803     case 2:
6804       for (i = 0; i < (full ? 4 : 2); i++)
6805         aarch64_set_vec_u32 (cpu, vd, i, clz (aarch64_get_vec_u32 (cpu, vn, i), 32));
6806       break;
6807     case 3:
6808       if (! full)
6809         HALT_UNALLOC;
6810       aarch64_set_vec_u64 (cpu, vd, 0, clz (aarch64_get_vec_u64 (cpu, vn, 0), 64));
6811       aarch64_set_vec_u64 (cpu, vd, 1, clz (aarch64_get_vec_u64 (cpu, vn, 1), 64));
6812       break;
6813     }
6814 }
6815
6816 static void
6817 do_vec_MOV_element (sim_cpu *cpu)
6818 {
6819   /* instr[31,21] = 0110 1110 000
6820      instr[20,16] = size & dest index
6821      instr[15]    = 0
6822      instr[14,11] = source index
6823      instr[10]    = 1
6824      instr[9,5]   = Vs
6825      instr[4.0]   = Vd.  */
6826
6827   unsigned vs = INSTR (9, 5);
6828   unsigned vd = INSTR (4, 0);
6829   unsigned src_index;
6830   unsigned dst_index;
6831
6832   NYI_assert (31, 21, 0x370);
6833   NYI_assert (15, 15, 0);
6834   NYI_assert (10, 10, 1);
6835
6836   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6837   if (INSTR (16, 16))
6838     {
6839       /* Move a byte.  */
6840       src_index = INSTR (14, 11);
6841       dst_index = INSTR (20, 17);
6842       aarch64_set_vec_u8 (cpu, vd, dst_index,
6843                           aarch64_get_vec_u8 (cpu, vs, src_index));
6844     }
6845   else if (INSTR (17, 17))
6846     {
6847       /* Move 16-bits.  */
6848       NYI_assert (11, 11, 0);
6849       src_index = INSTR (14, 12);
6850       dst_index = INSTR (20, 18);
6851       aarch64_set_vec_u16 (cpu, vd, dst_index,
6852                            aarch64_get_vec_u16 (cpu, vs, src_index));
6853     }
6854   else if (INSTR (18, 18))
6855     {
6856       /* Move 32-bits.  */
6857       NYI_assert (12, 11, 0);
6858       src_index = INSTR (14, 13);
6859       dst_index = INSTR (20, 19);
6860       aarch64_set_vec_u32 (cpu, vd, dst_index,
6861                            aarch64_get_vec_u32 (cpu, vs, src_index));
6862     }
6863   else
6864     {
6865       NYI_assert (19, 19, 1);
6866       NYI_assert (13, 11, 0);
6867       src_index = INSTR (14, 14);
6868       dst_index = INSTR (20, 20);
6869       aarch64_set_vec_u64 (cpu, vd, dst_index,
6870                            aarch64_get_vec_u64 (cpu, vs, src_index));
6871     }
6872 }
6873
6874 static void
6875 do_vec_REV32 (sim_cpu *cpu)
6876 {
6877   /* instr[31]    = 0
6878      instr[30]    = full/half
6879      instr[29,24] = 10 1110
6880      instr[23,22] = size
6881      instr[21,10] = 10 0000 0000 10
6882      instr[9,5]   = Rn
6883      instr[4,0]   = Rd.  */
6884
6885   unsigned rn = INSTR (9, 5);
6886   unsigned rd = INSTR (4, 0);
6887   unsigned size = INSTR (23, 22);
6888   unsigned full = INSTR (30, 30);
6889   unsigned i;
6890   FRegister val;
6891
6892   NYI_assert (29, 24, 0x2E);
6893   NYI_assert (21, 10, 0x802);
6894
6895   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6896   switch (size)
6897     {
6898     case 0:
6899       for (i = 0; i < (full ? 16 : 8); i++)
6900         val.b[i ^ 0x3] = aarch64_get_vec_u8 (cpu, rn, i);
6901       break;
6902
6903     case 1:
6904       for (i = 0; i < (full ? 8 : 4); i++)
6905         val.h[i ^ 0x1] = aarch64_get_vec_u16 (cpu, rn, i);
6906       break;
6907
6908     default:
6909       HALT_UNALLOC;
6910     }
6911
6912   aarch64_set_vec_u64 (cpu, rd, 0, val.v[0]);
6913   if (full)
6914     aarch64_set_vec_u64 (cpu, rd, 1, val.v[1]);
6915 }
6916
6917 static void
6918 do_vec_EXT (sim_cpu *cpu)
6919 {
6920   /* instr[31]    = 0
6921      instr[30]    = full/half
6922      instr[29,21] = 10 1110 000
6923      instr[20,16] = Vm
6924      instr[15]    = 0
6925      instr[14,11] = source index
6926      instr[10]    = 0
6927      instr[9,5]   = Vn
6928      instr[4.0]   = Vd.  */
6929
6930   unsigned vm = INSTR (20, 16);
6931   unsigned vn = INSTR (9, 5);
6932   unsigned vd = INSTR (4, 0);
6933   unsigned src_index = INSTR (14, 11);
6934   unsigned full = INSTR (30, 30);
6935   unsigned i;
6936   unsigned j;
6937   FRegister val;
6938
6939   NYI_assert (31, 21, 0x370);
6940   NYI_assert (15, 15, 0);
6941   NYI_assert (10, 10, 0);
6942
6943   if (!full && (src_index & 0x8))
6944     HALT_UNALLOC;
6945
6946   j = 0;
6947
6948   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
6949   for (i = src_index; i < (full ? 16 : 8); i++)
6950     val.b[j ++] = aarch64_get_vec_u8 (cpu, vn, i);
6951   for (i = 0; i < src_index; i++)
6952     val.b[j ++] = aarch64_get_vec_u8 (cpu, vm, i);
6953
6954   aarch64_set_vec_u64 (cpu, vd, 0, val.v[0]);
6955   if (full)
6956     aarch64_set_vec_u64 (cpu, vd, 1, val.v[1]);
6957 }
6958
6959 static void
6960 dexAdvSIMD0 (sim_cpu *cpu)
6961 {
6962   /* instr [28,25] = 0 111.  */
6963   if (    INSTR (15, 10) == 0x07
6964       && (INSTR (9, 5) ==
6965           INSTR (20, 16)))
6966     {
6967       if (INSTR (31, 21) == 0x075
6968           || INSTR (31, 21) == 0x275)
6969         {
6970           do_vec_MOV_whole_vector (cpu);
6971           return;
6972         }
6973     }
6974
6975   if (INSTR (29, 19) == 0x1E0)
6976     {
6977       do_vec_MOV_immediate (cpu);
6978       return;
6979     }
6980
6981   if (INSTR (29, 19) == 0x5E0)
6982     {
6983       do_vec_MVNI (cpu);
6984       return;
6985     }
6986
6987   if (INSTR (29, 19) == 0x1C0
6988       || INSTR (29, 19) == 0x1C1)
6989     {
6990       if (INSTR (15, 10) == 0x03)
6991         {
6992           do_vec_DUP_scalar_into_vector (cpu);
6993           return;
6994         }
6995     }
6996
6997   switch (INSTR (29, 24))
6998     {
6999     case 0x0E: do_vec_op1 (cpu); return;
7000     case 0x0F: do_vec_op2 (cpu); return;
7001
7002     case 0x2E:
7003       if (INSTR (21, 21) == 1)
7004         {
7005           switch (INSTR (15, 10))
7006             {
7007             case 0x02:
7008               do_vec_REV32 (cpu);
7009               return;
7010
7011             case 0x07:
7012               switch (INSTR (23, 22))
7013                 {
7014                 case 0: do_vec_EOR (cpu); return;
7015                 case 1: do_vec_BSL (cpu); return;
7016                 case 2:
7017                 case 3: do_vec_bit (cpu); return;
7018                 }
7019               break;
7020
7021             case 0x08: do_vec_sub_long (cpu); return;
7022             case 0x11: do_vec_USHL (cpu); return;
7023             case 0x12: do_vec_CLZ (cpu); return;
7024             case 0x16: do_vec_NOT (cpu); return;
7025             case 0x19: do_vec_max (cpu); return;
7026             case 0x1B: do_vec_min (cpu); return;
7027             case 0x21: do_vec_SUB (cpu); return;
7028             case 0x25: do_vec_MLS (cpu); return;
7029             case 0x31: do_vec_FminmaxNMP (cpu); return;
7030             case 0x35: do_vec_FADDP (cpu); return;
7031             case 0x37: do_vec_FMUL (cpu); return;
7032             case 0x3F: do_vec_FDIV (cpu); return;
7033
7034             case 0x3E:
7035               switch (INSTR (20, 16))
7036                 {
7037                 case 0x00: do_vec_FNEG (cpu); return;
7038                 case 0x01: do_vec_FSQRT (cpu); return;
7039                 default:   HALT_NYI;
7040                 }
7041
7042             case 0x0D:
7043             case 0x0F:
7044             case 0x22:
7045             case 0x23:
7046             case 0x26:
7047             case 0x2A:
7048             case 0x32:
7049             case 0x36:
7050             case 0x39:
7051             case 0x3A:
7052               do_vec_compare (cpu); return;
7053
7054             default:
7055               break;
7056             }
7057         }
7058
7059       if (INSTR (31, 21) == 0x370)
7060         {
7061           if (INSTR (10, 10))
7062             do_vec_MOV_element (cpu);
7063           else
7064             do_vec_EXT (cpu);
7065           return;
7066         }
7067
7068       switch (INSTR (21, 10))
7069         {
7070         case 0x82E: do_vec_neg (cpu); return;
7071         case 0x87E: do_vec_sqrt (cpu); return;
7072         default:
7073           if (INSTR (15, 10) == 0x30)
7074             {
7075               do_vec_mull (cpu);
7076               return;
7077             }
7078           break;
7079         }
7080       break;
7081
7082     case 0x2f:
7083       switch (INSTR (15, 10))
7084         {
7085         case 0x01: do_vec_SSHR_USHR (cpu); return;
7086         case 0x10:
7087         case 0x12: do_vec_mls_indexed (cpu); return;
7088         case 0x29: do_vec_xtl (cpu); return;
7089         default:
7090           HALT_NYI;
7091         }
7092
7093     default:
7094       break;
7095     }
7096
7097   HALT_NYI;
7098 }
7099
7100 /* 3 sources.  */
7101
7102 /* Float multiply add.  */
7103 static void
7104 fmadds (sim_cpu *cpu)
7105 {
7106   unsigned sa = INSTR (14, 10);
7107   unsigned sm = INSTR (20, 16);
7108   unsigned sn = INSTR ( 9,  5);
7109   unsigned sd = INSTR ( 4,  0);
7110
7111   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7112   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7113                         + aarch64_get_FP_float (cpu, sn)
7114                         * aarch64_get_FP_float (cpu, sm));
7115 }
7116
7117 /* Double multiply add.  */
7118 static void
7119 fmaddd (sim_cpu *cpu)
7120 {
7121   unsigned sa = INSTR (14, 10);
7122   unsigned sm = INSTR (20, 16);
7123   unsigned sn = INSTR ( 9,  5);
7124   unsigned sd = INSTR ( 4,  0);
7125
7126   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7127   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7128                          + aarch64_get_FP_double (cpu, sn)
7129                          * aarch64_get_FP_double (cpu, sm));
7130 }
7131
7132 /* Float multiply subtract.  */
7133 static void
7134 fmsubs (sim_cpu *cpu)
7135 {
7136   unsigned sa = INSTR (14, 10);
7137   unsigned sm = INSTR (20, 16);
7138   unsigned sn = INSTR ( 9,  5);
7139   unsigned sd = INSTR ( 4,  0);
7140
7141   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7142   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sa)
7143                         - aarch64_get_FP_float (cpu, sn)
7144                         * aarch64_get_FP_float (cpu, sm));
7145 }
7146
7147 /* Double multiply subtract.  */
7148 static void
7149 fmsubd (sim_cpu *cpu)
7150 {
7151   unsigned sa = INSTR (14, 10);
7152   unsigned sm = INSTR (20, 16);
7153   unsigned sn = INSTR ( 9,  5);
7154   unsigned sd = INSTR ( 4,  0);
7155
7156   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7157   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sa)
7158                          - aarch64_get_FP_double (cpu, sn)
7159                          * aarch64_get_FP_double (cpu, sm));
7160 }
7161
7162 /* Float negative multiply add.  */
7163 static void
7164 fnmadds (sim_cpu *cpu)
7165 {
7166   unsigned sa = INSTR (14, 10);
7167   unsigned sm = INSTR (20, 16);
7168   unsigned sn = INSTR ( 9,  5);
7169   unsigned sd = INSTR ( 4,  0);
7170
7171   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7172   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7173                         + (- aarch64_get_FP_float (cpu, sn))
7174                         * aarch64_get_FP_float (cpu, sm));
7175 }
7176
7177 /* Double negative multiply add.  */
7178 static void
7179 fnmaddd (sim_cpu *cpu)
7180 {
7181   unsigned sa = INSTR (14, 10);
7182   unsigned sm = INSTR (20, 16);
7183   unsigned sn = INSTR ( 9,  5);
7184   unsigned sd = INSTR ( 4,  0);
7185
7186   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7187   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7188                          + (- aarch64_get_FP_double (cpu, sn))
7189                          * aarch64_get_FP_double (cpu, sm));
7190 }
7191
7192 /* Float negative multiply subtract.  */
7193 static void
7194 fnmsubs (sim_cpu *cpu)
7195 {
7196   unsigned sa = INSTR (14, 10);
7197   unsigned sm = INSTR (20, 16);
7198   unsigned sn = INSTR ( 9,  5);
7199   unsigned sd = INSTR ( 4,  0);
7200
7201   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7202   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sa)
7203                         + aarch64_get_FP_float (cpu, sn)
7204                         * aarch64_get_FP_float (cpu, sm));
7205 }
7206
7207 /* Double negative multiply subtract.  */
7208 static void
7209 fnmsubd (sim_cpu *cpu)
7210 {
7211   unsigned sa = INSTR (14, 10);
7212   unsigned sm = INSTR (20, 16);
7213   unsigned sn = INSTR ( 9,  5);
7214   unsigned sd = INSTR ( 4,  0);
7215
7216   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7217   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sa)
7218                          + aarch64_get_FP_double (cpu, sn)
7219                          * aarch64_get_FP_double (cpu, sm));
7220 }
7221
7222 static void
7223 dexSimpleFPDataProc3Source (sim_cpu *cpu)
7224 {
7225   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7226      instr[30]    = 0
7227      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7228      instr[28,25] = 1111
7229      instr[24]    = 1
7230      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7231      instr[21]    ==> o1 : 0 ==> unnegated, 1 ==> negated
7232      instr[15]    ==> o2 : 0 ==> ADD, 1 ==> SUB  */
7233
7234   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7235   /* dispatch on combined type:o1:o2.  */
7236   uint32_t dispatch = (INSTR (23, 21) << 1) | INSTR (15, 15);
7237
7238   if (M_S != 0)
7239     HALT_UNALLOC;
7240
7241   switch (dispatch)
7242     {
7243     case 0: fmadds (cpu); return;
7244     case 1: fmsubs (cpu); return;
7245     case 2: fnmadds (cpu); return;
7246     case 3: fnmsubs (cpu); return;
7247     case 4: fmaddd (cpu); return;
7248     case 5: fmsubd (cpu); return;
7249     case 6: fnmaddd (cpu); return;
7250     case 7: fnmsubd (cpu); return;
7251     default:
7252       /* type > 1 is currently unallocated.  */
7253       HALT_UNALLOC;
7254     }
7255 }
7256
7257 static void
7258 dexSimpleFPFixedConvert (sim_cpu *cpu)
7259 {
7260   HALT_NYI;
7261 }
7262
7263 static void
7264 dexSimpleFPCondCompare (sim_cpu *cpu)
7265 {
7266   /* instr [31,23] = 0001 1110 0
7267      instr [22]    = type
7268      instr [21]    = 1
7269      instr [20,16] = Rm
7270      instr [15,12] = condition
7271      instr [11,10] = 01
7272      instr [9,5]   = Rn
7273      instr [4]     = 0
7274      instr [3,0]   = nzcv  */
7275
7276   unsigned rm = INSTR (20, 16);
7277   unsigned rn = INSTR (9, 5);
7278
7279   NYI_assert (31, 23, 0x3C);
7280   NYI_assert (11, 10, 0x1);
7281   NYI_assert (4,  4,  0);
7282
7283   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7284   if (! testConditionCode (cpu, INSTR (15, 12)))
7285     {
7286       aarch64_set_CPSR (cpu, INSTR (3, 0));
7287       return;
7288     }
7289
7290   if (INSTR (22, 22))
7291     {
7292       /* Double precision.  */
7293       double val1 = aarch64_get_vec_double (cpu, rn, 0);
7294       double val2 = aarch64_get_vec_double (cpu, rm, 0);
7295
7296       /* FIXME: Check for NaNs.  */
7297       if (val1 == val2)
7298         aarch64_set_CPSR (cpu, (Z | C));
7299       else if (val1 < val2)
7300         aarch64_set_CPSR (cpu, N);
7301       else /* val1 > val2 */
7302         aarch64_set_CPSR (cpu, C);
7303     }
7304   else
7305     {
7306       /* Single precision.  */
7307       float val1 = aarch64_get_vec_float (cpu, rn, 0);
7308       float val2 = aarch64_get_vec_float (cpu, rm, 0);
7309
7310       /* FIXME: Check for NaNs.  */
7311       if (val1 == val2)
7312         aarch64_set_CPSR (cpu, (Z | C));
7313       else if (val1 < val2)
7314         aarch64_set_CPSR (cpu, N);
7315       else /* val1 > val2 */
7316         aarch64_set_CPSR (cpu, C);
7317     }
7318 }
7319
7320 /* 2 sources.  */
7321
7322 /* Float add.  */
7323 static void
7324 fadds (sim_cpu *cpu)
7325 {
7326   unsigned sm = INSTR (20, 16);
7327   unsigned sn = INSTR ( 9,  5);
7328   unsigned sd = INSTR ( 4,  0);
7329
7330   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7331   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7332                         + aarch64_get_FP_float (cpu, sm));
7333 }
7334
7335 /* Double add.  */
7336 static void
7337 faddd (sim_cpu *cpu)
7338 {
7339   unsigned sm = INSTR (20, 16);
7340   unsigned sn = INSTR ( 9,  5);
7341   unsigned sd = INSTR ( 4,  0);
7342
7343   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7344   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7345                          + aarch64_get_FP_double (cpu, sm));
7346 }
7347
7348 /* Float divide.  */
7349 static void
7350 fdivs (sim_cpu *cpu)
7351 {
7352   unsigned sm = INSTR (20, 16);
7353   unsigned sn = INSTR ( 9,  5);
7354   unsigned sd = INSTR ( 4,  0);
7355
7356   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7357   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7358                         / aarch64_get_FP_float (cpu, sm));
7359 }
7360
7361 /* Double divide.  */
7362 static void
7363 fdivd (sim_cpu *cpu)
7364 {
7365   unsigned sm = INSTR (20, 16);
7366   unsigned sn = INSTR ( 9,  5);
7367   unsigned sd = INSTR ( 4,  0);
7368
7369   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7370   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7371                          / aarch64_get_FP_double (cpu, sm));
7372 }
7373
7374 /* Float multiply.  */
7375 static void
7376 fmuls (sim_cpu *cpu)
7377 {
7378   unsigned sm = INSTR (20, 16);
7379   unsigned sn = INSTR ( 9,  5);
7380   unsigned sd = INSTR ( 4,  0);
7381
7382   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7383   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7384                         * aarch64_get_FP_float (cpu, sm));
7385 }
7386
7387 /* Double multiply.  */
7388 static void
7389 fmuld (sim_cpu *cpu)
7390 {
7391   unsigned sm = INSTR (20, 16);
7392   unsigned sn = INSTR ( 9,  5);
7393   unsigned sd = INSTR ( 4,  0);
7394
7395   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7396   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7397                          * aarch64_get_FP_double (cpu, sm));
7398 }
7399
7400 /* Float negate and multiply.  */
7401 static void
7402 fnmuls (sim_cpu *cpu)
7403 {
7404   unsigned sm = INSTR (20, 16);
7405   unsigned sn = INSTR ( 9,  5);
7406   unsigned sd = INSTR ( 4,  0);
7407
7408   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7409   aarch64_set_FP_float (cpu, sd, - (aarch64_get_FP_float (cpu, sn)
7410                                     * aarch64_get_FP_float (cpu, sm)));
7411 }
7412
7413 /* Double negate and multiply.  */
7414 static void
7415 fnmuld (sim_cpu *cpu)
7416 {
7417   unsigned sm = INSTR (20, 16);
7418   unsigned sn = INSTR ( 9,  5);
7419   unsigned sd = INSTR ( 4,  0);
7420
7421   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7422   aarch64_set_FP_double (cpu, sd, - (aarch64_get_FP_double (cpu, sn)
7423                                      * aarch64_get_FP_double (cpu, sm)));
7424 }
7425
7426 /* Float subtract.  */
7427 static void
7428 fsubs (sim_cpu *cpu)
7429 {
7430   unsigned sm = INSTR (20, 16);
7431   unsigned sn = INSTR ( 9,  5);
7432   unsigned sd = INSTR ( 4,  0);
7433
7434   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7435   aarch64_set_FP_float (cpu, sd, aarch64_get_FP_float (cpu, sn)
7436                         - aarch64_get_FP_float (cpu, sm));
7437 }
7438
7439 /* Double subtract.  */
7440 static void
7441 fsubd (sim_cpu *cpu)
7442 {
7443   unsigned sm = INSTR (20, 16);
7444   unsigned sn = INSTR ( 9,  5);
7445   unsigned sd = INSTR ( 4,  0);
7446
7447   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7448   aarch64_set_FP_double (cpu, sd, aarch64_get_FP_double (cpu, sn)
7449                          - aarch64_get_FP_double (cpu, sm));
7450 }
7451
7452 static void
7453 do_FMINNM (sim_cpu *cpu)
7454 {
7455   /* instr[31,23] = 0 0011 1100
7456      instr[22]    = float(0)/double(1)
7457      instr[21]    = 1
7458      instr[20,16] = Sm
7459      instr[15,10] = 01 1110
7460      instr[9,5]   = Sn
7461      instr[4,0]   = Cpu  */
7462
7463   unsigned sm = INSTR (20, 16);
7464   unsigned sn = INSTR ( 9,  5);
7465   unsigned sd = INSTR ( 4,  0);
7466
7467   NYI_assert (31, 23, 0x03C);
7468   NYI_assert (15, 10, 0x1E);
7469
7470   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7471   if (INSTR (22, 22))
7472     aarch64_set_FP_double (cpu, sd,
7473                            dminnm (aarch64_get_FP_double (cpu, sn),
7474                                    aarch64_get_FP_double (cpu, sm)));
7475   else
7476     aarch64_set_FP_float (cpu, sd,
7477                           fminnm (aarch64_get_FP_float (cpu, sn),
7478                                   aarch64_get_FP_float (cpu, sm)));
7479 }
7480
7481 static void
7482 do_FMAXNM (sim_cpu *cpu)
7483 {
7484   /* instr[31,23] = 0 0011 1100
7485      instr[22]    = float(0)/double(1)
7486      instr[21]    = 1
7487      instr[20,16] = Sm
7488      instr[15,10] = 01 1010
7489      instr[9,5]   = Sn
7490      instr[4,0]   = Cpu  */
7491
7492   unsigned sm = INSTR (20, 16);
7493   unsigned sn = INSTR ( 9,  5);
7494   unsigned sd = INSTR ( 4,  0);
7495
7496   NYI_assert (31, 23, 0x03C);
7497   NYI_assert (15, 10, 0x1A);
7498
7499   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7500   if (INSTR (22, 22))
7501     aarch64_set_FP_double (cpu, sd,
7502                            dmaxnm (aarch64_get_FP_double (cpu, sn),
7503                                    aarch64_get_FP_double (cpu, sm)));
7504   else
7505     aarch64_set_FP_float (cpu, sd,
7506                           fmaxnm (aarch64_get_FP_float (cpu, sn),
7507                                   aarch64_get_FP_float (cpu, sm)));
7508 }
7509
7510 static void
7511 dexSimpleFPDataProc2Source (sim_cpu *cpu)
7512 {
7513   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
7514      instr[30]    = 0
7515      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
7516      instr[28,25] = 1111
7517      instr[24]    = 0
7518      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
7519      instr[21]    = 1
7520      instr[20,16] = Vm
7521      instr[15,12] ==> opcode : 0000 ==> FMUL, 0001 ==> FDIV
7522                                0010 ==> FADD, 0011 ==> FSUB,
7523                                0100 ==> FMAX, 0101 ==> FMIN
7524                                0110 ==> FMAXNM, 0111 ==> FMINNM
7525                                1000 ==> FNMUL, ow ==> UNALLOC
7526      instr[11,10] = 10
7527      instr[9,5]   = Vn
7528      instr[4,0]   = Vd  */
7529
7530   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
7531   uint32_t type = INSTR (23, 22);
7532   /* Dispatch on opcode.  */
7533   uint32_t dispatch = INSTR (15, 12);
7534
7535   if (type > 1)
7536     HALT_UNALLOC;
7537
7538   if (M_S != 0)
7539     HALT_UNALLOC;
7540
7541   if (type)
7542     switch (dispatch)
7543       {
7544       case 0: fmuld (cpu); return;
7545       case 1: fdivd (cpu); return;
7546       case 2: faddd (cpu); return;
7547       case 3: fsubd (cpu); return;
7548       case 6: do_FMAXNM (cpu); return;
7549       case 7: do_FMINNM (cpu); return;
7550       case 8: fnmuld (cpu); return;
7551
7552         /* Have not yet implemented fmax and fmin.  */
7553       case 4:
7554       case 5:
7555         HALT_NYI;
7556
7557       default:
7558         HALT_UNALLOC;
7559       }
7560   else /* type == 0 => floats.  */
7561     switch (dispatch)
7562       {
7563       case 0: fmuls (cpu); return;
7564       case 1: fdivs (cpu); return;
7565       case 2: fadds (cpu); return;
7566       case 3: fsubs (cpu); return;
7567       case 6: do_FMAXNM (cpu); return;
7568       case 7: do_FMINNM (cpu); return;
7569       case 8: fnmuls (cpu); return;
7570
7571       case 4:
7572       case 5:
7573         HALT_NYI;
7574
7575       default:
7576         HALT_UNALLOC;
7577       }
7578 }
7579
7580 static void
7581 dexSimpleFPCondSelect (sim_cpu *cpu)
7582 {
7583   /* FCSEL
7584      instr[31,23] = 0 0011 1100
7585      instr[22]    = 0=>single 1=>double
7586      instr[21]    = 1
7587      instr[20,16] = Sm
7588      instr[15,12] = cond
7589      instr[11,10] = 11
7590      instr[9,5]   = Sn
7591      instr[4,0]   = Cpu  */
7592   unsigned sm = INSTR (20, 16);
7593   unsigned sn = INSTR ( 9, 5);
7594   unsigned sd = INSTR ( 4, 0);
7595   uint32_t set = testConditionCode (cpu, INSTR (15, 12));
7596
7597   NYI_assert (31, 23, 0x03C);
7598   NYI_assert (11, 10, 0x3);
7599
7600   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7601   if (INSTR (22, 22))
7602     aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn)
7603                                      : aarch64_get_FP_double (cpu, sm)));
7604   else
7605     aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn)
7606                                     : aarch64_get_FP_float (cpu, sm)));
7607 }
7608
7609 /* Store 32 bit unscaled signed 9 bit.  */
7610 static void
7611 fsturs (sim_cpu *cpu, int32_t offset)
7612 {
7613   unsigned int rn = INSTR (9, 5);
7614   unsigned int st = INSTR (4, 0);
7615
7616   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7617   aarch64_set_mem_u32 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7618                        aarch64_get_vec_u32 (cpu, st, 0));
7619 }
7620
7621 /* Store 64 bit unscaled signed 9 bit.  */
7622 static void
7623 fsturd (sim_cpu *cpu, int32_t offset)
7624 {
7625   unsigned int rn = INSTR (9, 5);
7626   unsigned int st = INSTR (4, 0);
7627
7628   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7629   aarch64_set_mem_u64 (cpu, aarch64_get_reg_u64 (cpu, rn, 1) + offset,
7630                        aarch64_get_vec_u64 (cpu, st, 0));
7631 }
7632
7633 /* Store 128 bit unscaled signed 9 bit.  */
7634 static void
7635 fsturq (sim_cpu *cpu, int32_t offset)
7636 {
7637   unsigned int rn = INSTR (9, 5);
7638   unsigned int st = INSTR (4, 0);
7639   FRegister a;
7640
7641   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7642   aarch64_get_FP_long_double (cpu, st, & a);
7643   aarch64_set_mem_long_double (cpu,
7644                                aarch64_get_reg_u64 (cpu, rn, 1)
7645                                + offset, a);
7646 }
7647
7648 /* TODO FP move register.  */
7649
7650 /* 32 bit fp to fp move register.  */
7651 static void
7652 ffmovs (sim_cpu *cpu)
7653 {
7654   unsigned int rn = INSTR (9, 5);
7655   unsigned int st = INSTR (4, 0);
7656
7657   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7658   aarch64_set_FP_float (cpu, st, aarch64_get_FP_float (cpu, rn));
7659 }
7660
7661 /* 64 bit fp to fp move register.  */
7662 static void
7663 ffmovd (sim_cpu *cpu)
7664 {
7665   unsigned int rn = INSTR (9, 5);
7666   unsigned int st = INSTR (4, 0);
7667
7668   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7669   aarch64_set_FP_double (cpu, st, aarch64_get_FP_double (cpu, rn));
7670 }
7671
7672 /* 32 bit GReg to Vec move register.  */
7673 static void
7674 fgmovs (sim_cpu *cpu)
7675 {
7676   unsigned int rn = INSTR (9, 5);
7677   unsigned int st = INSTR (4, 0);
7678
7679   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7680   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_reg_u32 (cpu, rn, NO_SP));
7681 }
7682
7683 /* 64 bit g to fp move register.  */
7684 static void
7685 fgmovd (sim_cpu *cpu)
7686 {
7687   unsigned int rn = INSTR (9, 5);
7688   unsigned int st = INSTR (4, 0);
7689
7690   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7691   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_reg_u64 (cpu, rn, NO_SP));
7692 }
7693
7694 /* 32 bit fp to g move register.  */
7695 static void
7696 gfmovs (sim_cpu *cpu)
7697 {
7698   unsigned int rn = INSTR (9, 5);
7699   unsigned int st = INSTR (4, 0);
7700
7701   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7702   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u32 (cpu, rn, 0));
7703 }
7704
7705 /* 64 bit fp to g move register.  */
7706 static void
7707 gfmovd (sim_cpu *cpu)
7708 {
7709   unsigned int rn = INSTR (9, 5);
7710   unsigned int st = INSTR (4, 0);
7711
7712   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7713   aarch64_set_reg_u64 (cpu, st, NO_SP, aarch64_get_vec_u64 (cpu, rn, 0));
7714 }
7715
7716 /* FP move immediate
7717
7718    These install an immediate 8 bit value in the target register
7719    where the 8 bits comprise 1 sign bit, 4 bits of fraction and a 3
7720    bit exponent.  */
7721
7722 static void
7723 fmovs (sim_cpu *cpu)
7724 {
7725   unsigned int sd = INSTR (4, 0);
7726   uint32_t imm = INSTR (20, 13);
7727   float f = fp_immediate_for_encoding_32 (imm);
7728
7729   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7730   aarch64_set_FP_float (cpu, sd, f);
7731 }
7732
7733 static void
7734 fmovd (sim_cpu *cpu)
7735 {
7736   unsigned int sd = INSTR (4, 0);
7737   uint32_t imm = INSTR (20, 13);
7738   double d = fp_immediate_for_encoding_64 (imm);
7739
7740   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7741   aarch64_set_FP_double (cpu, sd, d);
7742 }
7743
7744 static void
7745 dexSimpleFPImmediate (sim_cpu *cpu)
7746 {
7747   /* instr[31,23] == 00111100
7748      instr[22]    == type : single(0)/double(1)
7749      instr[21]    == 1
7750      instr[20,13] == imm8
7751      instr[12,10] == 100
7752      instr[9,5]   == imm5 : 00000 ==> PK, ow ==> UNALLOC
7753      instr[4,0]   == Rd  */
7754   uint32_t imm5 = INSTR (9, 5);
7755
7756   NYI_assert (31, 23, 0x3C);
7757
7758   if (imm5 != 0)
7759     HALT_UNALLOC;
7760
7761   if (INSTR (22, 22))
7762     fmovd (cpu);
7763   else
7764     fmovs (cpu);
7765 }
7766
7767 /* TODO specific decode and execute for group Load Store.  */
7768
7769 /* TODO FP load/store single register (unscaled offset).  */
7770
7771 /* TODO load 8 bit unscaled signed 9 bit.  */
7772 /* TODO load 16 bit unscaled signed 9 bit.  */
7773
7774 /* Load 32 bit unscaled signed 9 bit.  */
7775 static void
7776 fldurs (sim_cpu *cpu, int32_t offset)
7777 {
7778   unsigned int rn = INSTR (9, 5);
7779   unsigned int st = INSTR (4, 0);
7780
7781   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7782   aarch64_set_vec_u32 (cpu, st, 0, aarch64_get_mem_u32
7783                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7784 }
7785
7786 /* Load 64 bit unscaled signed 9 bit.  */
7787 static void
7788 fldurd (sim_cpu *cpu, int32_t offset)
7789 {
7790   unsigned int rn = INSTR (9, 5);
7791   unsigned int st = INSTR (4, 0);
7792
7793   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7794   aarch64_set_vec_u64 (cpu, st, 0, aarch64_get_mem_u64
7795                        (cpu, aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset));
7796 }
7797
7798 /* Load 128 bit unscaled signed 9 bit.  */
7799 static void
7800 fldurq (sim_cpu *cpu, int32_t offset)
7801 {
7802   unsigned int rn = INSTR (9, 5);
7803   unsigned int st = INSTR (4, 0);
7804   FRegister a;
7805   uint64_t addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
7806
7807   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7808   aarch64_get_mem_long_double (cpu, addr, & a);
7809   aarch64_set_FP_long_double (cpu, st, a);
7810 }
7811
7812 /* TODO store 8 bit unscaled signed 9 bit.  */
7813 /* TODO store 16 bit unscaled signed 9 bit.  */
7814
7815
7816 /* 1 source.  */
7817
7818 /* Float absolute value.  */
7819 static void
7820 fabss (sim_cpu *cpu)
7821 {
7822   unsigned sn = INSTR (9, 5);
7823   unsigned sd = INSTR (4, 0);
7824   float value = aarch64_get_FP_float (cpu, sn);
7825
7826   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7827   aarch64_set_FP_float (cpu, sd, fabsf (value));
7828 }
7829
7830 /* Double absolute value.  */
7831 static void
7832 fabcpu (sim_cpu *cpu)
7833 {
7834   unsigned sn = INSTR (9, 5);
7835   unsigned sd = INSTR (4, 0);
7836   double value = aarch64_get_FP_double (cpu, sn);
7837
7838   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7839   aarch64_set_FP_double (cpu, sd, fabs (value));
7840 }
7841
7842 /* Float negative value.  */
7843 static void
7844 fnegs (sim_cpu *cpu)
7845 {
7846   unsigned sn = INSTR (9, 5);
7847   unsigned sd = INSTR (4, 0);
7848
7849   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7850   aarch64_set_FP_float (cpu, sd, - aarch64_get_FP_float (cpu, sn));
7851 }
7852
7853 /* Double negative value.  */
7854 static void
7855 fnegd (sim_cpu *cpu)
7856 {
7857   unsigned sn = INSTR (9, 5);
7858   unsigned sd = INSTR (4, 0);
7859
7860   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7861   aarch64_set_FP_double (cpu, sd, - aarch64_get_FP_double (cpu, sn));
7862 }
7863
7864 /* Float square root.  */
7865 static void
7866 fsqrts (sim_cpu *cpu)
7867 {
7868   unsigned sn = INSTR (9, 5);
7869   unsigned sd = INSTR (4, 0);
7870
7871   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7872   aarch64_set_FP_float (cpu, sd, sqrtf (aarch64_get_FP_float (cpu, sn)));
7873 }
7874
7875 /* Double square root.  */
7876 static void
7877 fsqrtd (sim_cpu *cpu)
7878 {
7879   unsigned sn = INSTR (9, 5);
7880   unsigned sd = INSTR (4, 0);
7881
7882   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7883   aarch64_set_FP_double (cpu, sd,
7884                          sqrt (aarch64_get_FP_double (cpu, sn)));
7885 }
7886
7887 /* Convert double to float.  */
7888 static void
7889 fcvtds (sim_cpu *cpu)
7890 {
7891   unsigned sn = INSTR (9, 5);
7892   unsigned sd = INSTR (4, 0);
7893
7894   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7895   aarch64_set_FP_float (cpu, sd, (float) aarch64_get_FP_double (cpu, sn));
7896 }
7897
7898 /* Convert float to double.  */
7899 static void
7900 fcvtcpu (sim_cpu *cpu)
7901 {
7902   unsigned sn = INSTR (9, 5);
7903   unsigned sd = INSTR (4, 0);
7904
7905   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7906   aarch64_set_FP_double (cpu, sd, (double) aarch64_get_FP_float (cpu, sn));
7907 }
7908
7909 static void
7910 do_FRINT (sim_cpu *cpu)
7911 {
7912   /* instr[31,23] = 0001 1110 0
7913      instr[22]    = single(0)/double(1)
7914      instr[21,18] = 1001
7915      instr[17,15] = rounding mode
7916      instr[14,10] = 10000
7917      instr[9,5]   = source
7918      instr[4,0]   = dest  */
7919
7920   float val;
7921   unsigned rs = INSTR (9, 5);
7922   unsigned rd = INSTR (4, 0);
7923   unsigned int rmode = INSTR (17, 15);
7924
7925   NYI_assert (31, 23, 0x03C);
7926   NYI_assert (21, 18, 0x9);
7927   NYI_assert (14, 10, 0x10);
7928
7929   if (rmode == 6 || rmode == 7)
7930     /* FIXME: Add support for rmode == 6 exactness check.  */
7931     rmode = uimm (aarch64_get_FPSR (cpu), 23, 22);
7932
7933   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
7934   if (INSTR (22, 22))
7935     {
7936       double val = aarch64_get_FP_double (cpu, rs);
7937
7938       switch (rmode)
7939         {
7940         case 0: /* mode N: nearest or even.  */
7941           {
7942             double rval = round (val);
7943
7944             if (val - rval == 0.5)
7945               {
7946                 if (((rval / 2.0) * 2.0) != rval)
7947                   rval += 1.0;
7948               }
7949
7950             aarch64_set_FP_double (cpu, rd, round (val));
7951             return;
7952           }
7953
7954         case 1: /* mode P: towards +inf.  */
7955           if (val < 0.0)
7956             aarch64_set_FP_double (cpu, rd, trunc (val));
7957           else
7958             aarch64_set_FP_double (cpu, rd, round (val));
7959           return;
7960
7961         case 2: /* mode M: towards -inf.  */
7962           if (val < 0.0)
7963             aarch64_set_FP_double (cpu, rd, round (val));
7964           else
7965             aarch64_set_FP_double (cpu, rd, trunc (val));
7966           return;
7967
7968         case 3: /* mode Z: towards 0.  */
7969           aarch64_set_FP_double (cpu, rd, trunc (val));
7970           return;
7971
7972         case 4: /* mode A: away from 0.  */
7973           aarch64_set_FP_double (cpu, rd, round (val));
7974           return;
7975
7976         case 6: /* mode X: use FPCR with exactness check.  */
7977         case 7: /* mode I: use FPCR mode.  */
7978           HALT_NYI;
7979
7980         default:
7981           HALT_UNALLOC;
7982         }
7983     }
7984
7985   val = aarch64_get_FP_float (cpu, rs);
7986
7987   switch (rmode)
7988     {
7989     case 0: /* mode N: nearest or even.  */
7990       {
7991         float rval = roundf (val);
7992
7993         if (val - rval == 0.5)
7994           {
7995             if (((rval / 2.0) * 2.0) != rval)
7996               rval += 1.0;
7997           }
7998
7999         aarch64_set_FP_float (cpu, rd, rval);
8000         return;
8001       }
8002
8003     case 1: /* mode P: towards +inf.  */
8004       if (val < 0.0)
8005         aarch64_set_FP_float (cpu, rd, truncf (val));
8006       else
8007         aarch64_set_FP_float (cpu, rd, roundf (val));
8008       return;
8009
8010     case 2: /* mode M: towards -inf.  */
8011       if (val < 0.0)
8012         aarch64_set_FP_float (cpu, rd, truncf (val));
8013       else
8014         aarch64_set_FP_float (cpu, rd, roundf (val));
8015       return;
8016
8017     case 3: /* mode Z: towards 0.  */
8018       aarch64_set_FP_float (cpu, rd, truncf (val));
8019       return;
8020
8021     case 4: /* mode A: away from 0.  */
8022       aarch64_set_FP_float (cpu, rd, roundf (val));
8023       return;
8024
8025     case 6: /* mode X: use FPCR with exactness check.  */
8026     case 7: /* mode I: use FPCR mode.  */
8027       HALT_NYI;
8028
8029     default:
8030       HALT_UNALLOC;
8031     }
8032 }
8033
8034 /* Convert half to float.  */
8035 static void
8036 do_FCVT_half_to_single (sim_cpu *cpu)
8037 {
8038   unsigned rn = INSTR (9, 5);
8039   unsigned rd = INSTR (4, 0);
8040
8041   NYI_assert (31, 10, 0x7B890);
8042
8043   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8044   aarch64_set_FP_float (cpu, rd, (float) aarch64_get_FP_half  (cpu, rn));
8045 }
8046
8047 /* Convert half to double.  */
8048 static void
8049 do_FCVT_half_to_double (sim_cpu *cpu)
8050 {
8051   unsigned rn = INSTR (9, 5);
8052   unsigned rd = INSTR (4, 0);
8053
8054   NYI_assert (31, 10, 0x7B8B0);
8055
8056   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8057   aarch64_set_FP_double (cpu, rd, (double) aarch64_get_FP_half  (cpu, rn));
8058 }
8059
8060 static void
8061 do_FCVT_single_to_half (sim_cpu *cpu)
8062 {
8063   unsigned rn = INSTR (9, 5);
8064   unsigned rd = INSTR (4, 0);
8065
8066   NYI_assert (31, 10, 0x788F0);
8067
8068   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8069   aarch64_set_FP_half (cpu, rd, aarch64_get_FP_float  (cpu, rn));
8070 }
8071
8072 /* Convert double to half.  */
8073 static void
8074 do_FCVT_double_to_half (sim_cpu *cpu)
8075 {
8076   unsigned rn = INSTR (9, 5);
8077   unsigned rd = INSTR (4, 0);
8078
8079   NYI_assert (31, 10, 0x798F0);
8080
8081   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8082   aarch64_set_FP_half (cpu, rd, (float) aarch64_get_FP_double  (cpu, rn));
8083 }
8084
8085 static void
8086 dexSimpleFPDataProc1Source (sim_cpu *cpu)
8087 {
8088   /* instr[31]    ==> M : 0 ==> OK, 1 ==> UNALLOC
8089      instr[30]    = 0
8090      instr[29]    ==> S :  0 ==> OK, 1 ==> UNALLOC
8091      instr[28,25] = 1111
8092      instr[24]    = 0
8093      instr[23,22] ==> type : 00 ==> source is single,
8094                              01 ==> source is double
8095                              10 ==> UNALLOC
8096                              11 ==> UNALLOC or source is half
8097      instr[21]    = 1
8098      instr[20,15] ==> opcode : with type 00 or 01
8099                                000000 ==> FMOV, 000001 ==> FABS,
8100                                000010 ==> FNEG, 000011 ==> FSQRT,
8101                                000100 ==> UNALLOC, 000101 ==> FCVT,(to single/double)
8102                                000110 ==> UNALLOC, 000111 ==> FCVT (to half)
8103                                001000 ==> FRINTN, 001001 ==> FRINTP,
8104                                001010 ==> FRINTM, 001011 ==> FRINTZ,
8105                                001100 ==> FRINTA, 001101 ==> UNALLOC
8106                                001110 ==> FRINTX, 001111 ==> FRINTI
8107                                with type 11
8108                                000100 ==> FCVT (half-to-single)
8109                                000101 ==> FCVT (half-to-double)
8110                                instr[14,10] = 10000.  */
8111
8112   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8113   uint32_t type   = INSTR (23, 22);
8114   uint32_t opcode = INSTR (20, 15);
8115
8116   if (M_S != 0)
8117     HALT_UNALLOC;
8118
8119   if (type == 3)
8120     {
8121       if (opcode == 4)
8122         do_FCVT_half_to_single (cpu);
8123       else if (opcode == 5)
8124         do_FCVT_half_to_double (cpu);
8125       else
8126         HALT_UNALLOC;
8127       return;
8128     }
8129
8130   if (type == 2)
8131     HALT_UNALLOC;
8132
8133   switch (opcode)
8134     {
8135     case 0:
8136       if (type)
8137         ffmovd (cpu);
8138       else
8139         ffmovs (cpu);
8140       return;
8141
8142     case 1:
8143       if (type)
8144         fabcpu (cpu);
8145       else
8146         fabss (cpu);
8147       return;
8148
8149     case 2:
8150       if (type)
8151         fnegd (cpu);
8152       else
8153         fnegs (cpu);
8154       return;
8155
8156     case 3:
8157       if (type)
8158         fsqrtd (cpu);
8159       else
8160         fsqrts (cpu);
8161       return;
8162
8163     case 4:
8164       if (type)
8165         fcvtds (cpu);
8166       else
8167         HALT_UNALLOC;
8168       return;
8169
8170     case 5:
8171       if (type)
8172         HALT_UNALLOC;
8173       fcvtcpu (cpu);
8174       return;
8175
8176     case 8:             /* FRINTN etc.  */
8177     case 9:
8178     case 10:
8179     case 11:
8180     case 12:
8181     case 14:
8182     case 15:
8183        do_FRINT (cpu);
8184        return;
8185
8186     case 7:
8187       if (INSTR (22, 22))
8188         do_FCVT_double_to_half (cpu);
8189       else
8190         do_FCVT_single_to_half (cpu);
8191       return;
8192
8193     case 13:
8194       HALT_NYI;
8195
8196     default:
8197       HALT_UNALLOC;
8198     }
8199 }
8200
8201 /* 32 bit signed int to float.  */
8202 static void
8203 scvtf32 (sim_cpu *cpu)
8204 {
8205   unsigned rn = INSTR (9, 5);
8206   unsigned sd = INSTR (4, 0);
8207
8208   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8209   aarch64_set_FP_float
8210     (cpu, sd, (float) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8211 }
8212
8213 /* signed int to float.  */
8214 static void
8215 scvtf (sim_cpu *cpu)
8216 {
8217   unsigned rn = INSTR (9, 5);
8218   unsigned sd = INSTR (4, 0);
8219
8220   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8221   aarch64_set_FP_float
8222     (cpu, sd, (float) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8223 }
8224
8225 /* 32 bit signed int to double.  */
8226 static void
8227 scvtd32 (sim_cpu *cpu)
8228 {
8229   unsigned rn = INSTR (9, 5);
8230   unsigned sd = INSTR (4, 0);
8231
8232   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8233   aarch64_set_FP_double
8234     (cpu, sd, (double) aarch64_get_reg_s32 (cpu, rn, NO_SP));
8235 }
8236
8237 /* signed int to double.  */
8238 static void
8239 scvtd (sim_cpu *cpu)
8240 {
8241   unsigned rn = INSTR (9, 5);
8242   unsigned sd = INSTR (4, 0);
8243
8244   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8245   aarch64_set_FP_double
8246     (cpu, sd, (double) aarch64_get_reg_s64 (cpu, rn, NO_SP));
8247 }
8248
8249 static const float  FLOAT_INT_MAX   = (float)  INT_MAX;
8250 static const float  FLOAT_INT_MIN   = (float)  INT_MIN;
8251 static const double DOUBLE_INT_MAX  = (double) INT_MAX;
8252 static const double DOUBLE_INT_MIN  = (double) INT_MIN;
8253 static const float  FLOAT_LONG_MAX  = (float)  LONG_MAX;
8254 static const float  FLOAT_LONG_MIN  = (float)  LONG_MIN;
8255 static const double DOUBLE_LONG_MAX = (double) LONG_MAX;
8256 static const double DOUBLE_LONG_MIN = (double) LONG_MIN;
8257
8258 #define UINT_MIN 0
8259 #define ULONG_MIN 0
8260 static const float  FLOAT_UINT_MAX   = (float)  UINT_MAX;
8261 static const float  FLOAT_UINT_MIN   = (float)  UINT_MIN;
8262 static const double DOUBLE_UINT_MAX  = (double) UINT_MAX;
8263 static const double DOUBLE_UINT_MIN  = (double) UINT_MIN;
8264 static const float  FLOAT_ULONG_MAX  = (float)  ULONG_MAX;
8265 static const float  FLOAT_ULONG_MIN  = (float)  ULONG_MIN;
8266 static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX;
8267 static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN;
8268
8269 /* Check for FP exception conditions:
8270      NaN raises IO
8271      Infinity raises IO
8272      Out of Range raises IO and IX and saturates value
8273      Denormal raises ID and IX and sets to zero.  */
8274 #define RAISE_EXCEPTIONS(F, VALUE, FTYPE, ITYPE)        \
8275   do                                                    \
8276     {                                                   \
8277       switch (fpclassify (F))                           \
8278         {                                               \
8279         case FP_INFINITE:                               \
8280         case FP_NAN:                                    \
8281           aarch64_set_FPSR (cpu, IO);                   \
8282           if (signbit (F))                              \
8283             VALUE = ITYPE##_MAX;                        \
8284           else                                          \
8285             VALUE = ITYPE##_MIN;                        \
8286           break;                                        \
8287                                                         \
8288         case FP_NORMAL:                                 \
8289           if (F >= FTYPE##_##ITYPE##_MAX)               \
8290             {                                           \
8291               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8292               VALUE = ITYPE##_MAX;                      \
8293             }                                           \
8294           else if (F <= FTYPE##_##ITYPE##_MIN)          \
8295             {                                           \
8296               aarch64_set_FPSR_bits (cpu, IO | IX, IO | IX);    \
8297               VALUE = ITYPE##_MIN;                      \
8298             }                                           \
8299           break;                                        \
8300                                                         \
8301         case FP_SUBNORMAL:                              \
8302           aarch64_set_FPSR_bits (cpu, IO | IX | ID, IX | ID);   \
8303           VALUE = 0;                                    \
8304           break;                                        \
8305                                                         \
8306         default:                                        \
8307         case FP_ZERO:                                   \
8308           VALUE = 0;                                    \
8309           break;                                        \
8310         }                                               \
8311     }                                                   \
8312   while (0)
8313
8314 /* 32 bit convert float to signed int truncate towards zero.  */
8315 static void
8316 fcvtszs32 (sim_cpu *cpu)
8317 {
8318   unsigned sn = INSTR (9, 5);
8319   unsigned rd = INSTR (4, 0);
8320   /* TODO : check that this rounds toward zero.  */
8321   float   f = aarch64_get_FP_float (cpu, sn);
8322   int32_t value = (int32_t) f;
8323
8324   RAISE_EXCEPTIONS (f, value, FLOAT, INT);
8325
8326   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8327   /* Avoid sign extension to 64 bit.  */
8328   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8329 }
8330
8331 /* 64 bit convert float to signed int truncate towards zero.  */
8332 static void
8333 fcvtszs (sim_cpu *cpu)
8334 {
8335   unsigned sn = INSTR (9, 5);
8336   unsigned rd = INSTR (4, 0);
8337   float f = aarch64_get_FP_float (cpu, sn);
8338   int64_t value = (int64_t) f;
8339
8340   RAISE_EXCEPTIONS (f, value, FLOAT, LONG);
8341
8342   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8343   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8344 }
8345
8346 /* 32 bit convert double to signed int truncate towards zero.  */
8347 static void
8348 fcvtszd32 (sim_cpu *cpu)
8349 {
8350   unsigned sn = INSTR (9, 5);
8351   unsigned rd = INSTR (4, 0);
8352   /* TODO : check that this rounds toward zero.  */
8353   double   d = aarch64_get_FP_double (cpu, sn);
8354   int32_t  value = (int32_t) d;
8355
8356   RAISE_EXCEPTIONS (d, value, DOUBLE, INT);
8357
8358   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8359   /* Avoid sign extension to 64 bit.  */
8360   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
8361 }
8362
8363 /* 64 bit convert double to signed int truncate towards zero.  */
8364 static void
8365 fcvtszd (sim_cpu *cpu)
8366 {
8367   unsigned sn = INSTR (9, 5);
8368   unsigned rd = INSTR (4, 0);
8369   /* TODO : check that this rounds toward zero.  */
8370   double  d = aarch64_get_FP_double (cpu, sn);
8371   int64_t value;
8372
8373   value = (int64_t) d;
8374
8375   RAISE_EXCEPTIONS (d, value, DOUBLE, LONG);
8376
8377   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8378   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
8379 }
8380
8381 static void
8382 do_fcvtzu (sim_cpu *cpu)
8383 {
8384   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8385      instr[30,23] = 00111100
8386      instr[22]    = type: single (0)/ double (1)
8387      instr[21]    = enable (0)/disable(1) precision
8388      instr[20,16] = 11001
8389      instr[15,10] = precision
8390      instr[9,5]   = Rs
8391      instr[4,0]   = Rd.  */
8392
8393   unsigned rs = INSTR (9, 5);
8394   unsigned rd = INSTR (4, 0);
8395
8396   NYI_assert (30, 23, 0x3C);
8397   NYI_assert (20, 16, 0x19);
8398
8399   if (INSTR (21, 21) != 1)
8400     /* Convert to fixed point.  */
8401     HALT_NYI;
8402
8403   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8404   if (INSTR (31, 31))
8405     {
8406       /* Convert to unsigned 64-bit integer.  */
8407       if (INSTR (22, 22))
8408         {
8409           double  d = aarch64_get_FP_double (cpu, rs);
8410           uint64_t value = (uint64_t) d;
8411
8412           /* Do not raise an exception if we have reached ULONG_MAX.  */
8413           if (value != (1ULL << 63))
8414             RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG);
8415
8416           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8417         }
8418       else
8419         {
8420           float  f = aarch64_get_FP_float (cpu, rs);
8421           uint64_t value = (uint64_t) f;
8422
8423           /* Do not raise an exception if we have reached ULONG_MAX.  */
8424           if (value != (1ULL << 63))
8425             RAISE_EXCEPTIONS (f, value, FLOAT, ULONG);
8426
8427           aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8428         }
8429     }
8430   else
8431     {
8432       uint32_t value;
8433
8434       /* Convert to unsigned 32-bit integer.  */
8435       if (INSTR (22, 22))
8436         {
8437           double  d = aarch64_get_FP_double (cpu, rs);
8438
8439           value = (uint32_t) d;
8440           /* Do not raise an exception if we have reached UINT_MAX.  */
8441           if (value != (1UL << 31))
8442             RAISE_EXCEPTIONS (d, value, DOUBLE, UINT);
8443         }
8444       else
8445         {
8446           float  f = aarch64_get_FP_float (cpu, rs);
8447
8448           value = (uint32_t) f;
8449           /* Do not raise an exception if we have reached UINT_MAX.  */
8450           if (value != (1UL << 31))
8451             RAISE_EXCEPTIONS (f, value, FLOAT, UINT);
8452         }
8453
8454       aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
8455     }
8456 }
8457
8458 static void
8459 do_UCVTF (sim_cpu *cpu)
8460 {
8461   /* instr[31]    = size: 32-bit (0), 64-bit (1)
8462      instr[30,23] = 001 1110 0
8463      instr[22]    = type: single (0)/ double (1)
8464      instr[21]    = enable (0)/disable(1) precision
8465      instr[20,16] = 0 0011
8466      instr[15,10] = precision
8467      instr[9,5]   = Rs
8468      instr[4,0]   = Rd.  */
8469
8470   unsigned rs = INSTR (9, 5);
8471   unsigned rd = INSTR (4, 0);
8472
8473   NYI_assert (30, 23, 0x3C);
8474   NYI_assert (20, 16, 0x03);
8475
8476   if (INSTR (21, 21) != 1)
8477     HALT_NYI;
8478
8479   /* FIXME: Add exception raising.  */
8480   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8481   if (INSTR (31, 31))
8482     {
8483       uint64_t value = aarch64_get_reg_u64 (cpu, rs, NO_SP);
8484
8485       if (INSTR (22, 22))
8486         aarch64_set_FP_double (cpu, rd, (double) value);
8487       else
8488         aarch64_set_FP_float (cpu, rd, (float) value);
8489     }
8490   else
8491     {
8492       uint32_t value =  aarch64_get_reg_u32 (cpu, rs, NO_SP);
8493
8494       if (INSTR (22, 22))
8495         aarch64_set_FP_double (cpu, rd, (double) value);
8496       else
8497         aarch64_set_FP_float (cpu, rd, (float) value);
8498     }
8499 }
8500
8501 static void
8502 float_vector_move (sim_cpu *cpu)
8503 {
8504   /* instr[31,17] == 100 1111 0101 0111
8505      instr[16]    ==> direction 0=> to GR, 1=> from GR
8506      instr[15,10] => ???
8507      instr[9,5]   ==> source
8508      instr[4,0]   ==> dest.  */
8509
8510   unsigned rn = INSTR (9, 5);
8511   unsigned rd = INSTR (4, 0);
8512
8513   NYI_assert (31, 17, 0x4F57);
8514
8515   if (INSTR (15, 10) != 0)
8516     HALT_UNALLOC;
8517
8518   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8519   if (INSTR (16, 16))
8520     aarch64_set_vec_u64 (cpu, rd, 1, aarch64_get_reg_u64 (cpu, rn, NO_SP));
8521   else
8522     aarch64_set_reg_u64 (cpu, rd, NO_SP, aarch64_get_vec_u64 (cpu, rn, 1));
8523 }
8524
8525 static void
8526 dexSimpleFPIntegerConvert (sim_cpu *cpu)
8527 {
8528   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
8529      instr[30     = 0
8530      instr[29]    = S :  0 ==> OK, 1 ==> UNALLOC
8531      instr[28,25] = 1111
8532      instr[24]    = 0
8533      instr[23,22] = type : 00 ==> single, 01 ==> double, 1x ==> UNALLOC
8534      instr[21]    = 1
8535      instr[20,19] = rmode
8536      instr[18,16] = opcode
8537      instr[15,10] = 10 0000  */
8538
8539   uint32_t rmode_opcode;
8540   uint32_t size_type;
8541   uint32_t type;
8542   uint32_t size;
8543   uint32_t S;
8544
8545   if (INSTR (31, 17) == 0x4F57)
8546     {
8547       float_vector_move (cpu);
8548       return;
8549     }
8550
8551   size = INSTR (31, 31);
8552   S = INSTR (29, 29);
8553   if (S != 0)
8554     HALT_UNALLOC;
8555
8556   type = INSTR (23, 22);
8557   if (type > 1)
8558     HALT_UNALLOC;
8559
8560   rmode_opcode = INSTR (20, 16);
8561   size_type = (size << 1) | type; /* 0==32f, 1==32d, 2==64f, 3==64d.  */
8562
8563   switch (rmode_opcode)
8564     {
8565     case 2:                     /* SCVTF.  */
8566       switch (size_type)
8567         {
8568         case 0: scvtf32 (cpu); return;
8569         case 1: scvtd32 (cpu); return;
8570         case 2: scvtf (cpu); return;
8571         case 3: scvtd (cpu); return;
8572         }
8573
8574     case 6:                     /* FMOV GR, Vec.  */
8575       switch (size_type)
8576         {
8577         case 0:  gfmovs (cpu); return;
8578         case 3:  gfmovd (cpu); return;
8579         default: HALT_UNALLOC;
8580         }
8581
8582     case 7:                     /* FMOV vec, GR.  */
8583       switch (size_type)
8584         {
8585         case 0:  fgmovs (cpu); return;
8586         case 3:  fgmovd (cpu); return;
8587         default: HALT_UNALLOC;
8588         }
8589
8590     case 24:                    /* FCVTZS.  */
8591       switch (size_type)
8592         {
8593         case 0: fcvtszs32 (cpu); return;
8594         case 1: fcvtszd32 (cpu); return;
8595         case 2: fcvtszs (cpu); return;
8596         case 3: fcvtszd (cpu); return;
8597         }
8598
8599     case 25: do_fcvtzu (cpu); return;
8600     case 3:  do_UCVTF (cpu); return;
8601
8602     case 0:     /* FCVTNS.  */
8603     case 1:     /* FCVTNU.  */
8604     case 4:     /* FCVTAS.  */
8605     case 5:     /* FCVTAU.  */
8606     case 8:     /* FCVPTS.  */
8607     case 9:     /* FCVTPU.  */
8608     case 16:    /* FCVTMS.  */
8609     case 17:    /* FCVTMU.  */
8610     default:
8611       HALT_NYI;
8612     }
8613 }
8614
8615 static void
8616 set_flags_for_float_compare (sim_cpu *cpu, float fvalue1, float fvalue2)
8617 {
8618   uint32_t flags;
8619
8620   /* FIXME: Add exception raising.  */
8621   if (isnan (fvalue1) || isnan (fvalue2))
8622     flags = C|V;
8623   else if (isinf (fvalue1) && isinf (fvalue2))
8624     {
8625       /* Subtracting two infinities may give a NaN.  We only need to compare
8626          the signs, which we can get from isinf.  */
8627       int result = isinf (fvalue1) - isinf (fvalue2);
8628
8629       if (result == 0)
8630         flags = Z|C;
8631       else if (result < 0)
8632         flags = N;
8633       else /* (result > 0).  */
8634         flags = C;
8635     }
8636   else
8637     {
8638       float result = fvalue1 - fvalue2;
8639
8640       if (result == 0.0)
8641         flags = Z|C;
8642       else if (result < 0)
8643         flags = N;
8644       else /* (result > 0).  */
8645         flags = C;
8646     }
8647
8648   aarch64_set_CPSR (cpu, flags);
8649 }
8650
8651 static void
8652 fcmps (sim_cpu *cpu)
8653 {
8654   unsigned sm = INSTR (20, 16);
8655   unsigned sn = INSTR ( 9,  5);
8656
8657   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8658   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8659
8660   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8661   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8662 }
8663
8664 /* Float compare to zero -- Invalid Operation exception
8665    only on signaling NaNs.  */
8666 static void
8667 fcmpzs (sim_cpu *cpu)
8668 {
8669   unsigned sn = INSTR ( 9,  5);
8670   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8671
8672   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8673   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8674 }
8675
8676 /* Float compare -- Invalid Operation exception on all NaNs.  */
8677 static void
8678 fcmpes (sim_cpu *cpu)
8679 {
8680   unsigned sm = INSTR (20, 16);
8681   unsigned sn = INSTR ( 9,  5);
8682
8683   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8684   float fvalue2 = aarch64_get_FP_float (cpu, sm);
8685
8686   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8687   set_flags_for_float_compare (cpu, fvalue1, fvalue2);
8688 }
8689
8690 /* Float compare to zero -- Invalid Operation exception on all NaNs.  */
8691 static void
8692 fcmpzes (sim_cpu *cpu)
8693 {
8694   unsigned sn = INSTR ( 9,  5);
8695   float fvalue1 = aarch64_get_FP_float (cpu, sn);
8696
8697   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8698   set_flags_for_float_compare (cpu, fvalue1, 0.0f);
8699 }
8700
8701 static void
8702 set_flags_for_double_compare (sim_cpu *cpu, double dval1, double dval2)
8703 {
8704   uint32_t flags;
8705
8706   /* FIXME: Add exception raising.  */
8707   if (isnan (dval1) || isnan (dval2))
8708     flags = C|V;
8709   else if (isinf (dval1) && isinf (dval2))
8710     {
8711       /* Subtracting two infinities may give a NaN.  We only need to compare
8712          the signs, which we can get from isinf.  */
8713       int result = isinf (dval1) - isinf (dval2);
8714
8715       if (result == 0)
8716         flags = Z|C;
8717       else if (result < 0)
8718         flags = N;
8719       else /* (result > 0).  */
8720         flags = C;
8721     }
8722   else
8723     {
8724       double result = dval1 - dval2;
8725
8726       if (result == 0.0)
8727         flags = Z|C;
8728       else if (result < 0)
8729         flags = N;
8730       else /* (result > 0).  */
8731         flags = C;
8732     }
8733
8734   aarch64_set_CPSR (cpu, flags);
8735 }
8736
8737 /* Double compare -- Invalid Operation exception only on signaling NaNs.  */
8738 static void
8739 fcmpd (sim_cpu *cpu)
8740 {
8741   unsigned sm = INSTR (20, 16);
8742   unsigned sn = INSTR ( 9,  5);
8743
8744   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8745   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8746
8747   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8748   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8749 }
8750
8751 /* Double compare to zero -- Invalid Operation exception
8752    only on signaling NaNs.  */
8753 static void
8754 fcmpzd (sim_cpu *cpu)
8755 {
8756   unsigned sn = INSTR ( 9,  5);
8757   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8758
8759   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8760   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8761 }
8762
8763 /* Double compare -- Invalid Operation exception on all NaNs.  */
8764 static void
8765 fcmped (sim_cpu *cpu)
8766 {
8767   unsigned sm = INSTR (20, 16);
8768   unsigned sn = INSTR ( 9,  5);
8769
8770   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8771   double dvalue2 = aarch64_get_FP_double (cpu, sm);
8772
8773   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8774   set_flags_for_double_compare (cpu, dvalue1, dvalue2);
8775 }
8776
8777 /* Double compare to zero -- Invalid Operation exception on all NaNs.  */
8778 static void
8779 fcmpzed (sim_cpu *cpu)
8780 {
8781   unsigned sn = INSTR ( 9,  5);
8782   double dvalue1 = aarch64_get_FP_double (cpu, sn);
8783
8784   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8785   set_flags_for_double_compare (cpu, dvalue1, 0.0);
8786 }
8787
8788 static void
8789 dexSimpleFPCompare (sim_cpu *cpu)
8790 {
8791   /* assert instr[28,25] == 1111
8792      instr[30:24:21:13,10] = 0011000
8793      instr[31] = M : 0 ==> OK, 1 ==> UNALLOC
8794      instr[29] ==> S :  0 ==> OK, 1 ==> UNALLOC
8795      instr[23,22] ==> type : 0 ==> single, 01 ==> double, 1x ==> UNALLOC
8796      instr[15,14] ==> op : 00 ==> OK, ow ==> UNALLOC
8797      instr[4,0] ==> opcode2 : 00000 ==> FCMP, 10000 ==> FCMPE,
8798                               01000 ==> FCMPZ, 11000 ==> FCMPEZ,
8799                               ow ==> UNALLOC  */
8800   uint32_t dispatch;
8801   uint32_t M_S = (INSTR (31, 31) << 1) | INSTR (29, 29);
8802   uint32_t type = INSTR (23, 22);
8803   uint32_t op = INSTR (15, 14);
8804   uint32_t op2_2_0 = INSTR (2, 0);
8805
8806   if (op2_2_0 != 0)
8807     HALT_UNALLOC;
8808
8809   if (M_S != 0)
8810     HALT_UNALLOC;
8811
8812   if (type > 1)
8813     HALT_UNALLOC;
8814
8815   if (op != 0)
8816     HALT_UNALLOC;
8817
8818   /* dispatch on type and top 2 bits of opcode.  */
8819   dispatch = (type << 2) | INSTR (4, 3);
8820
8821   switch (dispatch)
8822     {
8823     case 0: fcmps (cpu); return;
8824     case 1: fcmpzs (cpu); return;
8825     case 2: fcmpes (cpu); return;
8826     case 3: fcmpzes (cpu); return;
8827     case 4: fcmpd (cpu); return;
8828     case 5: fcmpzd (cpu); return;
8829     case 6: fcmped (cpu); return;
8830     case 7: fcmpzed (cpu); return;
8831     }
8832 }
8833
8834 static void
8835 do_scalar_FADDP (sim_cpu *cpu)
8836 {
8837   /* instr [31,23] = 0111 1110 0
8838      instr [22]    = single(0)/double(1)
8839      instr [21,10] = 11 0000 1101 10
8840      instr [9,5]   = Fn
8841      instr [4,0]   = Fd.  */
8842
8843   unsigned Fn = INSTR (9, 5);
8844   unsigned Fd = INSTR (4, 0);
8845
8846   NYI_assert (31, 23, 0x0FC);
8847   NYI_assert (21, 10, 0xC36);
8848
8849   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8850   if (INSTR (22, 22))
8851     {
8852       double val1 = aarch64_get_vec_double (cpu, Fn, 0);
8853       double val2 = aarch64_get_vec_double (cpu, Fn, 1);
8854
8855       aarch64_set_FP_double (cpu, Fd, val1 + val2);
8856     }
8857   else
8858     {
8859       float val1 = aarch64_get_vec_float (cpu, Fn, 0);
8860       float val2 = aarch64_get_vec_float (cpu, Fn, 1);
8861
8862       aarch64_set_FP_float (cpu, Fd, val1 + val2);
8863     }
8864 }
8865
8866 /* Floating point absolute difference.  */
8867
8868 static void
8869 do_scalar_FABD (sim_cpu *cpu)
8870 {
8871   /* instr [31,23] = 0111 1110 1
8872      instr [22]    = float(0)/double(1)
8873      instr [21]    = 1
8874      instr [20,16] = Rm
8875      instr [15,10] = 1101 01
8876      instr [9, 5]  = Rn
8877      instr [4, 0]  = Rd.  */
8878
8879   unsigned rm = INSTR (20, 16);
8880   unsigned rn = INSTR (9, 5);
8881   unsigned rd = INSTR (4, 0);
8882
8883   NYI_assert (31, 23, 0x0FD);
8884   NYI_assert (21, 21, 1);
8885   NYI_assert (15, 10, 0x35);
8886
8887   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8888   if (INSTR (22, 22))
8889     aarch64_set_FP_double (cpu, rd,
8890                            fabs (aarch64_get_FP_double (cpu, rn)
8891                                  - aarch64_get_FP_double (cpu, rm)));
8892   else
8893     aarch64_set_FP_float (cpu, rd,
8894                           fabsf (aarch64_get_FP_float (cpu, rn)
8895                                  - aarch64_get_FP_float (cpu, rm)));
8896 }
8897
8898 static void
8899 do_scalar_CMGT (sim_cpu *cpu)
8900 {
8901   /* instr [31,21] = 0101 1110 111
8902      instr [20,16] = Rm
8903      instr [15,10] = 00 1101
8904      instr [9, 5]  = Rn
8905      instr [4, 0]  = Rd.  */
8906
8907   unsigned rm = INSTR (20, 16);
8908   unsigned rn = INSTR (9, 5);
8909   unsigned rd = INSTR (4, 0);
8910
8911   NYI_assert (31, 21, 0x2F7);
8912   NYI_assert (15, 10, 0x0D);
8913
8914   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8915   aarch64_set_vec_u64 (cpu, rd, 0,
8916                        aarch64_get_vec_u64 (cpu, rn, 0) >
8917                        aarch64_get_vec_u64 (cpu, rm, 0) ? -1L : 0L);
8918 }
8919
8920 static void
8921 do_scalar_USHR (sim_cpu *cpu)
8922 {
8923   /* instr [31,23] = 0111 1111 0
8924      instr [22,16] = shift amount
8925      instr [15,10] = 0000 01
8926      instr [9, 5]  = Rn
8927      instr [4, 0]  = Rd.  */
8928
8929   unsigned amount = 128 - INSTR (22, 16);
8930   unsigned rn = INSTR (9, 5);
8931   unsigned rd = INSTR (4, 0);
8932
8933   NYI_assert (31, 23, 0x0FE);
8934   NYI_assert (15, 10, 0x01);
8935
8936   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8937   aarch64_set_vec_u64 (cpu, rd, 0,
8938                        aarch64_get_vec_u64 (cpu, rn, 0) >> amount);
8939 }
8940
8941 static void
8942 do_scalar_SSHL (sim_cpu *cpu)
8943 {
8944   /* instr [31,21] = 0101 1110 111
8945      instr [20,16] = Rm
8946      instr [15,10] = 0100 01
8947      instr [9, 5]  = Rn
8948      instr [4, 0]  = Rd.  */
8949
8950   unsigned rm = INSTR (20, 16);
8951   unsigned rn = INSTR (9, 5);
8952   unsigned rd = INSTR (4, 0);
8953   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
8954
8955   NYI_assert (31, 21, 0x2F7);
8956   NYI_assert (15, 10, 0x11);
8957
8958   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8959   if (shift >= 0)
8960     aarch64_set_vec_s64 (cpu, rd, 0,
8961                          aarch64_get_vec_s64 (cpu, rn, 0) << shift);
8962   else
8963     aarch64_set_vec_s64 (cpu, rd, 0,
8964                          aarch64_get_vec_s64 (cpu, rn, 0) >> - shift);
8965 }
8966
8967 /* Floating point scalar compare greater than or equal to 0.  */
8968 static void
8969 do_scalar_FCMGE_zero (sim_cpu *cpu)
8970 {
8971   /* instr [31,23] = 0111 1110 1
8972      instr [22,22] = size
8973      instr [21,16] = 1000 00
8974      instr [15,10] = 1100 10
8975      instr [9, 5]  = Rn
8976      instr [4, 0]  = Rd.  */
8977
8978   unsigned size = INSTR (22, 22);
8979   unsigned rn = INSTR (9, 5);
8980   unsigned rd = INSTR (4, 0);
8981
8982   NYI_assert (31, 23, 0x0FD);
8983   NYI_assert (21, 16, 0x20);
8984   NYI_assert (15, 10, 0x32);
8985
8986   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
8987   if (size)
8988     aarch64_set_vec_u64 (cpu, rd, 0,
8989                          aarch64_get_vec_double (cpu, rn, 0) >= 0.0 ? -1 : 0);
8990   else
8991     aarch64_set_vec_u32 (cpu, rd, 0,
8992                          aarch64_get_vec_float (cpu, rn, 0) >= 0.0 ? -1 : 0);
8993 }
8994
8995 /* Floating point scalar compare less than or equal to 0.  */
8996 static void
8997 do_scalar_FCMLE_zero (sim_cpu *cpu)
8998 {
8999   /* instr [31,23] = 0111 1110 1
9000      instr [22,22] = size
9001      instr [21,16] = 1000 00
9002      instr [15,10] = 1101 10
9003      instr [9, 5]  = Rn
9004      instr [4, 0]  = Rd.  */
9005
9006   unsigned size = INSTR (22, 22);
9007   unsigned rn = INSTR (9, 5);
9008   unsigned rd = INSTR (4, 0);
9009
9010   NYI_assert (31, 23, 0x0FD);
9011   NYI_assert (21, 16, 0x20);
9012   NYI_assert (15, 10, 0x36);
9013
9014   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9015   if (size)
9016     aarch64_set_vec_u64 (cpu, rd, 0,
9017                          aarch64_get_vec_double (cpu, rn, 0) <= 0.0 ? -1 : 0);
9018   else
9019     aarch64_set_vec_u32 (cpu, rd, 0,
9020                          aarch64_get_vec_float (cpu, rn, 0) <= 0.0 ? -1 : 0);
9021 }
9022
9023 /* Floating point scalar compare greater than 0.  */
9024 static void
9025 do_scalar_FCMGT_zero (sim_cpu *cpu)
9026 {
9027   /* instr [31,23] = 0101 1110 1
9028      instr [22,22] = size
9029      instr [21,16] = 1000 00
9030      instr [15,10] = 1100 10
9031      instr [9, 5]  = Rn
9032      instr [4, 0]  = Rd.  */
9033
9034   unsigned size = INSTR (22, 22);
9035   unsigned rn = INSTR (9, 5);
9036   unsigned rd = INSTR (4, 0);
9037
9038   NYI_assert (31, 23, 0x0BD);
9039   NYI_assert (21, 16, 0x20);
9040   NYI_assert (15, 10, 0x32);
9041
9042   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9043   if (size)
9044     aarch64_set_vec_u64 (cpu, rd, 0,
9045                          aarch64_get_vec_double (cpu, rn, 0) > 0.0 ? -1 : 0);
9046   else
9047     aarch64_set_vec_u32 (cpu, rd, 0,
9048                          aarch64_get_vec_float (cpu, rn, 0) > 0.0 ? -1 : 0);
9049 }
9050
9051 /* Floating point scalar compare equal to 0.  */
9052 static void
9053 do_scalar_FCMEQ_zero (sim_cpu *cpu)
9054 {
9055   /* instr [31,23] = 0101 1110 1
9056      instr [22,22] = size
9057      instr [21,16] = 1000 00
9058      instr [15,10] = 1101 10
9059      instr [9, 5]  = Rn
9060      instr [4, 0]  = Rd.  */
9061
9062   unsigned size = INSTR (22, 22);
9063   unsigned rn = INSTR (9, 5);
9064   unsigned rd = INSTR (4, 0);
9065
9066   NYI_assert (31, 23, 0x0BD);
9067   NYI_assert (21, 16, 0x20);
9068   NYI_assert (15, 10, 0x36);
9069
9070   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9071   if (size)
9072     aarch64_set_vec_u64 (cpu, rd, 0,
9073                          aarch64_get_vec_double (cpu, rn, 0) == 0.0 ? -1 : 0);
9074   else
9075     aarch64_set_vec_u32 (cpu, rd, 0,
9076                          aarch64_get_vec_float (cpu, rn, 0) == 0.0 ? -1 : 0);
9077 }
9078
9079 /* Floating point scalar compare less than 0.  */
9080 static void
9081 do_scalar_FCMLT_zero (sim_cpu *cpu)
9082 {
9083   /* instr [31,23] = 0101 1110 1
9084      instr [22,22] = size
9085      instr [21,16] = 1000 00
9086      instr [15,10] = 1110 10
9087      instr [9, 5]  = Rn
9088      instr [4, 0]  = Rd.  */
9089
9090   unsigned size = INSTR (22, 22);
9091   unsigned rn = INSTR (9, 5);
9092   unsigned rd = INSTR (4, 0);
9093
9094   NYI_assert (31, 23, 0x0BD);
9095   NYI_assert (21, 16, 0x20);
9096   NYI_assert (15, 10, 0x3A);
9097
9098   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9099   if (size)
9100     aarch64_set_vec_u64 (cpu, rd, 0,
9101                          aarch64_get_vec_double (cpu, rn, 0) < 0.0 ? -1 : 0);
9102   else
9103     aarch64_set_vec_u32 (cpu, rd, 0,
9104                          aarch64_get_vec_float (cpu, rn, 0) < 0.0 ? -1 : 0);
9105 }
9106
9107 static void
9108 do_scalar_shift (sim_cpu *cpu)
9109 {
9110   /* instr [31,23] = 0101 1111 0
9111      instr [22,16] = shift amount
9112      instr [15,10] = 0101 01   [SHL]
9113      instr [15,10] = 0000 01   [SSHR]
9114      instr [9, 5]  = Rn
9115      instr [4, 0]  = Rd.  */
9116
9117   unsigned rn = INSTR (9, 5);
9118   unsigned rd = INSTR (4, 0);
9119   unsigned amount;
9120
9121   NYI_assert (31, 23, 0x0BE);
9122
9123   if (INSTR (22, 22) == 0)
9124     HALT_UNALLOC;
9125
9126   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9127   switch (INSTR (15, 10))
9128     {
9129     case 0x01: /* SSHR */
9130       amount = 128 - INSTR (22, 16);
9131       aarch64_set_vec_s64 (cpu, rd, 0,
9132                            aarch64_get_vec_s64 (cpu, rn, 0) >> amount);
9133       return;
9134     case 0x15: /* SHL */
9135       amount = INSTR (22, 16) - 64;
9136       aarch64_set_vec_u64 (cpu, rd, 0,
9137                            aarch64_get_vec_u64 (cpu, rn, 0) << amount);
9138       return;
9139     default:
9140       HALT_NYI;
9141     }
9142 }
9143
9144 /* FCMEQ FCMGT FCMGE.  */
9145 static void
9146 do_scalar_FCM (sim_cpu *cpu)
9147 {
9148   /* instr [31,30] = 01
9149      instr [29]    = U
9150      instr [28,24] = 1 1110
9151      instr [23]    = E
9152      instr [22]    = size
9153      instr [21]    = 1
9154      instr [20,16] = Rm
9155      instr [15,12] = 1110
9156      instr [11]    = AC
9157      instr [10]    = 1
9158      instr [9, 5]  = Rn
9159      instr [4, 0]  = Rd.  */
9160
9161   unsigned rm = INSTR (20, 16);
9162   unsigned rn = INSTR (9, 5);
9163   unsigned rd = INSTR (4, 0);
9164   unsigned EUac = (INSTR (23, 23) << 2) | (INSTR (29, 29) << 1) | INSTR (11, 11);
9165   unsigned result;
9166   float val1;
9167   float val2;
9168
9169   NYI_assert (31, 30, 1);
9170   NYI_assert (28, 24, 0x1E);
9171   NYI_assert (21, 21, 1);
9172   NYI_assert (15, 12, 0xE);
9173   NYI_assert (10, 10, 1);
9174
9175   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9176   if (INSTR (22, 22))
9177     {
9178       double val1 = aarch64_get_FP_double (cpu, rn);
9179       double val2 = aarch64_get_FP_double (cpu, rm);
9180
9181       switch (EUac)
9182         {
9183         case 0: /* 000 */
9184           result = val1 == val2;
9185           break;
9186
9187         case 3: /* 011 */
9188           val1 = fabs (val1);
9189           val2 = fabs (val2);
9190           /* Fall through. */
9191         case 2: /* 010 */
9192           result = val1 >= val2;
9193           break;
9194
9195         case 7: /* 111 */
9196           val1 = fabs (val1);
9197           val2 = fabs (val2);
9198           /* Fall through. */
9199         case 6: /* 110 */
9200           result = val1 > val2;
9201           break;
9202
9203         default:
9204           HALT_UNALLOC;
9205         }
9206
9207       aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9208       return;
9209     }
9210
9211   val1 = aarch64_get_FP_float (cpu, rn);
9212   val2 = aarch64_get_FP_float (cpu, rm);
9213
9214   switch (EUac)
9215     {
9216     case 0: /* 000 */
9217       result = val1 == val2;
9218       break;
9219
9220     case 3: /* 011 */
9221       val1 = fabsf (val1);
9222       val2 = fabsf (val2);
9223       /* Fall through. */
9224     case 2: /* 010 */
9225       result = val1 >= val2;
9226       break;
9227
9228     case 7: /* 111 */
9229       val1 = fabsf (val1);
9230       val2 = fabsf (val2);
9231       /* Fall through. */
9232     case 6: /* 110 */
9233       result = val1 > val2;
9234       break;
9235
9236     default:
9237       HALT_UNALLOC;
9238     }
9239
9240   aarch64_set_vec_u32 (cpu, rd, 0, result ? -1 : 0);
9241 }
9242
9243 /* An alias of DUP.  */
9244 static void
9245 do_scalar_MOV (sim_cpu *cpu)
9246 {
9247   /* instr [31,21] = 0101 1110 000
9248      instr [20,16] = imm5
9249      instr [15,10] = 0000 01
9250      instr [9, 5]  = Rn
9251      instr [4, 0]  = Rd.  */
9252
9253   unsigned rn = INSTR (9, 5);
9254   unsigned rd = INSTR (4, 0);
9255   unsigned index;
9256
9257   NYI_assert (31, 21, 0x2F0);
9258   NYI_assert (15, 10, 0x01);
9259
9260   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9261   if (INSTR (16, 16))
9262     {
9263       /* 8-bit.  */
9264       index = INSTR (20, 17);
9265       aarch64_set_vec_u8
9266         (cpu, rd, 0, aarch64_get_vec_u8 (cpu, rn, index));
9267     }
9268   else if (INSTR (17, 17))
9269     {
9270       /* 16-bit.  */
9271       index = INSTR (20, 18);
9272       aarch64_set_vec_u16
9273         (cpu, rd, 0, aarch64_get_vec_u16 (cpu, rn, index));
9274     }
9275   else if (INSTR (18, 18))
9276     {
9277       /* 32-bit.  */
9278       index = INSTR (20, 19);
9279       aarch64_set_vec_u32
9280         (cpu, rd, 0, aarch64_get_vec_u32 (cpu, rn, index));
9281     }
9282   else if (INSTR (19, 19))
9283     {
9284       /* 64-bit.  */
9285       index = INSTR (20, 20);
9286       aarch64_set_vec_u64
9287         (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, index));
9288     }
9289   else
9290     HALT_UNALLOC;
9291 }
9292
9293 static void
9294 do_scalar_NEG (sim_cpu *cpu)
9295 {
9296   /* instr [31,10] = 0111 1110 1110 0000 1011 10
9297      instr [9, 5]  = Rn
9298      instr [4, 0]  = Rd.  */
9299
9300   unsigned rn = INSTR (9, 5);
9301   unsigned rd = INSTR (4, 0);
9302
9303   NYI_assert (31, 10, 0x1FB82E);
9304
9305   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9306   aarch64_set_vec_u64 (cpu, rd, 0, - aarch64_get_vec_u64 (cpu, rn, 0));
9307 }
9308
9309 static void
9310 do_scalar_USHL (sim_cpu *cpu)
9311 {
9312   /* instr [31,21] = 0111 1110 111
9313      instr [20,16] = Rm
9314      instr [15,10] = 0100 01
9315      instr [9, 5]  = Rn
9316      instr [4, 0]  = Rd.  */
9317
9318   unsigned rm = INSTR (20, 16);
9319   unsigned rn = INSTR (9, 5);
9320   unsigned rd = INSTR (4, 0);
9321   signed int shift = aarch64_get_vec_s8 (cpu, rm, 0);
9322
9323   NYI_assert (31, 21, 0x3F7);
9324   NYI_assert (15, 10, 0x11);
9325
9326   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9327   if (shift >= 0)
9328     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) << shift);
9329   else
9330     aarch64_set_vec_u64 (cpu, rd, 0, aarch64_get_vec_u64 (cpu, rn, 0) >> - shift);
9331 }
9332
9333 static void
9334 do_double_add (sim_cpu *cpu)
9335 {
9336   /* instr [31,21] = 0101 1110 111
9337      instr [20,16] = Fn
9338      instr [15,10] = 1000 01
9339      instr [9,5]   = Fm
9340      instr [4,0]   = Fd.  */
9341   unsigned Fd;
9342   unsigned Fm;
9343   unsigned Fn;
9344   double val1;
9345   double val2;
9346
9347   NYI_assert (31, 21, 0x2F7);
9348   NYI_assert (15, 10, 0x21);
9349
9350   Fd = INSTR (4, 0);
9351   Fm = INSTR (9, 5);
9352   Fn = INSTR (20, 16);
9353
9354   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9355   val1 = aarch64_get_FP_double (cpu, Fm);
9356   val2 = aarch64_get_FP_double (cpu, Fn);
9357
9358   aarch64_set_FP_double (cpu, Fd, val1 + val2);
9359 }
9360
9361 static void
9362 do_scalar_UCVTF (sim_cpu *cpu)
9363 {
9364   /* instr [31,23] = 0111 1110 0
9365      instr [22]    = single(0)/double(1)
9366      instr [21,10] = 10 0001 1101 10
9367      instr [9,5]   = rn
9368      instr [4,0]   = rd.  */
9369
9370   unsigned rn = INSTR (9, 5);
9371   unsigned rd = INSTR (4, 0);
9372
9373   NYI_assert (31, 23, 0x0FC);
9374   NYI_assert (21, 10, 0x876);
9375
9376   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9377   if (INSTR (22, 22))
9378     {
9379       uint64_t val = aarch64_get_vec_u64 (cpu, rn, 0);
9380
9381       aarch64_set_vec_double (cpu, rd, 0, (double) val);
9382     }
9383   else
9384     {
9385       uint32_t val = aarch64_get_vec_u32 (cpu, rn, 0);
9386
9387       aarch64_set_vec_float (cpu, rd, 0, (float) val);
9388     }
9389 }
9390
9391 static void
9392 do_scalar_vec (sim_cpu *cpu)
9393 {
9394   /* instr [30] = 1.  */
9395   /* instr [28,25] = 1111.  */
9396   switch (INSTR (31, 23))
9397     {
9398     case 0xBC:
9399       switch (INSTR (15, 10))
9400         {
9401         case 0x01: do_scalar_MOV (cpu); return;
9402         case 0x39: do_scalar_FCM (cpu); return;
9403         case 0x3B: do_scalar_FCM (cpu); return;
9404         }
9405       break;
9406
9407     case 0xBE: do_scalar_shift (cpu); return;
9408
9409     case 0xFC:
9410       switch (INSTR (15, 10))
9411         {
9412         case 0x36:
9413           switch (INSTR (21, 16))
9414             {
9415             case 0x30: do_scalar_FADDP (cpu); return;
9416             case 0x21: do_scalar_UCVTF (cpu); return;
9417             }
9418           HALT_NYI;
9419         case 0x39: do_scalar_FCM (cpu); return;
9420         case 0x3B: do_scalar_FCM (cpu); return;
9421         }
9422       break;
9423
9424     case 0xFD:
9425       switch (INSTR (15, 10))
9426         {
9427         case 0x0D: do_scalar_CMGT (cpu); return;
9428         case 0x11: do_scalar_USHL (cpu); return;
9429         case 0x2E: do_scalar_NEG (cpu); return;
9430         case 0x32: do_scalar_FCMGE_zero (cpu); return;
9431         case 0x35: do_scalar_FABD (cpu); return;
9432         case 0x36: do_scalar_FCMLE_zero (cpu); return;
9433         case 0x39: do_scalar_FCM (cpu); return;
9434         case 0x3B: do_scalar_FCM (cpu); return;
9435         default:
9436           HALT_NYI;
9437         }
9438
9439     case 0xFE: do_scalar_USHR (cpu); return;
9440
9441     case 0xBD:
9442       switch (INSTR (15, 10))
9443         {
9444         case 0x21: do_double_add (cpu); return;
9445         case 0x11: do_scalar_SSHL (cpu); return;
9446         case 0x32: do_scalar_FCMGT_zero (cpu); return;
9447         case 0x36: do_scalar_FCMEQ_zero (cpu); return;
9448         case 0x3A: do_scalar_FCMLT_zero (cpu); return;
9449         default:
9450           HALT_NYI;
9451         }
9452
9453     default:
9454       HALT_NYI;
9455     }
9456 }
9457
9458 static void
9459 dexAdvSIMD1 (sim_cpu *cpu)
9460 {
9461   /* instr [28,25] = 1 111.  */
9462
9463   /* We are currently only interested in the basic
9464      scalar fp routines which all have bit 30 = 0.  */
9465   if (INSTR (30, 30))
9466     do_scalar_vec (cpu);
9467
9468   /* instr[24] is set for FP data processing 3-source and clear for
9469      all other basic scalar fp instruction groups.  */
9470   else if (INSTR (24, 24))
9471     dexSimpleFPDataProc3Source (cpu);
9472
9473   /* instr[21] is clear for floating <-> fixed conversions and set for
9474      all other basic scalar fp instruction groups.  */
9475   else if (!INSTR (21, 21))
9476     dexSimpleFPFixedConvert (cpu);
9477
9478   /* instr[11,10] : 01 ==> cond compare, 10 ==> Data Proc 2 Source
9479      11 ==> cond select,  00 ==> other.  */
9480   else
9481     switch (INSTR (11, 10))
9482       {
9483       case 1: dexSimpleFPCondCompare (cpu); return;
9484       case 2: dexSimpleFPDataProc2Source (cpu); return;
9485       case 3: dexSimpleFPCondSelect (cpu); return;
9486
9487       default:
9488         /* Now an ordered cascade of tests.
9489            FP immediate has instr [12] == 1.
9490            FP compare has   instr [13] == 1.
9491            FP Data Proc 1 Source has instr [14] == 1.
9492            FP floating <--> integer conversions has instr [15] == 0.  */
9493         if (INSTR (12, 12))
9494           dexSimpleFPImmediate (cpu);
9495
9496         else if (INSTR (13, 13))
9497           dexSimpleFPCompare (cpu);
9498
9499         else if (INSTR (14, 14))
9500           dexSimpleFPDataProc1Source (cpu);
9501
9502         else if (!INSTR (15, 15))
9503           dexSimpleFPIntegerConvert (cpu);
9504
9505         else
9506           /* If we get here then instr[15] == 1 which means UNALLOC.  */
9507           HALT_UNALLOC;
9508       }
9509 }
9510
9511 /* PC relative addressing.  */
9512
9513 static void
9514 pcadr (sim_cpu *cpu)
9515 {
9516   /* instr[31] = op : 0 ==> ADR, 1 ==> ADRP
9517      instr[30,29] = immlo
9518      instr[23,5] = immhi.  */
9519   uint64_t address;
9520   unsigned rd = INSTR (4, 0);
9521   uint32_t isPage = INSTR (31, 31);
9522   union { int64_t u64; uint64_t s64; } imm;
9523   uint64_t offset;
9524
9525   imm.s64 = simm64 (aarch64_get_instr (cpu), 23, 5);
9526   offset = imm.u64;
9527   offset = (offset << 2) | INSTR (30, 29);
9528
9529   address = aarch64_get_PC (cpu);
9530
9531   if (isPage)
9532     {
9533       offset <<= 12;
9534       address &= ~0xfff;
9535     }
9536
9537   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9538   aarch64_set_reg_u64 (cpu, rd, NO_SP, address + offset);
9539 }
9540
9541 /* Specific decode and execute for group Data Processing Immediate.  */
9542
9543 static void
9544 dexPCRelAddressing (sim_cpu *cpu)
9545 {
9546   /* assert instr[28,24] = 10000.  */
9547   pcadr (cpu);
9548 }
9549
9550 /* Immediate logical.
9551    The bimm32/64 argument is constructed by replicating a 2, 4, 8,
9552    16, 32 or 64 bit sequence pulled out at decode and possibly
9553    inverting it..
9554
9555    N.B. the output register (dest) can normally be Xn or SP
9556    the exception occurs for flag setting instructions which may
9557    only use Xn for the output (dest).  The input register can
9558    never be SP.  */
9559
9560 /* 32 bit and immediate.  */
9561 static void
9562 and32 (sim_cpu *cpu, uint32_t bimm)
9563 {
9564   unsigned rn = INSTR (9, 5);
9565   unsigned rd = INSTR (4, 0);
9566
9567   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9568   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9569                        aarch64_get_reg_u32 (cpu, rn, NO_SP) & bimm);
9570 }
9571
9572 /* 64 bit and immediate.  */
9573 static void
9574 and64 (sim_cpu *cpu, uint64_t bimm)
9575 {
9576   unsigned rn = INSTR (9, 5);
9577   unsigned rd = INSTR (4, 0);
9578
9579   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9580   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9581                        aarch64_get_reg_u64 (cpu, rn, NO_SP) & bimm);
9582 }
9583
9584 /* 32 bit and immediate set flags.  */
9585 static void
9586 ands32 (sim_cpu *cpu, uint32_t bimm)
9587 {
9588   unsigned rn = INSTR (9, 5);
9589   unsigned rd = INSTR (4, 0);
9590
9591   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9592   uint32_t value2 = bimm;
9593
9594   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9595   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9596   set_flags_for_binop32 (cpu, value1 & value2);
9597 }
9598
9599 /* 64 bit and immediate set flags.  */
9600 static void
9601 ands64 (sim_cpu *cpu, uint64_t bimm)
9602 {
9603   unsigned rn = INSTR (9, 5);
9604   unsigned rd = INSTR (4, 0);
9605
9606   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9607   uint64_t value2 = bimm;
9608
9609   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9610   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9611   set_flags_for_binop64 (cpu, value1 & value2);
9612 }
9613
9614 /* 32 bit exclusive or immediate.  */
9615 static void
9616 eor32 (sim_cpu *cpu, uint32_t bimm)
9617 {
9618   unsigned rn = INSTR (9, 5);
9619   unsigned rd = INSTR (4, 0);
9620
9621   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9622   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9623                        aarch64_get_reg_u32 (cpu, rn, NO_SP) ^ bimm);
9624 }
9625
9626 /* 64 bit exclusive or immediate.  */
9627 static void
9628 eor64 (sim_cpu *cpu, uint64_t bimm)
9629 {
9630   unsigned rn = INSTR (9, 5);
9631   unsigned rd = INSTR (4, 0);
9632
9633   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9634   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9635                        aarch64_get_reg_u64 (cpu, rn, NO_SP) ^ bimm);
9636 }
9637
9638 /* 32 bit or immediate.  */
9639 static void
9640 orr32 (sim_cpu *cpu, uint32_t bimm)
9641 {
9642   unsigned rn = INSTR (9, 5);
9643   unsigned rd = INSTR (4, 0);
9644
9645   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9646   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9647                        aarch64_get_reg_u32 (cpu, rn, NO_SP) | bimm);
9648 }
9649
9650 /* 64 bit or immediate.  */
9651 static void
9652 orr64 (sim_cpu *cpu, uint64_t bimm)
9653 {
9654   unsigned rn = INSTR (9, 5);
9655   unsigned rd = INSTR (4, 0);
9656
9657   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9658   aarch64_set_reg_u64 (cpu, rd, SP_OK,
9659                        aarch64_get_reg_u64 (cpu, rn, NO_SP) | bimm);
9660 }
9661
9662 /* Logical shifted register.
9663    These allow an optional LSL, ASR, LSR or ROR to the second source
9664    register with a count up to the register bit count.
9665    N.B register args may not be SP.  */
9666
9667 /* 32 bit AND shifted register.  */
9668 static void
9669 and32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9670 {
9671   unsigned rm = INSTR (20, 16);
9672   unsigned rn = INSTR (9, 5);
9673   unsigned rd = INSTR (4, 0);
9674
9675   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9676   aarch64_set_reg_u64
9677     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9678      & shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9679 }
9680
9681 /* 64 bit AND shifted register.  */
9682 static void
9683 and64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9684 {
9685   unsigned rm = INSTR (20, 16);
9686   unsigned rn = INSTR (9, 5);
9687   unsigned rd = INSTR (4, 0);
9688
9689   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9690   aarch64_set_reg_u64
9691     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9692      & shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9693 }
9694
9695 /* 32 bit AND shifted register setting flags.  */
9696 static void
9697 ands32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9698 {
9699   unsigned rm = INSTR (20, 16);
9700   unsigned rn = INSTR (9, 5);
9701   unsigned rd = INSTR (4, 0);
9702
9703   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9704   uint32_t value2 = shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9705                                shift, count);
9706
9707   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9708   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9709   set_flags_for_binop32 (cpu, value1 & value2);
9710 }
9711
9712 /* 64 bit AND shifted register setting flags.  */
9713 static void
9714 ands64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9715 {
9716   unsigned rm = INSTR (20, 16);
9717   unsigned rn = INSTR (9, 5);
9718   unsigned rd = INSTR (4, 0);
9719
9720   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9721   uint64_t value2 = shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9722                                shift, count);
9723
9724   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9725   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9726   set_flags_for_binop64 (cpu, value1 & value2);
9727 }
9728
9729 /* 32 bit BIC shifted register.  */
9730 static void
9731 bic32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9732 {
9733   unsigned rm = INSTR (20, 16);
9734   unsigned rn = INSTR (9, 5);
9735   unsigned rd = INSTR (4, 0);
9736
9737   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9738   aarch64_set_reg_u64
9739     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9740      & ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9741 }
9742
9743 /* 64 bit BIC shifted register.  */
9744 static void
9745 bic64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9746 {
9747   unsigned rm = INSTR (20, 16);
9748   unsigned rn = INSTR (9, 5);
9749   unsigned rd = INSTR (4, 0);
9750
9751   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9752   aarch64_set_reg_u64
9753     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9754      & ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9755 }
9756
9757 /* 32 bit BIC shifted register setting flags.  */
9758 static void
9759 bics32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9760 {
9761   unsigned rm = INSTR (20, 16);
9762   unsigned rn = INSTR (9, 5);
9763   unsigned rd = INSTR (4, 0);
9764
9765   uint32_t value1 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
9766   uint32_t value2 = ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP),
9767                                  shift, count);
9768
9769   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9770   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9771   set_flags_for_binop32 (cpu, value1 & value2);
9772 }
9773
9774 /* 64 bit BIC shifted register setting flags.  */
9775 static void
9776 bics64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9777 {
9778   unsigned rm = INSTR (20, 16);
9779   unsigned rn = INSTR (9, 5);
9780   unsigned rd = INSTR (4, 0);
9781
9782   uint64_t value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
9783   uint64_t value2 = ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP),
9784                                  shift, count);
9785
9786   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9787   aarch64_set_reg_u64 (cpu, rd, NO_SP, value1 & value2);
9788   set_flags_for_binop64 (cpu, value1 & value2);
9789 }
9790
9791 /* 32 bit EON shifted register.  */
9792 static void
9793 eon32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9794 {
9795   unsigned rm = INSTR (20, 16);
9796   unsigned rn = INSTR (9, 5);
9797   unsigned rd = INSTR (4, 0);
9798
9799   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9800   aarch64_set_reg_u64
9801     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9802      ^ ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9803 }
9804
9805 /* 64 bit EON shifted register.  */
9806 static void
9807 eon64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9808 {
9809   unsigned rm = INSTR (20, 16);
9810   unsigned rn = INSTR (9, 5);
9811   unsigned rd = INSTR (4, 0);
9812
9813   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9814   aarch64_set_reg_u64
9815     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9816      ^ ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9817 }
9818
9819 /* 32 bit EOR shifted register.  */
9820 static void
9821 eor32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9822 {
9823   unsigned rm = INSTR (20, 16);
9824   unsigned rn = INSTR (9, 5);
9825   unsigned rd = INSTR (4, 0);
9826
9827   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9828   aarch64_set_reg_u64
9829     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9830      ^ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9831 }
9832
9833 /* 64 bit EOR shifted register.  */
9834 static void
9835 eor64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9836 {
9837   unsigned rm = INSTR (20, 16);
9838   unsigned rn = INSTR (9, 5);
9839   unsigned rd = INSTR (4, 0);
9840
9841   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9842   aarch64_set_reg_u64
9843     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9844      ^ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9845 }
9846
9847 /* 32 bit ORR shifted register.  */
9848 static void
9849 orr32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9850 {
9851   unsigned rm = INSTR (20, 16);
9852   unsigned rn = INSTR (9, 5);
9853   unsigned rd = INSTR (4, 0);
9854
9855   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9856   aarch64_set_reg_u64
9857     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9858      | shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9859 }
9860
9861 /* 64 bit ORR shifted register.  */
9862 static void
9863 orr64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9864 {
9865   unsigned rm = INSTR (20, 16);
9866   unsigned rn = INSTR (9, 5);
9867   unsigned rd = INSTR (4, 0);
9868
9869   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9870   aarch64_set_reg_u64
9871     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9872      | shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9873 }
9874
9875 /* 32 bit ORN shifted register.  */
9876 static void
9877 orn32_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9878 {
9879   unsigned rm = INSTR (20, 16);
9880   unsigned rn = INSTR (9, 5);
9881   unsigned rd = INSTR (4, 0);
9882
9883   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9884   aarch64_set_reg_u64
9885     (cpu, rd, NO_SP, aarch64_get_reg_u32 (cpu, rn, NO_SP)
9886      | ~ shifted32 (aarch64_get_reg_u32 (cpu, rm, NO_SP), shift, count));
9887 }
9888
9889 /* 64 bit ORN shifted register.  */
9890 static void
9891 orn64_shift (sim_cpu *cpu, Shift shift, uint32_t count)
9892 {
9893   unsigned rm = INSTR (20, 16);
9894   unsigned rn = INSTR (9, 5);
9895   unsigned rd = INSTR (4, 0);
9896
9897   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9898   aarch64_set_reg_u64
9899     (cpu, rd, NO_SP, aarch64_get_reg_u64 (cpu, rn, NO_SP)
9900      | ~ shifted64 (aarch64_get_reg_u64 (cpu, rm, NO_SP), shift, count));
9901 }
9902
9903 static void
9904 dexLogicalImmediate (sim_cpu *cpu)
9905 {
9906   /* assert instr[28,23] = 1001000
9907      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
9908      instr[30,29] = op : 0 ==> AND, 1 ==> ORR, 2 ==> EOR, 3 ==> ANDS
9909      instr[22] = N : used to construct immediate mask
9910      instr[21,16] = immr
9911      instr[15,10] = imms
9912      instr[9,5] = Rn
9913      instr[4,0] = Rd  */
9914
9915   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
9916   uint32_t size = INSTR (31, 31);
9917   uint32_t N = INSTR (22, 22);
9918   /* uint32_t immr = INSTR (21, 16);.  */
9919   /* uint32_t imms = INSTR (15, 10);.  */
9920   uint32_t index = INSTR (22, 10);
9921   uint64_t bimm64 = LITable [index];
9922   uint32_t dispatch = INSTR (30, 29);
9923
9924   if (~size & N)
9925     HALT_UNALLOC;
9926
9927   if (!bimm64)
9928     HALT_UNALLOC;
9929
9930   if (size == 0)
9931     {
9932       uint32_t bimm = (uint32_t) bimm64;
9933
9934       switch (dispatch)
9935         {
9936         case 0: and32 (cpu, bimm); return;
9937         case 1: orr32 (cpu, bimm); return;
9938         case 2: eor32 (cpu, bimm); return;
9939         case 3: ands32 (cpu, bimm); return;
9940         }
9941     }
9942   else
9943     {
9944       switch (dispatch)
9945         {
9946         case 0: and64 (cpu, bimm64); return;
9947         case 1: orr64 (cpu, bimm64); return;
9948         case 2: eor64 (cpu, bimm64); return;
9949         case 3: ands64 (cpu, bimm64); return;
9950         }
9951     }
9952   HALT_UNALLOC;
9953 }
9954
9955 /* Immediate move.
9956    The uimm argument is a 16 bit value to be inserted into the
9957    target register the pos argument locates the 16 bit word in the
9958    dest register i.e. it is in {0, 1} for 32 bit and {0, 1, 2,
9959    3} for 64 bit.
9960    N.B register arg may not be SP so it should be.
9961    accessed using the setGZRegisterXXX accessors.  */
9962
9963 /* 32 bit move 16 bit immediate zero remaining shorts.  */
9964 static void
9965 movz32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9966 {
9967   unsigned rd = INSTR (4, 0);
9968
9969   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9970   aarch64_set_reg_u64 (cpu, rd, NO_SP, val << (pos * 16));
9971 }
9972
9973 /* 64 bit move 16 bit immediate zero remaining shorts.  */
9974 static void
9975 movz64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9976 {
9977   unsigned rd = INSTR (4, 0);
9978
9979   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9980   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((uint64_t) val) << (pos * 16));
9981 }
9982
9983 /* 32 bit move 16 bit immediate negated.  */
9984 static void
9985 movn32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9986 {
9987   unsigned rd = INSTR (4, 0);
9988
9989   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
9990   aarch64_set_reg_u64 (cpu, rd, NO_SP, ((val << (pos * 16)) ^ 0xffffffffU));
9991 }
9992
9993 /* 64 bit move 16 bit immediate negated.  */
9994 static void
9995 movn64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
9996 {
9997   unsigned rd = INSTR (4, 0);
9998
9999   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10000   aarch64_set_reg_u64
10001     (cpu, rd, NO_SP, ((((uint64_t) val) << (pos * 16))
10002                       ^ 0xffffffffffffffffULL));
10003 }
10004
10005 /* 32 bit move 16 bit immediate keep remaining shorts.  */
10006 static void
10007 movk32 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10008 {
10009   unsigned rd = INSTR (4, 0);
10010   uint32_t current = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10011   uint32_t value = val << (pos * 16);
10012   uint32_t mask = ~(0xffffU << (pos * 16));
10013
10014   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10015   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10016 }
10017
10018 /* 64 bit move 16 it immediate keep remaining shorts.  */
10019 static void
10020 movk64 (sim_cpu *cpu, uint32_t val, uint32_t pos)
10021 {
10022   unsigned rd = INSTR (4, 0);
10023   uint64_t current = aarch64_get_reg_u64 (cpu, rd, NO_SP);
10024   uint64_t value = (uint64_t) val << (pos * 16);
10025   uint64_t mask = ~(0xffffULL << (pos * 16));
10026
10027   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10028   aarch64_set_reg_u64 (cpu, rd, NO_SP, (value | (current & mask)));
10029 }
10030
10031 static void
10032 dexMoveWideImmediate (sim_cpu *cpu)
10033 {
10034   /* assert instr[28:23] = 100101
10035      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10036      instr[30,29] = op : 0 ==> MOVN, 1 ==> UNALLOC, 2 ==> MOVZ, 3 ==> MOVK
10037      instr[22,21] = shift : 00 == LSL#0, 01 = LSL#16, 10 = LSL#32, 11 = LSL#48
10038      instr[20,5] = uimm16
10039      instr[4,0] = Rd  */
10040
10041   /* N.B. the (multiple of 16) shift is applied by the called routine,
10042      we just pass the multiplier.  */
10043
10044   uint32_t imm;
10045   uint32_t size = INSTR (31, 31);
10046   uint32_t op = INSTR (30, 29);
10047   uint32_t shift = INSTR (22, 21);
10048
10049   /* 32 bit can only shift 0 or 1 lot of 16.
10050      anything else is an unallocated instruction.  */
10051   if (size == 0 && (shift > 1))
10052     HALT_UNALLOC;
10053
10054   if (op == 1)
10055     HALT_UNALLOC;
10056
10057   imm = INSTR (20, 5);
10058
10059   if (size == 0)
10060     {
10061       if (op == 0)
10062         movn32 (cpu, imm, shift);
10063       else if (op == 2)
10064         movz32 (cpu, imm, shift);
10065       else
10066         movk32 (cpu, imm, shift);
10067     }
10068   else
10069     {
10070       if (op == 0)
10071         movn64 (cpu, imm, shift);
10072       else if (op == 2)
10073         movz64 (cpu, imm, shift);
10074       else
10075         movk64 (cpu, imm, shift);
10076     }
10077 }
10078
10079 /* Bitfield operations.
10080    These take a pair of bit positions r and s which are in {0..31}
10081    or {0..63} depending on the instruction word size.
10082    N.B register args may not be SP.  */
10083
10084 /* OK, we start with ubfm which just needs to pick
10085    some bits out of source zero the rest and write
10086    the result to dest.  Just need two logical shifts.  */
10087
10088 /* 32 bit bitfield move, left and right of affected zeroed
10089    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10090 static void
10091 ubfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10092 {
10093   unsigned rd;
10094   unsigned rn = INSTR (9, 5);
10095   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10096
10097   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10098   if (r <= s)
10099     {
10100       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10101          We want only bits s:xxx:r at the bottom of the word
10102          so we LSL bit s up to bit 31 i.e. by 31 - s
10103          and then we LSR to bring bit 31 down to bit s - r
10104          i.e. by 31 + r - s.  */
10105       value <<= 31 - s;
10106       value >>= 31 + r - s;
10107     }
10108   else
10109     {
10110       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0
10111          We want only bits s:xxx:0 starting at it 31-(r-1)
10112          so we LSL bit s up to bit 31 i.e. by 31 - s
10113          and then we LSL to bring bit 31 down to 31-(r-1)+s
10114          i.e. by r - (s + 1).  */
10115       value <<= 31 - s;
10116       value >>= r - (s + 1);
10117     }
10118
10119   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10120   rd = INSTR (4, 0);
10121   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10122 }
10123
10124 /* 64 bit bitfield move, left and right of affected zeroed
10125    if r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10126 static void
10127 ubfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10128 {
10129   unsigned rd;
10130   unsigned rn = INSTR (9, 5);
10131   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10132
10133   if (r <= s)
10134     {
10135       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10136          We want only bits s:xxx:r at the bottom of the word.
10137          So we LSL bit s up to bit 63 i.e. by 63 - s
10138          and then we LSR to bring bit 63 down to bit s - r
10139          i.e. by 63 + r - s.  */
10140       value <<= 63 - s;
10141       value >>= 63 + r - s;
10142     }
10143   else
10144     {
10145       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0.
10146          We want only bits s:xxx:0 starting at it 63-(r-1).
10147          So we LSL bit s up to bit 63 i.e. by 63 - s
10148          and then we LSL to bring bit 63 down to 63-(r-1)+s
10149          i.e. by r - (s + 1).  */
10150       value <<= 63 - s;
10151       value >>= r - (s + 1);
10152     }
10153
10154   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10155   rd = INSTR (4, 0);
10156   aarch64_set_reg_u64 (cpu, rd, NO_SP, value);
10157 }
10158
10159 /* The signed versions need to insert sign bits
10160    on the left of the inserted bit field. so we do
10161    much the same as the unsigned version except we
10162    use an arithmetic shift right -- this just means
10163    we need to operate on signed values.  */
10164
10165 /* 32 bit bitfield move, left of affected sign-extended, right zeroed.  */
10166 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10167 static void
10168 sbfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10169 {
10170   unsigned rd;
10171   unsigned rn = INSTR (9, 5);
10172   /* as per ubfm32 but use an ASR instead of an LSR.  */
10173   int32_t value = aarch64_get_reg_s32 (cpu, rn, NO_SP);
10174
10175   if (r <= s)
10176     {
10177       value <<= 31 - s;
10178       value >>= 31 + r - s;
10179     }
10180   else
10181     {
10182       value <<= 31 - s;
10183       value >>= r - (s + 1);
10184     }
10185
10186   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10187   rd = INSTR (4, 0);
10188   aarch64_set_reg_u64 (cpu, rd, NO_SP, (uint32_t) value);
10189 }
10190
10191 /* 64 bit bitfield move, left of affected sign-extended, right zeroed.  */
10192 /* If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10193 static void
10194 sbfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10195 {
10196   unsigned rd;
10197   unsigned rn = INSTR (9, 5);
10198   /* acpu per ubfm but use an ASR instead of an LSR.  */
10199   int64_t value = aarch64_get_reg_s64 (cpu, rn, NO_SP);
10200
10201   if (r <= s)
10202     {
10203       value <<= 63 - s;
10204       value >>= 63 + r - s;
10205     }
10206   else
10207     {
10208       value <<= 63 - s;
10209       value >>= r - (s + 1);
10210     }
10211
10212   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10213   rd = INSTR (4, 0);
10214   aarch64_set_reg_s64 (cpu, rd, NO_SP, value);
10215 }
10216
10217 /* Finally, these versions leave non-affected bits
10218    as is. so we need to generate the bits as per
10219    ubfm and also generate a mask to pick the
10220    bits from the original and computed values.  */
10221
10222 /* 32 bit bitfield move, non-affected bits left as is.
10223    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<32+s-r,32-r> = Wn<s:0>.  */
10224 static void
10225 bfm32 (sim_cpu *cpu, uint32_t r, uint32_t s)
10226 {
10227   unsigned rn = INSTR (9, 5);
10228   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10229   uint32_t mask = -1;
10230   unsigned rd;
10231   uint32_t value2;
10232
10233   /* Pick either s+1-r or s+1 consecutive bits out of the original word.  */
10234   if (r <= s)
10235     {
10236       /* 31:...:s:xxx:r:...:0 ==> 31:...:s-r:xxx:0.
10237          We want only bits s:xxx:r at the bottom of the word
10238          so we LSL bit s up to bit 31 i.e. by 31 - s
10239          and then we LSR to bring bit 31 down to bit s - r
10240          i.e. by 31 + r - s.  */
10241       value <<= 31 - s;
10242       value >>= 31 + r - s;
10243       /* the mask must include the same bits.  */
10244       mask <<= 31 - s;
10245       mask >>= 31 + r - s;
10246     }
10247   else
10248     {
10249       /* 31:...:s:xxx:0 ==> 31:...:31-(r-1)+s:xxx:31-(r-1):...:0.
10250          We want only bits s:xxx:0 starting at it 31-(r-1)
10251          so we LSL bit s up to bit 31 i.e. by 31 - s
10252          and then we LSL to bring bit 31 down to 31-(r-1)+s
10253          i.e. by r - (s + 1).  */
10254       value <<= 31 - s;
10255       value >>= r - (s + 1);
10256       /* The mask must include the same bits.  */
10257       mask <<= 31 - s;
10258       mask >>= r - (s + 1);
10259     }
10260
10261   rd = INSTR (4, 0);
10262   value2 = aarch64_get_reg_u32 (cpu, rd, NO_SP);
10263
10264   value2 &= ~mask;
10265   value2 |= value;
10266
10267   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10268   aarch64_set_reg_u64
10269     (cpu, rd, NO_SP, (aarch64_get_reg_u32 (cpu, rd, NO_SP) & ~mask) | value);
10270 }
10271
10272 /* 64 bit bitfield move, non-affected bits left as is.
10273    If r <= s Wd<s-r:0> = Wn<s:r> else Wd<64+s-r,64-r> = Wn<s:0>.  */
10274 static void
10275 bfm (sim_cpu *cpu, uint32_t r, uint32_t s)
10276 {
10277   unsigned rd;
10278   unsigned rn = INSTR (9, 5);
10279   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
10280   uint64_t mask = 0xffffffffffffffffULL;
10281
10282   if (r <= s)
10283     {
10284       /* 63:...:s:xxx:r:...:0 ==> 63:...:s-r:xxx:0.
10285          We want only bits s:xxx:r at the bottom of the word
10286          so we LSL bit s up to bit 63 i.e. by 63 - s
10287          and then we LSR to bring bit 63 down to bit s - r
10288          i.e. by 63 + r - s.  */
10289       value <<= 63 - s;
10290       value >>= 63 + r - s;
10291       /* The mask must include the same bits.  */
10292       mask <<= 63 - s;
10293       mask >>= 63 + r - s;
10294     }
10295   else
10296     {
10297       /* 63:...:s:xxx:0 ==> 63:...:63-(r-1)+s:xxx:63-(r-1):...:0
10298          We want only bits s:xxx:0 starting at it 63-(r-1)
10299          so we LSL bit s up to bit 63 i.e. by 63 - s
10300          and then we LSL to bring bit 63 down to 63-(r-1)+s
10301          i.e. by r - (s + 1).  */
10302       value <<= 63 - s;
10303       value >>= r - (s + 1);
10304       /* The mask must include the same bits.  */
10305       mask <<= 63 - s;
10306       mask >>= r - (s + 1);
10307     }
10308
10309   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10310   rd = INSTR (4, 0);
10311   aarch64_set_reg_u64
10312     (cpu, rd, NO_SP, (aarch64_get_reg_u64 (cpu, rd, NO_SP) & ~mask) | value);
10313 }
10314
10315 static void
10316 dexBitfieldImmediate (sim_cpu *cpu)
10317 {
10318   /* assert instr[28:23] = 100110
10319      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
10320      instr[30,29] = op : 0 ==> SBFM, 1 ==> BFM, 2 ==> UBFM, 3 ==> UNALLOC
10321      instr[22] = N : must be 0 for 32 bit, 1 for 64 bit ow UNALLOC
10322      instr[21,16] = immr : 0xxxxx for 32 bit, xxxxxx for 64 bit
10323      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10324      instr[9,5] = Rn
10325      instr[4,0] = Rd  */
10326
10327   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10328   uint32_t dispatch;
10329   uint32_t imms;
10330   uint32_t size = INSTR (31, 31);
10331   uint32_t N = INSTR (22, 22);
10332   /* 32 bit operations must have immr[5] = 0 and imms[5] = 0.  */
10333   /* or else we have an UNALLOC.  */
10334   uint32_t immr = INSTR (21, 16);
10335
10336   if (~size & N)
10337     HALT_UNALLOC;
10338
10339   if (!size && uimm (immr, 5, 5))
10340     HALT_UNALLOC;
10341
10342   imms = INSTR (15, 10);
10343   if (!size && uimm (imms, 5, 5))
10344     HALT_UNALLOC;
10345
10346   /* Switch on combined size and op.  */
10347   dispatch = INSTR (31, 29);
10348   switch (dispatch)
10349     {
10350     case 0: sbfm32 (cpu, immr, imms); return;
10351     case 1: bfm32 (cpu, immr, imms); return;
10352     case 2: ubfm32 (cpu, immr, imms); return;
10353     case 4: sbfm (cpu, immr, imms); return;
10354     case 5: bfm (cpu, immr, imms); return;
10355     case 6: ubfm (cpu, immr, imms); return;
10356     default: HALT_UNALLOC;
10357     }
10358 }
10359
10360 static void
10361 do_EXTR_32 (sim_cpu *cpu)
10362 {
10363   /* instr[31:21] = 00010011100
10364      instr[20,16] = Rm
10365      instr[15,10] = imms :  0xxxxx for 32 bit
10366      instr[9,5]   = Rn
10367      instr[4,0]   = Rd  */
10368   unsigned rm   = INSTR (20, 16);
10369   unsigned imms = INSTR (15, 10) & 31;
10370   unsigned rn   = INSTR ( 9,  5);
10371   unsigned rd   = INSTR ( 4,  0);
10372   uint64_t val1;
10373   uint64_t val2;
10374
10375   val1 = aarch64_get_reg_u32 (cpu, rm, NO_SP);
10376   val1 >>= imms;
10377   val2 = aarch64_get_reg_u32 (cpu, rn, NO_SP);
10378   val2 <<= (32 - imms);
10379
10380   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
10381   aarch64_set_reg_u64 (cpu, rd, NO_SP, val1 | val2);
10382 }
10383
10384 static void
10385 do_EXTR_64 (sim_cpu *cpu)
10386 {
10387   /* instr[31:21] = 10010011100
10388      instr[20,16] = Rm
10389      instr[15,10] = imms
10390      instr[9,5]   = Rn
10391      instr[4,0]   = Rd  */
10392   unsigned rm   = INSTR (20, 16);
10393   unsigned imms = INSTR (15, 10) & 63;
10394   unsigned rn   = INSTR ( 9,  5);
10395   unsigned rd   = INSTR ( 4,  0);
10396   uint64_t val;
10397
10398   val = aarch64_get_reg_u64 (cpu, rm, NO_SP);
10399   val >>= imms;
10400   val |= (aarch64_get_reg_u64 (cpu, rn, NO_SP) << (64 - imms));
10401
10402   aarch64_set_reg_u64 (cpu, rd, NO_SP, val);
10403 }
10404
10405 static void
10406 dexExtractImmediate (sim_cpu *cpu)
10407 {
10408   /* assert instr[28:23] = 100111
10409      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
10410      instr[30,29] = op21 : 0 ==> EXTR, 1,2,3 ==> UNALLOC
10411      instr[22]    = N : must be 0 for 32 bit, 1 for 64 bit or UNALLOC
10412      instr[21]    = op0 : must be 0 or UNALLOC
10413      instr[20,16] = Rm
10414      instr[15,10] = imms :  0xxxxx for 32 bit, xxxxxx for 64 bit
10415      instr[9,5]   = Rn
10416      instr[4,0]   = Rd  */
10417
10418   /* 32 bit operations must have N = 0 or else we have an UNALLOC.  */
10419   /* 64 bit operations must have N = 1 or else we have an UNALLOC.  */
10420   uint32_t dispatch;
10421   uint32_t size = INSTR (31, 31);
10422   uint32_t N = INSTR (22, 22);
10423   /* 32 bit operations must have imms[5] = 0
10424      or else we have an UNALLOC.  */
10425   uint32_t imms = INSTR (15, 10);
10426
10427   if (size ^ N)
10428     HALT_UNALLOC;
10429
10430   if (!size && uimm (imms, 5, 5))
10431     HALT_UNALLOC;
10432
10433   /* Switch on combined size and op.  */
10434   dispatch = INSTR (31, 29);
10435
10436   if (dispatch == 0)
10437     do_EXTR_32 (cpu);
10438
10439   else if (dispatch == 4)
10440     do_EXTR_64 (cpu);
10441
10442   else if (dispatch == 1)
10443     HALT_NYI;
10444   else
10445     HALT_UNALLOC;
10446 }
10447
10448 static void
10449 dexDPImm (sim_cpu *cpu)
10450 {
10451   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
10452      assert  group == GROUP_DPIMM_1000 || grpoup == GROUP_DPIMM_1001
10453      bits [25,23] of a DPImm are the secondary dispatch vector.  */
10454   uint32_t group2 = dispatchDPImm (aarch64_get_instr (cpu));
10455
10456   switch (group2)
10457     {
10458     case DPIMM_PCADR_000:
10459     case DPIMM_PCADR_001:
10460       dexPCRelAddressing (cpu);
10461       return;
10462
10463     case DPIMM_ADDSUB_010:
10464     case DPIMM_ADDSUB_011:
10465       dexAddSubtractImmediate (cpu);
10466       return;
10467
10468     case DPIMM_LOG_100:
10469       dexLogicalImmediate (cpu);
10470       return;
10471
10472     case DPIMM_MOV_101:
10473       dexMoveWideImmediate (cpu);
10474       return;
10475
10476     case DPIMM_BITF_110:
10477       dexBitfieldImmediate (cpu);
10478       return;
10479
10480     case DPIMM_EXTR_111:
10481       dexExtractImmediate (cpu);
10482       return;
10483
10484     default:
10485       /* Should never reach here.  */
10486       HALT_NYI;
10487     }
10488 }
10489
10490 static void
10491 dexLoadUnscaledImmediate (sim_cpu *cpu)
10492 {
10493   /* instr[29,24] == 111_00
10494      instr[21] == 0
10495      instr[11,10] == 00
10496      instr[31,30] = size
10497      instr[26] = V
10498      instr[23,22] = opc
10499      instr[20,12] = simm9
10500      instr[9,5] = rn may be SP.  */
10501   /* unsigned rt = INSTR (4, 0);  */
10502   uint32_t V = INSTR (26, 26);
10503   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10504   int32_t imm = simm32 (aarch64_get_instr (cpu), 20, 12);
10505
10506   if (!V)
10507     {
10508       /* GReg operations.  */
10509       switch (dispatch)
10510         {
10511         case 0:  sturb (cpu, imm); return;
10512         case 1:  ldurb32 (cpu, imm); return;
10513         case 2:  ldursb64 (cpu, imm); return;
10514         case 3:  ldursb32 (cpu, imm); return;
10515         case 4:  sturh (cpu, imm); return;
10516         case 5:  ldurh32 (cpu, imm); return;
10517         case 6:  ldursh64 (cpu, imm); return;
10518         case 7:  ldursh32 (cpu, imm); return;
10519         case 8:  stur32 (cpu, imm); return;
10520         case 9:  ldur32 (cpu, imm); return;
10521         case 10: ldursw (cpu, imm); return;
10522         case 12: stur64 (cpu, imm); return;
10523         case 13: ldur64 (cpu, imm); return;
10524
10525         case 14:
10526           /* PRFUM NYI.  */
10527           HALT_NYI;
10528
10529         default:
10530         case 11:
10531         case 15:
10532           HALT_UNALLOC;
10533         }
10534     }
10535
10536   /* FReg operations.  */
10537   switch (dispatch)
10538     {
10539     case 2:  fsturq (cpu, imm); return;
10540     case 3:  fldurq (cpu, imm); return;
10541     case 8:  fsturs (cpu, imm); return;
10542     case 9:  fldurs (cpu, imm); return;
10543     case 12: fsturd (cpu, imm); return;
10544     case 13: fldurd (cpu, imm); return;
10545
10546     case 0: /* STUR 8 bit FP.  */
10547     case 1: /* LDUR 8 bit FP.  */
10548     case 4: /* STUR 16 bit FP.  */
10549     case 5: /* LDUR 8 bit FP.  */
10550       HALT_NYI;
10551
10552     default:
10553     case 6:
10554     case 7:
10555     case 10:
10556     case 11:
10557     case 14:
10558     case 15:
10559       HALT_UNALLOC;
10560     }
10561 }
10562
10563 /*  N.B. A preliminary note regarding all the ldrs<x>32
10564     instructions
10565
10566    The signed value loaded by these instructions is cast to unsigned
10567    before being assigned to aarch64_get_reg_u64 (cpu, N) i.e. to the
10568    64 bit element of the GReg union. this performs a 32 bit sign extension
10569    (as required) but avoids 64 bit sign extension, thus ensuring that the
10570    top half of the register word is zero. this is what the spec demands
10571    when a 32 bit load occurs.  */
10572
10573 /* 32 bit load sign-extended byte scaled unsigned 12 bit.  */
10574 static void
10575 ldrsb32_abs (sim_cpu *cpu, uint32_t offset)
10576 {
10577   unsigned int rn = INSTR (9, 5);
10578   unsigned int rt = INSTR (4, 0);
10579
10580   /* The target register may not be SP but the source may be
10581      there is no scaling required for a byte load.  */
10582   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset;
10583   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10584                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10585 }
10586
10587 /* 32 bit load sign-extended byte scaled or unscaled zero-
10588    or sign-extended 32-bit register offset.  */
10589 static void
10590 ldrsb32_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10591 {
10592   unsigned int rm = INSTR (20, 16);
10593   unsigned int rn = INSTR (9, 5);
10594   unsigned int rt = INSTR (4, 0);
10595
10596   /* rn may reference SP, rm and rt must reference ZR.  */
10597
10598   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10599   int64_t displacement = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10600                                  extension);
10601
10602   /* There is no scaling required for a byte load.  */
10603   aarch64_set_reg_u64
10604     (cpu, rt, NO_SP, (int64_t) aarch64_get_mem_s8 (cpu, address
10605                                                    + displacement));
10606 }
10607
10608 /* 32 bit load sign-extended byte unscaled signed 9 bit with
10609    pre- or post-writeback.  */
10610 static void
10611 ldrsb32_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10612 {
10613   uint64_t address;
10614   unsigned int rn = INSTR (9, 5);
10615   unsigned int rt = INSTR (4, 0);
10616
10617   if (rn == rt && wb != NoWriteBack)
10618     HALT_UNALLOC;
10619
10620   address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10621
10622   if (wb == Pre)
10623       address += offset;
10624
10625   aarch64_set_reg_u64 (cpu, rt, NO_SP,
10626                        (int64_t) aarch64_get_mem_s8 (cpu, address));
10627
10628   if (wb == Post)
10629     address += offset;
10630
10631   if (wb != NoWriteBack)
10632     aarch64_set_reg_u64 (cpu, rn, NO_SP, address);
10633 }
10634
10635 /* 8 bit store scaled.  */
10636 static void
10637 fstrb_abs (sim_cpu *cpu, uint32_t offset)
10638 {
10639   unsigned st = INSTR (4, 0);
10640   unsigned rn = INSTR (9, 5);
10641
10642   aarch64_set_mem_u8 (cpu,
10643                       aarch64_get_reg_u64 (cpu, rn, SP_OK) + offset,
10644                       aarch64_get_vec_u8 (cpu, st, 0));
10645 }
10646
10647 /* 8 bit store scaled or unscaled zero- or
10648    sign-extended 8-bit register offset.  */
10649 static void
10650 fstrb_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10651 {
10652   unsigned rm = INSTR (20, 16);
10653   unsigned rn = INSTR (9, 5);
10654   unsigned st = INSTR (4, 0);
10655
10656   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10657   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10658                                extension);
10659   uint64_t  displacement = scaling == Scaled ? extended : 0;
10660
10661   aarch64_set_mem_u8
10662     (cpu, address + displacement, aarch64_get_vec_u8 (cpu, st, 0));
10663 }
10664
10665 /* 16 bit store scaled.  */
10666 static void
10667 fstrh_abs (sim_cpu *cpu, uint32_t offset)
10668 {
10669   unsigned st = INSTR (4, 0);
10670   unsigned rn = INSTR (9, 5);
10671
10672   aarch64_set_mem_u16
10673     (cpu,
10674      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 16),
10675      aarch64_get_vec_u16 (cpu, st, 0));
10676 }
10677
10678 /* 16 bit store scaled or unscaled zero-
10679    or sign-extended 16-bit register offset.  */
10680 static void
10681 fstrh_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10682 {
10683   unsigned rm = INSTR (20, 16);
10684   unsigned rn = INSTR (9, 5);
10685   unsigned st = INSTR (4, 0);
10686
10687   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10688   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10689                                extension);
10690   uint64_t  displacement = OPT_SCALE (extended, 16, scaling);
10691
10692   aarch64_set_mem_u16
10693     (cpu, address + displacement, aarch64_get_vec_u16 (cpu, st, 0));
10694 }
10695
10696 /* 32 bit store scaled unsigned 12 bit.  */
10697 static void
10698 fstrs_abs (sim_cpu *cpu, uint32_t offset)
10699 {
10700   unsigned st = INSTR (4, 0);
10701   unsigned rn = INSTR (9, 5);
10702
10703   aarch64_set_mem_u32
10704     (cpu,
10705      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 32),
10706      aarch64_get_vec_u32 (cpu, st, 0));
10707 }
10708
10709 /* 32 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10710 static void
10711 fstrs_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10712 {
10713   unsigned rn = INSTR (9, 5);
10714   unsigned st = INSTR (4, 0);
10715
10716   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10717
10718   if (wb != Post)
10719     address += offset;
10720
10721   aarch64_set_mem_u32 (cpu, address, aarch64_get_vec_u32 (cpu, st, 0));
10722
10723   if (wb == Post)
10724     address += offset;
10725
10726   if (wb != NoWriteBack)
10727     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10728 }
10729
10730 /* 32 bit store scaled or unscaled zero-
10731    or sign-extended 32-bit register offset.  */
10732 static void
10733 fstrs_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10734 {
10735   unsigned rm = INSTR (20, 16);
10736   unsigned rn = INSTR (9, 5);
10737   unsigned st = INSTR (4, 0);
10738
10739   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10740   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10741                                extension);
10742   uint64_t  displacement = OPT_SCALE (extended, 32, scaling);
10743
10744   aarch64_set_mem_u32
10745     (cpu, address + displacement, aarch64_get_vec_u32 (cpu, st, 0));
10746 }
10747
10748 /* 64 bit store scaled unsigned 12 bit.  */
10749 static void
10750 fstrd_abs (sim_cpu *cpu, uint32_t offset)
10751 {
10752   unsigned st = INSTR (4, 0);
10753   unsigned rn = INSTR (9, 5);
10754
10755   aarch64_set_mem_u64
10756     (cpu,
10757      aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 64),
10758      aarch64_get_vec_u64 (cpu, st, 0));
10759 }
10760
10761 /* 64 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10762 static void
10763 fstrd_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10764 {
10765   unsigned rn = INSTR (9, 5);
10766   unsigned st = INSTR (4, 0);
10767
10768   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10769
10770   if (wb != Post)
10771     address += offset;
10772
10773   aarch64_set_mem_u64 (cpu, address, aarch64_get_vec_u64 (cpu, st, 0));
10774
10775   if (wb == Post)
10776     address += offset;
10777
10778   if (wb != NoWriteBack)
10779     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10780 }
10781
10782 /* 64 bit store scaled or unscaled zero-
10783    or sign-extended 32-bit register offset.  */
10784 static void
10785 fstrd_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10786 {
10787   unsigned rm = INSTR (20, 16);
10788   unsigned rn = INSTR (9, 5);
10789   unsigned st = INSTR (4, 0);
10790
10791   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10792   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10793                                extension);
10794   uint64_t  displacement = OPT_SCALE (extended, 64, scaling);
10795
10796   aarch64_set_mem_u64
10797     (cpu, address + displacement, aarch64_get_vec_u64 (cpu, st, 0));
10798 }
10799
10800 /* 128 bit store scaled unsigned 12 bit.  */
10801 static void
10802 fstrq_abs (sim_cpu *cpu, uint32_t offset)
10803 {
10804   FRegister a;
10805   unsigned st = INSTR (4, 0);
10806   unsigned rn = INSTR (9, 5);
10807   uint64_t addr;
10808
10809   aarch64_get_FP_long_double (cpu, st, & a);
10810
10811   addr = aarch64_get_reg_u64 (cpu, rn, SP_OK) + SCALE (offset, 128);
10812   aarch64_set_mem_long_double (cpu, addr, a);
10813 }
10814
10815 /* 128 bit store unscaled signed 9 bit with pre- or post-writeback.  */
10816 static void
10817 fstrq_wb (sim_cpu *cpu, int32_t offset, WriteBack wb)
10818 {
10819   FRegister a;
10820   unsigned rn = INSTR (9, 5);
10821   unsigned st = INSTR (4, 0);
10822   uint64_t address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10823
10824   if (wb != Post)
10825     address += offset;
10826
10827   aarch64_get_FP_long_double (cpu, st, & a);
10828   aarch64_set_mem_long_double (cpu, address, a);
10829
10830   if (wb == Post)
10831     address += offset;
10832
10833   if (wb != NoWriteBack)
10834     aarch64_set_reg_u64 (cpu, rn, SP_OK, address);
10835 }
10836
10837 /* 128 bit store scaled or unscaled zero-
10838    or sign-extended 32-bit register offset.  */
10839 static void
10840 fstrq_scale_ext (sim_cpu *cpu, Scaling scaling, Extension extension)
10841 {
10842   unsigned rm = INSTR (20, 16);
10843   unsigned rn = INSTR (9, 5);
10844   unsigned st = INSTR (4, 0);
10845
10846   uint64_t  address = aarch64_get_reg_u64 (cpu, rn, SP_OK);
10847   int64_t   extended = extend (aarch64_get_reg_u32 (cpu, rm, NO_SP),
10848                                extension);
10849   uint64_t  displacement = OPT_SCALE (extended, 128, scaling);
10850
10851   FRegister a;
10852
10853   aarch64_get_FP_long_double (cpu, st, & a);
10854   aarch64_set_mem_long_double (cpu, address + displacement, a);
10855 }
10856
10857 static void
10858 dexLoadImmediatePrePost (sim_cpu *cpu)
10859 {
10860   /* instr[31,30] = size
10861      instr[29,27] = 111
10862      instr[26]    = V
10863      instr[25,24] = 00
10864      instr[23,22] = opc
10865      instr[21]    = 0
10866      instr[20,12] = simm9
10867      instr[11]    = wb : 0 ==> Post, 1 ==> Pre
10868      instr[10]    = 0
10869      instr[9,5]   = Rn may be SP.
10870      instr[4,0]   = Rt */
10871
10872   uint32_t  V        = INSTR (26, 26);
10873   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10874   int32_t   imm      = simm32 (aarch64_get_instr (cpu), 20, 12);
10875   WriteBack wb       = INSTR (11, 11);
10876
10877   if (!V)
10878     {
10879       /* GReg operations.  */
10880       switch (dispatch)
10881         {
10882         case 0:  strb_wb (cpu, imm, wb); return;
10883         case 1:  ldrb32_wb (cpu, imm, wb); return;
10884         case 2:  ldrsb_wb (cpu, imm, wb); return;
10885         case 3:  ldrsb32_wb (cpu, imm, wb); return;
10886         case 4:  strh_wb (cpu, imm, wb); return;
10887         case 5:  ldrh32_wb (cpu, imm, wb); return;
10888         case 6:  ldrsh64_wb (cpu, imm, wb); return;
10889         case 7:  ldrsh32_wb (cpu, imm, wb); return;
10890         case 8:  str32_wb (cpu, imm, wb); return;
10891         case 9:  ldr32_wb (cpu, imm, wb); return;
10892         case 10: ldrsw_wb (cpu, imm, wb); return;
10893         case 12: str_wb (cpu, imm, wb); return;
10894         case 13: ldr_wb (cpu, imm, wb); return;
10895
10896         default:
10897         case 11:
10898         case 14:
10899         case 15:
10900           HALT_UNALLOC;
10901         }
10902     }
10903
10904   /* FReg operations.  */
10905   switch (dispatch)
10906     {
10907     case 2:  fstrq_wb (cpu, imm, wb); return;
10908     case 3:  fldrq_wb (cpu, imm, wb); return;
10909     case 8:  fstrs_wb (cpu, imm, wb); return;
10910     case 9:  fldrs_wb (cpu, imm, wb); return;
10911     case 12: fstrd_wb (cpu, imm, wb); return;
10912     case 13: fldrd_wb (cpu, imm, wb); return;
10913
10914     case 0:       /* STUR 8 bit FP.  */
10915     case 1:       /* LDUR 8 bit FP.  */
10916     case 4:       /* STUR 16 bit FP.  */
10917     case 5:       /* LDUR 8 bit FP.  */
10918       HALT_NYI;
10919
10920     default:
10921     case 6:
10922     case 7:
10923     case 10:
10924     case 11:
10925     case 14:
10926     case 15:
10927       HALT_UNALLOC;
10928     }
10929 }
10930
10931 static void
10932 dexLoadRegisterOffset (sim_cpu *cpu)
10933 {
10934   /* instr[31,30] = size
10935      instr[29,27] = 111
10936      instr[26]    = V
10937      instr[25,24] = 00
10938      instr[23,22] = opc
10939      instr[21]    = 1
10940      instr[20,16] = rm
10941      instr[15,13] = option : 010 ==> UXTW, 011 ==> UXTX/LSL,
10942                              110 ==> SXTW, 111 ==> SXTX,
10943                              ow ==> RESERVED
10944      instr[12]    = scaled
10945      instr[11,10] = 10
10946      instr[9,5]   = rn
10947      instr[4,0]   = rt.  */
10948
10949   uint32_t  V = INSTR (26, 26);
10950   uint32_t  dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
10951   Scaling   scale = INSTR (12, 12);
10952   Extension extensionType = INSTR (15, 13);
10953
10954   /* Check for illegal extension types.  */
10955   if (uimm (extensionType, 1, 1) == 0)
10956     HALT_UNALLOC;
10957
10958   if (extensionType == UXTX || extensionType == SXTX)
10959     extensionType = NoExtension;
10960
10961   if (!V)
10962     {
10963       /* GReg operations.  */
10964       switch (dispatch)
10965         {
10966         case 0:  strb_scale_ext (cpu, scale, extensionType); return;
10967         case 1:  ldrb32_scale_ext (cpu, scale, extensionType); return;
10968         case 2:  ldrsb_scale_ext (cpu, scale, extensionType); return;
10969         case 3:  ldrsb32_scale_ext (cpu, scale, extensionType); return;
10970         case 4:  strh_scale_ext (cpu, scale, extensionType); return;
10971         case 5:  ldrh32_scale_ext (cpu, scale, extensionType); return;
10972         case 6:  ldrsh_scale_ext (cpu, scale, extensionType); return;
10973         case 7:  ldrsh32_scale_ext (cpu, scale, extensionType); return;
10974         case 8:  str32_scale_ext (cpu, scale, extensionType); return;
10975         case 9:  ldr32_scale_ext (cpu, scale, extensionType); return;
10976         case 10: ldrsw_scale_ext (cpu, scale, extensionType); return;
10977         case 12: str_scale_ext (cpu, scale, extensionType); return;
10978         case 13: ldr_scale_ext (cpu, scale, extensionType); return;
10979         case 14: prfm_scale_ext (cpu, scale, extensionType); return;
10980
10981         default:
10982         case 11:
10983         case 15:
10984           HALT_UNALLOC;
10985         }
10986     }
10987
10988   /* FReg operations.  */
10989   switch (dispatch)
10990     {
10991     case 1: /* LDUR 8 bit FP.  */
10992       HALT_NYI;
10993     case 3:  fldrq_scale_ext (cpu, scale, extensionType); return;
10994     case 5: /* LDUR 8 bit FP.  */
10995       HALT_NYI;
10996     case 9:  fldrs_scale_ext (cpu, scale, extensionType); return;
10997     case 13: fldrd_scale_ext (cpu, scale, extensionType); return;
10998
10999     case 0:  fstrb_scale_ext (cpu, scale, extensionType); return;
11000     case 2:  fstrq_scale_ext (cpu, scale, extensionType); return;
11001     case 4:  fstrh_scale_ext (cpu, scale, extensionType); return;
11002     case 8:  fstrs_scale_ext (cpu, scale, extensionType); return;
11003     case 12: fstrd_scale_ext (cpu, scale, extensionType); return;
11004
11005     default:
11006     case 6:
11007     case 7:
11008     case 10:
11009     case 11:
11010     case 14:
11011     case 15:
11012       HALT_UNALLOC;
11013     }
11014 }
11015
11016 static void
11017 dexLoadUnsignedImmediate (sim_cpu *cpu)
11018 {
11019   /* instr[29,24] == 111_01
11020      instr[31,30] = size
11021      instr[26]    = V
11022      instr[23,22] = opc
11023      instr[21,10] = uimm12 : unsigned immediate offset
11024      instr[9,5]   = rn may be SP.
11025      instr[4,0]   = rt.  */
11026
11027   uint32_t V = INSTR (26,26);
11028   uint32_t dispatch = ((INSTR (31, 30) << 2) | INSTR (23, 22));
11029   uint32_t imm = INSTR (21, 10);
11030
11031   if (!V)
11032     {
11033       /* GReg operations.  */
11034       switch (dispatch)
11035         {
11036         case 0:  strb_abs (cpu, imm); return;
11037         case 1:  ldrb32_abs (cpu, imm); return;
11038         case 2:  ldrsb_abs (cpu, imm); return;
11039         case 3:  ldrsb32_abs (cpu, imm); return;
11040         case 4:  strh_abs (cpu, imm); return;
11041         case 5:  ldrh32_abs (cpu, imm); return;
11042         case 6:  ldrsh_abs (cpu, imm); return;
11043         case 7:  ldrsh32_abs (cpu, imm); return;
11044         case 8:  str32_abs (cpu, imm); return;
11045         case 9:  ldr32_abs (cpu, imm); return;
11046         case 10: ldrsw_abs (cpu, imm); return;
11047         case 12: str_abs (cpu, imm); return;
11048         case 13: ldr_abs (cpu, imm); return;
11049         case 14: prfm_abs (cpu, imm); return;
11050
11051         default:
11052         case 11:
11053         case 15:
11054           HALT_UNALLOC;
11055         }
11056     }
11057
11058   /* FReg operations.  */
11059   switch (dispatch)
11060     {
11061     case 0:  fstrb_abs (cpu, imm); return;
11062     case 4:  fstrh_abs (cpu, imm); return;
11063     case 8:  fstrs_abs (cpu, imm); return;
11064     case 12: fstrd_abs (cpu, imm); return;
11065     case 2:  fstrq_abs (cpu, imm); return;
11066
11067     case 1:  fldrb_abs (cpu, imm); return;
11068     case 5:  fldrh_abs (cpu, imm); return;
11069     case 9:  fldrs_abs (cpu, imm); return;
11070     case 13: fldrd_abs (cpu, imm); return;
11071     case 3:  fldrq_abs (cpu, imm); return;
11072
11073     default:
11074     case 6:
11075     case 7:
11076     case 10:
11077     case 11:
11078     case 14:
11079     case 15:
11080       HALT_UNALLOC;
11081     }
11082 }
11083
11084 static void
11085 dexLoadExclusive (sim_cpu *cpu)
11086 {
11087   /* assert instr[29:24] = 001000;
11088      instr[31,30] = size
11089      instr[23] = 0 if exclusive
11090      instr[22] = L : 1 if load, 0 if store
11091      instr[21] = 1 if pair
11092      instr[20,16] = Rs
11093      instr[15] = o0 : 1 if ordered
11094      instr[14,10] = Rt2
11095      instr[9,5] = Rn
11096      instr[4.0] = Rt.  */
11097
11098   switch (INSTR (22, 21))
11099     {
11100     case 2:   ldxr (cpu); return;
11101     case 0:   stxr (cpu); return;
11102     default:  HALT_NYI;
11103     }
11104 }
11105
11106 static void
11107 dexLoadOther (sim_cpu *cpu)
11108 {
11109   uint32_t dispatch;
11110
11111   /* instr[29,25] = 111_0
11112      instr[24] == 0 ==> dispatch, 1 ==> ldst reg unsigned immediate
11113      instr[21:11,10] is the secondary dispatch.  */
11114   if (INSTR (24, 24))
11115     {
11116       dexLoadUnsignedImmediate (cpu);
11117       return;
11118     }
11119
11120   dispatch = ((INSTR (21, 21) << 2) | INSTR (11, 10));
11121   switch (dispatch)
11122     {
11123     case 0: dexLoadUnscaledImmediate (cpu); return;
11124     case 1: dexLoadImmediatePrePost (cpu); return;
11125     case 3: dexLoadImmediatePrePost (cpu); return;
11126     case 6: dexLoadRegisterOffset (cpu); return;
11127
11128     default:
11129     case 2:
11130     case 4:
11131     case 5:
11132     case 7:
11133       HALT_NYI;
11134     }
11135 }
11136
11137 static void
11138 store_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11139 {
11140   unsigned rn = INSTR (14, 10);
11141   unsigned rd = INSTR (9, 5);
11142   unsigned rm = INSTR (4, 0);
11143   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11144
11145   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11146     HALT_UNALLOC; /* ??? */
11147
11148   offset <<= 2;
11149
11150   if (wb != Post)
11151     address += offset;
11152
11153   aarch64_set_mem_u32 (cpu, address,
11154                        aarch64_get_reg_u32 (cpu, rm, NO_SP));
11155   aarch64_set_mem_u32 (cpu, address + 4,
11156                        aarch64_get_reg_u32 (cpu, rn, NO_SP));
11157
11158   if (wb == Post)
11159     address += offset;
11160
11161   if (wb != NoWriteBack)
11162     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11163 }
11164
11165 static void
11166 store_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11167 {
11168   unsigned rn = INSTR (14, 10);
11169   unsigned rd = INSTR (9, 5);
11170   unsigned rm = INSTR (4, 0);
11171   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11172
11173   if ((rn == rd || rm == rd) && wb != NoWriteBack)
11174     HALT_UNALLOC; /* ??? */
11175
11176   offset <<= 3;
11177
11178   if (wb != Post)
11179     address += offset;
11180
11181   aarch64_set_mem_u64 (cpu, address,
11182                        aarch64_get_reg_u64 (cpu, rm, NO_SP));
11183   aarch64_set_mem_u64 (cpu, address + 8,
11184                        aarch64_get_reg_u64 (cpu, rn, NO_SP));
11185
11186   if (wb == Post)
11187     address += offset;
11188
11189   if (wb != NoWriteBack)
11190     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11191 }
11192
11193 static void
11194 load_pair_u32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11195 {
11196   unsigned rn = INSTR (14, 10);
11197   unsigned rd = INSTR (9, 5);
11198   unsigned rm = INSTR (4, 0);
11199   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11200
11201   /* Treat this as unalloc to make sure we don't do it.  */
11202   if (rn == rm)
11203     HALT_UNALLOC;
11204
11205   offset <<= 2;
11206
11207   if (wb != Post)
11208     address += offset;
11209
11210   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u32 (cpu, address));
11211   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u32 (cpu, address + 4));
11212
11213   if (wb == Post)
11214     address += offset;
11215
11216   if (wb != NoWriteBack)
11217     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11218 }
11219
11220 static void
11221 load_pair_s32 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11222 {
11223   unsigned rn = INSTR (14, 10);
11224   unsigned rd = INSTR (9, 5);
11225   unsigned rm = INSTR (4, 0);
11226   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11227
11228   /* Treat this as unalloc to make sure we don't do it.  */
11229   if (rn == rm)
11230     HALT_UNALLOC;
11231
11232   offset <<= 2;
11233
11234   if (wb != Post)
11235     address += offset;
11236
11237   aarch64_set_reg_s64 (cpu, rm, SP_OK, aarch64_get_mem_s32 (cpu, address));
11238   aarch64_set_reg_s64 (cpu, rn, SP_OK, aarch64_get_mem_s32 (cpu, address + 4));
11239
11240   if (wb == Post)
11241     address += offset;
11242
11243   if (wb != NoWriteBack)
11244     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11245 }
11246
11247 static void
11248 load_pair_u64 (sim_cpu *cpu, int32_t offset, WriteBack wb)
11249 {
11250   unsigned rn = INSTR (14, 10);
11251   unsigned rd = INSTR (9, 5);
11252   unsigned rm = INSTR (4, 0);
11253   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11254
11255   /* Treat this as unalloc to make sure we don't do it.  */
11256   if (rn == rm)
11257     HALT_UNALLOC;
11258
11259   offset <<= 3;
11260
11261   if (wb != Post)
11262     address += offset;
11263
11264   aarch64_set_reg_u64 (cpu, rm, SP_OK, aarch64_get_mem_u64 (cpu, address));
11265   aarch64_set_reg_u64 (cpu, rn, SP_OK, aarch64_get_mem_u64 (cpu, address + 8));
11266
11267   if (wb == Post)
11268     address += offset;
11269
11270   if (wb != NoWriteBack)
11271     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11272 }
11273
11274 static void
11275 dex_load_store_pair_gr (sim_cpu *cpu)
11276 {
11277   /* instr[31,30] = size (10=> 64-bit, 01=> signed 32-bit, 00=> 32-bit)
11278      instr[29,25] = instruction encoding: 101_0
11279      instr[26]    = V : 1 if fp 0 if gp
11280      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11281      instr[22]    = load/store (1=> load)
11282      instr[21,15] = signed, scaled, offset
11283      instr[14,10] = Rn
11284      instr[ 9, 5] = Rd
11285      instr[ 4, 0] = Rm.  */
11286
11287   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11288   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11289
11290   switch (dispatch)
11291     {
11292     case 2: store_pair_u32 (cpu, offset, Post); return;
11293     case 3: load_pair_u32  (cpu, offset, Post); return;
11294     case 4: store_pair_u32 (cpu, offset, NoWriteBack); return;
11295     case 5: load_pair_u32  (cpu, offset, NoWriteBack); return;
11296     case 6: store_pair_u32 (cpu, offset, Pre); return;
11297     case 7: load_pair_u32  (cpu, offset, Pre); return;
11298
11299     case 11: load_pair_s32  (cpu, offset, Post); return;
11300     case 13: load_pair_s32  (cpu, offset, NoWriteBack); return;
11301     case 15: load_pair_s32  (cpu, offset, Pre); return;
11302
11303     case 18: store_pair_u64 (cpu, offset, Post); return;
11304     case 19: load_pair_u64  (cpu, offset, Post); return;
11305     case 20: store_pair_u64 (cpu, offset, NoWriteBack); return;
11306     case 21: load_pair_u64  (cpu, offset, NoWriteBack); return;
11307     case 22: store_pair_u64 (cpu, offset, Pre); return;
11308     case 23: load_pair_u64  (cpu, offset, Pre); return;
11309
11310     default:
11311       HALT_UNALLOC;
11312     }
11313 }
11314
11315 static void
11316 store_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11317 {
11318   unsigned rn = INSTR (14, 10);
11319   unsigned rd = INSTR (9, 5);
11320   unsigned rm = INSTR (4, 0);
11321   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11322
11323   offset <<= 2;
11324
11325   if (wb != Post)
11326     address += offset;
11327
11328   aarch64_set_mem_u32 (cpu, address,     aarch64_get_vec_u32 (cpu, rm, 0));
11329   aarch64_set_mem_u32 (cpu, address + 4, aarch64_get_vec_u32 (cpu, rn, 0));
11330
11331   if (wb == Post)
11332     address += offset;
11333
11334   if (wb != NoWriteBack)
11335     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11336 }
11337
11338 static void
11339 store_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11340 {
11341   unsigned rn = INSTR (14, 10);
11342   unsigned rd = INSTR (9, 5);
11343   unsigned rm = INSTR (4, 0);
11344   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11345
11346   offset <<= 3;
11347
11348   if (wb != Post)
11349     address += offset;
11350
11351   aarch64_set_mem_u64 (cpu, address,     aarch64_get_vec_u64 (cpu, rm, 0));
11352   aarch64_set_mem_u64 (cpu, address + 8, aarch64_get_vec_u64 (cpu, rn, 0));
11353
11354   if (wb == Post)
11355     address += offset;
11356
11357   if (wb != NoWriteBack)
11358     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11359 }
11360
11361 static void
11362 store_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11363 {
11364   FRegister a;
11365   unsigned rn = INSTR (14, 10);
11366   unsigned rd = INSTR (9, 5);
11367   unsigned rm = INSTR (4, 0);
11368   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11369
11370   offset <<= 4;
11371
11372   if (wb != Post)
11373     address += offset;
11374
11375   aarch64_get_FP_long_double (cpu, rm, & a);
11376   aarch64_set_mem_long_double (cpu, address, a);
11377   aarch64_get_FP_long_double (cpu, rn, & a);
11378   aarch64_set_mem_long_double (cpu, address + 16, a);
11379
11380   if (wb == Post)
11381     address += offset;
11382
11383   if (wb != NoWriteBack)
11384     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11385 }
11386
11387 static void
11388 load_pair_float (sim_cpu *cpu, int32_t offset, WriteBack wb)
11389 {
11390   unsigned rn = INSTR (14, 10);
11391   unsigned rd = INSTR (9, 5);
11392   unsigned rm = INSTR (4, 0);
11393   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11394
11395   if (rm == rn)
11396     HALT_UNALLOC;
11397
11398   offset <<= 2;
11399
11400   if (wb != Post)
11401     address += offset;
11402
11403   aarch64_set_vec_u32 (cpu, rm, 0, aarch64_get_mem_u32 (cpu, address));
11404   aarch64_set_vec_u32 (cpu, rn, 0, aarch64_get_mem_u32 (cpu, address + 4));
11405
11406   if (wb == Post)
11407     address += offset;
11408
11409   if (wb != NoWriteBack)
11410     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11411 }
11412
11413 static void
11414 load_pair_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11415 {
11416   unsigned rn = INSTR (14, 10);
11417   unsigned rd = INSTR (9, 5);
11418   unsigned rm = INSTR (4, 0);
11419   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11420
11421   if (rm == rn)
11422     HALT_UNALLOC;
11423
11424   offset <<= 3;
11425
11426   if (wb != Post)
11427     address += offset;
11428
11429   aarch64_set_vec_u64 (cpu, rm, 0, aarch64_get_mem_u64 (cpu, address));
11430   aarch64_set_vec_u64 (cpu, rn, 0, aarch64_get_mem_u64 (cpu, address + 8));
11431
11432   if (wb == Post)
11433     address += offset;
11434
11435   if (wb != NoWriteBack)
11436     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11437 }
11438
11439 static void
11440 load_pair_long_double (sim_cpu *cpu, int32_t offset, WriteBack wb)
11441 {
11442   FRegister a;
11443   unsigned rn = INSTR (14, 10);
11444   unsigned rd = INSTR (9, 5);
11445   unsigned rm = INSTR (4, 0);
11446   uint64_t address = aarch64_get_reg_u64 (cpu, rd, SP_OK);
11447
11448   if (rm == rn)
11449     HALT_UNALLOC;
11450
11451   offset <<= 4;
11452
11453   if (wb != Post)
11454     address += offset;
11455
11456   aarch64_get_mem_long_double (cpu, address, & a);
11457   aarch64_set_FP_long_double (cpu, rm, a);
11458   aarch64_get_mem_long_double (cpu, address + 16, & a);
11459   aarch64_set_FP_long_double (cpu, rn, a);
11460
11461   if (wb == Post)
11462     address += offset;
11463
11464   if (wb != NoWriteBack)
11465     aarch64_set_reg_u64 (cpu, rd, SP_OK, address);
11466 }
11467
11468 static void
11469 dex_load_store_pair_fp (sim_cpu *cpu)
11470 {
11471   /* instr[31,30] = size (10=> 128-bit, 01=> 64-bit, 00=> 32-bit)
11472      instr[29,25] = instruction encoding
11473      instr[24,23] = addressing mode (10=> offset, 01=> post, 11=> pre)
11474      instr[22]    = load/store (1=> load)
11475      instr[21,15] = signed, scaled, offset
11476      instr[14,10] = Rn
11477      instr[ 9, 5] = Rd
11478      instr[ 4, 0] = Rm  */
11479
11480   uint32_t dispatch = ((INSTR (31, 30) << 3) | INSTR (24, 22));
11481   int32_t offset = simm32 (aarch64_get_instr (cpu), 21, 15);
11482
11483   switch (dispatch)
11484     {
11485     case 2: store_pair_float (cpu, offset, Post); return;
11486     case 3: load_pair_float  (cpu, offset, Post); return;
11487     case 4: store_pair_float (cpu, offset, NoWriteBack); return;
11488     case 5: load_pair_float  (cpu, offset, NoWriteBack); return;
11489     case 6: store_pair_float (cpu, offset, Pre); return;
11490     case 7: load_pair_float  (cpu, offset, Pre); return;
11491
11492     case 10: store_pair_double (cpu, offset, Post); return;
11493     case 11: load_pair_double  (cpu, offset, Post); return;
11494     case 12: store_pair_double (cpu, offset, NoWriteBack); return;
11495     case 13: load_pair_double  (cpu, offset, NoWriteBack); return;
11496     case 14: store_pair_double (cpu, offset, Pre); return;
11497     case 15: load_pair_double  (cpu, offset, Pre); return;
11498
11499     case 18: store_pair_long_double (cpu, offset, Post); return;
11500     case 19: load_pair_long_double  (cpu, offset, Post); return;
11501     case 20: store_pair_long_double (cpu, offset, NoWriteBack); return;
11502     case 21: load_pair_long_double  (cpu, offset, NoWriteBack); return;
11503     case 22: store_pair_long_double (cpu, offset, Pre); return;
11504     case 23: load_pair_long_double  (cpu, offset, Pre); return;
11505
11506     default:
11507       HALT_UNALLOC;
11508     }
11509 }
11510
11511 static inline unsigned
11512 vec_reg (unsigned v, unsigned o)
11513 {
11514   return (v + o) & 0x3F;
11515 }
11516
11517 /* Load multiple N-element structures to M consecutive registers.  */
11518 static void
11519 vec_load (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11520 {
11521   int      all  = INSTR (30, 30);
11522   unsigned size = INSTR (11, 10);
11523   unsigned vd   = INSTR (4, 0);
11524   unsigned rpt = (N == M) ? 1 : M;
11525   unsigned selem = N;
11526   unsigned i, j, k;
11527
11528   switch (size)
11529     {
11530     case 0: /* 8-bit operations.  */
11531       for (i = 0; i < rpt; i++)
11532         for (j = 0; j < (8 + (8 * all)); j++)
11533           for (k = 0; k < selem; k++)
11534             {
11535               aarch64_set_vec_u8 (cpu, vec_reg (vd, i + k), j,
11536                                   aarch64_get_mem_u8 (cpu, address));
11537               address += 1;
11538             }
11539       return;
11540
11541     case 1: /* 16-bit operations.  */
11542       for (i = 0; i < rpt; i++)
11543         for (j = 0; j < (4 + (4 * all)); j++)
11544           for (k = 0; k < selem; k++)
11545             {
11546               aarch64_set_vec_u16 (cpu, vec_reg (vd, i + k), j,
11547                                    aarch64_get_mem_u16 (cpu, address));
11548               address += 2;
11549             }
11550       return;
11551
11552     case 2: /* 32-bit operations.  */
11553       for (i = 0; i < rpt; i++)
11554         for (j = 0; j < (2 + (2 * all)); j++)
11555           for (k = 0; k < selem; k++)
11556             {
11557               aarch64_set_vec_u32 (cpu, vec_reg (vd, i + k), j,
11558                                    aarch64_get_mem_u32 (cpu, address));
11559               address += 4;
11560             }
11561       return;
11562
11563     case 3: /* 64-bit operations.  */
11564       for (i = 0; i < rpt; i++)
11565         for (j = 0; j < (1 + all); j++)
11566           for (k = 0; k < selem; k++)
11567             {
11568               aarch64_set_vec_u64 (cpu, vec_reg (vd, i + k), j,
11569                                    aarch64_get_mem_u64 (cpu, address));
11570               address += 8;
11571             }
11572       return;
11573     }
11574 }
11575
11576 /* Load multiple 4-element structures into four consecutive registers.  */
11577 static void
11578 LD4 (sim_cpu *cpu, uint64_t address)
11579 {
11580   vec_load (cpu, address, 4, 4);
11581 }
11582
11583 /* Load multiple 3-element structures into three consecutive registers.  */
11584 static void
11585 LD3 (sim_cpu *cpu, uint64_t address)
11586 {
11587   vec_load (cpu, address, 3, 3);
11588 }
11589
11590 /* Load multiple 2-element structures into two consecutive registers.  */
11591 static void
11592 LD2 (sim_cpu *cpu, uint64_t address)
11593 {
11594   vec_load (cpu, address, 2, 2);
11595 }
11596
11597 /* Load multiple 1-element structures into one register.  */
11598 static void
11599 LD1_1 (sim_cpu *cpu, uint64_t address)
11600 {
11601   vec_load (cpu, address, 1, 1);
11602 }
11603
11604 /* Load multiple 1-element structures into two registers.  */
11605 static void
11606 LD1_2 (sim_cpu *cpu, uint64_t address)
11607 {
11608   vec_load (cpu, address, 1, 2);
11609 }
11610
11611 /* Load multiple 1-element structures into three registers.  */
11612 static void
11613 LD1_3 (sim_cpu *cpu, uint64_t address)
11614 {
11615   vec_load (cpu, address, 1, 3);
11616 }
11617
11618 /* Load multiple 1-element structures into four registers.  */
11619 static void
11620 LD1_4 (sim_cpu *cpu, uint64_t address)
11621 {
11622   vec_load (cpu, address, 1, 4);
11623 }
11624
11625 /* Store multiple N-element structures from M consecutive registers.  */
11626 static void
11627 vec_store (sim_cpu *cpu, uint64_t address, unsigned N, unsigned M)
11628 {
11629   int      all  = INSTR (30, 30);
11630   unsigned size = INSTR (11, 10);
11631   unsigned vd   = INSTR (4, 0);
11632   unsigned rpt = (N == M) ? 1 : M;
11633   unsigned selem = N;
11634   unsigned i, j, k;
11635
11636   switch (size)
11637     {
11638     case 0: /* 8-bit operations.  */
11639       for (i = 0; i < rpt; i++)
11640         for (j = 0; j < (8 + (8 * all)); j++)
11641           for (k = 0; k < selem; k++)
11642             {
11643               aarch64_set_mem_u8
11644                 (cpu, address,
11645                  aarch64_get_vec_u8 (cpu, vec_reg (vd, i + k), j));
11646               address += 1;
11647             }
11648       return;
11649
11650     case 1: /* 16-bit operations.  */
11651       for (i = 0; i < rpt; i++)
11652         for (j = 0; j < (4 + (4 * all)); j++)
11653           for (k = 0; k < selem; k++)
11654             {
11655               aarch64_set_mem_u16
11656                 (cpu, address,
11657                  aarch64_get_vec_u16 (cpu, vec_reg (vd, i + k), j));
11658               address += 2;
11659             }
11660       return;
11661
11662     case 2: /* 32-bit operations.  */
11663       for (i = 0; i < rpt; i++)
11664         for (j = 0; j < (2 + (2 * all)); j++)
11665           for (k = 0; k < selem; k++)
11666             {
11667               aarch64_set_mem_u32
11668                 (cpu, address,
11669                  aarch64_get_vec_u32 (cpu, vec_reg (vd, i + k), j));
11670               address += 4;
11671             }
11672       return;
11673
11674     case 3: /* 64-bit operations.  */
11675       for (i = 0; i < rpt; i++)
11676         for (j = 0; j < (1 + all); j++)
11677           for (k = 0; k < selem; k++)
11678             {
11679               aarch64_set_mem_u64
11680                 (cpu, address,
11681                  aarch64_get_vec_u64 (cpu, vec_reg (vd, i + k), j));
11682               address += 8;
11683             }
11684       return;
11685     }
11686 }
11687
11688 /* Store multiple 4-element structure from four consecutive registers.  */
11689 static void
11690 ST4 (sim_cpu *cpu, uint64_t address)
11691 {
11692   vec_store (cpu, address, 4, 4);
11693 }
11694
11695 /* Store multiple 3-element structures from three consecutive registers.  */
11696 static void
11697 ST3 (sim_cpu *cpu, uint64_t address)
11698 {
11699   vec_store (cpu, address, 3, 3);
11700 }
11701
11702 /* Store multiple 2-element structures from two consecutive registers.  */
11703 static void
11704 ST2 (sim_cpu *cpu, uint64_t address)
11705 {
11706   vec_store (cpu, address, 2, 2);
11707 }
11708
11709 /* Store multiple 1-element structures from one register.  */
11710 static void
11711 ST1_1 (sim_cpu *cpu, uint64_t address)
11712 {
11713   vec_store (cpu, address, 1, 1);
11714 }
11715
11716 /* Store multiple 1-element structures from two registers.  */
11717 static void
11718 ST1_2 (sim_cpu *cpu, uint64_t address)
11719 {
11720   vec_store (cpu, address, 1, 2);
11721 }
11722
11723 /* Store multiple 1-element structures from three registers.  */
11724 static void
11725 ST1_3 (sim_cpu *cpu, uint64_t address)
11726 {
11727   vec_store (cpu, address, 1, 3);
11728 }
11729
11730 /* Store multiple 1-element structures from four registers.  */
11731 static void
11732 ST1_4 (sim_cpu *cpu, uint64_t address)
11733 {
11734   vec_store (cpu, address, 1, 4);
11735 }
11736
11737 #define LDn_STn_SINGLE_LANE_AND_SIZE()                          \
11738   do                                                            \
11739     {                                                           \
11740       switch (INSTR (15, 14))                                   \
11741         {                                                       \
11742         case 0:                                                 \
11743           lane = (full << 3) | (s << 2) | size;                 \
11744           size = 0;                                             \
11745           break;                                                \
11746                                                                 \
11747         case 1:                                                 \
11748           if ((size & 1) == 1)                                  \
11749             HALT_UNALLOC;                                       \
11750           lane = (full << 2) | (s << 1) | (size >> 1);          \
11751           size = 1;                                             \
11752           break;                                                \
11753                                                                 \
11754         case 2:                                                 \
11755           if ((size & 2) == 2)                                  \
11756             HALT_UNALLOC;                                       \
11757                                                                 \
11758           if ((size & 1) == 0)                                  \
11759             {                                                   \
11760               lane = (full << 1) | s;                           \
11761               size = 2;                                         \
11762             }                                                   \
11763           else                                                  \
11764             {                                                   \
11765               if (s)                                            \
11766                 HALT_UNALLOC;                                   \
11767               lane = full;                                      \
11768               size = 3;                                         \
11769             }                                                   \
11770           break;                                                \
11771                                                                 \
11772         default:                                                \
11773           HALT_UNALLOC;                                         \
11774         }                                                       \
11775     }                                                           \
11776   while (0)
11777
11778 /* Load single structure into one lane of N registers.  */
11779 static void
11780 do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
11781 {
11782   /* instr[31]    = 0
11783      instr[30]    = element selector 0=>half, 1=>all elements
11784      instr[29,24] = 00 1101
11785      instr[23]    = 0=>simple, 1=>post
11786      instr[22]    = 1
11787      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11788      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11789                       11111 (immediate post inc)
11790      instr[15,13] = opcode
11791      instr[12]    = S, used for lane number
11792      instr[11,10] = size, also used for lane number
11793      instr[9,5]   = address
11794      instr[4,0]   = Vd  */
11795
11796   unsigned full = INSTR (30, 30);
11797   unsigned vd = INSTR (4, 0);
11798   unsigned size = INSTR (11, 10);
11799   unsigned s = INSTR (12, 12);
11800   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11801   int lane = 0;
11802   int i;
11803
11804   NYI_assert (29, 24, 0x0D);
11805   NYI_assert (22, 22, 1);
11806
11807   /* Compute the lane number first (using size), and then compute size.  */
11808   LDn_STn_SINGLE_LANE_AND_SIZE ();
11809
11810   for (i = 0; i < nregs; i++)
11811     switch (size)
11812       {
11813       case 0:
11814         {
11815           uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
11816           aarch64_set_vec_u8 (cpu, vd + i, lane, val);
11817           break;
11818         }
11819
11820       case 1:
11821         {
11822           uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
11823           aarch64_set_vec_u16 (cpu, vd + i, lane, val);
11824           break;
11825         }
11826
11827       case 2:
11828         {
11829           uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
11830           aarch64_set_vec_u32 (cpu, vd + i, lane, val);
11831           break;
11832         }
11833
11834       case 3:
11835         {
11836           uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
11837           aarch64_set_vec_u64 (cpu, vd + i, lane, val);
11838           break;
11839         }
11840       }
11841 }
11842
11843 /* Store single structure from one lane from N registers.  */
11844 static void
11845 do_vec_STn_single (sim_cpu *cpu, uint64_t address)
11846 {
11847   /* instr[31]    = 0
11848      instr[30]    = element selector 0=>half, 1=>all elements
11849      instr[29,24] = 00 1101
11850      instr[23]    = 0=>simple, 1=>post
11851      instr[22]    = 0
11852      instr[21]    = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
11853      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11854                       11111 (immediate post inc)
11855      instr[15,13] = opcode
11856      instr[12]    = S, used for lane number
11857      instr[11,10] = size, also used for lane number
11858      instr[9,5]   = address
11859      instr[4,0]   = Vd  */
11860
11861   unsigned full = INSTR (30, 30);
11862   unsigned vd = INSTR (4, 0);
11863   unsigned size = INSTR (11, 10);
11864   unsigned s = INSTR (12, 12);
11865   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11866   int lane = 0;
11867   int i;
11868
11869   NYI_assert (29, 24, 0x0D);
11870   NYI_assert (22, 22, 0);
11871
11872   /* Compute the lane number first (using size), and then compute size.  */
11873   LDn_STn_SINGLE_LANE_AND_SIZE ();
11874
11875   for (i = 0; i < nregs; i++)
11876     switch (size)
11877       {
11878       case 0:
11879         {
11880           uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
11881           aarch64_set_mem_u8 (cpu, address + i, val);
11882           break;
11883         }
11884
11885       case 1:
11886         {
11887           uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
11888           aarch64_set_mem_u16 (cpu, address + (i * 2), val);
11889           break;
11890         }
11891
11892       case 2:
11893         {
11894           uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
11895           aarch64_set_mem_u32 (cpu, address + (i * 4), val);
11896           break;
11897         }
11898
11899       case 3:
11900         {
11901           uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
11902           aarch64_set_mem_u64 (cpu, address + (i * 8), val);
11903           break;
11904         }
11905       }
11906 }
11907
11908 /* Load single structure into all lanes of N registers.  */
11909 static void
11910 do_vec_LDnR (sim_cpu *cpu, uint64_t address)
11911 {
11912   /* instr[31]    = 0
11913      instr[30]    = element selector 0=>half, 1=>all elements
11914      instr[29,24] = 00 1101
11915      instr[23]    = 0=>simple, 1=>post
11916      instr[22]    = 1
11917      instr[21]    = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
11918      instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11919                       11111 (immediate post inc)
11920      instr[15,14] = 11
11921      instr[13]    = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
11922      instr[12]    = 0
11923      instr[11,10] = element size 00=> byte(b), 01=> half(h),
11924                                  10=> word(s), 11=> double(d)
11925      instr[9,5]   = address
11926      instr[4,0]   = Vd  */
11927
11928   unsigned full = INSTR (30, 30);
11929   unsigned vd = INSTR (4, 0);
11930   unsigned size = INSTR (11, 10);
11931   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
11932   int i, n;
11933
11934   NYI_assert (29, 24, 0x0D);
11935   NYI_assert (22, 22, 1);
11936   NYI_assert (15, 14, 3);
11937   NYI_assert (12, 12, 0);
11938
11939   for (n = 0; n < nregs; n++)
11940     switch (size)
11941       {
11942       case 0:
11943         {
11944           uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
11945           for (i = 0; i < (full ? 16 : 8); i++)
11946             aarch64_set_vec_u8 (cpu, vd + n, i, val);
11947           break;
11948         }
11949
11950       case 1:
11951         {
11952           uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
11953           for (i = 0; i < (full ? 8 : 4); i++)
11954             aarch64_set_vec_u16 (cpu, vd + n, i, val);
11955           break;
11956         }
11957
11958       case 2:
11959         {
11960           uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
11961           for (i = 0; i < (full ? 4 : 2); i++)
11962             aarch64_set_vec_u32 (cpu, vd + n, i, val);
11963           break;
11964         }
11965
11966       case 3:
11967         {
11968           uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
11969           for (i = 0; i < (full ? 2 : 1); i++)
11970             aarch64_set_vec_u64 (cpu, vd + n, i, val);
11971           break;
11972         }
11973
11974       default:
11975         HALT_UNALLOC;
11976       }
11977 }
11978
11979 static void
11980 do_vec_load_store (sim_cpu *cpu)
11981 {
11982   /* {LD|ST}<N>   {Vd..Vd+N}, vaddr
11983
11984      instr[31]    = 0
11985      instr[30]    = element selector 0=>half, 1=>all elements
11986      instr[29,25] = 00110
11987      instr[24]    = 0=>multiple struct, 1=>single struct
11988      instr[23]    = 0=>simple, 1=>post
11989      instr[22]    = 0=>store, 1=>load
11990      instr[21]    = 0 (LDn) / small(0)-large(1) selector (LDnR)
11991      instr[20,16] = 00000 (simple), Vinc (reg-post-inc, no SP),
11992                     11111 (immediate post inc)
11993      instr[15,12] = elements and destinations.  eg for load:
11994                      0000=>LD4 => load multiple 4-element to
11995                      four consecutive registers
11996                      0100=>LD3 => load multiple 3-element to
11997                      three consecutive registers
11998                      1000=>LD2 => load multiple 2-element to
11999                      two consecutive registers
12000                      0010=>LD1 => load multiple 1-element to
12001                      four consecutive registers
12002                      0110=>LD1 => load multiple 1-element to
12003                      three consecutive registers
12004                      1010=>LD1 => load multiple 1-element to
12005                      two consecutive registers
12006                      0111=>LD1 => load multiple 1-element to
12007                      one register
12008                      1100=>LDR1,LDR2
12009                      1110=>LDR3,LDR4
12010      instr[11,10] = element size 00=> byte(b), 01=> half(h),
12011                                  10=> word(s), 11=> double(d)
12012      instr[9,5]   = Vn, can be SP
12013      instr[4,0]   = Vd  */
12014
12015   int single;
12016   int post;
12017   int load;
12018   unsigned vn;
12019   uint64_t address;
12020   int type;
12021
12022   if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
12023     HALT_NYI;
12024
12025   single = INSTR (24, 24);
12026   post = INSTR (23, 23);
12027   load = INSTR (22, 22);
12028   type = INSTR (15, 12);
12029   vn = INSTR (9, 5);
12030   address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
12031
12032   if (! single && INSTR (21, 21) != 0)
12033     HALT_UNALLOC;
12034
12035   if (post)
12036     {
12037       unsigned vm = INSTR (20, 16);
12038
12039       if (vm == R31)
12040         {
12041           unsigned sizeof_operation;
12042
12043           if (single)
12044             {
12045               if ((type >= 0) && (type <= 11))
12046                 {
12047                   int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
12048                   switch (INSTR (15, 14))
12049                     {
12050                     case 0:
12051                       sizeof_operation = nregs * 1;
12052                       break;
12053                     case 1:
12054                       sizeof_operation = nregs * 2;
12055                       break;
12056                     case 2:
12057                       if (INSTR (10, 10) == 0)
12058                         sizeof_operation = nregs * 4;
12059                       else
12060                         sizeof_operation = nregs * 8;
12061                       break;
12062                     default:
12063                       HALT_UNALLOC;
12064                     }
12065                 }
12066               else if (type == 0xC)
12067                 {
12068                   sizeof_operation = INSTR (21, 21) ? 2 : 1;
12069                   sizeof_operation <<= INSTR (11, 10);
12070                 }
12071               else if (type == 0xE)
12072                 {
12073                   sizeof_operation = INSTR (21, 21) ? 4 : 3;
12074                   sizeof_operation <<= INSTR (11, 10);
12075                 }
12076               else
12077                 HALT_UNALLOC;
12078             }
12079           else
12080             {
12081               switch (type)
12082                 {
12083                 case 0: sizeof_operation = 32; break;
12084                 case 4: sizeof_operation = 24; break;
12085                 case 8: sizeof_operation = 16; break;
12086
12087                 case 7:
12088                   /* One register, immediate offset variant.  */
12089                   sizeof_operation = 8;
12090                   break;
12091
12092                 case 10:
12093                   /* Two registers, immediate offset variant.  */
12094                   sizeof_operation = 16;
12095                   break;
12096
12097                 case 6:
12098                   /* Three registers, immediate offset variant.  */
12099                   sizeof_operation = 24;
12100                   break;
12101
12102                 case 2:
12103                   /* Four registers, immediate offset variant.  */
12104                   sizeof_operation = 32;
12105                   break;
12106
12107                 default:
12108                   HALT_UNALLOC;
12109                 }
12110
12111               if (INSTR (30, 30))
12112                 sizeof_operation *= 2;
12113             }
12114
12115           aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
12116         }
12117       else
12118         aarch64_set_reg_u64 (cpu, vn, SP_OK,
12119                              address + aarch64_get_reg_u64 (cpu, vm, NO_SP));
12120     }
12121   else
12122     {
12123       NYI_assert (20, 16, 0);
12124     }
12125
12126   if (single)
12127     {
12128       if (load)
12129         {
12130           if ((type >= 0) && (type <= 11))
12131             do_vec_LDn_single (cpu, address);
12132           else if ((type == 0xC) || (type == 0xE))
12133             do_vec_LDnR (cpu, address);
12134           else
12135             HALT_UNALLOC;
12136           return;
12137         }
12138
12139       /* Stores.  */
12140       if ((type >= 0) && (type <= 11))
12141         {
12142           do_vec_STn_single (cpu, address);
12143           return;
12144         }
12145
12146       HALT_UNALLOC;
12147     }
12148
12149   if (load)
12150     {
12151       switch (type)
12152         {
12153         case 0:  LD4 (cpu, address); return;
12154         case 4:  LD3 (cpu, address); return;
12155         case 8:  LD2 (cpu, address); return;
12156         case 2:  LD1_4 (cpu, address); return;
12157         case 6:  LD1_3 (cpu, address); return;
12158         case 10: LD1_2 (cpu, address); return;
12159         case 7:  LD1_1 (cpu, address); return;
12160
12161         default:
12162           HALT_UNALLOC;
12163         }
12164     }
12165
12166   /* Stores.  */
12167   switch (type)
12168     {
12169     case 0:  ST4 (cpu, address); return;
12170     case 4:  ST3 (cpu, address); return;
12171     case 8:  ST2 (cpu, address); return;
12172     case 2:  ST1_4 (cpu, address); return;
12173     case 6:  ST1_3 (cpu, address); return;
12174     case 10: ST1_2 (cpu, address); return;
12175     case 7:  ST1_1 (cpu, address); return;
12176     default:
12177       HALT_UNALLOC;
12178     }
12179 }
12180
12181 static void
12182 dexLdSt (sim_cpu *cpu)
12183 {
12184   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
12185      assert  group == GROUP_LDST_0100 || group == GROUP_LDST_0110 ||
12186              group == GROUP_LDST_1100 || group == GROUP_LDST_1110
12187      bits [29,28:26] of a LS are the secondary dispatch vector.  */
12188   uint32_t group2 = dispatchLS (aarch64_get_instr (cpu));
12189
12190   switch (group2)
12191     {
12192     case LS_EXCL_000:
12193       dexLoadExclusive (cpu); return;
12194
12195     case LS_LIT_010:
12196     case LS_LIT_011:
12197       dexLoadLiteral (cpu); return;
12198
12199     case LS_OTHER_110:
12200     case LS_OTHER_111:
12201       dexLoadOther (cpu); return;
12202
12203     case LS_ADVSIMD_001:
12204       do_vec_load_store (cpu); return;
12205
12206     case LS_PAIR_100:
12207       dex_load_store_pair_gr (cpu); return;
12208
12209     case LS_PAIR_101:
12210       dex_load_store_pair_fp (cpu); return;
12211
12212     default:
12213       /* Should never reach here.  */
12214       HALT_NYI;
12215     }
12216 }
12217
12218 /* Specific decode and execute for group Data Processing Register.  */
12219
12220 static void
12221 dexLogicalShiftedRegister (sim_cpu *cpu)
12222 {
12223   /* instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12224      instr[30,29] = op
12225      instr[28:24] = 01010
12226      instr[23,22] = shift : 0 ==> LSL, 1 ==> LSR, 2 ==> ASR, 3 ==> ROR
12227      instr[21]    = N
12228      instr[20,16] = Rm
12229      instr[15,10] = count : must be 0xxxxx for 32 bit
12230      instr[9,5]   = Rn
12231      instr[4,0]   = Rd  */
12232
12233   uint32_t size      = INSTR (31, 31);
12234   Shift    shiftType = INSTR (23, 22);
12235   uint32_t count     = INSTR (15, 10);
12236
12237   /* 32 bit operations must have count[5] = 0.
12238      or else we have an UNALLOC.  */
12239   if (size == 0 && uimm (count, 5, 5))
12240     HALT_UNALLOC;
12241
12242   /* Dispatch on size:op:N.  */
12243   switch ((INSTR (31, 29) << 1) | INSTR (21, 21))
12244     {
12245     case 0: and32_shift  (cpu, shiftType, count); return;
12246     case 1: bic32_shift  (cpu, shiftType, count); return;
12247     case 2: orr32_shift  (cpu, shiftType, count); return;
12248     case 3: orn32_shift  (cpu, shiftType, count); return;
12249     case 4: eor32_shift  (cpu, shiftType, count); return;
12250     case 5: eon32_shift  (cpu, shiftType, count); return;
12251     case 6: ands32_shift (cpu, shiftType, count); return;
12252     case 7: bics32_shift (cpu, shiftType, count); return;
12253     case 8: and64_shift  (cpu, shiftType, count); return;
12254     case 9: bic64_shift  (cpu, shiftType, count); return;
12255     case 10:orr64_shift  (cpu, shiftType, count); return;
12256     case 11:orn64_shift  (cpu, shiftType, count); return;
12257     case 12:eor64_shift  (cpu, shiftType, count); return;
12258     case 13:eon64_shift  (cpu, shiftType, count); return;
12259     case 14:ands64_shift (cpu, shiftType, count); return;
12260     case 15:bics64_shift (cpu, shiftType, count); return;
12261     }
12262 }
12263
12264 /* 32 bit conditional select.  */
12265 static void
12266 csel32 (sim_cpu *cpu, CondCode cc)
12267 {
12268   unsigned rm = INSTR (20, 16);
12269   unsigned rn = INSTR (9, 5);
12270   unsigned rd = INSTR (4, 0);
12271
12272   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12273                        testConditionCode (cpu, cc)
12274                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12275                        : aarch64_get_reg_u32 (cpu, rm, NO_SP));
12276 }
12277
12278 /* 64 bit conditional select.  */
12279 static void
12280 csel64 (sim_cpu *cpu, CondCode cc)
12281 {
12282   unsigned rm = INSTR (20, 16);
12283   unsigned rn = INSTR (9, 5);
12284   unsigned rd = INSTR (4, 0);
12285
12286   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12287                        testConditionCode (cpu, cc)
12288                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12289                        : aarch64_get_reg_u64 (cpu, rm, NO_SP));
12290 }
12291
12292 /* 32 bit conditional increment.  */
12293 static void
12294 csinc32 (sim_cpu *cpu, CondCode cc)
12295 {
12296   unsigned rm = INSTR (20, 16);
12297   unsigned rn = INSTR (9, 5);
12298   unsigned rd = INSTR (4, 0);
12299
12300   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12301                        testConditionCode (cpu, cc)
12302                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12303                        : aarch64_get_reg_u32 (cpu, rm, NO_SP) + 1);
12304 }
12305
12306 /* 64 bit conditional increment.  */
12307 static void
12308 csinc64 (sim_cpu *cpu, CondCode cc)
12309 {
12310   unsigned rm = INSTR (20, 16);
12311   unsigned rn = INSTR (9, 5);
12312   unsigned rd = INSTR (4, 0);
12313
12314   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12315                        testConditionCode (cpu, cc)
12316                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12317                        : aarch64_get_reg_u64 (cpu, rm, NO_SP) + 1);
12318 }
12319
12320 /* 32 bit conditional invert.  */
12321 static void
12322 csinv32 (sim_cpu *cpu, CondCode cc)
12323 {
12324   unsigned rm = INSTR (20, 16);
12325   unsigned rn = INSTR (9, 5);
12326   unsigned rd = INSTR (4, 0);
12327
12328   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12329                        testConditionCode (cpu, cc)
12330                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12331                        : ~ aarch64_get_reg_u32 (cpu, rm, NO_SP));
12332 }
12333
12334 /* 64 bit conditional invert.  */
12335 static void
12336 csinv64 (sim_cpu *cpu, CondCode cc)
12337 {
12338   unsigned rm = INSTR (20, 16);
12339   unsigned rn = INSTR (9, 5);
12340   unsigned rd = INSTR (4, 0);
12341
12342   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12343                        testConditionCode (cpu, cc)
12344                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12345                        : ~ aarch64_get_reg_u64 (cpu, rm, NO_SP));
12346 }
12347
12348 /* 32 bit conditional negate.  */
12349 static void
12350 csneg32 (sim_cpu *cpu, CondCode cc)
12351 {
12352   unsigned rm = INSTR (20, 16);
12353   unsigned rn = INSTR (9, 5);
12354   unsigned rd = INSTR (4, 0);
12355
12356   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12357                        testConditionCode (cpu, cc)
12358                        ? aarch64_get_reg_u32 (cpu, rn, NO_SP)
12359                        : - aarch64_get_reg_u32 (cpu, rm, NO_SP));
12360 }
12361
12362 /* 64 bit conditional negate.  */
12363 static void
12364 csneg64 (sim_cpu *cpu, CondCode cc)
12365 {
12366   unsigned rm = INSTR (20, 16);
12367   unsigned rn = INSTR (9, 5);
12368   unsigned rd = INSTR (4, 0);
12369
12370   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12371                        testConditionCode (cpu, cc)
12372                        ? aarch64_get_reg_u64 (cpu, rn, NO_SP)
12373                        : - aarch64_get_reg_u64 (cpu, rm, NO_SP));
12374 }
12375
12376 static void
12377 dexCondSelect (sim_cpu *cpu)
12378 {
12379   /* instr[28,21] = 11011011
12380      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12381      instr[30:11,10] = op : 000 ==> CSEL, 001 ==> CSINC,
12382                             100 ==> CSINV, 101 ==> CSNEG,
12383                             _1_ ==> UNALLOC
12384      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12385      instr[15,12] = cond
12386      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC  */
12387
12388   CondCode cc = INSTR (15, 12);
12389   uint32_t S = INSTR (29, 29);
12390   uint32_t op2 = INSTR (11, 10);
12391
12392   if (S == 1)
12393     HALT_UNALLOC;
12394
12395   if (op2 & 0x2)
12396     HALT_UNALLOC;
12397
12398   switch ((INSTR (31, 30) << 1) | op2)
12399     {
12400     case 0: csel32  (cpu, cc); return;
12401     case 1: csinc32 (cpu, cc); return;
12402     case 2: csinv32 (cpu, cc); return;
12403     case 3: csneg32 (cpu, cc); return;
12404     case 4: csel64  (cpu, cc); return;
12405     case 5: csinc64 (cpu, cc); return;
12406     case 6: csinv64 (cpu, cc); return;
12407     case 7: csneg64 (cpu, cc); return;
12408     }
12409 }
12410
12411 /* Some helpers for counting leading 1 or 0 bits.  */
12412
12413 /* Counts the number of leading bits which are the same
12414    in a 32 bit value in the range 1 to 32.  */
12415 static uint32_t
12416 leading32 (uint32_t value)
12417 {
12418   int32_t mask= 0xffff0000;
12419   uint32_t count= 16; /* Counts number of bits set in mask.  */
12420   uint32_t lo = 1;    /* Lower bound for number of sign bits.  */
12421   uint32_t hi = 32;   /* Upper bound for number of sign bits.  */
12422
12423   while (lo + 1 < hi)
12424     {
12425       int32_t test = (value & mask);
12426
12427       if (test == 0 || test == mask)
12428         {
12429           lo = count;
12430           count = (lo + hi) / 2;
12431           mask >>= (count - lo);
12432         }
12433       else
12434         {
12435           hi = count;
12436           count = (lo + hi) / 2;
12437           mask <<= hi - count;
12438         }
12439     }
12440
12441   if (lo != hi)
12442     {
12443       int32_t test;
12444
12445       mask >>= 1;
12446       test = (value & mask);
12447
12448       if (test == 0 || test == mask)
12449         count = hi;
12450       else
12451         count = lo;
12452     }
12453
12454   return count;
12455 }
12456
12457 /* Counts the number of leading bits which are the same
12458    in a 64 bit value in the range 1 to 64.  */
12459 static uint64_t
12460 leading64 (uint64_t value)
12461 {
12462   int64_t mask= 0xffffffff00000000LL;
12463   uint64_t count = 32; /* Counts number of bits set in mask.  */
12464   uint64_t lo = 1;     /* Lower bound for number of sign bits.  */
12465   uint64_t hi = 64;    /* Upper bound for number of sign bits.  */
12466
12467   while (lo + 1 < hi)
12468     {
12469       int64_t test = (value & mask);
12470
12471       if (test == 0 || test == mask)
12472         {
12473           lo = count;
12474           count = (lo + hi) / 2;
12475           mask >>= (count - lo);
12476         }
12477       else
12478         {
12479           hi = count;
12480           count = (lo + hi) / 2;
12481           mask <<= hi - count;
12482         }
12483     }
12484
12485   if (lo != hi)
12486     {
12487       int64_t test;
12488
12489       mask >>= 1;
12490       test = (value & mask);
12491
12492       if (test == 0 || test == mask)
12493         count = hi;
12494       else
12495         count = lo;
12496     }
12497
12498   return count;
12499 }
12500
12501 /* Bit operations.  */
12502 /* N.B register args may not be SP.  */
12503
12504 /* 32 bit count leading sign bits.  */
12505 static void
12506 cls32 (sim_cpu *cpu)
12507 {
12508   unsigned rn = INSTR (9, 5);
12509   unsigned rd = INSTR (4, 0);
12510
12511   /* N.B. the result needs to exclude the leading bit.  */
12512   aarch64_set_reg_u64
12513     (cpu, rd, NO_SP, leading32 (aarch64_get_reg_u32 (cpu, rn, NO_SP)) - 1);
12514 }
12515
12516 /* 64 bit count leading sign bits.  */
12517 static void
12518 cls64 (sim_cpu *cpu)
12519 {
12520   unsigned rn = INSTR (9, 5);
12521   unsigned rd = INSTR (4, 0);
12522
12523   /* N.B. the result needs to exclude the leading bit.  */
12524   aarch64_set_reg_u64
12525     (cpu, rd, NO_SP, leading64 (aarch64_get_reg_u64 (cpu, rn, NO_SP)) - 1);
12526 }
12527
12528 /* 32 bit count leading zero bits.  */
12529 static void
12530 clz32 (sim_cpu *cpu)
12531 {
12532   unsigned rn = INSTR (9, 5);
12533   unsigned rd = INSTR (4, 0);
12534   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12535
12536   /* if the sign (top) bit is set then the count is 0.  */
12537   if (pick32 (value, 31, 31))
12538     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12539   else
12540     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading32 (value));
12541 }
12542
12543 /* 64 bit count leading zero bits.  */
12544 static void
12545 clz64 (sim_cpu *cpu)
12546 {
12547   unsigned rn = INSTR (9, 5);
12548   unsigned rd = INSTR (4, 0);
12549   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12550
12551   /* if the sign (top) bit is set then the count is 0.  */
12552   if (pick64 (value, 63, 63))
12553     aarch64_set_reg_u64 (cpu, rd, NO_SP, 0L);
12554   else
12555     aarch64_set_reg_u64 (cpu, rd, NO_SP, leading64 (value));
12556 }
12557
12558 /* 32 bit reverse bits.  */
12559 static void
12560 rbit32 (sim_cpu *cpu)
12561 {
12562   unsigned rn = INSTR (9, 5);
12563   unsigned rd = INSTR (4, 0);
12564   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12565   uint32_t result = 0;
12566   int i;
12567
12568   for (i = 0; i < 32; i++)
12569     {
12570       result <<= 1;
12571       result |= (value & 1);
12572       value >>= 1;
12573     }
12574   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12575 }
12576
12577 /* 64 bit reverse bits.  */
12578 static void
12579 rbit64 (sim_cpu *cpu)
12580 {
12581   unsigned rn = INSTR (9, 5);
12582   unsigned rd = INSTR (4, 0);
12583   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12584   uint64_t result = 0;
12585   int i;
12586
12587   for (i = 0; i < 64; i++)
12588     {
12589       result <<= 1;
12590       result |= (value & 1UL);
12591       value >>= 1;
12592     }
12593   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12594 }
12595
12596 /* 32 bit reverse bytes.  */
12597 static void
12598 rev32 (sim_cpu *cpu)
12599 {
12600   unsigned rn = INSTR (9, 5);
12601   unsigned rd = INSTR (4, 0);
12602   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12603   uint32_t result = 0;
12604   int i;
12605
12606   for (i = 0; i < 4; i++)
12607     {
12608       result <<= 8;
12609       result |= (value & 0xff);
12610       value >>= 8;
12611     }
12612   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12613 }
12614
12615 /* 64 bit reverse bytes.  */
12616 static void
12617 rev64 (sim_cpu *cpu)
12618 {
12619   unsigned rn = INSTR (9, 5);
12620   unsigned rd = INSTR (4, 0);
12621   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12622   uint64_t result = 0;
12623   int i;
12624
12625   for (i = 0; i < 8; i++)
12626     {
12627       result <<= 8;
12628       result |= (value & 0xffULL);
12629       value >>= 8;
12630     }
12631   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12632 }
12633
12634 /* 32 bit reverse shorts.  */
12635 /* N.B.this reverses the order of the bytes in each half word.  */
12636 static void
12637 revh32 (sim_cpu *cpu)
12638 {
12639   unsigned rn = INSTR (9, 5);
12640   unsigned rd = INSTR (4, 0);
12641   uint32_t value = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12642   uint32_t result = 0;
12643   int i;
12644
12645   for (i = 0; i < 2; i++)
12646     {
12647       result <<= 8;
12648       result |= (value & 0x00ff00ff);
12649       value >>= 8;
12650     }
12651   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12652 }
12653
12654 /* 64 bit reverse shorts.  */
12655 /* N.B.this reverses the order of the bytes in each half word.  */
12656 static void
12657 revh64 (sim_cpu *cpu)
12658 {
12659   unsigned rn = INSTR (9, 5);
12660   unsigned rd = INSTR (4, 0);
12661   uint64_t value = aarch64_get_reg_u64 (cpu, rn, NO_SP);
12662   uint64_t result = 0;
12663   int i;
12664
12665   for (i = 0; i < 2; i++)
12666     {
12667       result <<= 8;
12668       result |= (value & 0x00ff00ff00ff00ffULL);
12669       value >>= 8;
12670     }
12671   aarch64_set_reg_u64 (cpu, rd, NO_SP, result);
12672 }
12673
12674 static void
12675 dexDataProc1Source (sim_cpu *cpu)
12676 {
12677   /* instr[30]    = 1
12678      instr[28,21] = 111010110
12679      instr[31]    = size : 0 ==> 32 bit, 1 ==> 64 bit
12680      instr[29]    = S : 0 ==> ok, 1 ==> UNALLOC
12681      instr[20,16] = opcode2 : 00000 ==> ok, ow ==> UNALLOC
12682      instr[15,10] = opcode : 000000 ==> RBIT, 000001 ==> REV16,
12683                              000010 ==> REV, 000011 ==> UNALLOC
12684                              000100 ==> CLZ, 000101 ==> CLS
12685                              ow ==> UNALLOC
12686      instr[9,5]   = rn : may not be SP
12687      instr[4,0]   = rd : may not be SP.  */
12688
12689   uint32_t S = INSTR (29, 29);
12690   uint32_t opcode2 = INSTR (20, 16);
12691   uint32_t opcode = INSTR (15, 10);
12692   uint32_t dispatch = ((INSTR (31, 31) << 3) | opcode);
12693
12694   if (S == 1)
12695     HALT_UNALLOC;
12696
12697   if (opcode2 != 0)
12698     HALT_UNALLOC;
12699
12700   if (opcode & 0x38)
12701     HALT_UNALLOC;
12702
12703   switch (dispatch)
12704     {
12705     case 0: rbit32 (cpu); return;
12706     case 1: revh32 (cpu); return;
12707     case 2: rev32 (cpu); return;
12708     case 4: clz32 (cpu); return;
12709     case 5: cls32 (cpu); return;
12710     case 8: rbit64 (cpu); return;
12711     case 9: revh64 (cpu); return;
12712     case 10:rev32 (cpu); return;
12713     case 11:rev64 (cpu); return;
12714     case 12:clz64 (cpu); return;
12715     case 13:cls64 (cpu); return;
12716     default: HALT_UNALLOC;
12717     }
12718 }
12719
12720 /* Variable shift.
12721    Shifts by count supplied in register.
12722    N.B register args may not be SP.
12723    These all use the shifted auxiliary function for
12724    simplicity and clarity.  Writing the actual shift
12725    inline would avoid a branch and so be faster but
12726    would also necessitate getting signs right.  */
12727
12728 /* 32 bit arithmetic shift right.  */
12729 static void
12730 asrv32 (sim_cpu *cpu)
12731 {
12732   unsigned rm = INSTR (20, 16);
12733   unsigned rn = INSTR (9, 5);
12734   unsigned rd = INSTR (4, 0);
12735
12736   aarch64_set_reg_u64
12737     (cpu, rd, NO_SP,
12738      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ASR,
12739                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12740 }
12741
12742 /* 64 bit arithmetic shift right.  */
12743 static void
12744 asrv64 (sim_cpu *cpu)
12745 {
12746   unsigned rm = INSTR (20, 16);
12747   unsigned rn = INSTR (9, 5);
12748   unsigned rd = INSTR (4, 0);
12749
12750   aarch64_set_reg_u64
12751     (cpu, rd, NO_SP,
12752      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ASR,
12753                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12754 }
12755
12756 /* 32 bit logical shift left.  */
12757 static void
12758 lslv32 (sim_cpu *cpu)
12759 {
12760   unsigned rm = INSTR (20, 16);
12761   unsigned rn = INSTR (9, 5);
12762   unsigned rd = INSTR (4, 0);
12763
12764   aarch64_set_reg_u64
12765     (cpu, rd, NO_SP,
12766      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSL,
12767                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12768 }
12769
12770 /* 64 bit arithmetic shift left.  */
12771 static void
12772 lslv64 (sim_cpu *cpu)
12773 {
12774   unsigned rm = INSTR (20, 16);
12775   unsigned rn = INSTR (9, 5);
12776   unsigned rd = INSTR (4, 0);
12777
12778   aarch64_set_reg_u64
12779     (cpu, rd, NO_SP,
12780      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSL,
12781                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12782 }
12783
12784 /* 32 bit logical shift right.  */
12785 static void
12786 lsrv32 (sim_cpu *cpu)
12787 {
12788   unsigned rm = INSTR (20, 16);
12789   unsigned rn = INSTR (9, 5);
12790   unsigned rd = INSTR (4, 0);
12791
12792   aarch64_set_reg_u64
12793     (cpu, rd, NO_SP,
12794      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), LSR,
12795                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12796 }
12797
12798 /* 64 bit logical shift right.  */
12799 static void
12800 lsrv64 (sim_cpu *cpu)
12801 {
12802   unsigned rm = INSTR (20, 16);
12803   unsigned rn = INSTR (9, 5);
12804   unsigned rd = INSTR (4, 0);
12805
12806   aarch64_set_reg_u64
12807     (cpu, rd, NO_SP,
12808      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), LSR,
12809                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12810 }
12811
12812 /* 32 bit rotate right.  */
12813 static void
12814 rorv32 (sim_cpu *cpu)
12815 {
12816   unsigned rm = INSTR (20, 16);
12817   unsigned rn = INSTR (9, 5);
12818   unsigned rd = INSTR (4, 0);
12819
12820   aarch64_set_reg_u64
12821     (cpu, rd, NO_SP,
12822      shifted32 (aarch64_get_reg_u32 (cpu, rn, NO_SP), ROR,
12823                 (aarch64_get_reg_u32 (cpu, rm, NO_SP) & 0x1f)));
12824 }
12825
12826 /* 64 bit rotate right.  */
12827 static void
12828 rorv64 (sim_cpu *cpu)
12829 {
12830   unsigned rm = INSTR (20, 16);
12831   unsigned rn = INSTR (9, 5);
12832   unsigned rd = INSTR (4, 0);
12833
12834   aarch64_set_reg_u64
12835     (cpu, rd, NO_SP,
12836      shifted64 (aarch64_get_reg_u64 (cpu, rn, NO_SP), ROR,
12837                 (aarch64_get_reg_u64 (cpu, rm, NO_SP) & 0x3f)));
12838 }
12839
12840
12841 /* divide.  */
12842
12843 /* 32 bit signed divide.  */
12844 static void
12845 cpuiv32 (sim_cpu *cpu)
12846 {
12847   unsigned rm = INSTR (20, 16);
12848   unsigned rn = INSTR (9, 5);
12849   unsigned rd = INSTR (4, 0);
12850   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12851   /* TODO : check that this rounds towards zero as required.  */
12852   int64_t dividend = aarch64_get_reg_s32 (cpu, rn, NO_SP);
12853   int64_t divisor = aarch64_get_reg_s32 (cpu, rm, NO_SP);
12854
12855   aarch64_set_reg_s64 (cpu, rd, NO_SP,
12856                        divisor ? ((int32_t) (dividend / divisor)) : 0);
12857 }
12858
12859 /* 64 bit signed divide.  */
12860 static void
12861 cpuiv64 (sim_cpu *cpu)
12862 {
12863   unsigned rm = INSTR (20, 16);
12864   unsigned rn = INSTR (9, 5);
12865   unsigned rd = INSTR (4, 0);
12866
12867   /* TODO : check that this rounds towards zero as required.  */
12868   int64_t divisor = aarch64_get_reg_s64 (cpu, rm, NO_SP);
12869
12870   aarch64_set_reg_s64
12871     (cpu, rd, NO_SP,
12872      divisor ? (aarch64_get_reg_s64 (cpu, rn, NO_SP) / divisor) : 0);
12873 }
12874
12875 /* 32 bit unsigned divide.  */
12876 static void
12877 udiv32 (sim_cpu *cpu)
12878 {
12879   unsigned rm = INSTR (20, 16);
12880   unsigned rn = INSTR (9, 5);
12881   unsigned rd = INSTR (4, 0);
12882
12883   /* N.B. the pseudo-code does the divide using 64 bit data.  */
12884   uint64_t dividend = aarch64_get_reg_u32 (cpu, rn, NO_SP);
12885   uint64_t divisor  = aarch64_get_reg_u32 (cpu, rm, NO_SP);
12886
12887   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12888                        divisor ? (uint32_t) (dividend / divisor) : 0);
12889 }
12890
12891 /* 64 bit unsigned divide.  */
12892 static void
12893 udiv64 (sim_cpu *cpu)
12894 {
12895   unsigned rm = INSTR (20, 16);
12896   unsigned rn = INSTR (9, 5);
12897   unsigned rd = INSTR (4, 0);
12898
12899   /* TODO : check that this rounds towards zero as required.  */
12900   uint64_t divisor = aarch64_get_reg_u64 (cpu, rm, NO_SP);
12901
12902   aarch64_set_reg_u64
12903     (cpu, rd, NO_SP,
12904      divisor ? (aarch64_get_reg_u64 (cpu, rn, NO_SP) / divisor) : 0);
12905 }
12906
12907 static void
12908 dexDataProc2Source (sim_cpu *cpu)
12909 {
12910   /* assert instr[30] == 0
12911      instr[28,21] == 11010110
12912      instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit
12913      instr[29] = S : 0 ==> ok, 1 ==> UNALLOC
12914      instr[15,10] = opcode : 000010 ==> UDIV, 000011 ==> CPUIV,
12915                              001000 ==> LSLV, 001001 ==> LSRV
12916                              001010 ==> ASRV, 001011 ==> RORV
12917                              ow ==> UNALLOC.  */
12918
12919   uint32_t dispatch;
12920   uint32_t S = INSTR (29, 29);
12921   uint32_t opcode = INSTR (15, 10);
12922
12923   if (S == 1)
12924     HALT_UNALLOC;
12925
12926   if (opcode & 0x34)
12927     HALT_UNALLOC;
12928
12929   dispatch = (  (INSTR (31, 31) << 3)
12930               | (uimm (opcode, 3, 3) << 2)
12931               |  uimm (opcode, 1, 0));
12932   switch (dispatch)
12933     {
12934     case 2:  udiv32 (cpu); return;
12935     case 3:  cpuiv32 (cpu); return;
12936     case 4:  lslv32 (cpu); return;
12937     case 5:  lsrv32 (cpu); return;
12938     case 6:  asrv32 (cpu); return;
12939     case 7:  rorv32 (cpu); return;
12940     case 10: udiv64 (cpu); return;
12941     case 11: cpuiv64 (cpu); return;
12942     case 12: lslv64 (cpu); return;
12943     case 13: lsrv64 (cpu); return;
12944     case 14: asrv64 (cpu); return;
12945     case 15: rorv64 (cpu); return;
12946     default: HALT_UNALLOC;
12947     }
12948 }
12949
12950
12951 /* Multiply.  */
12952
12953 /* 32 bit multiply and add.  */
12954 static void
12955 madd32 (sim_cpu *cpu)
12956 {
12957   unsigned rm = INSTR (20, 16);
12958   unsigned ra = INSTR (14, 10);
12959   unsigned rn = INSTR (9, 5);
12960   unsigned rd = INSTR (4, 0);
12961
12962   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12963   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12964                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12965                        + aarch64_get_reg_u32 (cpu, rn, NO_SP)
12966                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12967 }
12968
12969 /* 64 bit multiply and add.  */
12970 static void
12971 madd64 (sim_cpu *cpu)
12972 {
12973   unsigned rm = INSTR (20, 16);
12974   unsigned ra = INSTR (14, 10);
12975   unsigned rn = INSTR (9, 5);
12976   unsigned rd = INSTR (4, 0);
12977
12978   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12979   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12980                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
12981                        + (aarch64_get_reg_u64 (cpu, rn, NO_SP)
12982                           * aarch64_get_reg_u64 (cpu, rm, NO_SP)));
12983 }
12984
12985 /* 32 bit multiply and sub.  */
12986 static void
12987 msub32 (sim_cpu *cpu)
12988 {
12989   unsigned rm = INSTR (20, 16);
12990   unsigned ra = INSTR (14, 10);
12991   unsigned rn = INSTR (9, 5);
12992   unsigned rd = INSTR (4, 0);
12993
12994   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
12995   aarch64_set_reg_u64 (cpu, rd, NO_SP,
12996                        aarch64_get_reg_u32 (cpu, ra, NO_SP)
12997                        - aarch64_get_reg_u32 (cpu, rn, NO_SP)
12998                        * aarch64_get_reg_u32 (cpu, rm, NO_SP));
12999 }
13000
13001 /* 64 bit multiply and sub.  */
13002 static void
13003 msub64 (sim_cpu *cpu)
13004 {
13005   unsigned rm = INSTR (20, 16);
13006   unsigned ra = INSTR (14, 10);
13007   unsigned rn = INSTR (9, 5);
13008   unsigned rd = INSTR (4, 0);
13009
13010   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13011   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13012                        aarch64_get_reg_u64 (cpu, ra, NO_SP)
13013                        - aarch64_get_reg_u64 (cpu, rn, NO_SP)
13014                        * aarch64_get_reg_u64 (cpu, rm, NO_SP));
13015 }
13016
13017 /* Signed multiply add long -- source, source2 : 32 bit, source3 : 64 bit.  */
13018 static void
13019 smaddl (sim_cpu *cpu)
13020 {
13021   unsigned rm = INSTR (20, 16);
13022   unsigned ra = INSTR (14, 10);
13023   unsigned rn = INSTR (9, 5);
13024   unsigned rd = INSTR (4, 0);
13025
13026   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13027      obtain a 64 bit product.  */
13028   aarch64_set_reg_s64
13029     (cpu, rd, NO_SP,
13030      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13031      + ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13032      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13033 }
13034
13035 /* Signed multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13036 static void
13037 smsubl (sim_cpu *cpu)
13038 {
13039   unsigned rm = INSTR (20, 16);
13040   unsigned ra = INSTR (14, 10);
13041   unsigned rn = INSTR (9, 5);
13042   unsigned rd = INSTR (4, 0);
13043
13044   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13045      obtain a 64 bit product.  */
13046   aarch64_set_reg_s64
13047     (cpu, rd, NO_SP,
13048      aarch64_get_reg_s64 (cpu, ra, NO_SP)
13049      - ((int64_t) aarch64_get_reg_s32 (cpu, rn, NO_SP))
13050      * ((int64_t) aarch64_get_reg_s32 (cpu, rm, NO_SP)));
13051 }
13052
13053 /* Integer Multiply/Divide.  */
13054
13055 /* First some macros and a helper function.  */
13056 /* Macros to test or access elements of 64 bit words.  */
13057
13058 /* Mask used to access lo 32 bits of 64 bit unsigned int.  */
13059 #define LOW_WORD_MASK ((1ULL << 32) - 1)
13060 /* Return the lo 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13061 #define lowWordToU64(_value_u64) ((_value_u64) & LOW_WORD_MASK)
13062 /* Return the hi 32 bit word of a 64 bit unsigned int as a 64 bit unsigned int.  */
13063 #define highWordToU64(_value_u64) ((_value_u64) >> 32)
13064
13065 /* Offset of sign bit in 64 bit signed integger.  */
13066 #define SIGN_SHIFT_U64 63
13067 /* The sign bit itself -- also identifies the minimum negative int value.  */
13068 #define SIGN_BIT_U64 (1UL << SIGN_SHIFT_U64)
13069 /* Return true if a 64 bit signed int presented as an unsigned int is the
13070    most negative value.  */
13071 #define isMinimumU64(_value_u64) ((_value_u64) == SIGN_BIT_U64)
13072 /* Return true (non-zero) if a 64 bit signed int presented as an unsigned
13073    int has its sign bit set to false.  */
13074 #define isSignSetU64(_value_u64) ((_value_u64) & SIGN_BIT_U64)
13075 /* Return 1L or -1L according to whether a 64 bit signed int presented as
13076    an unsigned int has its sign bit set or not.  */
13077 #define signOfU64(_value_u64) (1L + (((value_u64) >> SIGN_SHIFT_U64) * -2L)
13078 /* Clear the sign bit of a 64 bit signed int presented as an unsigned int.  */
13079 #define clearSignU64(_value_u64) ((_value_u64) &= ~SIGN_BIT_U64)
13080
13081 /* Multiply two 64 bit ints and return.
13082    the hi 64 bits of the 128 bit product.  */
13083
13084 static uint64_t
13085 mul64hi (uint64_t value1, uint64_t value2)
13086 {
13087   uint64_t resultmid1;
13088   uint64_t result;
13089   uint64_t value1_lo = lowWordToU64 (value1);
13090   uint64_t value1_hi = highWordToU64 (value1) ;
13091   uint64_t value2_lo = lowWordToU64 (value2);
13092   uint64_t value2_hi = highWordToU64 (value2);
13093
13094   /* Cross-multiply and collect results.  */
13095   uint64_t xproductlo = value1_lo * value2_lo;
13096   uint64_t xproductmid1 = value1_lo * value2_hi;
13097   uint64_t xproductmid2 = value1_hi * value2_lo;
13098   uint64_t xproducthi = value1_hi * value2_hi;
13099   uint64_t carry = 0;
13100   /* Start accumulating 64 bit results.  */
13101   /* Drop bottom half of lowest cross-product.  */
13102   uint64_t resultmid = xproductlo >> 32;
13103   /* Add in middle products.  */
13104   resultmid = resultmid + xproductmid1;
13105
13106   /* Check for overflow.  */
13107   if (resultmid < xproductmid1)
13108     /* Carry over 1 into top cross-product.  */
13109     carry++;
13110
13111   resultmid1  = resultmid + xproductmid2;
13112
13113   /* Check for overflow.  */
13114   if (resultmid1 < xproductmid2)
13115     /* Carry over 1 into top cross-product.  */
13116     carry++;
13117
13118   /* Drop lowest 32 bits of middle cross-product.  */
13119   result = resultmid1 >> 32;
13120   /* Move carry bit to just above middle cross-product highest bit.  */
13121   carry = carry << 32;
13122
13123   /* Add top cross-product plus and any carry.  */
13124   result += xproducthi + carry;
13125
13126   return result;
13127 }
13128
13129 /* Signed multiply high, source, source2 :
13130    64 bit, dest <-- high 64-bit of result.  */
13131 static void
13132 smulh (sim_cpu *cpu)
13133 {
13134   uint64_t uresult;
13135   int64_t  result;
13136   unsigned rm = INSTR (20, 16);
13137   unsigned rn = INSTR (9, 5);
13138   unsigned rd = INSTR (4, 0);
13139   GReg     ra = INSTR (14, 10);
13140   int64_t  value1 = aarch64_get_reg_u64 (cpu, rn, NO_SP);
13141   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
13142   uint64_t uvalue1;
13143   uint64_t uvalue2;
13144   int  negate = 0;
13145
13146   if (ra != R31)
13147     HALT_UNALLOC;
13148
13149   /* Convert to unsigned and use the unsigned mul64hi routine
13150      the fix the sign up afterwards.  */
13151   if (value1 < 0)
13152     {
13153       negate = !negate;
13154       uvalue1 = -value1;
13155     }
13156   else
13157     {
13158       uvalue1 = value1;
13159     }
13160
13161   if (value2 < 0)
13162     {
13163       negate = !negate;
13164       uvalue2 = -value2;
13165     }
13166   else
13167     {
13168       uvalue2 = value2;
13169     }
13170
13171   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13172
13173   uresult = mul64hi (uvalue1, uvalue2);
13174   result = uresult;
13175
13176   if (negate)
13177     {
13178       /* Multiply 128-bit result by -1, which means highpart gets inverted,
13179          and has carry in added only if low part is 0.  */
13180       result = ~result;
13181       if ((uvalue1 * uvalue2) == 0)
13182         result += 1;
13183     }
13184
13185   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
13186 }
13187
13188 /* Unsigned multiply add long -- source, source2 :
13189    32 bit, source3 : 64 bit.  */
13190 static void
13191 umaddl (sim_cpu *cpu)
13192 {
13193   unsigned rm = INSTR (20, 16);
13194   unsigned ra = INSTR (14, 10);
13195   unsigned rn = INSTR (9, 5);
13196   unsigned rd = INSTR (4, 0);
13197
13198   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13199   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13200      obtain a 64 bit product.  */
13201   aarch64_set_reg_u64
13202     (cpu, rd, NO_SP,
13203      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13204      + ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13205      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13206 }
13207
13208 /* Unsigned multiply sub long -- source, source2 : 32 bit, source3 : 64 bit.  */
13209 static void
13210 umsubl (sim_cpu *cpu)
13211 {
13212   unsigned rm = INSTR (20, 16);
13213   unsigned ra = INSTR (14, 10);
13214   unsigned rn = INSTR (9, 5);
13215   unsigned rd = INSTR (4, 0);
13216
13217   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13218   /* N.B. we need to multiply the signed 32 bit values in rn, rm to
13219      obtain a 64 bit product.  */
13220   aarch64_set_reg_u64
13221     (cpu, rd, NO_SP,
13222      aarch64_get_reg_u64 (cpu, ra, NO_SP)
13223      - ((uint64_t) aarch64_get_reg_u32 (cpu, rn, NO_SP))
13224      * ((uint64_t) aarch64_get_reg_u32 (cpu, rm, NO_SP)));
13225 }
13226
13227 /* Unsigned multiply high, source, source2 :
13228    64 bit, dest <-- high 64-bit of result.  */
13229 static void
13230 umulh (sim_cpu *cpu)
13231 {
13232   unsigned rm = INSTR (20, 16);
13233   unsigned rn = INSTR (9, 5);
13234   unsigned rd = INSTR (4, 0);
13235   GReg     ra = INSTR (14, 10);
13236
13237   if (ra != R31)
13238     HALT_UNALLOC;
13239
13240   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13241   aarch64_set_reg_u64 (cpu, rd, NO_SP,
13242                        mul64hi (aarch64_get_reg_u64 (cpu, rn, NO_SP),
13243                                 aarch64_get_reg_u64 (cpu, rm, NO_SP)));
13244 }
13245
13246 static void
13247 dexDataProc3Source (sim_cpu *cpu)
13248 {
13249   /* assert instr[28,24] == 11011.  */
13250   /* instr[31] = size : 0 ==> 32 bit, 1 ==> 64 bit (for rd at least)
13251      instr[30,29] = op54 : 00 ==> ok, ow ==> UNALLOC
13252      instr[23,21] = op31 : 111 ==> UNALLOC, o2 ==> ok
13253      instr[15] = o0 : 0/1 ==> ok
13254      instr[23,21:15] ==> op : 0000 ==> MADD, 0001 ==> MSUB,     (32/64 bit)
13255                               0010 ==> SMADDL, 0011 ==> SMSUBL, (64 bit only)
13256                               0100 ==> SMULH,                   (64 bit only)
13257                               1010 ==> UMADDL, 1011 ==> UNSUBL, (64 bit only)
13258                               1100 ==> UMULH                    (64 bit only)
13259                               ow ==> UNALLOC.  */
13260
13261   uint32_t dispatch;
13262   uint32_t size = INSTR (31, 31);
13263   uint32_t op54 = INSTR (30, 29);
13264   uint32_t op31 = INSTR (23, 21);
13265   uint32_t o0 = INSTR (15, 15);
13266
13267   if (op54 != 0)
13268     HALT_UNALLOC;
13269
13270   if (size == 0)
13271     {
13272       if (op31 != 0)
13273         HALT_UNALLOC;
13274
13275       if (o0 == 0)
13276         madd32 (cpu);
13277       else
13278         msub32 (cpu);
13279       return;
13280     }
13281
13282   dispatch = (op31 << 1) | o0;
13283
13284   switch (dispatch)
13285     {
13286     case 0:  madd64 (cpu); return;
13287     case 1:  msub64 (cpu); return;
13288     case 2:  smaddl (cpu); return;
13289     case 3:  smsubl (cpu); return;
13290     case 4:  smulh (cpu); return;
13291     case 10: umaddl (cpu); return;
13292     case 11: umsubl (cpu); return;
13293     case 12: umulh (cpu); return;
13294     default: HALT_UNALLOC;
13295     }
13296 }
13297
13298 static void
13299 dexDPReg (sim_cpu *cpu)
13300 {
13301   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
13302      assert  group == GROUP_DPREG_0101 || group == GROUP_DPREG_1101
13303      bits [28:24:21] of a DPReg are the secondary dispatch vector.  */
13304   uint32_t group2 = dispatchDPReg (aarch64_get_instr (cpu));
13305
13306   switch (group2)
13307     {
13308     case DPREG_LOG_000:
13309     case DPREG_LOG_001:
13310       dexLogicalShiftedRegister (cpu); return;
13311
13312     case DPREG_ADDSHF_010:
13313       dexAddSubtractShiftedRegister (cpu); return;
13314
13315     case DPREG_ADDEXT_011:
13316       dexAddSubtractExtendedRegister (cpu); return;
13317
13318     case DPREG_ADDCOND_100:
13319       {
13320         /* This set bundles a variety of different operations.  */
13321         /* Check for.  */
13322         /* 1) add/sub w carry.  */
13323         uint32_t mask1 = 0x1FE00000U;
13324         uint32_t val1  = 0x1A000000U;
13325         /* 2) cond compare register/immediate.  */
13326         uint32_t mask2 = 0x1FE00000U;
13327         uint32_t val2  = 0x1A400000U;
13328         /* 3) cond select.  */
13329         uint32_t mask3 = 0x1FE00000U;
13330         uint32_t val3  = 0x1A800000U;
13331         /* 4) data proc 1/2 source.  */
13332         uint32_t mask4 = 0x1FE00000U;
13333         uint32_t val4  = 0x1AC00000U;
13334
13335         if ((aarch64_get_instr (cpu) & mask1) == val1)
13336           dexAddSubtractWithCarry (cpu);
13337
13338         else if ((aarch64_get_instr (cpu) & mask2) == val2)
13339           CondCompare (cpu);
13340
13341         else if ((aarch64_get_instr (cpu) & mask3) == val3)
13342           dexCondSelect (cpu);
13343
13344         else if ((aarch64_get_instr (cpu) & mask4) == val4)
13345           {
13346             /* Bit 30 is clear for data proc 2 source
13347                and set for data proc 1 source.  */
13348             if (aarch64_get_instr (cpu)  & (1U << 30))
13349               dexDataProc1Source (cpu);
13350             else
13351               dexDataProc2Source (cpu);
13352           }
13353
13354         else
13355           /* Should not reach here.  */
13356           HALT_NYI;
13357
13358         return;
13359       }
13360
13361     case DPREG_3SRC_110:
13362       dexDataProc3Source (cpu); return;
13363
13364     case DPREG_UNALLOC_101:
13365       HALT_UNALLOC;
13366
13367     case DPREG_3SRC_111:
13368       dexDataProc3Source (cpu); return;
13369
13370     default:
13371       /* Should never reach here.  */
13372       HALT_NYI;
13373     }
13374 }
13375
13376 /* Unconditional Branch immediate.
13377    Offset is a PC-relative byte offset in the range +/- 128MiB.
13378    The offset is assumed to be raw from the decode i.e. the
13379    simulator is expected to scale them from word offsets to byte.  */
13380
13381 /* Unconditional branch.  */
13382 static void
13383 buc (sim_cpu *cpu, int32_t offset)
13384 {
13385   aarch64_set_next_PC_by_offset (cpu, offset);
13386 }
13387
13388 static unsigned stack_depth = 0;
13389
13390 /* Unconditional branch and link -- writes return PC to LR.  */
13391 static void
13392 bl (sim_cpu *cpu, int32_t offset)
13393 {
13394   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13395   aarch64_save_LR (cpu);
13396   aarch64_set_next_PC_by_offset (cpu, offset);
13397
13398   if (TRACE_BRANCH_P (cpu))
13399     {
13400       ++ stack_depth;
13401       TRACE_BRANCH (cpu,
13402                     " %*scall %" PRIx64 " [%s]"
13403                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13404                     stack_depth, " ", aarch64_get_next_PC (cpu),
13405                     aarch64_get_func (CPU_STATE (cpu),
13406                                       aarch64_get_next_PC (cpu)),
13407                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13408                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13409                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13410                     );
13411     }
13412 }
13413
13414 /* Unconditional Branch register.
13415    Branch/return address is in source register.  */
13416
13417 /* Unconditional branch.  */
13418 static void
13419 br (sim_cpu *cpu)
13420 {
13421   unsigned rn = INSTR (9, 5);
13422   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13423   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13424 }
13425
13426 /* Unconditional branch and link -- writes return PC to LR.  */
13427 static void
13428 blr (sim_cpu *cpu)
13429 {
13430   /* Ensure we read the destination before we write LR.  */
13431   uint64_t target = aarch64_get_reg_u64 (cpu, INSTR (9, 5), NO_SP);
13432
13433   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13434   aarch64_save_LR (cpu);
13435   aarch64_set_next_PC (cpu, target);
13436
13437   if (TRACE_BRANCH_P (cpu))
13438     {
13439       ++ stack_depth;
13440       TRACE_BRANCH (cpu,
13441                     " %*scall %" PRIx64 " [%s]"
13442                     " [args: %" PRIx64 " %" PRIx64 " %" PRIx64 "]",
13443                     stack_depth, " ", aarch64_get_next_PC (cpu),
13444                     aarch64_get_func (CPU_STATE (cpu),
13445                                       aarch64_get_next_PC (cpu)),
13446                     aarch64_get_reg_u64 (cpu, 0, NO_SP),
13447                     aarch64_get_reg_u64 (cpu, 1, NO_SP),
13448                     aarch64_get_reg_u64 (cpu, 2, NO_SP)
13449                     );
13450     }
13451 }
13452
13453 /* Return -- assembler will default source to LR this is functionally
13454    equivalent to br but, presumably, unlike br it side effects the
13455    branch predictor.  */
13456 static void
13457 ret (sim_cpu *cpu)
13458 {
13459   unsigned rn = INSTR (9, 5);
13460   aarch64_set_next_PC (cpu, aarch64_get_reg_u64 (cpu, rn, NO_SP));
13461
13462   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13463   if (TRACE_BRANCH_P (cpu))
13464     {
13465       TRACE_BRANCH (cpu,
13466                     " %*sreturn [result: %" PRIx64 "]",
13467                     stack_depth, " ", aarch64_get_reg_u64 (cpu, 0, NO_SP));
13468       -- stack_depth;
13469     }
13470 }
13471
13472 /* NOP -- we implement this and call it from the decode in case we
13473    want to intercept it later.  */
13474
13475 static void
13476 nop (sim_cpu *cpu)
13477 {
13478   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13479 }
13480
13481 /* Data synchronization barrier.  */
13482
13483 static void
13484 dsb (sim_cpu *cpu)
13485 {
13486   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13487 }
13488
13489 /* Data memory barrier.  */
13490
13491 static void
13492 dmb (sim_cpu *cpu)
13493 {
13494   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13495 }
13496
13497 /* Instruction synchronization barrier.  */
13498
13499 static void
13500 isb (sim_cpu *cpu)
13501 {
13502   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13503 }
13504
13505 static void
13506 dexBranchImmediate (sim_cpu *cpu)
13507 {
13508   /* assert instr[30,26] == 00101
13509      instr[31] ==> 0 == B, 1 == BL
13510      instr[25,0] == imm26 branch offset counted in words.  */
13511
13512   uint32_t top = INSTR (31, 31);
13513   /* We have a 26 byte signed word offset which we need to pass to the
13514      execute routine as a signed byte offset.  */
13515   int32_t offset = simm32 (aarch64_get_instr (cpu), 25, 0) << 2;
13516
13517   if (top)
13518     bl (cpu, offset);
13519   else
13520     buc (cpu, offset);
13521 }
13522
13523 /* Control Flow.  */
13524
13525 /* Conditional branch
13526
13527    Offset is a PC-relative byte offset in the range +/- 1MiB pos is
13528    a bit position in the range 0 .. 63
13529
13530    cc is a CondCode enum value as pulled out of the decode
13531
13532    N.B. any offset register (source) can only be Xn or Wn.  */
13533
13534 static void
13535 bcc (sim_cpu *cpu, int32_t offset, CondCode cc)
13536 {
13537   /* The test returns TRUE if CC is met.  */
13538   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13539   if (testConditionCode (cpu, cc))
13540     aarch64_set_next_PC_by_offset (cpu, offset);
13541 }
13542
13543 /* 32 bit branch on register non-zero.  */
13544 static void
13545 cbnz32 (sim_cpu *cpu, int32_t offset)
13546 {
13547   unsigned rt = INSTR (4, 0);
13548
13549   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13550   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) != 0)
13551     aarch64_set_next_PC_by_offset (cpu, offset);
13552 }
13553
13554 /* 64 bit branch on register zero.  */
13555 static void
13556 cbnz (sim_cpu *cpu, int32_t offset)
13557 {
13558   unsigned rt = INSTR (4, 0);
13559
13560   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13561   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) != 0)
13562     aarch64_set_next_PC_by_offset (cpu, offset);
13563 }
13564
13565 /* 32 bit branch on register non-zero.  */
13566 static void
13567 cbz32 (sim_cpu *cpu, int32_t offset)
13568 {
13569   unsigned rt = INSTR (4, 0);
13570
13571   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13572   if (aarch64_get_reg_u32 (cpu, rt, NO_SP) == 0)
13573     aarch64_set_next_PC_by_offset (cpu, offset);
13574 }
13575
13576 /* 64 bit branch on register zero.  */
13577 static void
13578 cbz (sim_cpu *cpu, int32_t offset)
13579 {
13580   unsigned rt = INSTR (4, 0);
13581
13582   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13583   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) == 0)
13584     aarch64_set_next_PC_by_offset (cpu, offset);
13585 }
13586
13587 /* Branch on register bit test non-zero -- one size fits all.  */
13588 static void
13589 tbnz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13590 {
13591   unsigned rt = INSTR (4, 0);
13592
13593   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13594   if (aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos))
13595     aarch64_set_next_PC_by_offset (cpu, offset);
13596 }
13597
13598 /* Branch on register bit test zero -- one size fits all.  */
13599 static void
13600 tbz (sim_cpu *cpu, uint32_t  pos, int32_t offset)
13601 {
13602   unsigned rt = INSTR (4, 0);
13603
13604   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13605   if (!(aarch64_get_reg_u64 (cpu, rt, NO_SP) & (((uint64_t) 1) << pos)))
13606     aarch64_set_next_PC_by_offset (cpu, offset);
13607 }
13608
13609 static void
13610 dexCompareBranchImmediate (sim_cpu *cpu)
13611 {
13612   /* instr[30,25] = 01 1010
13613      instr[31]    = size : 0 ==> 32, 1 ==> 64
13614      instr[24]    = op : 0 ==> CBZ, 1 ==> CBNZ
13615      instr[23,5]  = simm19 branch offset counted in words
13616      instr[4,0]   = rt  */
13617
13618   uint32_t size = INSTR (31, 31);
13619   uint32_t op   = INSTR (24, 24);
13620   int32_t offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13621
13622   if (size == 0)
13623     {
13624       if (op == 0)
13625         cbz32 (cpu, offset);
13626       else
13627         cbnz32 (cpu, offset);
13628     }
13629   else
13630     {
13631       if (op == 0)
13632         cbz (cpu, offset);
13633       else
13634         cbnz (cpu, offset);
13635     }
13636 }
13637
13638 static void
13639 dexTestBranchImmediate (sim_cpu *cpu)
13640 {
13641   /* instr[31]    = b5 : bit 5 of test bit idx
13642      instr[30,25] = 01 1011
13643      instr[24]    = op : 0 ==> TBZ, 1 == TBNZ
13644      instr[23,19] = b40 : bits 4 to 0 of test bit idx
13645      instr[18,5]  = simm14 : signed offset counted in words
13646      instr[4,0]   = uimm5  */
13647
13648   uint32_t pos = ((INSTR (31, 31) << 5) | INSTR (23, 19));
13649   int32_t offset = simm32 (aarch64_get_instr (cpu), 18, 5) << 2;
13650
13651   NYI_assert (30, 25, 0x1b);
13652
13653   if (INSTR (24, 24) == 0)
13654     tbz (cpu, pos, offset);
13655   else
13656     tbnz (cpu, pos, offset);
13657 }
13658
13659 static void
13660 dexCondBranchImmediate (sim_cpu *cpu)
13661 {
13662   /* instr[31,25] = 010 1010
13663      instr[24]    = op1; op => 00 ==> B.cond
13664      instr[23,5]  = simm19 : signed offset counted in words
13665      instr[4]     = op0
13666      instr[3,0]   = cond  */
13667
13668   int32_t offset;
13669   uint32_t op = ((INSTR (24, 24) << 1) | INSTR (4, 4));
13670
13671   NYI_assert (31, 25, 0x2a);
13672
13673   if (op != 0)
13674     HALT_UNALLOC;
13675
13676   offset = simm32 (aarch64_get_instr (cpu), 23, 5) << 2;
13677
13678   bcc (cpu, offset, INSTR (3, 0));
13679 }
13680
13681 static void
13682 dexBranchRegister (sim_cpu *cpu)
13683 {
13684   /* instr[31,25] = 110 1011
13685      instr[24,21] = op : 0 ==> BR, 1 => BLR, 2 => RET, 3 => ERET, 4 => DRPS
13686      instr[20,16] = op2 : must be 11111
13687      instr[15,10] = op3 : must be 000000
13688      instr[4,0]   = op2 : must be 11111.  */
13689
13690   uint32_t op = INSTR (24, 21);
13691   uint32_t op2 = INSTR (20, 16);
13692   uint32_t op3 = INSTR (15, 10);
13693   uint32_t op4 = INSTR (4, 0);
13694
13695   NYI_assert (31, 25, 0x6b);
13696
13697   if (op2 != 0x1F || op3 != 0 || op4 != 0)
13698     HALT_UNALLOC;
13699
13700   if (op == 0)
13701     br (cpu);
13702
13703   else if (op == 1)
13704     blr (cpu);
13705
13706   else if (op == 2)
13707     ret (cpu);
13708
13709   else
13710     {
13711       /* ERET and DRPS accept 0b11111 for rn = instr [4,0].  */
13712       /* anything else is unallocated.  */
13713       uint32_t rn = INSTR (4, 0);
13714
13715       if (rn != 0x1f)
13716         HALT_UNALLOC;
13717
13718       if (op == 4 || op == 5)
13719         HALT_NYI;
13720
13721       HALT_UNALLOC;
13722     }
13723 }
13724
13725 /* FIXME: We should get the Angel SWI values from ../../libgloss/aarch64/svc.h
13726    but this may not be available.  So instead we define the values we need
13727    here.  */
13728 #define AngelSVC_Reason_Open            0x01
13729 #define AngelSVC_Reason_Close           0x02
13730 #define AngelSVC_Reason_Write           0x05
13731 #define AngelSVC_Reason_Read            0x06
13732 #define AngelSVC_Reason_IsTTY           0x09
13733 #define AngelSVC_Reason_Seek            0x0A
13734 #define AngelSVC_Reason_FLen            0x0C
13735 #define AngelSVC_Reason_Remove          0x0E
13736 #define AngelSVC_Reason_Rename          0x0F
13737 #define AngelSVC_Reason_Clock           0x10
13738 #define AngelSVC_Reason_Time            0x11
13739 #define AngelSVC_Reason_System          0x12
13740 #define AngelSVC_Reason_Errno           0x13
13741 #define AngelSVC_Reason_GetCmdLine      0x15
13742 #define AngelSVC_Reason_HeapInfo        0x16
13743 #define AngelSVC_Reason_ReportException 0x18
13744 #define AngelSVC_Reason_Elapsed         0x30
13745
13746
13747 static void
13748 handle_halt (sim_cpu *cpu, uint32_t val)
13749 {
13750   uint64_t result = 0;
13751
13752   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
13753   if (val != 0xf000)
13754     {
13755       TRACE_SYSCALL (cpu, " HLT [0x%x]", val);
13756       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13757                        sim_stopped, SIM_SIGTRAP);
13758     }
13759
13760   /* We have encountered an Angel SVC call.  See if we can process it.  */
13761   switch (aarch64_get_reg_u32 (cpu, 0, NO_SP))
13762     {
13763     case AngelSVC_Reason_HeapInfo:
13764       {
13765         /* Get the values.  */
13766         uint64_t stack_top = aarch64_get_stack_start (cpu);
13767         uint64_t heap_base = aarch64_get_heap_start (cpu);
13768
13769         /* Get the pointer  */
13770         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13771         ptr = aarch64_get_mem_u64 (cpu, ptr);
13772
13773         /* Fill in the memory block.  */
13774         /* Start addr of heap.  */
13775         aarch64_set_mem_u64 (cpu, ptr +  0, heap_base);
13776         /* End addr of heap.  */
13777         aarch64_set_mem_u64 (cpu, ptr +  8, stack_top);
13778         /* Lowest stack addr.  */
13779         aarch64_set_mem_u64 (cpu, ptr + 16, heap_base);
13780         /* Initial stack addr.  */
13781         aarch64_set_mem_u64 (cpu, ptr + 24, stack_top);
13782
13783         TRACE_SYSCALL (cpu, " AngelSVC: Get Heap Info");
13784       }
13785       break;
13786
13787     case AngelSVC_Reason_Open:
13788       {
13789         /* Get the pointer  */
13790         /* uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);.  */
13791         /* FIXME: For now we just assume that we will only be asked
13792            to open the standard file descriptors.  */
13793         static int fd = 0;
13794         result = fd ++;
13795
13796         TRACE_SYSCALL (cpu, " AngelSVC: Open file %d", fd - 1);
13797       }
13798       break;
13799
13800     case AngelSVC_Reason_Close:
13801       {
13802         uint64_t fh = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13803         TRACE_SYSCALL (cpu, " AngelSVC: Close file %d", (int) fh);
13804         result = 0;
13805       }
13806       break;
13807
13808     case AngelSVC_Reason_Errno:
13809       result = 0;
13810       TRACE_SYSCALL (cpu, " AngelSVC: Get Errno");
13811       break;
13812
13813     case AngelSVC_Reason_Clock:
13814       result =
13815 #ifdef CLOCKS_PER_SEC
13816         (CLOCKS_PER_SEC >= 100)
13817         ? (clock () / (CLOCKS_PER_SEC / 100))
13818         : ((clock () * 100) / CLOCKS_PER_SEC)
13819 #else
13820         /* Presume unix... clock() returns microseconds.  */
13821         (clock () / 10000)
13822 #endif
13823         ;
13824         TRACE_SYSCALL (cpu, " AngelSVC: Get Clock");
13825       break;
13826
13827     case AngelSVC_Reason_GetCmdLine:
13828       {
13829         /* Get the pointer  */
13830         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13831         ptr = aarch64_get_mem_u64 (cpu, ptr);
13832
13833         /* FIXME: No command line for now.  */
13834         aarch64_set_mem_u64 (cpu, ptr, 0);
13835         TRACE_SYSCALL (cpu, " AngelSVC: Get Command Line");
13836       }
13837       break;
13838
13839     case AngelSVC_Reason_IsTTY:
13840       result = 1;
13841         TRACE_SYSCALL (cpu, " AngelSVC: IsTTY ?");
13842       break;
13843
13844     case AngelSVC_Reason_Write:
13845       {
13846         /* Get the pointer  */
13847         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13848         /* Get the write control block.  */
13849         uint64_t fd  = aarch64_get_mem_u64 (cpu, ptr);
13850         uint64_t buf = aarch64_get_mem_u64 (cpu, ptr + 8);
13851         uint64_t len = aarch64_get_mem_u64 (cpu, ptr + 16);
13852
13853         TRACE_SYSCALL (cpu, "write of %" PRIx64 " bytes from %"
13854                        PRIx64 " on descriptor %" PRIx64,
13855                        len, buf, fd);
13856
13857         if (len > 1280)
13858           {
13859             TRACE_SYSCALL (cpu,
13860                            " AngelSVC: Write: Suspiciously long write: %ld",
13861                            (long) len);
13862             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13863                              sim_stopped, SIM_SIGBUS);
13864           }
13865         else if (fd == 1)
13866           {
13867             printf ("%.*s", (int) len, aarch64_get_mem_ptr (cpu, buf));
13868           }
13869         else if (fd == 2)
13870           {
13871             TRACE (cpu, 0, "\n");
13872             sim_io_eprintf (CPU_STATE (cpu), "%.*s",
13873                             (int) len, aarch64_get_mem_ptr (cpu, buf));
13874             TRACE (cpu, 0, "\n");
13875           }
13876         else
13877           {
13878             TRACE_SYSCALL (cpu,
13879                            " AngelSVC: Write: Unexpected file handle: %d",
13880                            (int) fd);
13881             sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13882                              sim_stopped, SIM_SIGABRT);
13883           }
13884       }
13885       break;
13886
13887     case AngelSVC_Reason_ReportException:
13888       {
13889         /* Get the pointer  */
13890         uint64_t ptr = aarch64_get_reg_u64 (cpu, 1, SP_OK);
13891         /*ptr = aarch64_get_mem_u64 (cpu, ptr);.  */
13892         uint64_t type = aarch64_get_mem_u64 (cpu, ptr);
13893         uint64_t state = aarch64_get_mem_u64 (cpu, ptr + 8);
13894
13895         TRACE_SYSCALL (cpu,
13896                        "Angel Exception: type 0x%" PRIx64 " state %" PRIx64,
13897                        type, state);
13898
13899         if (type == 0x20026)
13900           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13901                            sim_exited, state);
13902         else
13903           sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13904                            sim_stopped, SIM_SIGINT);
13905       }
13906       break;
13907
13908     case AngelSVC_Reason_Read:
13909     case AngelSVC_Reason_FLen:
13910     case AngelSVC_Reason_Seek:
13911     case AngelSVC_Reason_Remove:
13912     case AngelSVC_Reason_Time:
13913     case AngelSVC_Reason_System:
13914     case AngelSVC_Reason_Rename:
13915     case AngelSVC_Reason_Elapsed:
13916     default:
13917       TRACE_SYSCALL (cpu, " HLT [Unknown angel %x]",
13918                      aarch64_get_reg_u32 (cpu, 0, NO_SP));
13919       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13920                        sim_stopped, SIM_SIGTRAP);
13921     }
13922
13923   aarch64_set_reg_u64 (cpu, 0, NO_SP, result);
13924 }
13925
13926 static void
13927 dexExcpnGen (sim_cpu *cpu)
13928 {
13929   /* instr[31:24] = 11010100
13930      instr[23,21] = opc : 000 ==> GEN EXCPN, 001 ==> BRK
13931                           010 ==> HLT,       101 ==> DBG GEN EXCPN
13932      instr[20,5]  = imm16
13933      instr[4,2]   = opc2 000 ==> OK, ow ==> UNALLOC
13934      instr[1,0]   = LL : discriminates opc  */
13935
13936   uint32_t opc = INSTR (23, 21);
13937   uint32_t imm16 = INSTR (20, 5);
13938   uint32_t opc2 = INSTR (4, 2);
13939   uint32_t LL;
13940
13941   NYI_assert (31, 24, 0xd4);
13942
13943   if (opc2 != 0)
13944     HALT_UNALLOC;
13945
13946   LL = INSTR (1, 0);
13947
13948   /* We only implement HLT and BRK for now.  */
13949   if (opc == 1 && LL == 0)
13950     {
13951       TRACE_EVENTS (cpu, " BRK [0x%x]", imm16);
13952       sim_engine_halt (CPU_STATE (cpu), cpu, NULL, aarch64_get_PC (cpu),
13953                        sim_exited, aarch64_get_reg_s32 (cpu, R0, SP_OK));
13954     }
13955
13956   if (opc == 2 && LL == 0)
13957     handle_halt (cpu, imm16);
13958
13959   else if (opc == 0 || opc == 5)
13960     HALT_NYI;
13961
13962   else
13963     HALT_UNALLOC;
13964 }
13965
13966 /* Stub for accessing system registers.  */
13967
13968 static uint64_t
13969 system_get (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
13970             unsigned crm, unsigned op2)
13971 {
13972   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 7)
13973     /* DCZID_EL0 - the Data Cache Zero ID register.
13974        We do not support DC ZVA at the moment, so
13975        we return a value with the disable bit set.
13976        We implement support for the DCZID register since
13977        it is used by the C library's memset function.  */
13978     return ((uint64_t) 1) << 4;
13979
13980   if (crn == 0 && op1 == 3 && crm == 0 && op2 == 1)
13981     /* Cache Type Register.  */
13982     return 0x80008000UL;
13983
13984   if (crn == 13 && op1 == 3 && crm == 0 && op2 == 2)
13985     /* TPIDR_EL0 - thread pointer id.  */
13986     return aarch64_get_thread_id (cpu);
13987
13988   if (op1 == 3 && crm == 4 && op2 == 0)
13989     return aarch64_get_FPCR (cpu);
13990
13991   if (op1 == 3 && crm == 4 && op2 == 1)
13992     return aarch64_get_FPSR (cpu);
13993
13994   else if (op1 == 3 && crm == 2 && op2 == 0)
13995     return aarch64_get_CPSR (cpu);
13996
13997   HALT_NYI;
13998 }
13999
14000 static void
14001 system_set (sim_cpu *cpu, unsigned op0, unsigned op1, unsigned crn,
14002             unsigned crm, unsigned op2, uint64_t val)
14003 {
14004   if (op1 == 3 && crm == 4 && op2 == 0)
14005     aarch64_set_FPCR (cpu, val);
14006
14007   else if (op1 == 3 && crm == 4 && op2 == 1)
14008     aarch64_set_FPSR (cpu, val);
14009
14010   else if (op1 == 3 && crm == 2 && op2 == 0)
14011     aarch64_set_CPSR (cpu, val);
14012
14013   else
14014     HALT_NYI;
14015 }
14016
14017 static void
14018 do_mrs (sim_cpu *cpu)
14019 {
14020   /* instr[31:20] = 1101 0101 0001 1
14021      instr[19]    = op0
14022      instr[18,16] = op1
14023      instr[15,12] = CRn
14024      instr[11,8]  = CRm
14025      instr[7,5]   = op2
14026      instr[4,0]   = Rt  */
14027   unsigned sys_op0 = INSTR (19, 19) + 2;
14028   unsigned sys_op1 = INSTR (18, 16);
14029   unsigned sys_crn = INSTR (15, 12);
14030   unsigned sys_crm = INSTR (11, 8);
14031   unsigned sys_op2 = INSTR (7, 5);
14032   unsigned rt = INSTR (4, 0);
14033
14034   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14035   aarch64_set_reg_u64 (cpu, rt, NO_SP,
14036                        system_get (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2));
14037 }
14038
14039 static void
14040 do_MSR_immediate (sim_cpu *cpu)
14041 {
14042   /* instr[31:19] = 1101 0101 0000 0
14043      instr[18,16] = op1
14044      instr[15,12] = 0100
14045      instr[11,8]  = CRm
14046      instr[7,5]   = op2
14047      instr[4,0]   = 1 1111  */
14048
14049   unsigned op1 = INSTR (18, 16);
14050   /*unsigned crm = INSTR (11, 8);*/
14051   unsigned op2 = INSTR (7, 5);
14052
14053   NYI_assert (31, 19, 0x1AA0);
14054   NYI_assert (15, 12, 0x4);
14055   NYI_assert (4,  0,  0x1F);
14056
14057   if (op1 == 0)
14058     {
14059       if (op2 == 5)
14060         HALT_NYI; /* set SPSel.  */
14061       else
14062         HALT_UNALLOC;
14063     }
14064   else if (op1 == 3)
14065     {
14066       if (op2 == 6)
14067         HALT_NYI; /* set DAIFset.  */
14068       else if (op2 == 7)
14069         HALT_NYI; /* set DAIFclr.  */
14070       else
14071         HALT_UNALLOC;
14072     }
14073   else
14074     HALT_UNALLOC;
14075 }
14076
14077 static void
14078 do_MSR_reg (sim_cpu *cpu)
14079 {
14080   /* instr[31:20] = 1101 0101 0001
14081      instr[19]    = op0
14082      instr[18,16] = op1
14083      instr[15,12] = CRn
14084      instr[11,8]  = CRm
14085      instr[7,5]   = op2
14086      instr[4,0]   = Rt  */
14087
14088   unsigned sys_op0 = INSTR (19, 19) + 2;
14089   unsigned sys_op1 = INSTR (18, 16);
14090   unsigned sys_crn = INSTR (15, 12);
14091   unsigned sys_crm = INSTR (11, 8);
14092   unsigned sys_op2 = INSTR (7, 5);
14093   unsigned rt = INSTR (4, 0);
14094
14095   NYI_assert (31, 20, 0xD51);
14096
14097   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
14098   system_set (cpu, sys_op0, sys_op1, sys_crn, sys_crm, sys_op2,
14099               aarch64_get_reg_u64 (cpu, rt, NO_SP));
14100 }
14101
14102 static void
14103 do_SYS (sim_cpu *cpu)
14104 {
14105   /* instr[31,19] = 1101 0101 0000 1
14106      instr[18,16] = op1
14107      instr[15,12] = CRn
14108      instr[11,8]  = CRm
14109      instr[7,5]   = op2
14110      instr[4,0]   = Rt  */
14111   NYI_assert (31, 19, 0x1AA1);
14112
14113   /* FIXME: For now we just silently accept system ops.  */
14114 }
14115
14116 static void
14117 dexSystem (sim_cpu *cpu)
14118 {
14119   /* instr[31:22] = 1101 01010 0
14120      instr[21]    = L
14121      instr[20,19] = op0
14122      instr[18,16] = op1
14123      instr[15,12] = CRn
14124      instr[11,8]  = CRm
14125      instr[7,5]   = op2
14126      instr[4,0]   = uimm5  */
14127
14128   /* We are interested in HINT, DSB, DMB and ISB
14129
14130      Hint #0 encodes NOOP (this is the only hint we care about)
14131      L == 0, op0 == 0, op1 = 011, CRn = 0010, Rt = 11111,
14132      CRm op2  != 0000 000 OR CRm op2 == 0000 000 || CRm op > 0000 101
14133
14134      DSB, DMB, ISB are data store barrier, data memory barrier and
14135      instruction store barrier, respectively, where
14136
14137      L == 0, op0 == 0, op1 = 011, CRn = 0011, Rt = 11111,
14138      op2 : DSB ==> 100, DMB ==> 101, ISB ==> 110
14139      CRm<3:2> ==> domain, CRm<1:0> ==> types,
14140      domain : 00 ==> OuterShareable, 01 ==> Nonshareable,
14141               10 ==> InerShareable, 11 ==> FullSystem
14142      types :  01 ==> Reads, 10 ==> Writes,
14143               11 ==> All, 00 ==> All (domain == FullSystem).  */
14144
14145   unsigned rt = INSTR (4, 0);
14146
14147   NYI_assert (31, 22, 0x354);
14148
14149   switch (INSTR (21, 12))
14150     {
14151     case 0x032:
14152       if (rt == 0x1F)
14153         {
14154           /* NOP has CRm != 0000 OR.  */
14155           /*         (CRm == 0000 AND (op2 == 000 OR op2 > 101)).  */
14156           uint32_t crm = INSTR (11, 8);
14157           uint32_t op2 = INSTR (7, 5);
14158
14159           if (crm != 0 || (op2 == 0 || op2 > 5))
14160             {
14161               /* Actually call nop method so we can reimplement it later.  */
14162               nop (cpu);
14163               return;
14164             }
14165         }
14166       HALT_NYI;
14167
14168     case 0x033:
14169       {
14170         uint32_t op2 =  INSTR (7, 5);
14171
14172         switch (op2)
14173           {
14174           case 2: HALT_NYI;
14175           case 4: dsb (cpu); return;
14176           case 5: dmb (cpu); return;
14177           case 6: isb (cpu); return;
14178           default: HALT_UNALLOC;
14179         }
14180       }
14181
14182     case 0x3B0:
14183     case 0x3B4:
14184     case 0x3BD:
14185       do_mrs (cpu);
14186       return;
14187
14188     case 0x0B7:
14189       do_SYS (cpu); /* DC is an alias of SYS.  */
14190       return;
14191
14192     default:
14193       if (INSTR (21, 20) == 0x1)
14194         do_MSR_reg (cpu);
14195       else if (INSTR (21, 19) == 0 && INSTR (15, 12) == 0x4)
14196         do_MSR_immediate (cpu);
14197       else
14198         HALT_NYI;
14199       return;
14200     }
14201 }
14202
14203 static void
14204 dexBr (sim_cpu *cpu)
14205 {
14206   /* uint32_t group = dispatchGroup (aarch64_get_instr (cpu));
14207      assert  group == GROUP_BREXSYS_1010 || group == GROUP_BREXSYS_1011
14208      bits [31,29] of a BrExSys are the secondary dispatch vector.  */
14209   uint32_t group2 = dispatchBrExSys (aarch64_get_instr (cpu));
14210
14211   switch (group2)
14212     {
14213     case BR_IMM_000:
14214       return dexBranchImmediate (cpu);
14215
14216     case BR_IMMCMP_001:
14217       /* Compare has bit 25 clear while test has it set.  */
14218       if (!INSTR (25, 25))
14219         dexCompareBranchImmediate (cpu);
14220       else
14221         dexTestBranchImmediate (cpu);
14222       return;
14223
14224     case BR_IMMCOND_010:
14225       /* This is a conditional branch if bit 25 is clear otherwise
14226          unallocated.  */
14227       if (!INSTR (25, 25))
14228         dexCondBranchImmediate (cpu);
14229       else
14230         HALT_UNALLOC;
14231       return;
14232
14233     case BR_UNALLOC_011:
14234       HALT_UNALLOC;
14235
14236     case BR_IMM_100:
14237       dexBranchImmediate (cpu);
14238       return;
14239
14240     case BR_IMMCMP_101:
14241       /* Compare has bit 25 clear while test has it set.  */
14242       if (!INSTR (25, 25))
14243         dexCompareBranchImmediate (cpu);
14244       else
14245         dexTestBranchImmediate (cpu);
14246       return;
14247
14248     case BR_REG_110:
14249       /* Unconditional branch reg has bit 25 set.  */
14250       if (INSTR (25, 25))
14251         dexBranchRegister (cpu);
14252
14253       /* This includes both Excpn Gen, System and unalloc operations.
14254          We need to decode the Excpn Gen operation BRK so we can plant
14255          debugger entry points.
14256          Excpn Gen operations have instr [24] = 0.
14257          we need to decode at least one of the System operations NOP
14258          which is an alias for HINT #0.
14259          System operations have instr [24,22] = 100.  */
14260       else if (INSTR (24, 24) == 0)
14261         dexExcpnGen (cpu);
14262
14263       else if (INSTR (24, 22) == 4)
14264         dexSystem (cpu);
14265
14266       else
14267         HALT_UNALLOC;
14268
14269       return;
14270
14271     case BR_UNALLOC_111:
14272       HALT_UNALLOC;
14273
14274     default:
14275       /* Should never reach here.  */
14276       HALT_NYI;
14277     }
14278 }
14279
14280 static void
14281 aarch64_decode_and_execute (sim_cpu *cpu, uint64_t pc)
14282 {
14283   /* We need to check if gdb wants an in here.  */
14284   /* checkBreak (cpu);.  */
14285
14286   uint64_t group = dispatchGroup (aarch64_get_instr (cpu));
14287
14288   switch (group)
14289     {
14290     case GROUP_PSEUDO_0000:   dexPseudo (cpu); break;
14291     case GROUP_LDST_0100:     dexLdSt (cpu); break;
14292     case GROUP_DPREG_0101:    dexDPReg (cpu); break;
14293     case GROUP_LDST_0110:     dexLdSt (cpu); break;
14294     case GROUP_ADVSIMD_0111:  dexAdvSIMD0 (cpu); break;
14295     case GROUP_DPIMM_1000:    dexDPImm (cpu); break;
14296     case GROUP_DPIMM_1001:    dexDPImm (cpu); break;
14297     case GROUP_BREXSYS_1010:  dexBr (cpu); break;
14298     case GROUP_BREXSYS_1011:  dexBr (cpu); break;
14299     case GROUP_LDST_1100:     dexLdSt (cpu); break;
14300     case GROUP_DPREG_1101:    dexDPReg (cpu); break;
14301     case GROUP_LDST_1110:     dexLdSt (cpu); break;
14302     case GROUP_ADVSIMD_1111:  dexAdvSIMD1 (cpu); break;
14303
14304     case GROUP_UNALLOC_0001:
14305     case GROUP_UNALLOC_0010:
14306     case GROUP_UNALLOC_0011:
14307       HALT_UNALLOC;
14308
14309     default:
14310       /* Should never reach here.  */
14311       HALT_NYI;
14312     }
14313 }
14314
14315 static bfd_boolean
14316 aarch64_step (sim_cpu *cpu)
14317 {
14318   uint64_t pc = aarch64_get_PC (cpu);
14319
14320   if (pc == TOP_LEVEL_RETURN_PC)
14321     return FALSE;
14322
14323   aarch64_set_next_PC (cpu, pc + 4);
14324
14325   /* Code is always little-endian.  */
14326   sim_core_read_buffer (CPU_STATE (cpu), cpu, read_map,
14327                         & aarch64_get_instr (cpu), pc, 4);
14328   aarch64_get_instr (cpu) = endian_le2h_4 (aarch64_get_instr (cpu));
14329
14330   TRACE_INSN (cpu, " pc = %" PRIx64 " instr = %08x", pc,
14331               aarch64_get_instr (cpu));
14332   TRACE_DISASM (cpu, pc);
14333
14334   aarch64_decode_and_execute (cpu, pc);
14335
14336   return TRUE;
14337 }
14338
14339 void
14340 aarch64_run (SIM_DESC sd)
14341 {
14342   sim_cpu *cpu = STATE_CPU (sd, 0);
14343
14344   while (aarch64_step (cpu))
14345     {
14346       aarch64_update_PC (cpu);
14347
14348       if (sim_events_tick (sd))
14349         sim_events_process (sd);
14350     }
14351
14352   sim_engine_halt (sd, cpu, NULL, aarch64_get_PC (cpu),
14353                    sim_exited, aarch64_get_reg_s32 (cpu, R0, NO_SP));
14354 }
14355
14356 void
14357 aarch64_init (sim_cpu *cpu, uint64_t pc)
14358 {
14359   uint64_t sp = aarch64_get_stack_start (cpu);
14360
14361   /* Install SP, FP and PC and set LR to -20
14362      so we can detect a top-level return.  */
14363   aarch64_set_reg_u64 (cpu, SP, SP_OK, sp);
14364   aarch64_set_reg_u64 (cpu, FP, SP_OK, sp);
14365   aarch64_set_reg_u64 (cpu, LR, SP_OK, TOP_LEVEL_RETURN_PC);
14366   aarch64_set_next_PC (cpu, pc);
14367   aarch64_update_PC (cpu);
14368   aarch64_init_LIT_table ();
14369 }