vec_store (cpu, address, 4);
}
+#define LDn_STn_SINGLE_LANE_AND_SIZE() \
+ do \
+ { \
+ switch (INSTR (15, 14)) \
+ { \
+ case 0: \
+ lane = (full << 3) | (s << 2) | size; \
+ size = 0; \
+ break; \
+ \
+ case 1: \
+ if ((size & 1) == 1) \
+ HALT_UNALLOC; \
+ lane = (full << 2) | (s << 1) | (size >> 1); \
+ size = 1; \
+ break; \
+ \
+ case 2: \
+ if ((size & 2) == 2) \
+ HALT_UNALLOC; \
+ \
+ if ((size & 1) == 0) \
+ { \
+ lane = (full << 1) | s; \
+ size = 2; \
+ } \
+ else \
+ { \
+ if (s) \
+ HALT_UNALLOC; \
+ lane = full; \
+ size = 3; \
+ } \
+ break; \
+ \
+ default: \
+ HALT_UNALLOC; \
+ } \
+ } \
+ while (0)
+
+/* Load single structure into one lane of N registers. */
static void
-do_vec_LDnR (sim_cpu *cpu, uint64_t address)
+do_vec_LDn_single (sim_cpu *cpu, uint64_t address)
{
/* instr[31] = 0
instr[30] = element selector 0=>half, 1=>all elements
instr[29,24] = 00 1101
instr[23] = 0=>simple, 1=>post
instr[22] = 1
- instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
+ instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
11111 (immediate post inc)
- instr[15,14] = 11
- instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
- instr[12] = 0
- instr[11,10] = element size 00=> byte(b), 01=> half(h),
- 10=> word(s), 11=> double(d)
+ instr[15,13] = opcode
+ instr[12] = S, used for lane number
+ instr[11,10] = size, also used for lane number
instr[9,5] = address
instr[4,0] = Vd */
unsigned full = INSTR (30, 30);
unsigned vd = INSTR (4, 0);
unsigned size = INSTR (11, 10);
+ unsigned s = INSTR (12, 12);
+ int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
+ int lane = 0;
int i;
NYI_assert (29, 24, 0x0D);
NYI_assert (22, 22, 1);
- NYI_assert (15, 14, 3);
- NYI_assert (12, 12, 0);
- switch ((INSTR (13, 13) << 1) | INSTR (21, 21))
- {
- case 0: /* LD1R. */
- switch (size)
- {
- case 0:
- {
- uint8_t val = aarch64_get_mem_u8 (cpu, address);
- for (i = 0; i < (full ? 16 : 8); i++)
- aarch64_set_vec_u8 (cpu, vd, i, val);
- break;
- }
-
- case 1:
- {
- uint16_t val = aarch64_get_mem_u16 (cpu, address);
- for (i = 0; i < (full ? 8 : 4); i++)
- aarch64_set_vec_u16 (cpu, vd, i, val);
- break;
- }
-
- case 2:
- {
- uint32_t val = aarch64_get_mem_u32 (cpu, address);
- for (i = 0; i < (full ? 4 : 2); i++)
- aarch64_set_vec_u32 (cpu, vd, i, val);
- break;
- }
-
- case 3:
- {
- uint64_t val = aarch64_get_mem_u64 (cpu, address);
- for (i = 0; i < (full ? 2 : 1); i++)
- aarch64_set_vec_u64 (cpu, vd, i, val);
- break;
- }
+ /* Compute the lane number first (using size), and then compute size. */
+ LDn_STn_SINGLE_LANE_AND_SIZE ();
- default:
- HALT_UNALLOC;
+ for (i = 0; i < nregs; i++)
+ switch (size)
+ {
+ case 0:
+ {
+ uint8_t val = aarch64_get_mem_u8 (cpu, address + i);
+ aarch64_set_vec_u8 (cpu, vd + i, lane, val);
+ break;
}
- break;
- case 1: /* LD2R. */
- switch (size)
+ case 1:
{
- case 0:
- {
- uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
- uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
-
- for (i = 0; i < (full ? 16 : 8); i++)
- {
- aarch64_set_vec_u8 (cpu, vd, 0, val1);
- aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
- }
- break;
- }
+ uint16_t val = aarch64_get_mem_u16 (cpu, address + (i * 2));
+ aarch64_set_vec_u16 (cpu, vd + i, lane, val);
+ break;
+ }
- case 1:
- {
- uint16_t val1 = aarch64_get_mem_u16 (cpu, address);
- uint16_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
+ case 2:
+ {
+ uint32_t val = aarch64_get_mem_u32 (cpu, address + (i * 4));
+ aarch64_set_vec_u32 (cpu, vd + i, lane, val);
+ break;
+ }
- for (i = 0; i < (full ? 8 : 4); i++)
- {
- aarch64_set_vec_u16 (cpu, vd, 0, val1);
- aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
- }
- break;
- }
+ case 3:
+ {
+ uint64_t val = aarch64_get_mem_u64 (cpu, address + (i * 8));
+ aarch64_set_vec_u64 (cpu, vd + i, lane, val);
+ break;
+ }
+ }
+}
- case 2:
- {
- uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
- uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
+/* Store single structure from one lane from N registers. */
+static void
+do_vec_STn_single (sim_cpu *cpu, uint64_t address)
+{
+ /* instr[31] = 0
+ instr[30] = element selector 0=>half, 1=>all elements
+ instr[29,24] = 00 1101
+ instr[23] = 0=>simple, 1=>post
+ instr[22] = 0
+ instr[21] = width: LD1-or-LD3 (0) / LD2-or-LD4 (1)
+ instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
+ 11111 (immediate post inc)
+ instr[15,13] = opcode
+ instr[12] = S, used for lane number
+ instr[11,10] = size, also used for lane number
+ instr[9,5] = address
+ instr[4,0] = Vd */
- for (i = 0; i < (full ? 4 : 2); i++)
- {
- aarch64_set_vec_u32 (cpu, vd, 0, val1);
- aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
- }
- break;
- }
+ unsigned full = INSTR (30, 30);
+ unsigned vd = INSTR (4, 0);
+ unsigned size = INSTR (11, 10);
+ unsigned s = INSTR (12, 12);
+ int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
+ int lane = 0;
+ int i;
- case 3:
- {
- uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
- uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
+ NYI_assert (29, 24, 0x0D);
+ NYI_assert (22, 22, 0);
- for (i = 0; i < (full ? 2 : 1); i++)
- {
- aarch64_set_vec_u64 (cpu, vd, 0, val1);
- aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
- }
- break;
- }
+ /* Compute the lane number first (using size), and then compute size. */
+ LDn_STn_SINGLE_LANE_AND_SIZE ();
- default:
- HALT_UNALLOC;
+ for (i = 0; i < nregs; i++)
+ switch (size)
+ {
+ case 0:
+ {
+ uint8_t val = aarch64_get_vec_u8 (cpu, vd + i, lane);
+ aarch64_set_mem_u8 (cpu, address + i, val);
+ break;
}
- break;
- case 2: /* LD3R. */
- switch (size)
+ case 1:
{
- case 0:
- {
- uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
- uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
- uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
-
- for (i = 0; i < (full ? 16 : 8); i++)
- {
- aarch64_set_vec_u8 (cpu, vd, 0, val1);
- aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
- }
- }
+ uint16_t val = aarch64_get_vec_u16 (cpu, vd + i, lane);
+ aarch64_set_mem_u16 (cpu, address + (i * 2), val);
break;
+ }
- case 1:
- {
- uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
- uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
- uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
-
- for (i = 0; i < (full ? 8 : 4); i++)
- {
- aarch64_set_vec_u16 (cpu, vd, 0, val1);
- aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
- }
- }
+ case 2:
+ {
+ uint32_t val = aarch64_get_vec_u32 (cpu, vd + i, lane);
+ aarch64_set_mem_u32 (cpu, address + (i * 4), val);
break;
+ }
- case 2:
- {
- uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
- uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
- uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
-
- for (i = 0; i < (full ? 4 : 2); i++)
- {
- aarch64_set_vec_u32 (cpu, vd, 0, val1);
- aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
- }
- }
+ case 3:
+ {
+ uint64_t val = aarch64_get_vec_u64 (cpu, vd + i, lane);
+ aarch64_set_mem_u64 (cpu, address + (i * 8), val);
break;
+ }
+ }
+}
- case 3:
- {
- uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
- uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
- uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
+/* Load single structure into all lanes of N registers. */
+static void
+do_vec_LDnR (sim_cpu *cpu, uint64_t address)
+{
+ /* instr[31] = 0
+ instr[30] = element selector 0=>half, 1=>all elements
+ instr[29,24] = 00 1101
+ instr[23] = 0=>simple, 1=>post
+ instr[22] = 1
+ instr[21] = width: LD1R-or-LD3R (0) / LD2R-or-LD4R (1)
+ instr[20,16] = 0 0000 (simple), Vinc (reg-post-inc, no SP),
+ 11111 (immediate post inc)
+ instr[15,14] = 11
+ instr[13] = width: LD1R-or-LD2R (0) / LD3R-or-LD4R (1)
+ instr[12] = 0
+ instr[11,10] = element size 00=> byte(b), 01=> half(h),
+ 10=> word(s), 11=> double(d)
+ instr[9,5] = address
+ instr[4,0] = Vd */
- for (i = 0; i < (full ? 2 : 1); i++)
- {
- aarch64_set_vec_u64 (cpu, vd, 0, val1);
- aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
- }
- }
- break;
+ unsigned full = INSTR (30, 30);
+ unsigned vd = INSTR (4, 0);
+ unsigned size = INSTR (11, 10);
+ int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
+ int i, n;
- default:
- HALT_UNALLOC;
- }
- break;
+ NYI_assert (29, 24, 0x0D);
+ NYI_assert (22, 22, 1);
+ NYI_assert (15, 14, 3);
+ NYI_assert (12, 12, 0);
- case 3: /* LD4R. */
- switch (size)
+ for (n = 0; n < nregs; n++)
+ switch (size)
+ {
+ case 0:
{
- case 0:
- {
- uint8_t val1 = aarch64_get_mem_u8 (cpu, address);
- uint8_t val2 = aarch64_get_mem_u8 (cpu, address + 1);
- uint8_t val3 = aarch64_get_mem_u8 (cpu, address + 2);
- uint8_t val4 = aarch64_get_mem_u8 (cpu, address + 3);
-
- for (i = 0; i < (full ? 16 : 8); i++)
- {
- aarch64_set_vec_u8 (cpu, vd, 0, val1);
- aarch64_set_vec_u8 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u8 (cpu, vd + 2, 0, val3);
- aarch64_set_vec_u8 (cpu, vd + 3, 0, val4);
- }
- }
+ uint8_t val = aarch64_get_mem_u8 (cpu, address + n);
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd + n, i, val);
break;
+ }
- case 1:
- {
- uint32_t val1 = aarch64_get_mem_u16 (cpu, address);
- uint32_t val2 = aarch64_get_mem_u16 (cpu, address + 2);
- uint32_t val3 = aarch64_get_mem_u16 (cpu, address + 4);
- uint32_t val4 = aarch64_get_mem_u16 (cpu, address + 6);
-
- for (i = 0; i < (full ? 8 : 4); i++)
- {
- aarch64_set_vec_u16 (cpu, vd, 0, val1);
- aarch64_set_vec_u16 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u16 (cpu, vd + 2, 0, val3);
- aarch64_set_vec_u16 (cpu, vd + 3, 0, val4);
- }
- }
+ case 1:
+ {
+ uint16_t val = aarch64_get_mem_u16 (cpu, address + (n * 2));
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd + n, i, val);
break;
+ }
- case 2:
- {
- uint32_t val1 = aarch64_get_mem_u32 (cpu, address);
- uint32_t val2 = aarch64_get_mem_u32 (cpu, address + 4);
- uint32_t val3 = aarch64_get_mem_u32 (cpu, address + 8);
- uint32_t val4 = aarch64_get_mem_u32 (cpu, address + 12);
-
- for (i = 0; i < (full ? 4 : 2); i++)
- {
- aarch64_set_vec_u32 (cpu, vd, 0, val1);
- aarch64_set_vec_u32 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u32 (cpu, vd + 2, 0, val3);
- aarch64_set_vec_u32 (cpu, vd + 3, 0, val4);
- }
- }
+ case 2:
+ {
+ uint32_t val = aarch64_get_mem_u32 (cpu, address + (n * 4));
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd + n, i, val);
break;
+ }
- case 3:
- {
- uint64_t val1 = aarch64_get_mem_u64 (cpu, address);
- uint64_t val2 = aarch64_get_mem_u64 (cpu, address + 8);
- uint64_t val3 = aarch64_get_mem_u64 (cpu, address + 16);
- uint64_t val4 = aarch64_get_mem_u64 (cpu, address + 24);
-
- for (i = 0; i < (full ? 2 : 1); i++)
- {
- aarch64_set_vec_u64 (cpu, vd, 0, val1);
- aarch64_set_vec_u64 (cpu, vd + 1, 0, val2);
- aarch64_set_vec_u64 (cpu, vd + 2, 0, val3);
- aarch64_set_vec_u64 (cpu, vd + 3, 0, val4);
- }
- }
+ case 3:
+ {
+ uint64_t val = aarch64_get_mem_u64 (cpu, address + (n * 8));
+ for (i = 0; i < (full ? 2 : 1); i++)
+ aarch64_set_vec_u64 (cpu, vd + n, i, val);
break;
-
- default:
- HALT_UNALLOC;
}
- break;
- default:
- HALT_UNALLOC;
- }
+ default:
+ HALT_UNALLOC;
+ }
}
static void
instr[31] = 0
instr[30] = element selector 0=>half, 1=>all elements
instr[29,25] = 00110
- instr[24] = ?
+ instr[24] = 0=>multiple struct, 1=>single struct
instr[23] = 0=>simple, 1=>post
instr[22] = 0=>store, 1=>load
instr[21] = 0 (LDn) / small(0)-large(1) selector (LDnR)
instr[9,5] = Vn, can be SP
instr[4,0] = Vd */
+ int single;
int post;
int load;
unsigned vn;
if (INSTR (31, 31) != 0 || INSTR (29, 25) != 0x06)
HALT_NYI;
- type = INSTR (15, 12);
- if (type != 0xE && type != 0xE && INSTR (21, 21) != 0)
- HALT_NYI;
-
+ single = INSTR (24, 24);
post = INSTR (23, 23);
load = INSTR (22, 22);
+ type = INSTR (15, 12);
vn = INSTR (9, 5);
address = aarch64_get_reg_u64 (cpu, vn, SP_OK);
+ if (! single && INSTR (21, 21) != 0)
+ HALT_UNALLOC;
+
if (post)
{
unsigned vm = INSTR (20, 16);
{
unsigned sizeof_operation;
- switch (type)
+ if (single)
{
- case 0: sizeof_operation = 32; break;
- case 4: sizeof_operation = 24; break;
- case 8: sizeof_operation = 16; break;
-
- case 0xC:
- sizeof_operation = INSTR (21, 21) ? 2 : 1;
- sizeof_operation <<= INSTR (11, 10);
- break;
+ if ((type >= 0) && (type <= 11))
+ {
+ int nregs = ((INSTR (13, 13) << 1) | INSTR (21, 21)) + 1;
+ switch (INSTR (15, 14))
+ {
+ case 0:
+ sizeof_operation = nregs * 1;
+ break;
+ case 1:
+ sizeof_operation = nregs * 2;
+ break;
+ case 2:
+ if (INSTR (10, 10) == 0)
+ sizeof_operation = nregs * 4;
+ else
+ sizeof_operation = nregs * 8;
+ break;
+ default:
+ HALT_UNALLOC;
+ }
+ }
+ else if (type == 0xC)
+ {
+ sizeof_operation = INSTR (21, 21) ? 2 : 1;
+ sizeof_operation <<= INSTR (11, 10);
+ }
+ else if (type == 0xE)
+ {
+ sizeof_operation = INSTR (21, 21) ? 4 : 3;
+ sizeof_operation <<= INSTR (11, 10);
+ }
+ else
+ HALT_UNALLOC;
+ }
+ else
+ {
+ switch (type)
+ {
+ case 0: sizeof_operation = 32; break;
+ case 4: sizeof_operation = 24; break;
+ case 8: sizeof_operation = 16; break;
- case 0xE:
- sizeof_operation = INSTR (21, 21) ? 8 : 4;
- sizeof_operation <<= INSTR (11, 10);
- break;
+ case 7:
+ /* One register, immediate offset variant. */
+ sizeof_operation = 8;
+ break;
- case 7:
- /* One register, immediate offset variant. */
- sizeof_operation = 8;
- break;
+ case 10:
+ /* Two registers, immediate offset variant. */
+ sizeof_operation = 16;
+ break;
- case 10:
- /* Two registers, immediate offset variant. */
- sizeof_operation = 16;
- break;
+ case 6:
+ /* Three registers, immediate offset variant. */
+ sizeof_operation = 24;
+ break;
- case 6:
- /* Three registers, immediate offset variant. */
- sizeof_operation = 24;
- break;
+ case 2:
+ /* Four registers, immediate offset variant. */
+ sizeof_operation = 32;
+ break;
- case 2:
- /* Four registers, immediate offset variant. */
- sizeof_operation = 32;
- break;
+ default:
+ HALT_UNALLOC;
+ }
- default:
- HALT_UNALLOC;
+ if (INSTR (30, 30))
+ sizeof_operation *= 2;
}
- if (INSTR (30, 30))
- sizeof_operation *= 2;
-
aarch64_set_reg_u64 (cpu, vn, SP_OK, address + sizeof_operation);
}
else
NYI_assert (20, 16, 0);
}
+ if (single)
+ {
+ if (load)
+ {
+ if ((type >= 0) && (type <= 11))
+ do_vec_LDn_single (cpu, address);
+ else if ((type == 0xC) || (type == 0xE))
+ do_vec_LDnR (cpu, address);
+ else
+ HALT_UNALLOC;
+ return;
+ }
+
+ /* Stores. */
+ if ((type >= 0) && (type <= 11))
+ {
+ do_vec_STn_single (cpu, address);
+ return;
+ }
+
+ HALT_UNALLOC;
+ }
+
if (load)
{
switch (type)
case 10: LD1_2 (cpu, address); return;
case 7: LD1_1 (cpu, address); return;
- case 0xE:
- case 0xC: do_vec_LDnR (cpu, address); return;
-
default:
- HALT_NYI;
+ HALT_UNALLOC;
}
}
case 10: ST1_2 (cpu, address); return;
case 7: ST1_1 (cpu, address); return;
default:
- HALT_NYI;
+ HALT_UNALLOC;
}
}