From 05b3d79d265aa9de2a81ac2d0f5e6f5821161f34 Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Tue, 17 Jan 2017 16:01:40 -0800 Subject: [PATCH] Fixes for addv and xtn2 instructions. sim/aarch64/ * simulator.c (do_vec_ADDV): Call aarch64_set_vec_u64 instead of aarch64_set_reg_u64. In case 2, call HALT_UNALLOC if not full. In case 3, call HALT_UNALLOC unconditionally. (do_vec_XTN): Delete shifts. In case 2, change index from i + 4 to i + 2. Delete if on bias, change index to i + bias * X. sim/testsuite/sim/aarch64/ * addv.s: New. * xtn.s: New. --- sim/aarch64/ChangeLog | 8 +++ sim/aarch64/simulator.c | 47 ++++++----------- sim/testsuite/sim/aarch64/ChangeLog | 5 ++ sim/testsuite/sim/aarch64/addv.s | 50 ++++++++++++++++++ sim/testsuite/sim/aarch64/xtn.s | 79 +++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 31 deletions(-) create mode 100644 sim/testsuite/sim/aarch64/addv.s create mode 100644 sim/testsuite/sim/aarch64/xtn.s diff --git a/sim/aarch64/ChangeLog b/sim/aarch64/ChangeLog index 814365d266e..d9dd4f60f20 100644 --- a/sim/aarch64/ChangeLog +++ b/sim/aarch64/ChangeLog @@ -1,3 +1,11 @@ +2017-01-17 Jim Wilson + + * simulator.c (do_vec_ADDV): Call aarch64_set_vec_u64 instead of + aarch64_set_reg_u64. In case 2, call HALT_UNALLOC if not full. In + case 3, call HALT_UNALLOC unconditionally. + (do_vec_XTN): Delete shifts. In case 2, change index from i + 4 to + i + 2. Delete if on bias, change index to i + bias * X. + 2017-01-09 Jim Wilson * simulator.c (do_vec_UZP): Rewrite. diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c index 36129e5308f..c8e65c5c416 100644 --- a/sim/aarch64/simulator.c +++ b/sim/aarch64/simulator.c @@ -3445,28 +3445,25 @@ do_vec_ADDV (sim_cpu *cpu) case 0: for (i = 0; i < (full ? 16 : 8); i++) val += aarch64_get_vec_u8 (cpu, vm, i); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + aarch64_set_vec_u64 (cpu, rd, 0, val); return; case 1: for (i = 0; i < (full ? 8 : 4); i++) val += aarch64_get_vec_u16 (cpu, vm, i); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + aarch64_set_vec_u64 (cpu, rd, 0, val); return; case 2: - for (i = 0; i < (full ? 4 : 2); i++) + if (! full) + HALT_UNALLOC; + for (i = 0; i < 4; i++) val += aarch64_get_vec_u32 (cpu, vm, i); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); + aarch64_set_vec_u64 (cpu, rd, 0, val); return; case 3: - if (! full) - HALT_UNALLOC; - val = aarch64_get_vec_u64 (cpu, vm, 0); - val += aarch64_get_vec_u64 (cpu, vm, 1); - aarch64_set_reg_u64 (cpu, rd, NO_SP, val); - return; + HALT_UNALLOC; } } @@ -4206,33 +4203,21 @@ do_vec_XTN (sim_cpu *cpu) switch (INSTR (23, 22)) { case 0: - if (bias) - for (i = 0; i < 8; i++) - aarch64_set_vec_u8 (cpu, vd, i + 8, - aarch64_get_vec_u16 (cpu, vs, i) >> 8); - else - for (i = 0; i < 8; i++) - aarch64_set_vec_u8 (cpu, vd, i, aarch64_get_vec_u16 (cpu, vs, i)); + for (i = 0; i < 8; i++) + aarch64_set_vec_u8 (cpu, vd, i + (bias * 8), + aarch64_get_vec_u16 (cpu, vs, i)); return; case 1: - if (bias) - for (i = 0; i < 4; i++) - aarch64_set_vec_u16 (cpu, vd, i + 4, - aarch64_get_vec_u32 (cpu, vs, i) >> 16); - else - for (i = 0; i < 4; i++) - aarch64_set_vec_u16 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vs, i)); + for (i = 0; i < 4; i++) + aarch64_set_vec_u16 (cpu, vd, i + (bias * 4), + aarch64_get_vec_u32 (cpu, vs, i)); return; case 2: - if (bias) - for (i = 0; i < 2; i++) - aarch64_set_vec_u32 (cpu, vd, i + 4, - aarch64_get_vec_u64 (cpu, vs, i) >> 32); - else - for (i = 0; i < 2; i++) - aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u64 (cpu, vs, i)); + for (i = 0; i < 2; i++) + aarch64_set_vec_u32 (cpu, vd, i + (bias * 2), + aarch64_get_vec_u64 (cpu, vs, i)); return; } } diff --git a/sim/testsuite/sim/aarch64/ChangeLog b/sim/testsuite/sim/aarch64/ChangeLog index b756603e341..7e48dd122a8 100644 --- a/sim/testsuite/sim/aarch64/ChangeLog +++ b/sim/testsuite/sim/aarch64/ChangeLog @@ -1,3 +1,8 @@ +2017-01-17 Jim Wilson + + * addv.s: New. + * xtn.s: New. + 2017-01-09 Jim Wilson * uzp.s: New. diff --git a/sim/testsuite/sim/aarch64/addv.s b/sim/testsuite/sim/aarch64/addv.s new file mode 100644 index 00000000000..4da8935d37f --- /dev/null +++ b/sim/testsuite/sim/aarch64/addv.s @@ -0,0 +1,50 @@ +# mach: aarch64 + +# Check the add across vector instruction: addv. + +.include "testutils.inc" + + .data + .align 4 +input: + .word 0x04030201 + .word 0x08070605 + .word 0x0c0b0a09 + .word 0x100f0e0d + + start + adrp x0, input + ldr q0, [x0, #:lo12:input] + + addv b1, v0.8b + mov x1, v1.d[0] + cmp x1, #36 + bne .Lfailure + + addv b1, v0.16b + mov x1, v1.d[0] + cmp x1, #136 + bne .Lfailure + + addv h1, v0.4h + mov x1, v1.d[0] + mov x2, #5136 + cmp x1, x2 + bne .Lfailure + + addv h1, v0.8h + mov x1, v1.d[0] + mov x2, #18496 + cmp x1, x2 + bne .Lfailure + + addv s1, v0.4s + mov x1, v1.d[0] + mov x2, 8220 + movk x2, 0x2824, lsl 16 + cmp x1, x2 + bne .Lfailure + + pass +.Lfailure: + fail diff --git a/sim/testsuite/sim/aarch64/xtn.s b/sim/testsuite/sim/aarch64/xtn.s new file mode 100644 index 00000000000..de369f7c0f5 --- /dev/null +++ b/sim/testsuite/sim/aarch64/xtn.s @@ -0,0 +1,79 @@ +# mach: aarch64 + +# Check the extract narrow instructions: xtn, xtn2. + +.include "testutils.inc" + + .data + .align 4 +input: + .word 0x04030201 + .word 0x08070605 + .word 0x0c0b0a09 + .word 0x100f0e0d +input2: + .word 0x14131211 + .word 0x18171615 + .word 0x1c1b1a19 + .word 0x201f1e1d +x16b: + .word 0x07050301 + .word 0x0f0d0b09 + .word 0x17151311 + .word 0x1f1d1b19 +x8h: + .word 0x06050201 + .word 0x0e0d0a09 + .word 0x16151211 + .word 0x1e1d1a19 +x4s: + .word 0x04030201 + .word 0x0c0b0a09 + .word 0x14131211 + .word 0x1c1b1a19 + + start + adrp x0, input + ldr q0, [x0, #:lo12:input] + adrp x0, input2 + ldr q1, [x0, #:lo12:input2] + + xtn v2.8b, v0.8h + xtn2 v2.16b, v1.8h + mov x1, v2.d[0] + mov x2, v2.d[1] + adrp x3, x16b + ldr x4, [x3, #:lo12:x16b] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:x16b+8] + cmp x2, x5 + bne .Lfailure + + xtn v2.4h, v0.4s + xtn2 v2.8h, v1.4s + mov x1, v2.d[0] + mov x2, v2.d[1] + adrp x3, x8h + ldr x4, [x3, #:lo12:x8h] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:x8h+8] + cmp x2, x5 + bne .Lfailure + + xtn v2.2s, v0.2d + xtn2 v2.4s, v1.2d + mov x1, v2.d[0] + mov x2, v2.d[1] + adrp x3, x4s + ldr x4, [x3, #:lo12:x4s] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:x4s+8] + cmp x2, x5 + bne .Lfailure + + pass +.Lfailure: + fail -- 2.30.2