--- /dev/null
+This directory contains executable tests for ARM/AArch64 Advanced SIMD
+(Neon) intrinsics.
+
+It is meant to cover execution cases of all the Advanced SIMD
+intrinsics, but does not scan the generated assembler code.
+
+The general framework is composed as follows:
+- advsimd-intrinsics.exp: main dejagnu driver
+- *.c: actual tests, generally one per intrinsinc family
+- arm-neon-ref.h: contains macro definitions to save typing in actual
+ test files
+- compute-ref-data.h: contains input vectors definitions
+- *.inc: generic tests, shared by several families of intrinsics. For
+ instance, unary or binary operators
+
+A typical .c test file starts with the following contents (look at
+vld1.c and vaba.c for sample cases):
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+Then, definitions of expected results, based on common input values,
+as defined in compute-ref-data.h.
+For example:
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+defines the expected results of an operator generating int16x4 values.
+
+The common input values defined in compute-ref-data.h have been chosen
+to avoid corner-case values for most operators, yet exposing negative
+values for signed operators. For this reason, their range is also
+limited. For instance, the initialization of buffer_int16x4 will be
+{ -16, -15, -14, -13 }.
+
+The initialization of floating-point values is done via hex notation,
+to avoid potential rounding problems.
+
+To test special values and corner cases, specific initialization
+values should be used in dedicated tests, to ensure proper coverage.
+An example of this is vshl.
+
+When a variant of an intrinsic is not available, its expected result
+should be defined to the value of CLEAN_PATTERN_8 as defined in
+arm-neon-ref.h. For example:
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+if the given intrinsic has no variant producing an int64x1 result,
+like the vcmp family (eg. vclt).
+
+This is because the helper function (check_results(), defined in
+arm-neon-ref.h), iterates over all the possible variants, to save
+typing in each individual test file. Alternatively, one can directly
+call the CHECK/CHECK_FP macros to check only a few expected results
+(see vabs.c for an example).
+
+Then, define the TEST_MSG string, which will be used when reporting errors.
+
+Next, define the function performing the actual tests, in general
+relying on the helpers provided by arm-neon-ref.h, which means:
+
+* declare necessary vectors of suitable types: using
+ DECL_VARIABLE_ALL_VARIANTS when all variants are supported, or the
+ relevant of subset calls to DECL_VARIABLE.
+
+* call clean_results() to initialize the 'results' buffers.
+
+* initialize the input vectors, using VLOAD, VDUP or VSET_LANE (vld*
+ tests do not need this step, since their actual purpose is to
+ initialize vectors).
+
+* execute the intrinsic on relevant variants, for instance using
+ TEST_MACRO_ALL_VARIANTS_2_5.
+
+* call check_results() to check that the results match the expected
+ values.
+
+A template test file could be:
+=================================================================
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results. */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+ 0xfa, 0xfb, 0xfc, 0xfd };
+/* and as many others as necessary. */
+
+#define TEST_MSG "VMYINTRINSIC"
+void exec_myintrinsic (void)
+{
+ /* my test: v4=vmyintrinsic(v1,v2,v3), then store the result. */
+#define TEST_VMYINTR(Q, T1, T2, W, N) \
+ VECT_VAR(vector_res, T1, W, N) = \
+ vmyintr##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+ VECT_VAR(vector2, T1, W, N), \
+ VECT_VAR(vector3, T1, W, N)); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VMYINTR_VAR(VAR) \
+ DECL_VARIABLE(VAR, int, 8, 8);
+/* And as many others as necessary. */
+
+ DECL_VMYINTR_VAR(vector1);
+ DECL_VMYINTR_VAR(vector2);
+ DECL_VMYINTR_VAR(vector3);
+ DECL_VMYINTR_VAR(vector_res);
+
+ clean_results ();
+
+ /* Initialize input "vector1" from "buffer". */
+ VLOAD(vector1, buffer, , int, s, 8, 8);
+/* And as many others as necessary. */
+
+ /* Choose init value arbitrarily. */
+ VDUP(vector2, , int, s, 8, 8, 1);
+/* And as many others as necessary. */
+
+ /* Choose init value arbitrarily. */
+ VDUP(vector3, , int, s, 8, 8, -5);
+/* And as many others as necessary. */
+
+ /* Execute the tests. */
+ TEST_VMYINTR(, int, s, 8, 8);
+/* And as many others as necessary. */
+
+ check_results (TEST_MSG, "");
+}
+
+int main (void)
+{
+ exec_vmyintrinsic ();
+ return 0;
+}
+=================================================================
--- /dev/null
+# Copyright (C) 2014 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM or AArch64 target.
+if {![istarget arm*-*-*]
+ && ![istarget aarch64*-*-*]} then {
+ return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Initialize `dg'.
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+
+dg-init
+
+torture-init
+set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
+
+# Make sure Neon flags are provided, if necessary.
+set additional_flags [add_options_for_arm_neon ""]
+
+# Main loop.
+foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
+ # If we're only testing specific files and this isn't one of them, skip it.
+ if ![runtest_file_p $runtests $src] then {
+ continue
+ }
+
+ # runtest_file_p is already run above, and the code below can run
+ # runtest_file_p again, make sure everything for this test is
+ # performed if the above runtest_file_p decided this runtest
+ # instance should execute the test
+ gcc_parallel_test_enable 0
+ c-torture-execute $src $additional_flags
+ gcc-dg-runtest $src "" $additional_flags
+ gcc_parallel_test_enable 1
+}
+
+# All done.
+torture-finish
+dg-finish
--- /dev/null
+/* This file defines helper operations shared by all the tests. */
+
+#ifndef _ARM_NEON_REF_H_
+#define _ARM_NEON_REF_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+/* helper type, to help write floating point results in integer form. */
+typedef uint32_t hfloat32_t;
+
+extern void abort(void);
+extern void *memset(void *, int, size_t);
+extern void *memcpy(void *, const void *, size_t);
+extern size_t strlen(const char *);
+
+/* Various string construction helpers. */
+
+/*
+ The most useful at user-level are VECT_VAR and VECT_VAR_DECL, which
+ construct variable names or declarations, such as:
+ VECT_VAR(expected, int, 16, 4) -> expected_int16x4
+ VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
+*/
+
+#define xSTR(X) #X
+#define STR(X) xSTR(X)
+
+#define xNAME1(V,T) V ## _ ## T
+#define xNAME(V,T) xNAME1(V,T)
+
+/* VAR(foo,int,16) -> foo_int16 */
+#define VAR(V,T,W) xNAME(V,T##W)
+/* VAR_DECL(foo,int,16) -> int16_t foo_int16 */
+#define VAR_DECL(V, T, W) T##W##_t VAR(V,T,W)
+
+/* VECT_NAME(int,16,4) -> int16x4 */
+#define VECT_NAME(T, W, N) T##W##x##N
+/* VECT_ARRAY_NAME(int,16,4,2) -> int16x4x2 */
+#define VECT_ARRAY_NAME(T, W, N, L) T##W##x##N##x##L
+/* VECT_TYPE(int,16,4) -> int16x4_t */
+#define VECT_TYPE(T, W, N) xNAME(VECT_NAME(T,W,N),t)
+/* VECT_ARRAY_TYPE(int,16,4,2) -> int16x4x2_t */
+#define VECT_ARRAY_TYPE(T, W, N, L) xNAME(VECT_ARRAY_NAME(T,W,N,L),t)
+
+/* VECT_VAR(foo,int,16,4) -> foo_int16x4 */
+#define VECT_VAR(V,T,W,N) xNAME(V,VECT_NAME(T,W,N))
+/* VECT_VAR_DECL(foo,int,16,4) -> int16_t foo_int16x4 */
+#define VECT_VAR_DECL(V, T, W, N) T##W##_t VECT_VAR(V,T,W,N)
+
+/* Array declarations. */
+/* ARRAY(foo,int,16,4) -> int16_t foo_int16x4[4] */
+#define ARRAY(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[N]
+
+/* Arrays of vectors. */
+/* VECT_ARRAY_VAR(foo,int,16,4,2) -> foo_int16x4x2 */
+#define VECT_ARRAY_VAR(V,T,W,N,L) xNAME(V,VECT_ARRAY_NAME(T,W,N,L))
+/* VECT_ARRAY(foo,int,16,4,2) -> int16_t foo_int16x4x2[4*2] */
+#define VECT_ARRAY(V, T, W, N, L) T##W##_t VECT_ARRAY_VAR(V,T,W,N,L)[N*L]
+
+/* Check results vs expected values. Operates on one vector. */
+#define CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ { \
+ int i; \
+ for(i=0; i<N ; i++) \
+ { \
+ if (VECT_VAR(result, T, W, N)[i] != \
+ VECT_VAR(EXPECTED, T, W, N)[i]) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in buffer '%s') at type %s " \
+ "index %d: got 0x%" FMT " != 0x%" FMT " %s\n", \
+ MSG, __FILE__, __LINE__, \
+ STR(EXPECTED), \
+ STR(VECT_NAME(T, W, N)), \
+ i, \
+ VECT_VAR(result, T, W, N)[i], \
+ VECT_VAR(EXPECTED, T, W, N)[i], \
+ strlen(COMMENT) > 0 ? COMMENT : ""); \
+ abort(); \
+ } \
+ } \
+ }
+
+/* Floating-point variant. */
+#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
+ { \
+ int i; \
+ for(i=0; i<N ; i++) \
+ { \
+ union fp_operand { \
+ uint##W##_t i; \
+ float##W##_t f; \
+ } tmp_res, tmp_exp; \
+ tmp_res.f = VECT_VAR(result, T, W, N)[i]; \
+ tmp_exp.i = VECT_VAR(EXPECTED, h##T, W, N)[i]; \
+ if (tmp_res.i != tmp_exp.i) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in buffer '%s') at type %s " \
+ "index %d: got 0x%" FMT " != 0x%" FMT " %s\n", \
+ MSG, __FILE__, __LINE__, \
+ STR(EXPECTED), \
+ STR(VECT_NAME(T, W, N)), \
+ i, \
+ tmp_res.i, \
+ tmp_exp.i, \
+ strlen(COMMENT) > 0 ? COMMENT : ""); \
+ abort(); \
+ } \
+ } \
+ }
+
+/* Clean buffer with a non-zero pattern to help diagnose buffer
+ overflows. */
+#define CLEAN_PATTERN_8 0x33
+
+#define CLEAN(VAR,T,W,N) \
+ memset(VECT_VAR(VAR, T, W, N), \
+ CLEAN_PATTERN_8, \
+ sizeof(VECT_VAR(VAR, T, W, N)));
+
+/* Define output buffers, one of each size. */
+static ARRAY(result, int, 8, 8);
+static ARRAY(result, int, 16, 4);
+static ARRAY(result, int, 32, 2);
+static ARRAY(result, int, 64, 1);
+static ARRAY(result, uint, 8, 8);
+static ARRAY(result, uint, 16, 4);
+static ARRAY(result, uint, 32, 2);
+static ARRAY(result, uint, 64, 1);
+static ARRAY(result, poly, 8, 8);
+static ARRAY(result, poly, 16, 4);
+static ARRAY(result, float, 32, 2);
+static ARRAY(result, int, 8, 16);
+static ARRAY(result, int, 16, 8);
+static ARRAY(result, int, 32, 4);
+static ARRAY(result, int, 64, 2);
+static ARRAY(result, uint, 8, 16);
+static ARRAY(result, uint, 16, 8);
+static ARRAY(result, uint, 32, 4);
+static ARRAY(result, uint, 64, 2);
+static ARRAY(result, poly, 8, 16);
+static ARRAY(result, poly, 16, 8);
+static ARRAY(result, float, 32, 4);
+
+/* Declare expected results, one of each size. They are defined and
+ initialized in each test file. */
+extern ARRAY(expected, int, 8, 8);
+extern ARRAY(expected, int, 16, 4);
+extern ARRAY(expected, int, 32, 2);
+extern ARRAY(expected, int, 64, 1);
+extern ARRAY(expected, uint, 8, 8);
+extern ARRAY(expected, uint, 16, 4);
+extern ARRAY(expected, uint, 32, 2);
+extern ARRAY(expected, uint, 64, 1);
+extern ARRAY(expected, poly, 8, 8);
+extern ARRAY(expected, poly, 16, 4);
+extern ARRAY(expected, hfloat, 32, 2);
+extern ARRAY(expected, int, 8, 16);
+extern ARRAY(expected, int, 16, 8);
+extern ARRAY(expected, int, 32, 4);
+extern ARRAY(expected, int, 64, 2);
+extern ARRAY(expected, uint, 8, 16);
+extern ARRAY(expected, uint, 16, 8);
+extern ARRAY(expected, uint, 32, 4);
+extern ARRAY(expected, uint, 64, 2);
+extern ARRAY(expected, poly, 8, 16);
+extern ARRAY(expected, poly, 16, 8);
+extern ARRAY(expected, hfloat, 32, 4);
+
+/* Check results. Operates on all possible vector types. */
+#define CHECK_RESULTS(test_name,comment) \
+ { \
+ CHECK(test_name, int, 8, 8, PRIx8, expected, comment); \
+ CHECK(test_name, int, 16, 4, PRIx16, expected, comment); \
+ CHECK(test_name, int, 32, 2, PRIx32, expected, comment); \
+ CHECK(test_name, int, 64, 1, PRIx64, expected, comment); \
+ CHECK(test_name, uint, 8, 8, PRIx8, expected, comment); \
+ CHECK(test_name, uint, 16, 4, PRIx16, expected, comment); \
+ CHECK(test_name, uint, 32, 2, PRIx32, expected, comment); \
+ CHECK(test_name, uint, 64, 1, PRIx64, expected, comment); \
+ CHECK(test_name, poly, 8, 8, PRIx8, expected, comment); \
+ CHECK(test_name, poly, 16, 4, PRIx16, expected, comment); \
+ CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \
+ \
+ CHECK(test_name, int, 8, 16, PRIx8, expected, comment); \
+ CHECK(test_name, int, 16, 8, PRIx16, expected, comment); \
+ CHECK(test_name, int, 32, 4, PRIx32, expected, comment); \
+ CHECK(test_name, int, 64, 2, PRIx64, expected, comment); \
+ CHECK(test_name, uint, 8, 16, PRIx8, expected, comment); \
+ CHECK(test_name, uint, 16, 8, PRIx16, expected, comment); \
+ CHECK(test_name, uint, 32, 4, PRIx32, expected, comment); \
+ CHECK(test_name, uint, 64, 2, PRIx64, expected, comment); \
+ CHECK(test_name, poly, 8, 16, PRIx8, expected, comment); \
+ CHECK(test_name, poly, 16, 8, PRIx16, expected, comment); \
+ CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \
+ } \
+
+#define CHECK_RESULTS_NAMED(test_name,EXPECTED,comment) \
+ { \
+ CHECK(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
+ \
+ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, int, 64, 2, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
+ CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
+ } \
+
+
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+typedef union {
+ struct {
+ int _xxx:27;
+ unsigned int QC:1;
+ int V:1;
+ int C:1;
+ int Z:1;
+ int N:1;
+ } b;
+ unsigned int word;
+} _ARM_FPSCR;
+
+#else /* __ORDER_BIG_ENDIAN__ */
+
+typedef union {
+ struct {
+ int N:1;
+ int Z:1;
+ int C:1;
+ int V:1;
+ unsigned int QC:1;
+ int _dnm:27;
+ } b;
+ unsigned int word;
+} _ARM_FPSCR;
+
+#endif /* __ORDER_BIG_ENDIAN__ */
+
+#define Neon_Cumulative_Sat __read_neon_cumulative_sat()
+#define Set_Neon_Cumulative_Sat(x) __set_neon_cumulative_sat((x))
+
+#if defined(__aarch64__)
+static volatile int __read_neon_cumulative_sat (void) {
+ _ARM_FPSCR _afpscr_for_qc;
+ asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
+ return _afpscr_for_qc.b.QC;
+}
+static void __set_neon_cumulative_sat (int x) {
+ _ARM_FPSCR _afpscr_for_qc;
+ asm volatile ("mrs %0,fpsr" : "=r" (_afpscr_for_qc));
+ _afpscr_for_qc.b.QC = x;
+ asm volatile ("msr fpsr,%0" : : "r" (_afpscr_for_qc));
+ return;
+}
+#else
+static volatile int __read_neon_cumulative_sat (void) {
+ _ARM_FPSCR _afpscr_for_qc;
+ asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
+ return _afpscr_for_qc.b.QC;
+}
+
+static void __set_neon_cumulative_sat (int x) {
+ _ARM_FPSCR _afpscr_for_qc;
+ asm volatile ("vmrs %0,fpscr" : "=r" (_afpscr_for_qc));
+ _afpscr_for_qc.b.QC = x;
+ asm volatile ("vmsr fpscr,%0" : : "r" (_afpscr_for_qc));
+ return;
+}
+#endif
+
+/* Declare expected cumulative saturation results, one for each
+ size. They are defined and initialized in relevant test files. */
+extern int VECT_VAR(expected_cumulative_sat, int, 8, 8);
+extern int VECT_VAR(expected_cumulative_sat, int, 16, 4);
+extern int VECT_VAR(expected_cumulative_sat, int, 32, 2);
+extern int VECT_VAR(expected_cumulative_sat, int, 64, 1);
+extern int VECT_VAR(expected_cumulative_sat, uint, 8, 8);
+extern int VECT_VAR(expected_cumulative_sat, uint, 16, 4);
+extern int VECT_VAR(expected_cumulative_sat, uint, 32, 2);
+extern int VECT_VAR(expected_cumulative_sat, uint, 64, 1);
+extern int VECT_VAR(expected_cumulative_sat, int, 8, 16);
+extern int VECT_VAR(expected_cumulative_sat, int, 16, 8);
+extern int VECT_VAR(expected_cumulative_sat, int, 32, 4);
+extern int VECT_VAR(expected_cumulative_sat, int, 64, 2);
+extern int VECT_VAR(expected_cumulative_sat, uint, 8, 16);
+extern int VECT_VAR(expected_cumulative_sat, uint, 16, 8);
+extern int VECT_VAR(expected_cumulative_sat, uint, 32, 4);
+extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2);
+
+/* Check cumulative saturation flag vs expected value. */
+#define CHECK_CUMULATIVE_SAT(MSG,T,W,N,EXPECTED,COMMENT) \
+ { \
+ if (Neon_Cumulative_Sat != \
+ VECT_VAR(EXPECTED, T, W, N)) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in cumulative_sat '%s') at type %s: " \
+ "got %d expected %d%s\n", \
+ MSG, __FILE__, __LINE__, \
+ STR(EXPECTED), \
+ STR(VECT_NAME(T, W, N)), \
+ Neon_Cumulative_Sat, \
+ VECT_VAR(EXPECTED, T, W, N), \
+ strlen(COMMENT) > 0 ? " " COMMENT : ""); \
+ abort(); \
+ } \
+ }
+
+#define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment) \
+ { \
+ CHECK_CUMULATIVE_SAT(test_name, int, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, int, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, int, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, int, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
+ \
+ CHECK_CUMULATIVE_SAT(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, int, 64, 2, PRIx64, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
+ CHECK_CUMULATIVE_SAT_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
+ } \
+
+
+/* Clean output buffers before execution. */
+static void clean_results (void)
+{
+ CLEAN(result, int, 8, 8);
+ CLEAN(result, int, 16, 4);
+ CLEAN(result, int, 32, 2);
+ CLEAN(result, int, 64, 1);
+ CLEAN(result, uint, 8, 8);
+ CLEAN(result, uint, 16, 4);
+ CLEAN(result, uint, 32, 2);
+ CLEAN(result, uint, 64, 1);
+ CLEAN(result, poly, 8, 8);
+ CLEAN(result, poly, 16, 4);
+ CLEAN(result, float, 32, 2);
+
+ CLEAN(result, int, 8, 16);
+ CLEAN(result, int, 16, 8);
+ CLEAN(result, int, 32, 4);
+ CLEAN(result, int, 64, 2);
+ CLEAN(result, uint, 8, 16);
+ CLEAN(result, uint, 16, 8);
+ CLEAN(result, uint, 32, 4);
+ CLEAN(result, uint, 64, 2);
+ CLEAN(result, poly, 8, 16);
+ CLEAN(result, poly, 16, 8);
+ CLEAN(result, float, 32, 4);
+}
+
+
+/* Helpers to declare variables of various types. */
+#define DECL_VARIABLE(VAR, T1, W, N) \
+ VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
+
+/* Declare only 64 bits signed variants. */
+#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
+ DECL_VARIABLE(VAR, int, 8, 8); \
+ DECL_VARIABLE(VAR, int, 16, 4); \
+ DECL_VARIABLE(VAR, int, 32, 2); \
+ DECL_VARIABLE(VAR, int, 64, 1)
+
+/* Declare only 64 bits unsigned variants. */
+#define DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR) \
+ DECL_VARIABLE(VAR, uint, 8, 8); \
+ DECL_VARIABLE(VAR, uint, 16, 4); \
+ DECL_VARIABLE(VAR, uint, 32, 2); \
+ DECL_VARIABLE(VAR, uint, 64, 1)
+
+/* Declare only 128 bits signed variants. */
+#define DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR) \
+ DECL_VARIABLE(VAR, int, 8, 16); \
+ DECL_VARIABLE(VAR, int, 16, 8); \
+ DECL_VARIABLE(VAR, int, 32, 4); \
+ DECL_VARIABLE(VAR, int, 64, 2)
+
+/* Declare only 128 bits unsigned variants. */
+#define DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR) \
+ DECL_VARIABLE(VAR, uint, 8, 16); \
+ DECL_VARIABLE(VAR, uint, 16, 8); \
+ DECL_VARIABLE(VAR, uint, 32, 4); \
+ DECL_VARIABLE(VAR, uint, 64, 2)
+
+/* Declare all 64 bits variants. */
+#define DECL_VARIABLE_64BITS_VARIANTS(VAR) \
+ DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE(VAR, poly, 8, 8); \
+ DECL_VARIABLE(VAR, poly, 16, 4); \
+ DECL_VARIABLE(VAR, float, 32, 2)
+
+/* Declare all 128 bits variants. */
+#define DECL_VARIABLE_128BITS_VARIANTS(VAR) \
+ DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE(VAR, poly, 8, 16); \
+ DECL_VARIABLE(VAR, poly, 16, 8); \
+ DECL_VARIABLE(VAR, float, 32, 4)
+
+/* Declare all variants. */
+#define DECL_VARIABLE_ALL_VARIANTS(VAR) \
+ DECL_VARIABLE_64BITS_VARIANTS(VAR); \
+ DECL_VARIABLE_128BITS_VARIANTS(VAR)
+
+/* Declare all signed variants. */
+#define DECL_VARIABLE_SIGNED_VARIANTS(VAR) \
+ DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR)
+
+/* Declare all unsigned variants. */
+#define DECL_VARIABLE_UNSIGNED_VARIANTS(VAR) \
+ DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
+ DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR)
+
+/* Helpers to initialize vectors. */
+#define VDUP(VAR, Q, T1, T2, W, N, V) \
+ VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V)
+
+#define VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \
+ VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \
+ VECT_VAR(VAR, T1, W, N), \
+ L)
+
+/* We need to load initial values first, so rely on VLD1. */
+#define VLOAD(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N))
+
+/* Helpers to call macros with 1 constant and 5 variable
+ arguments. */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
+ MACRO(VAR, , int, s, 8, 8); \
+ MACRO(VAR, , int, s, 16, 4); \
+ MACRO(VAR, , int, s, 32, 2); \
+ MACRO(VAR, , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR) \
+ MACRO(VAR, , uint, u, 8, 8); \
+ MACRO(VAR, , uint, u, 16, 4); \
+ MACRO(VAR, , uint, u, 32, 2); \
+ MACRO(VAR, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
+ MACRO(VAR, q, int, s, 8, 16); \
+ MACRO(VAR, q, int, s, 16, 8); \
+ MACRO(VAR, q, int, s, 32, 4); \
+ MACRO(VAR, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO,VAR) \
+ MACRO(VAR, q, uint, u, 8, 16); \
+ MACRO(VAR, q, uint, u, 16, 8); \
+ MACRO(VAR, q, uint, u, 32, 4); \
+ MACRO(VAR, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR) \
+ TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
+ TEST_MACRO_64BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR) \
+ TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
+ TEST_MACRO_128BITS_UNSIGNED_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_ALL_VARIANTS_1_5(MACRO, VAR) \
+ TEST_MACRO_64BITS_VARIANTS_1_5(MACRO, VAR); \
+ TEST_MACRO_128BITS_VARIANTS_1_5(MACRO, VAR)
+
+#define TEST_MACRO_SIGNED_VARIANTS_1_5(MACRO, VAR) \
+ TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR); \
+ TEST_MACRO_128BITS_SIGNED_VARIANTS_1_5(MACRO, VAR)
+
+/* Helpers to call macros with 2 constant and 5 variable
+ arguments. */
+#define TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ MACRO(VAR1, VAR2, , int, s, 8, 8); \
+ MACRO(VAR1, VAR2, , int, s, 16, 4); \
+ MACRO(VAR1, VAR2, , int, s, 32, 2); \
+ MACRO(VAR1, VAR2 , , int, s, 64, 1)
+
+#define TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ MACRO(VAR1, VAR2, , uint, u, 8, 8); \
+ MACRO(VAR1, VAR2, , uint, u, 16, 4); \
+ MACRO(VAR1, VAR2, , uint, u, 32, 2); \
+ MACRO(VAR1, VAR2, , uint, u, 64, 1)
+
+#define TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ MACRO(VAR1, VAR2, q, int, s, 8, 16); \
+ MACRO(VAR1, VAR2, q, int, s, 16, 8); \
+ MACRO(VAR1, VAR2, q, int, s, 32, 4); \
+ MACRO(VAR1, VAR2, q, int, s, 64, 2)
+
+#define TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ MACRO(VAR1, VAR2, q, uint, u, 8, 16); \
+ MACRO(VAR1, VAR2, q, uint, u, 16, 8); \
+ MACRO(VAR1, VAR2, q, uint, u, 32, 4); \
+ MACRO(VAR1, VAR2, q, uint, u, 64, 2)
+
+#define TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+ TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+ MACRO(VAR1, VAR2, , poly, p, 8, 8); \
+ MACRO(VAR1, VAR2, , poly, p, 16, 4)
+
+#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+ TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+ MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
+ MACRO(VAR1, VAR2, q, poly, p, 16, 8)
+
+#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+ TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#define TEST_MACRO_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2) \
+ TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
+ TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2)
+
+#endif /* _ARM_NEON_REF_H_ */
--- /dev/null
+/* This file contains input data static definitions, shared by most of
+ the tests. */
+
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+
+/* Initialization helpers; 4 slices are needed for vld2, vld3 and
+ vld4. */
+#define MY_INIT_TABLE(T,W,N) xNAME(INIT_TABLE,N)(T##W##_t)
+#define MY_INIT_TABLE2(T,W,N) xNAME(INIT_TABLE2,N)(T##W##_t)
+#define MY_INIT_TABLE3(T,W,N) xNAME(INIT_TABLE3,N)(T##W##_t)
+#define MY_INIT_TABLE4(T,W,N) xNAME(INIT_TABLE4,N)(T##W##_t)
+
+/* Initialized input buffers. */
+#define VECT_VAR_DECL_INIT(V, T, W, N) \
+ VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,N) }
+
+/* Specialized initializer with 4 entries, as used by vldX_dup and
+ vdup tests, which iterate 4 times on input buffers. */
+#define VECT_VAR_DECL_INIT4(V, T, W, N) \
+ VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,4) };
+
+/* Initializers for arrays of vectors. */
+#define VECT_ARRAY_INIT2(V, T, W, N) \
+ T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] = \
+ { MY_INIT_TABLE(T,W,N) \
+ MY_INIT_TABLE2(T,W,N) }
+
+#define VECT_ARRAY_INIT3(V, T, W, N) \
+ T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] = \
+ { MY_INIT_TABLE(T,W,N) \
+ MY_INIT_TABLE2(T,W,N) \
+ MY_INIT_TABLE3(T,W,N) }
+
+#define VECT_ARRAY_INIT4(V, T, W, N) \
+ T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] = \
+ { MY_INIT_TABLE(T,W,N) \
+ MY_INIT_TABLE2(T,W,N) \
+ MY_INIT_TABLE3(T,W,N) \
+ MY_INIT_TABLE4(T,W,N) }
+
+/* Sample initialization vectors. */
+#define INIT_TABLE_1(T) \
+ (T)-16,
+#define INIT_TABLE2_1(T) \
+ (T)-15,
+#define INIT_TABLE3_1(T) \
+ (T)-14,
+#define INIT_TABLE4_1(T) \
+ (T)-13,
+
+#define INIT_TABLE_2(T) \
+ (T)-16, (T)-15,
+#define INIT_TABLE2_2(T) \
+ (T)-14, (T)-13,
+#define INIT_TABLE3_2(T) \
+ (T)-12, (T)-11,
+#define INIT_TABLE4_2(T) \
+ (T)-10, (T)-9,
+
+/* Initializer for vld3_lane tests. */
+#define INIT_TABLE_3(T) \
+ (T)-16, (T)-15, (T)-14,
+
+#define INIT_TABLE_4(T) \
+ (T)-16, (T)-15, (T)-14, (T)-13,
+#define INIT_TABLE2_4(T) \
+ (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TABLE3_4(T) \
+ (T)-8, (T)-7, (T)-6, (T)-5,
+#define INIT_TABLE4_4(T) \
+ (T)-4, (T)-3, (T)-2, (T)-1,
+
+#define INIT_TABLE_8(T) \
+ (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
+#define INIT_TABLE2_8(T) \
+ (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TABLE3_8(T) \
+ (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,
+#define INIT_TABLE4_8(T) \
+ (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+
+#define INIT_TABLE_16(T) \
+ (T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, \
+ (T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
+#define INIT_TABLE2_16(T) \
+ (T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, \
+ (T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
+#define INIT_TABLE3_16(T) \
+ (T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23, \
+ (T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31,
+#define INIT_TABLE4_16(T) \
+ (T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39, \
+ (T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47,
+
+/* This one is used for padding between input buffers. */
+#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42
+
+/* Input buffers, one of each size. */
+/* Insert some padding to try to exhibit out of bounds accesses. */
+VECT_VAR_DECL_INIT(buffer, int, 8, 8);
+PAD(buffer_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer, int, 16, 4);
+PAD(buffer_pad, int, 16, 4);
+VECT_VAR_DECL_INIT(buffer, int, 32, 2);
+PAD(buffer_pad, int, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 64, 1);
+PAD(buffer_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
+PAD(buffer_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
+PAD(buffer_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
+PAD(buffer_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
+PAD(buffer_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
+PAD(buffer_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
+PAD(buffer_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer, float, 32, 2);
+PAD(buffer_pad, float, 32, 2);
+VECT_VAR_DECL_INIT(buffer, int, 8, 16);
+PAD(buffer_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer, int, 16, 8);
+PAD(buffer_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer, int, 32, 4);
+PAD(buffer_pad, int, 32, 4);
+VECT_VAR_DECL_INIT(buffer, int, 64, 2);
+PAD(buffer_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
+PAD(buffer_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
+PAD(buffer_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
+PAD(buffer_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
+PAD(buffer_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
+PAD(buffer_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
+PAD(buffer_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer, float, 32, 4);
+PAD(buffer_pad, float, 32, 4);
+
+/* The tests for vld1_dup and vdup expect at least 4 entries in the
+ input buffer, so force 1- and 2-elements initializers to have 4
+ entries (using VECT_VAR_DECL_INIT4). */
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
+VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
+VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
+
+VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, int, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, int, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, int, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, int, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4);
+VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2);
+VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
+VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
+VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
+VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
+VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
+VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
--- /dev/null
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results. */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
+ 0xfa, 0xfb, 0xfc, 0xfd };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
+ 0x57, 0x58, 0x59, 0x5a };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
+ 0x62, 0x63, 0x64, 0x65,
+ 0x66, 0x67, 0x68, 0x69,
+ 0x6a, 0x6b, 0x6c, 0x6d };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
+ 0xba0, 0xba1, 0xba2, 0xba3 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
+ 0x3333333333333333 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
+ 0xfc, 0xfd, 0xfe, 0xff,
+ 0x0, 0x1, 0x2, 0x3,
+ 0x4, 0x5, 0x6, 0x7 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
+ 0xfffd, 0xfffe, 0xffff, 0x0 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
+ 0x3333333333333333 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+ 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+ 0x33333333, 0x33333333 };
+
+#define TEST_MSG "VABA/VABAQ"
+void exec_vaba (void)
+{
+ /* Basic test: v4=vaba(v1,v2,v3), then store the result. */
+#define TEST_VABA(Q, T1, T2, W, N) \
+ VECT_VAR(vector_res, T1, W, N) = \
+ vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+ VECT_VAR(vector2, T1, W, N), \
+ VECT_VAR(vector3, T1, W, N)); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+#define DECL_VABA_VAR(VAR) \
+ DECL_VARIABLE(VAR, int, 8, 8); \
+ DECL_VARIABLE(VAR, int, 16, 4); \
+ DECL_VARIABLE(VAR, int, 32, 2); \
+ DECL_VARIABLE(VAR, uint, 8, 8); \
+ DECL_VARIABLE(VAR, uint, 16, 4); \
+ DECL_VARIABLE(VAR, uint, 32, 2); \
+ DECL_VARIABLE(VAR, int, 8, 16); \
+ DECL_VARIABLE(VAR, int, 16, 8); \
+ DECL_VARIABLE(VAR, int, 32, 4); \
+ DECL_VARIABLE(VAR, uint, 8, 16); \
+ DECL_VARIABLE(VAR, uint, 16, 8); \
+ DECL_VARIABLE(VAR, uint, 32, 4)
+
+ DECL_VABA_VAR(vector1);
+ DECL_VABA_VAR(vector2);
+ DECL_VABA_VAR(vector3);
+ DECL_VABA_VAR(vector_res);
+
+ clean_results ();
+
+ /* Initialize input "vector1" from "buffer". */
+ VLOAD(vector1, buffer, , int, s, 8, 8);
+ VLOAD(vector1, buffer, , int, s, 16, 4);
+ VLOAD(vector1, buffer, , int, s, 32, 2);
+ VLOAD(vector1, buffer, , uint, u, 8, 8);
+ VLOAD(vector1, buffer, , uint, u, 16, 4);
+ VLOAD(vector1, buffer, , uint, u, 32, 2);
+ VLOAD(vector1, buffer, q, int, s, 8, 16);
+ VLOAD(vector1, buffer, q, int, s, 16, 8);
+ VLOAD(vector1, buffer, q, int, s, 32, 4);
+ VLOAD(vector1, buffer, q, uint, u, 8, 16);
+ VLOAD(vector1, buffer, q, uint, u, 16, 8);
+ VLOAD(vector1, buffer, q, uint, u, 32, 4);
+
+ /* Choose init value arbitrarily. */
+ VDUP(vector2, , int, s, 8, 8, 1);
+ VDUP(vector2, , int, s, 16, 4, -13);
+ VDUP(vector2, , int, s, 32, 2, 8);
+ VDUP(vector2, , uint, u, 8, 8, 1);
+ VDUP(vector2, , uint, u, 16, 4, 13);
+ VDUP(vector2, , uint, u, 32, 2, 8);
+ VDUP(vector2, q, int, s, 8, 16, 10);
+ VDUP(vector2, q, int, s, 16, 8, -12);
+ VDUP(vector2, q, int, s, 32, 4, 32);
+ VDUP(vector2, q, uint, u, 8, 16, 10);
+ VDUP(vector2, q, uint, u, 16, 8, 12);
+ VDUP(vector2, q, uint, u, 32, 4, 32);
+
+ /* Choose init value arbitrarily. */
+ VDUP(vector3, , int, s, 8, 8, -5);
+ VDUP(vector3, , int, s, 16, 4, 25);
+ VDUP(vector3, , int, s, 32, 2, -40);
+ VDUP(vector3, , uint, u, 8, 8, 100);
+ VDUP(vector3, , uint, u, 16, 4, 2340);
+ VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
+ VDUP(vector3, q, int, s, 8, 16, -100);
+ VDUP(vector3, q, int, s, 16, 8, -3000);
+ VDUP(vector3, q, int, s, 32, 4, 10000);
+ VDUP(vector3, q, uint, u, 8, 16, 2);
+ VDUP(vector3, q, uint, u, 16, 8, 3);
+ VDUP(vector3, q, uint, u, 32, 4, 4);
+
+ /* Execute the tests. */
+ TEST_VABA(, int, s, 8, 8);
+ TEST_VABA(, int, s, 16, 4);
+ TEST_VABA(, int, s, 32, 2);
+ TEST_VABA(, uint, u, 8, 8);
+ TEST_VABA(, uint, u, 16, 4);
+ TEST_VABA(, uint, u, 32, 2);
+ TEST_VABA(q, int, s, 8, 16);
+ TEST_VABA(q, int, s, 16, 8);
+ TEST_VABA(q, int, s, 32, 4);
+ TEST_VABA(q, uint, u, 8, 16);
+ TEST_VABA(q, uint, u, 16, 8);
+ TEST_VABA(q, uint, u, 32, 4);
+
+ CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+ exec_vaba ();
+ return 0;
+}
--- /dev/null
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results. */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb,
+ 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb,
+ 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
+ 0xfff3, 0xfff4, 0xfff5,
+ 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
+ 0xfffffff2, 0xfffffff3 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
+ 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb,
+ 0xfc, 0xfd, 0xfe, 0xff };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
+ 0xfff4, 0xfff5, 0xfff6, 0xfff7 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
+ 0xc1600000, 0xc1500000 };
+
+#define TEST_MSG "VLD1/VLD1Q"
+void exec_vld1 (void)
+{
+ /* Basic test vec=vld1(buffer); then store vec: vst1(result, vector). */
+ /* This test actually tests vdl1 and vst1 at the same time. */
+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
+ VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
+
+ DECL_VARIABLE_ALL_VARIANTS(vector);
+
+ clean_results ();
+
+ TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
+
+ TEST_VLD1(vector, buffer, , float, f, 32, 2);
+ TEST_VLD1(vector, buffer, q, float, f, 32, 4);
+
+ CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+ exec_vld1 ();
+ return 0;
+}
--- /dev/null
+#include <arm_neon.h>
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+/* Expected results. */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+ 0xe8, 0xea, 0xec, 0xee };
+VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
+VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
+ 0xe8, 0xea, 0xec, 0xee };
+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 };
+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 };
+VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333,
+ 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+ 0x80, 0xa0, 0xc0, 0xe0,
+ 0x0, 0x20, 0x40, 0x60,
+ 0x80, 0xa0, 0xc0, 0xe0 };
+VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+ 0x4000, 0x5000, 0x6000, 0x7000 };
+VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x40000000,
+ 0x80000000, 0xc0000000 };
+VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 };
+VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
+ 0x80, 0xa0, 0xc0, 0xe0,
+ 0x0, 0x20, 0x40, 0x60,
+ 0x80, 0xa0, 0xc0, 0xe0 };
+VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
+ 0x4000, 0x5000, 0x6000, 0x7000 };
+VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000,
+ 0x80000000, 0xc0000000 };
+VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 };
+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
+ 0x3333, 0x3333, 0x3333, 0x3333 };
+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+ 0x33333333, 0x33333333 };
+
+/* Expected results with large shift amount. */
+VECT_VAR_DECL(expected_large_shift,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,32,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,64,1) [] = { 0x0 };
+VECT_VAR_DECL(expected_large_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_large_shift,poly,16,4) [] = { 0x3333, 0x3333,
+ 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_large_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
+VECT_VAR_DECL(expected_large_shift,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,int,64,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,uint,64,2) [] = { 0x0, 0x0 };
+VECT_VAR_DECL(expected_large_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_large_shift,poly,16,8) [] = { 0x3333, 0x3333,
+ 0x3333, 0x3333,
+ 0x3333, 0x3333,
+ 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_large_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
+ 0x33333333, 0x33333333 };
+
+
+/* Expected results with negative shift amount. */
+VECT_VAR_DECL(expected_negative_shift,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
+ 0xfa, 0xfa, 0xfb, 0xfb };
+VECT_VAR_DECL(expected_negative_shift,int,16,4) [] = { 0xfff8, 0xfff8,
+ 0xfff9, 0xfff9 };
+VECT_VAR_DECL(expected_negative_shift,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
+VECT_VAR_DECL(expected_negative_shift,int,64,1) [] = { 0xffffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
+ 0x7a, 0x7a, 0x7b, 0x7b };
+VECT_VAR_DECL(expected_negative_shift,uint,16,4) [] = { 0x7ff8, 0x7ff8,
+ 0x7ff9, 0x7ff9 };
+VECT_VAR_DECL(expected_negative_shift,uint,32,2) [] = { 0x3ffffffc,
+ 0x3ffffffc };
+VECT_VAR_DECL(expected_negative_shift,uint,64,1) [] = { 0xfffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_negative_shift,poly,16,4) [] = { 0x3333, 0x3333,
+ 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_negative_shift,hfloat,32,2) [] = { 0x33333333,
+ 0x33333333 };
+VECT_VAR_DECL(expected_negative_shift,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc,
+ 0xfd, 0xfd, 0xfd, 0xfd,
+ 0xfe, 0xfe, 0xfe, 0xfe,
+ 0xff, 0xff, 0xff, 0xff };
+VECT_VAR_DECL(expected_negative_shift,int,16,8) [] = { 0xffff, 0xffff,
+ 0xffff, 0xffff,
+ 0xffff, 0xffff,
+ 0xffff, 0xffff };
+VECT_VAR_DECL(expected_negative_shift,int,32,4) [] = { 0xfffffffe, 0xfffffffe,
+ 0xfffffffe, 0xfffffffe };
+VECT_VAR_DECL(expected_negative_shift,int,64,2) [] = { 0xffffffffffffffff,
+ 0xffffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c,
+ 0x3d, 0x3d, 0x3d, 0x3d,
+ 0x3e, 0x3e, 0x3e, 0x3e,
+ 0x3f, 0x3f, 0x3f, 0x3f };
+VECT_VAR_DECL(expected_negative_shift,uint,16,8) [] = { 0x7ff, 0x7ff,
+ 0x7ff, 0x7ff,
+ 0x7ff, 0x7ff,
+ 0x7ff, 0x7ff };
+VECT_VAR_DECL(expected_negative_shift,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe,
+ 0x1ffffffe, 0x1ffffffe };
+VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff,
+ 0x7ffffffffffffff };
+VECT_VAR_DECL(expected_negative_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33,
+ 0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected_negative_shift,poly,16,8) [] = { 0x3333, 0x3333,
+ 0x3333, 0x3333,
+ 0x3333, 0x3333,
+ 0x3333, 0x3333 };
+VECT_VAR_DECL(expected_negative_shift,hfloat,32,4) [] = { 0x33333333,
+ 0x33333333,
+ 0x33333333,
+ 0x33333333 };
+
+
+#ifndef INSN_NAME
+#define INSN_NAME vshl
+#define TEST_MSG "VSHL/VSHLQ"
+#endif
+
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+ /* Basic test: v3=vshl(v1,v2), then store the result. */
+#define TEST_VSHL(T3, Q, T1, T2, W, N) \
+ VECT_VAR(vector_res, T1, W, N) = \
+ vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \
+ VECT_VAR(vector_shift, T3, W, N)); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
+
+ DECL_VARIABLE_ALL_VARIANTS(vector);
+ DECL_VARIABLE_ALL_VARIANTS(vector_res);
+
+ DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
+
+ clean_results ();
+
+ /* Initialize input "vector" from "buffer". */
+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
+
+ /* Choose init value arbitrarily, will be used as shift amount. */
+ VDUP(vector_shift, , int, s, 8, 8, 1);
+ VDUP(vector_shift, , int, s, 16, 4, 3);
+ VDUP(vector_shift, , int, s, 32, 2, 8);
+ VDUP(vector_shift, , int, s, 64, 1, 3);
+ VDUP(vector_shift, q, int, s, 8, 16, 5);
+ VDUP(vector_shift, q, int, s, 16, 8, 12);
+ VDUP(vector_shift, q, int, s, 32, 4, 30);
+ VDUP(vector_shift, q, int, s, 64, 2, 63);
+
+ /* Execute the tests. */
+ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+ CHECK_RESULTS (TEST_MSG, "");
+
+
+ /* Test large shift amount (larger or equal to the type width. */
+ VDUP(vector_shift, , int, s, 8, 8, 8);
+ VDUP(vector_shift, , int, s, 16, 4, 16);
+ VDUP(vector_shift, , int, s, 32, 2, 32);
+ VDUP(vector_shift, , int, s, 64, 1, 64);
+ VDUP(vector_shift, q, int, s, 8, 16, 8);
+ VDUP(vector_shift, q, int, s, 16, 8, 17);
+ VDUP(vector_shift, q, int, s, 32, 4, 33);
+ VDUP(vector_shift, q, int, s, 64, 2, 65);
+
+ /* Execute the tests. */
+ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+ CHECK_RESULTS_NAMED (TEST_MSG, expected_large_shift, "(large shift amount)");
+
+
+ /* Test negative shift amount. */
+ VDUP(vector_shift, , int, s, 8, 8, -1);
+ VDUP(vector_shift, , int, s, 16, 4, -1);
+ VDUP(vector_shift, , int, s, 32, 2, -2);
+ VDUP(vector_shift, , int, s, 64, 1, -4);
+ VDUP(vector_shift, q, int, s, 8, 16, -2);
+ VDUP(vector_shift, q, int, s, 16, 8, -5);
+ VDUP(vector_shift, q, int, s, 32, 4, -3);
+ VDUP(vector_shift, q, int, s, 64, 2, -5);
+
+ /* Execute the tests. */
+ TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
+
+ CHECK_RESULTS_NAMED (TEST_MSG, expected_negative_shift, "(negative shift amount)");
+}
+
+int main (void)
+{
+ FNNAME (INSN_NAME) ();
+
+ return 0;
+}
--- /dev/null
+Advanced SIMD intrinsics tests are located in gcc.target/aarch64.