a3210a94b1dcd00ea8b9f687a7805386ef5c0b08
1 /* This file contains tests for all the *p64 intrinsics, except for
2 vreinterpret which have their own testcase. */
4 /* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
5 /* { dg-add-options arm_crypto } */
6 /* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
9 #include "arm-neon-ref.h"
10 #include "compute-ref-data.h"
12 /* Expected results: vbsl. */
13 VECT_VAR_DECL(vbsl_expected
,poly
,64,1) [] = { 0xfffffff1 };
14 VECT_VAR_DECL(vbsl_expected
,poly
,64,2) [] = { 0xfffffff1,
17 /* Expected results: vceq. */
18 VECT_VAR_DECL(vceq_expected
,uint
,64,1) [] = { 0x0 };
20 /* Expected results: vcombine. */
21 VECT_VAR_DECL(vcombine_expected
,poly
,64,2) [] = { 0xfffffffffffffff0, 0x88 };
23 /* Expected results: vcreate. */
24 VECT_VAR_DECL(vcreate_expected
,poly
,64,1) [] = { 0x123456789abcdef0 };
26 /* Expected results: vdup_lane. */
27 VECT_VAR_DECL(vdup_lane_expected
,poly
,64,1) [] = { 0xfffffffffffffff0 };
28 VECT_VAR_DECL(vdup_lane_expected
,poly
,64,2) [] = { 0xfffffffffffffff0,
31 /* Expected results: vdup_n. */
32 VECT_VAR_DECL(vdup_n_expected0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
33 VECT_VAR_DECL(vdup_n_expected0
,poly
,64,2) [] = { 0xfffffffffffffff0,
35 VECT_VAR_DECL(vdup_n_expected1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
36 VECT_VAR_DECL(vdup_n_expected1
,poly
,64,2) [] = { 0xfffffffffffffff1,
38 VECT_VAR_DECL(vdup_n_expected2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
39 VECT_VAR_DECL(vdup_n_expected2
,poly
,64,2) [] = { 0xfffffffffffffff2,
42 /* Expected results: vext. */
43 VECT_VAR_DECL(vext_expected
,poly
,64,1) [] = { 0xfffffffffffffff0 };
44 VECT_VAR_DECL(vext_expected
,poly
,64,2) [] = { 0xfffffffffffffff1, 0x88 };
46 /* Expected results: vget_low. */
47 VECT_VAR_DECL(vget_low_expected
,poly
,64,1) [] = { 0xfffffffffffffff0 };
49 /* Expected results: vget_high. */
50 VECT_VAR_DECL(vget_high_expected
,poly
,64,1) [] = { 0xfffffffffffffff1 };
52 /* Expected results: vld1. */
53 VECT_VAR_DECL(vld1_expected
,poly
,64,1) [] = { 0xfffffffffffffff0 };
54 VECT_VAR_DECL(vld1_expected
,poly
,64,2) [] = { 0xfffffffffffffff0,
57 /* Expected results: vld1_dup. */
58 VECT_VAR_DECL(vld1_dup_expected0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
59 VECT_VAR_DECL(vld1_dup_expected0
,poly
,64,2) [] = { 0xfffffffffffffff0,
61 VECT_VAR_DECL(vld1_dup_expected1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
62 VECT_VAR_DECL(vld1_dup_expected1
,poly
,64,2) [] = { 0xfffffffffffffff1,
64 VECT_VAR_DECL(vld1_dup_expected2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
65 VECT_VAR_DECL(vld1_dup_expected2
,poly
,64,2) [] = { 0xfffffffffffffff2,
68 /* Expected results: vld1_lane. */
69 VECT_VAR_DECL(vld1_lane_expected
,poly
,64,1) [] = { 0xfffffffffffffff0 };
70 VECT_VAR_DECL(vld1_lane_expected
,poly
,64,2) [] = { 0xfffffffffffffff0,
73 /* Expected results: vldX. */
74 VECT_VAR_DECL(vld2_expected_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
75 VECT_VAR_DECL(vld2_expected_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
76 VECT_VAR_DECL(vld3_expected_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
77 VECT_VAR_DECL(vld3_expected_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
78 VECT_VAR_DECL(vld3_expected_2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
79 VECT_VAR_DECL(vld4_expected_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
80 VECT_VAR_DECL(vld4_expected_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
81 VECT_VAR_DECL(vld4_expected_2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
82 VECT_VAR_DECL(vld4_expected_3
,poly
,64,1) [] = { 0xfffffffffffffff3 };
84 /* Expected results: vldX_dup. */
85 VECT_VAR_DECL(vld2_dup_expected_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
86 VECT_VAR_DECL(vld2_dup_expected_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
87 VECT_VAR_DECL(vld3_dup_expected_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
88 VECT_VAR_DECL(vld3_dup_expected_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
89 VECT_VAR_DECL(vld3_dup_expected_2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
90 VECT_VAR_DECL(vld4_dup_expected_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
91 VECT_VAR_DECL(vld4_dup_expected_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
92 VECT_VAR_DECL(vld4_dup_expected_2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
93 VECT_VAR_DECL(vld4_dup_expected_3
,poly
,64,1) [] = { 0xfffffffffffffff3 };
95 /* Expected results: vsli. */
96 VECT_VAR_DECL(vsli_expected
,poly
,64,1) [] = { 0x10 };
97 VECT_VAR_DECL(vsli_expected
,poly
,64,2) [] = { 0x7ffffffffffff0,
99 VECT_VAR_DECL(vsli_expected_max_shift
,poly
,64,1) [] = { 0x7ffffffffffffff0 };
100 VECT_VAR_DECL(vsli_expected_max_shift
,poly
,64,2) [] = { 0xfffffffffffffff0,
101 0xfffffffffffffff1 };
103 /* Expected results: vsri. */
104 VECT_VAR_DECL(vsri_expected
,poly
,64,1) [] = { 0xe000000000000000 };
105 VECT_VAR_DECL(vsri_expected
,poly
,64,2) [] = { 0xfffffffffffff800,
106 0xfffffffffffff800 };
107 VECT_VAR_DECL(vsri_expected_max_shift
,poly
,64,1) [] = { 0xfffffffffffffff0 };
108 VECT_VAR_DECL(vsri_expected_max_shift
,poly
,64,2) [] = { 0xfffffffffffffff0,
109 0xfffffffffffffff1 };
111 /* Expected results: vst1_lane. */
112 VECT_VAR_DECL(vst1_lane_expected
,poly
,64,1) [] = { 0xfffffffffffffff0 };
113 VECT_VAR_DECL(vst1_lane_expected
,poly
,64,2) [] = { 0xfffffffffffffff0,
114 0x3333333333333333 };
116 /* Expected results: vget_lane. */
117 VECT_VAR_DECL(vget_lane_expected
,poly
,64,1) = 0xfffffffffffffff0;
118 VECT_VAR_DECL(vget_lane_expected
,poly
,64,2) = 0xfffffffffffffff0;
120 /* Expected results: vset_lane. */
121 VECT_VAR_DECL(vset_lane_expected
,poly
,64,1) [] = { 0x88 };
122 VECT_VAR_DECL(vset_lane_expected
,poly
,64,2) [] = { 0xfffffffffffffff0, 0x11 };
124 /* Expected results: vtst. */
125 VECT_VAR_DECL(vtst_expected
,uint
,64,1) [] = { 0x0 };
128 /* Expected results: vmov_n. */
129 VECT_VAR_DECL(vmov_n_expected0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
130 VECT_VAR_DECL(vmov_n_expected0
,poly
,64,2) [] = { 0xfffffffffffffff0,
131 0xfffffffffffffff0 };
132 VECT_VAR_DECL(vmov_n_expected1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
133 VECT_VAR_DECL(vmov_n_expected1
,poly
,64,2) [] = { 0xfffffffffffffff1,
134 0xfffffffffffffff1 };
135 VECT_VAR_DECL(vmov_n_expected2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
136 VECT_VAR_DECL(vmov_n_expected2
,poly
,64,2) [] = { 0xfffffffffffffff2,
137 0xfffffffffffffff2 };
139 /* Expected results: vldX_lane. */
140 VECT_VAR_DECL(expected_vld_st2_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
141 VECT_VAR_DECL(expected_vld_st2_0
,poly
,64,2) [] = { 0xfffffffffffffff0,
142 0xfffffffffffffff1 };
143 VECT_VAR_DECL(expected_vld_st2_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
144 VECT_VAR_DECL(expected_vld_st2_1
,poly
,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
145 0xaaaaaaaaaaaaaaaa };
146 VECT_VAR_DECL(expected_vld_st3_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
147 VECT_VAR_DECL(expected_vld_st3_0
,poly
,64,2) [] = { 0xfffffffffffffff0,
148 0xfffffffffffffff1 };
149 VECT_VAR_DECL(expected_vld_st3_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
150 VECT_VAR_DECL(expected_vld_st3_1
,poly
,64,2) [] = { 0xfffffffffffffff2,
151 0xaaaaaaaaaaaaaaaa };
152 VECT_VAR_DECL(expected_vld_st3_2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
153 VECT_VAR_DECL(expected_vld_st3_2
,poly
,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
154 0xaaaaaaaaaaaaaaaa };
155 VECT_VAR_DECL(expected_vld_st4_0
,poly
,64,1) [] = { 0xfffffffffffffff0 };
156 VECT_VAR_DECL(expected_vld_st4_0
,poly
,64,2) [] = { 0xfffffffffffffff0,
157 0xfffffffffffffff1 };
158 VECT_VAR_DECL(expected_vld_st4_1
,poly
,64,1) [] = { 0xfffffffffffffff1 };
159 VECT_VAR_DECL(expected_vld_st4_1
,poly
,64,2) [] = { 0xfffffffffffffff2,
160 0xfffffffffffffff3 };
161 VECT_VAR_DECL(expected_vld_st4_2
,poly
,64,1) [] = { 0xfffffffffffffff2 };
162 VECT_VAR_DECL(expected_vld_st4_2
,poly
,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
163 0xaaaaaaaaaaaaaaaa };
164 VECT_VAR_DECL(expected_vld_st4_3
,poly
,64,1) [] = { 0xfffffffffffffff3 };
165 VECT_VAR_DECL(expected_vld_st4_3
,poly
,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
166 0xaaaaaaaaaaaaaaaa };
168 /* Expected results: vtst. */
169 VECT_VAR_DECL(vtst_expected
,uint
,64,2) [] = { 0x0, 0xffffffffffffffff };
176 /* vbsl_p64 tests. */
177 #define TEST_MSG "VBSL/VBSLQ"
179 #define TEST_VBSL(T3, Q, T1, T2, W, N) \
180 VECT_VAR(vbsl_vector_res, T1, W, N) = \
181 vbsl##Q##_##T2##W(VECT_VAR(vbsl_vector_first, T3, W, N), \
182 VECT_VAR(vbsl_vector, T1, W, N), \
183 VECT_VAR(vbsl_vector2, T1, W, N)); \
184 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vbsl_vector_res, T1, W, N))
186 DECL_VARIABLE(vbsl_vector
, poly
, 64, 1);
187 DECL_VARIABLE(vbsl_vector
, poly
, 64, 2);
188 DECL_VARIABLE(vbsl_vector2
, poly
, 64, 1);
189 DECL_VARIABLE(vbsl_vector2
, poly
, 64, 2);
190 DECL_VARIABLE(vbsl_vector_res
, poly
, 64, 1);
191 DECL_VARIABLE(vbsl_vector_res
, poly
, 64, 2);
193 DECL_VARIABLE(vbsl_vector_first
, uint
, 64, 1);
194 DECL_VARIABLE(vbsl_vector_first
, uint
, 64, 2);
196 CLEAN(result
, poly
, 64, 1);
197 CLEAN(result
, poly
, 64, 2);
199 VLOAD(vbsl_vector
, buffer
, , poly
, p
, 64, 1);
200 VLOAD(vbsl_vector
, buffer
, q
, poly
, p
, 64, 2);
202 VDUP(vbsl_vector2
, , poly
, p
, 64, 1, 0xFFFFFFF3);
203 VDUP(vbsl_vector2
, q
, poly
, p
, 64, 2, 0xFFFFFFF3);
205 VDUP(vbsl_vector_first
, , uint
, u
, 64, 1, 0xFFFFFFF2);
206 VDUP(vbsl_vector_first
, q
, uint
, u
, 64, 2, 0xFFFFFFF2);
208 TEST_VBSL(uint
, , poly
, p
, 64, 1);
209 TEST_VBSL(uint
, q
, poly
, p
, 64, 2);
211 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vbsl_expected
, "");
212 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vbsl_expected
, "");
214 /* vceq_p64 tests. */
216 #define TEST_MSG "VCEQ"
218 #define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \
219 VECT_VAR(vceq_vector_res, T3, W, N) = \
220 INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N), \
221 VECT_VAR(vceq_vector2, T1, W, N)); \
222 vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
224 #define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \
225 TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
227 DECL_VARIABLE(vceq_vector
, poly
, 64, 1);
228 DECL_VARIABLE(vceq_vector2
, poly
, 64, 1);
229 DECL_VARIABLE(vceq_vector_res
, uint
, 64, 1);
231 CLEAN(result
, uint
, 64, 1);
233 VLOAD(vceq_vector
, buffer
, , poly
, p
, 64, 1);
235 VDUP(vceq_vector2
, , poly
, p
, 64, 1, 0x88);
237 TEST_VCOMP(vceq
, , poly
, p
, uint
, 64, 1);
239 CHECK(TEST_MSG
, uint
, 64, 1, PRIx64
, vceq_expected
, "");
241 /* vcombine_p64 tests. */
243 #define TEST_MSG "VCOMBINE"
245 #define TEST_VCOMBINE(T1, T2, W, N, N2) \
246 VECT_VAR(vcombine_vector128, T1, W, N2) = \
247 vcombine_##T2##W(VECT_VAR(vcombine_vector64_a, T1, W, N), \
248 VECT_VAR(vcombine_vector64_b, T1, W, N)); \
249 vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vcombine_vector128, T1, W, N2))
251 DECL_VARIABLE(vcombine_vector64_a
, poly
, 64, 1);
252 DECL_VARIABLE(vcombine_vector64_b
, poly
, 64, 1);
253 DECL_VARIABLE(vcombine_vector128
, poly
, 64, 2);
255 CLEAN(result
, poly
, 64, 2);
257 VLOAD(vcombine_vector64_a
, buffer
, , poly
, p
, 64, 1);
259 VDUP(vcombine_vector64_b
, , poly
, p
, 64, 1, 0x88);
261 TEST_VCOMBINE(poly
, p
, 64, 1, 2);
263 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vcombine_expected
, "");
265 /* vcreate_p64 tests. */
267 #define TEST_MSG "VCREATE"
269 #define TEST_VCREATE(T1, T2, W, N) \
270 VECT_VAR(vcreate_vector_res, T1, W, N) = \
271 vcreate_##T2##W(VECT_VAR(vcreate_val, T1, W, N)); \
272 vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vcreate_vector_res, T1, W, N))
274 #define DECL_VAL(VAR, T1, W, N) \
275 uint64_t VECT_VAR(VAR, T1, W, N)
277 DECL_VAL(vcreate_val
, poly
, 64, 1);
278 DECL_VARIABLE(vcreate_vector_res
, poly
, 64, 1);
280 CLEAN(result
, poly
, 64, 2);
282 VECT_VAR(vcreate_val
, poly
, 64, 1) = 0x123456789abcdef0ULL
;
284 TEST_VCREATE(poly
, p
, 64, 1);
286 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vcreate_expected
, "");
288 /* vdup_lane_p64 tests. */
290 #define TEST_MSG "VDUP_LANE/VDUP_LANEQ"
292 #define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \
293 VECT_VAR(vdup_lane_vector_res, T1, W, N) = \
294 vdup##Q##_lane_##T2##W(VECT_VAR(vdup_lane_vector, T1, W, N2), L); \
295 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_lane_vector_res, T1, W, N))
297 DECL_VARIABLE(vdup_lane_vector
, poly
, 64, 1);
298 DECL_VARIABLE(vdup_lane_vector
, poly
, 64, 2);
299 DECL_VARIABLE(vdup_lane_vector_res
, poly
, 64, 1);
300 DECL_VARIABLE(vdup_lane_vector_res
, poly
, 64, 2);
302 CLEAN(result
, poly
, 64, 1);
303 CLEAN(result
, poly
, 64, 2);
305 VLOAD(vdup_lane_vector
, buffer
, , poly
, p
, 64, 1);
307 TEST_VDUP_LANE(, poly
, p
, 64, 1, 1, 0);
308 TEST_VDUP_LANE(q
, poly
, p
, 64, 2, 1, 0);
310 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vdup_lane_expected
, "");
311 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vdup_lane_expected
, "");
313 /* vdup_n_p64 tests. */
315 #define TEST_MSG "VDUP/VDUPQ"
317 #define TEST_VDUP(Q, T1, T2, W, N) \
318 VECT_VAR(vdup_n_vector, T1, W, N) = \
319 vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
320 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_n_vector, T1, W, N))
322 DECL_VARIABLE(vdup_n_vector
, poly
, 64, 1);
323 DECL_VARIABLE(vdup_n_vector
, poly
, 64, 2);
325 /* Try to read different places from the input buffer. */
326 for (i
=0; i
< 3; i
++) {
327 CLEAN(result
, poly
, 64, 1);
328 CLEAN(result
, poly
, 64, 2);
330 TEST_VDUP(, poly
, p
, 64, 1);
331 TEST_VDUP(q
, poly
, p
, 64, 2);
335 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vdup_n_expected0
, "");
336 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vdup_n_expected0
, "");
339 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vdup_n_expected1
, "");
340 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vdup_n_expected1
, "");
343 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vdup_n_expected2
, "");
344 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vdup_n_expected2
, "");
351 /* vexit_p64 tests. */
353 #define TEST_MSG "VEXT/VEXTQ"
355 #define TEST_VEXT(Q, T1, T2, W, N, V) \
356 VECT_VAR(vext_vector_res, T1, W, N) = \
357 vext##Q##_##T2##W(VECT_VAR(vext_vector1, T1, W, N), \
358 VECT_VAR(vext_vector2, T1, W, N), \
360 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vext_vector_res, T1, W, N))
362 DECL_VARIABLE(vext_vector1
, poly
, 64, 1);
363 DECL_VARIABLE(vext_vector1
, poly
, 64, 2);
364 DECL_VARIABLE(vext_vector2
, poly
, 64, 1);
365 DECL_VARIABLE(vext_vector2
, poly
, 64, 2);
366 DECL_VARIABLE(vext_vector_res
, poly
, 64, 1);
367 DECL_VARIABLE(vext_vector_res
, poly
, 64, 2);
369 CLEAN(result
, poly
, 64, 1);
370 CLEAN(result
, poly
, 64, 2);
372 VLOAD(vext_vector1
, buffer
, , poly
, p
, 64, 1);
373 VLOAD(vext_vector1
, buffer
, q
, poly
, p
, 64, 2);
375 VDUP(vext_vector2
, , poly
, p
, 64, 1, 0x88);
376 VDUP(vext_vector2
, q
, poly
, p
, 64, 2, 0x88);
378 TEST_VEXT(, poly
, p
, 64, 1, 0);
379 TEST_VEXT(q
, poly
, p
, 64, 2, 1);
381 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vext_expected
, "");
382 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vext_expected
, "");
384 /* vget_low_p64 tests. */
386 #define TEST_MSG "VGET_LOW"
388 #define TEST_VGET_LOW(T1, T2, W, N, N2) \
389 VECT_VAR(vget_low_vector64, T1, W, N) = \
390 vget_low_##T2##W(VECT_VAR(vget_low_vector128, T1, W, N2)); \
391 vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_low_vector64, T1, W, N))
393 DECL_VARIABLE(vget_low_vector64
, poly
, 64, 1);
394 DECL_VARIABLE(vget_low_vector128
, poly
, 64, 2);
396 CLEAN(result
, poly
, 64, 1);
398 VLOAD(vget_low_vector128
, buffer
, q
, poly
, p
, 64, 2);
400 TEST_VGET_LOW(poly
, p
, 64, 1, 2);
402 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vget_low_expected
, "");
404 /* vget_high_p64 tests. */
406 #define TEST_MSG "VGET_HIGH"
408 #define TEST_VGET_HIGH(T1, T2, W, N, N2) \
409 VECT_VAR(vget_high_vector64, T1, W, N) = \
410 vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \
411 vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
413 DECL_VARIABLE(vget_high_vector64
, poly
, 64, 1);
414 DECL_VARIABLE(vget_high_vector128
, poly
, 64, 2);
416 CLEAN(result
, poly
, 64, 1);
418 VLOAD(vget_high_vector128
, buffer
, q
, poly
, p
, 64, 2);
420 TEST_VGET_HIGH(poly
, p
, 64, 1, 2);
422 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vget_high_expected
, "");
424 /* vld1_p64 tests. */
426 #define TEST_MSG "VLD1/VLD1Q"
428 #define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
429 VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
430 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
432 DECL_VARIABLE(vld1_vector
, poly
, 64, 1);
433 DECL_VARIABLE(vld1_vector
, poly
, 64, 2);
435 CLEAN(result
, poly
, 64, 1);
436 CLEAN(result
, poly
, 64, 2);
438 VLOAD(vld1_vector
, buffer
, , poly
, p
, 64, 1);
439 VLOAD(vld1_vector
, buffer
, q
, poly
, p
, 64, 2);
441 TEST_VLD1(vld1_vector
, buffer
, , poly
, p
, 64, 1);
442 TEST_VLD1(vld1_vector
, buffer
, q
, poly
, p
, 64, 2);
444 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld1_expected
, "");
445 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vld1_expected
, "");
447 /* vld1_dup_p64 tests. */
449 #define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
451 #define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \
452 VECT_VAR(VAR, T1, W, N) = \
453 vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]); \
454 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
456 DECL_VARIABLE(vld1_dup_vector
, poly
, 64, 1);
457 DECL_VARIABLE(vld1_dup_vector
, poly
, 64, 2);
459 /* Try to read different places from the input buffer. */
460 for (i
=0; i
<3; i
++) {
461 CLEAN(result
, poly
, 64, 1);
462 CLEAN(result
, poly
, 64, 2);
464 TEST_VLD1_DUP(vld1_dup_vector
, buffer_dup
, , poly
, p
, 64, 1);
465 TEST_VLD1_DUP(vld1_dup_vector
, buffer_dup
, q
, poly
, p
, 64, 2);
469 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld1_dup_expected0
, "");
470 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vld1_dup_expected0
, "");
473 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld1_dup_expected1
, "");
474 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vld1_dup_expected1
, "");
477 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld1_dup_expected2
, "");
478 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vld1_dup_expected2
, "");
485 /* vld1_lane_p64 tests. */
487 #define TEST_MSG "VLD1_LANE/VLD1_LANEQ"
489 #define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \
490 memset (VECT_VAR(vld1_lane_buffer_src, T1, W, N), 0xAA, W/8*N); \
491 VECT_VAR(vld1_lane_vector_src, T1, W, N) = \
492 vld1##Q##_##T2##W(VECT_VAR(vld1_lane_buffer_src, T1, W, N)); \
493 VECT_VAR(vld1_lane_vector, T1, W, N) = \
494 vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \
495 VECT_VAR(vld1_lane_vector_src, T1, W, N), L); \
496 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vld1_lane_vector, T1, W, N))
498 DECL_VARIABLE(vld1_lane_vector
, poly
, 64, 1);
499 DECL_VARIABLE(vld1_lane_vector
, poly
, 64, 2);
500 DECL_VARIABLE(vld1_lane_vector_src
, poly
, 64, 1);
501 DECL_VARIABLE(vld1_lane_vector_src
, poly
, 64, 2);
503 ARRAY(vld1_lane_buffer_src
, poly
, 64, 1);
504 ARRAY(vld1_lane_buffer_src
, poly
, 64, 2);
506 CLEAN(result
, poly
, 64, 1);
507 CLEAN(result
, poly
, 64, 2);
509 TEST_VLD1_LANE(, poly
, p
, 64, 1, 0);
510 TEST_VLD1_LANE(q
, poly
, p
, 64, 2, 0);
512 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld1_lane_expected
, "");
513 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vld1_lane_expected
, "");
515 /* vldX_p64 tests. */
516 #define DECL_VLDX(T1, W, N, X) \
517 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \
518 VECT_VAR_DECL(vldX_result_bis_##X, T1, W, N)[X * N]
520 #define TEST_VLDX(Q, T1, T2, W, N, X) \
521 VECT_ARRAY_VAR(vldX_vector, T1, W, N, X) = \
522 /* Use dedicated init buffer, of size X */ \
523 vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X)); \
524 vst##X##Q##_##T2##W(VECT_VAR(vldX_result_bis_##X, T1, W, N), \
525 VECT_ARRAY_VAR(vldX_vector, T1, W, N, X)); \
526 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_result_bis_##X, T1, W, N), \
527 sizeof(VECT_VAR(result, T1, W, N)));
529 /* Overwrite "result" with the contents of "result_bis"[Y]. */
530 #define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \
531 memcpy(VECT_VAR(result, T1, W, N), \
532 &(VECT_VAR(vldX_result_bis_##X, T1, W, N)[Y*N]), \
533 sizeof(VECT_VAR(result, T1, W, N)));
535 DECL_VLDX(poly
, 64, 1, 2);
536 DECL_VLDX(poly
, 64, 1, 3);
537 DECL_VLDX(poly
, 64, 1, 4);
539 VECT_ARRAY_INIT2(buffer_vld2
, poly
, 64, 1);
540 PAD(buffer_vld2_pad
, poly
, 64, 1);
541 VECT_ARRAY_INIT3(buffer_vld3
, poly
, 64, 1);
542 PAD(buffer_vld3_pad
, poly
, 64, 1);
543 VECT_ARRAY_INIT4(buffer_vld4
, poly
, 64, 1);
544 PAD(buffer_vld4_pad
, poly
, 64, 1);
547 #define TEST_MSG "VLD2/VLD2Q"
548 CLEAN(result
, poly
, 64, 1);
549 TEST_VLDX(, poly
, p
, 64, 1, 2);
550 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld2_expected_0
, "chunk 0");
551 CLEAN(result
, poly
, 64, 1);
552 TEST_EXTRA_CHUNK(poly
, 64, 1, 2, 1);
553 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld2_expected_1
, "chunk 1");
556 #define TEST_MSG "VLD3/VLD3Q"
557 CLEAN(result
, poly
, 64, 1);
558 TEST_VLDX(, poly
, p
, 64, 1, 3);
559 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld3_expected_0
, "chunk 0");
560 CLEAN(result
, poly
, 64, 1);
561 TEST_EXTRA_CHUNK(poly
, 64, 1, 3, 1);
562 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld3_expected_1
, "chunk 1");
563 CLEAN(result
, poly
, 64, 1);
564 TEST_EXTRA_CHUNK(poly
, 64, 1, 3, 2);
565 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld3_expected_2
, "chunk 2");
568 #define TEST_MSG "VLD4/VLD4Q"
569 CLEAN(result
, poly
, 64, 1);
570 TEST_VLDX(, poly
, p
, 64, 1, 4);
571 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_expected_0
, "chunk 0");
572 CLEAN(result
, poly
, 64, 1);
573 TEST_EXTRA_CHUNK(poly
, 64, 1, 4, 1);
574 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_expected_1
, "chunk 1");
575 CLEAN(result
, poly
, 64, 1);
576 TEST_EXTRA_CHUNK(poly
, 64, 1, 4, 2);
577 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_expected_2
, "chunk 2");
578 CLEAN(result
, poly
, 64, 1);
579 TEST_EXTRA_CHUNK(poly
, 64, 1, 4, 3);
580 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_expected_3
, "chunk 3");
582 /* vldX_dup_p64 tests. */
583 #define DECL_VLDX_DUP(T1, W, N, X) \
584 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X); \
585 VECT_VAR_DECL(vldX_dup_result_bis_##X, T1, W, N)[X * N]
587 #define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \
588 VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X) = \
589 vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]); \
591 vst##X##Q##_##T2##W(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \
592 VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X)); \
593 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \
594 sizeof(VECT_VAR(result, T1, W, N)));
596 /* Overwrite "result" with the contents of "result_bis"[Y]. */
597 #define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \
598 memcpy(VECT_VAR(result, T1, W, N), \
599 &(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \
600 sizeof(VECT_VAR(result, T1, W, N)));
602 DECL_VLDX_DUP(poly
, 64, 1, 2);
603 DECL_VLDX_DUP(poly
, 64, 1, 3);
604 DECL_VLDX_DUP(poly
, 64, 1, 4);
608 #define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
609 CLEAN(result
, poly
, 64, 1);
610 TEST_VLDX_DUP(, poly
, p
, 64, 1, 2);
611 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld2_dup_expected_0
, "chunk 0");
612 CLEAN(result
, poly
, 64, 1);
613 TEST_VLDX_DUP_EXTRA_CHUNK(poly
, 64, 1, 2, 1);
614 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld2_dup_expected_1
, "chunk 1");
617 #define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
618 CLEAN(result
, poly
, 64, 1);
619 TEST_VLDX_DUP(, poly
, p
, 64, 1, 3);
620 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld3_dup_expected_0
, "chunk 0");
621 CLEAN(result
, poly
, 64, 1);
622 TEST_VLDX_DUP_EXTRA_CHUNK(poly
, 64, 1, 3, 1);
623 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld3_dup_expected_1
, "chunk 1");
624 CLEAN(result
, poly
, 64, 1);
625 TEST_VLDX_DUP_EXTRA_CHUNK(poly
, 64, 1, 3, 2);
626 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld3_dup_expected_2
, "chunk 2");
629 #define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
630 CLEAN(result
, poly
, 64, 1);
631 TEST_VLDX_DUP(, poly
, p
, 64, 1, 4);
632 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_dup_expected_0
, "chunk 0");
633 CLEAN(result
, poly
, 64, 1);
634 TEST_VLDX_DUP_EXTRA_CHUNK(poly
, 64, 1, 4, 1);
635 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_dup_expected_1
, "chunk 1");
636 CLEAN(result
, poly
, 64, 1);
637 TEST_VLDX_DUP_EXTRA_CHUNK(poly
, 64, 1, 4, 2);
638 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_dup_expected_2
, "chunk 2");
639 CLEAN(result
, poly
, 64, 1);
640 TEST_VLDX_DUP_EXTRA_CHUNK(poly
, 64, 1, 4, 3);
641 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vld4_dup_expected_3
, "chunk 3");
643 /* vsli_p64 tests. */
645 #define TEST_MSG "VSLI"
647 #define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \
648 VECT_VAR(vsXi_vector_res, T1, W, N) = \
649 INSN##Q##_n_##T2##W(VECT_VAR(vsXi_vector, T1, W, N), \
650 VECT_VAR(vsXi_vector2, T1, W, N), \
652 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vsXi_vector_res, T1, W, N))
654 #define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \
655 TEST_VSXI1(INSN, Q, T1, T2, W, N, V)
657 DECL_VARIABLE(vsXi_vector
, poly
, 64, 1);
658 DECL_VARIABLE(vsXi_vector
, poly
, 64, 2);
659 DECL_VARIABLE(vsXi_vector2
, poly
, 64, 1);
660 DECL_VARIABLE(vsXi_vector2
, poly
, 64, 2);
661 DECL_VARIABLE(vsXi_vector_res
, poly
, 64, 1);
662 DECL_VARIABLE(vsXi_vector_res
, poly
, 64, 2);
664 CLEAN(result
, poly
, 64, 1);
665 CLEAN(result
, poly
, 64, 2);
667 VLOAD(vsXi_vector
, buffer
, , poly
, p
, 64, 1);
668 VLOAD(vsXi_vector
, buffer
, q
, poly
, p
, 64, 2);
670 VDUP(vsXi_vector2
, , poly
, p
, 64, 1, 2);
671 VDUP(vsXi_vector2
, q
, poly
, p
, 64, 2, 3);
673 TEST_VSXI(vsli
, , poly
, p
, 64, 1, 3);
674 TEST_VSXI(vsli
, q
, poly
, p
, 64, 2, 53);
676 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vsli_expected
, "");
677 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vsli_expected
, "");
679 /* Test cases with maximum shift amount. */
680 CLEAN(result
, poly
, 64, 1);
681 CLEAN(result
, poly
, 64, 2);
683 TEST_VSXI(vsli
, , poly
, p
, 64, 1, 63);
684 TEST_VSXI(vsli
, q
, poly
, p
, 64, 2, 63);
686 #define COMMENT "(max shift amount)"
687 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vsli_expected_max_shift
, COMMENT
);
688 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vsli_expected_max_shift
, COMMENT
);
690 /* vsri_p64 tests. */
692 #define TEST_MSG "VSRI"
694 CLEAN(result
, poly
, 64, 1);
695 CLEAN(result
, poly
, 64, 2);
697 VLOAD(vsXi_vector
, buffer
, , poly
, p
, 64, 1);
698 VLOAD(vsXi_vector
, buffer
, q
, poly
, p
, 64, 2);
700 VDUP(vsXi_vector2
, , poly
, p
, 64, 1, 2);
701 VDUP(vsXi_vector2
, q
, poly
, p
, 64, 2, 3);
703 TEST_VSXI(vsri
, , poly
, p
, 64, 1, 3);
704 TEST_VSXI(vsri
, q
, poly
, p
, 64, 2, 53);
706 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vsri_expected
, "");
707 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vsri_expected
, "");
709 /* Test cases with maximum shift amount. */
710 CLEAN(result
, poly
, 64, 1);
711 CLEAN(result
, poly
, 64, 2);
713 TEST_VSXI(vsri
, , poly
, p
, 64, 1, 64);
714 TEST_VSXI(vsri
, q
, poly
, p
, 64, 2, 64);
716 #define COMMENT "(max shift amount)"
717 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vsri_expected_max_shift
, COMMENT
);
718 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vsri_expected_max_shift
, COMMENT
);
720 /* vst1_lane_p64 tests. */
722 #define TEST_MSG "VST1_LANE/VST1_LANEQ"
724 #define TEST_VST1_LANE(Q, T1, T2, W, N, L) \
725 VECT_VAR(vst1_lane_vector, T1, W, N) = \
726 vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
727 vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \
728 VECT_VAR(vst1_lane_vector, T1, W, N), L);
730 DECL_VARIABLE(vst1_lane_vector
, poly
, 64, 1);
731 DECL_VARIABLE(vst1_lane_vector
, poly
, 64, 2);
733 CLEAN(result
, poly
, 64, 1);
734 CLEAN(result
, poly
, 64, 2);
736 TEST_VST1_LANE(, poly
, p
, 64, 1, 0);
737 TEST_VST1_LANE(q
, poly
, p
, 64, 2, 0);
739 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vst1_lane_expected
, "");
740 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vst1_lane_expected
, "");
742 /* vget_lane_p64 tests. */
744 #define TEST_MSG "VGET_LANE/VGETQ_LANE"
746 #define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
747 VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
748 if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
750 "ERROR in %s (%s line %d in result '%s') at type %s " \
751 "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
752 TEST_MSG, __FILE__, __LINE__, \
753 STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
754 STR(VECT_NAME(T1, W, N)), \
755 VECT_VAR(vget_lane_vector, T1, W, N), \
756 VECT_VAR(vget_lane_expected, T1, W, N)); \
760 /* Initialize input values. */
761 DECL_VARIABLE(vget_lane_vector1
, poly
, 64, 1);
762 DECL_VARIABLE(vget_lane_vector1
, poly
, 64, 2);
764 VLOAD(vget_lane_vector1
, buffer
, , poly
, p
, 64, 1);
765 VLOAD(vget_lane_vector1
, buffer
, q
, poly
, p
, 64, 2);
767 VECT_VAR_DECL(vget_lane_vector
, poly
, 64, 1);
768 VECT_VAR_DECL(vget_lane_vector
, poly
, 64, 2);
770 TEST_VGET_LANE( , poly
, p
, 64, 1, 0);
771 TEST_VGET_LANE(q
, poly
, p
, 64, 2, 0);
774 /* vset_lane_p64 tests. */
776 #define TEST_MSG "VSET_LANE/VSETQ_LANE"
778 #define TEST_VSET_LANE(Q, T1, T2, W, N, V, L) \
779 VECT_VAR(vset_lane_vector, T1, W, N) = \
780 vset##Q##_lane_##T2##W(V, \
781 VECT_VAR(vset_lane_vector, T1, W, N), \
783 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
785 /* Initialize input values. */
786 DECL_VARIABLE(vset_lane_vector
, poly
, 64, 1);
787 DECL_VARIABLE(vset_lane_vector
, poly
, 64, 2);
789 CLEAN(result
, uint
, 64, 1);
790 CLEAN(result
, uint
, 64, 2);
792 VLOAD(vset_lane_vector
, buffer
, , poly
, p
, 64, 1);
793 VLOAD(vset_lane_vector
, buffer
, q
, poly
, p
, 64, 2);
795 /* Choose value and lane arbitrarily. */
796 TEST_VSET_LANE(, poly
, p
, 64, 1, 0x88, 0);
797 TEST_VSET_LANE(q
, poly
, p
, 64, 2, 0x11, 1);
799 CHECK(TEST_MSG
, poly
, 64, 1, PRIx64
, vset_lane_expected
, "");
800 CHECK(TEST_MSG
, poly
, 64, 2, PRIx64
, vset_lane_expected
, "");
803 /* vtst_p64 tests. */
805 #define TEST_MSG "VTST"
807 #define TEST_VTST1(INSN, Q, T1, T2, W, N) \
808 VECT_VAR(vtst_vector_res, uint, W, N) = \
809 INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N), \
810 VECT_VAR(vtst_vector2, T1, W, N)); \
811 vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \
812 VECT_VAR(vtst_vector_res, uint, W, N))
814 #define TEST_VTST(INSN, Q, T1, T2, W, N) \
815 TEST_VTST1(INSN, Q, T1, T2, W, N) \
817 /* Initialize input values. */
818 DECL_VARIABLE(vtst_vector
, poly
, 64, 1);
819 DECL_VARIABLE(vtst_vector2
, poly
, 64, 1);
820 DECL_VARIABLE(vtst_vector_res
, uint
, 64, 1);
822 CLEAN(result
, uint
, 64, 1);
824 VLOAD(vtst_vector
, buffer
, , poly
, p
, 64, 1);
825 VDUP(vtst_vector2
, , poly
, p
, 64, 1, 5);
827 TEST_VTST(vtst
, , poly
, p
, 64, 1);
829 CHECK(TEST_MSG
, uint
, 64, 1, PRIx64
, vtst_expected
, "");
831 /* vtstq_p64 is supported by aarch64 only. */
833 DECL_VARIABLE(vtst_vector
, poly
, 64, 2);
834 DECL_VARIABLE(vtst_vector2
, poly
, 64, 2);
835 DECL_VARIABLE(vtst_vector_res
, uint
, 64, 2);
836 CLEAN(result
, uint
, 64, 2);
837 VLOAD(vtst_vector
, buffer
, q
, poly
, p
, 64, 2);
838 VDUP(vtst_vector2
, q
, poly
, p
, 64, 2, 5);
839 TEST_VTST(vtst
, q
, poly
, p
, 64, 2);
840 CHECK(TEST_MSG
, uint
, 64, 2, PRIx64
, vtst_expected
, "");
842 /* vmov_n_p64 tests. */
844 #define TEST_MSG "VMOV/VMOVQ"
846 #define TEST_VMOV(Q, T1, T2, W, N) \
847 VECT_VAR(vmov_n_vector, T1, W, N) = \
848 vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
849 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
851 DECL_VARIABLE(vmov_n_vector
, poly
, 64, 1);
852 DECL_VARIABLE(vmov_n_vector
, poly
, 64, 2);
854 /* Try to read different places from the input buffer. */
855 for (i
=0; i
< 3; i
++) {
856 CLEAN(result
, poly
, 64, 1);
857 CLEAN(result
, poly
, 64, 2);
859 TEST_VMOV(, poly
, p
, 64, 1);
860 TEST_VMOV(q
, poly
, p
, 64, 2);
864 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vmov_n_expected0
, "");
865 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vmov_n_expected0
, "");
868 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vmov_n_expected1
, "");
869 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vmov_n_expected1
, "");
872 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, vmov_n_expected2
, "");
873 CHECK_POLY(TEST_MSG
, poly
, 64, 2, PRIx64
, vmov_n_expected2
, "");
880 /* vldx_lane_p64 tests. */
882 #define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
884 VECT_VAR_DECL_INIT(buffer_vld2_lane
, poly
, 64, 2);
885 VECT_VAR_DECL_INIT(buffer_vld3_lane
, poly
, 64, 3);
886 VECT_VAR_DECL_INIT(buffer_vld4_lane
, poly
, 64, 4);
888 /* In this case, input variables are arrays of vectors. */
889 #define DECL_VLD_STX_LANE(T1, W, N, X) \
890 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
891 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
892 VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
894 /* We need to use a temporary result buffer (result_bis), because
895 the one used for other tests is not large enough. A subset of the
896 result data is moved from result_bis to result, and it is this
897 subset which is used to check the actual behavior. The next
898 macro enables to move another chunk of data from result_bis to
900 /* We also use another extra input buffer (buffer_src), which we
901 fill with 0xAA, and which it used to load a vector from which we
902 read a given lane. */
904 #define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
905 memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
906 sizeof(VECT_VAR(buffer_src, T1, W, N))); \
908 VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
909 vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
911 VECT_ARRAY_VAR(vector, T1, W, N, X) = \
912 /* Use dedicated init buffer, of size. X */ \
913 vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
914 VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
916 vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
917 VECT_ARRAY_VAR(vector, T1, W, N, X)); \
918 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
919 sizeof(VECT_VAR(result, T1, W, N)))
921 /* Overwrite "result" with the contents of "result_bis"[Y]. */
922 #undef TEST_EXTRA_CHUNK
923 #define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
924 memcpy(VECT_VAR(result, T1, W, N), \
925 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
926 sizeof(VECT_VAR(result, T1, W, N)));
928 /* Add some padding to try to catch out of bound accesses. */
929 #define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
930 #define DUMMY_ARRAY(V, T, W, N, L) \
931 VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
932 ARRAY1(V##_pad,T,W,N)
934 #define DECL_ALL_VLD_STX_LANE(X) \
935 DECL_VLD_STX_LANE(poly, 64, 1, X); \
936 DECL_VLD_STX_LANE(poly, 64, 2, X);
938 #define TEST_ALL_VLDX_LANE(X) \
939 TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
940 TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
942 #define TEST_ALL_EXTRA_CHUNKS(X,Y) \
943 TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
944 TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
946 #define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \
947 CHECK_POLY(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
948 CHECK_POLY(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
950 /* Declare the temporary buffers / variables. */
951 DECL_ALL_VLD_STX_LANE(2);
952 DECL_ALL_VLD_STX_LANE(3);
953 DECL_ALL_VLD_STX_LANE(4);
955 DUMMY_ARRAY(buffer_src
, poly
, 64, 1, 4);
956 DUMMY_ARRAY(buffer_src
, poly
, 64, 2, 4);
958 /* Check vld2_lane/vld2q_lane. */
961 #define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
962 TEST_ALL_VLDX_LANE(2);
963 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st2_0
, " chunk 0");
965 TEST_ALL_EXTRA_CHUNKS(2, 1);
966 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st2_1
, " chunk 1");
968 /* Check vld3_lane/vld3q_lane. */
971 #define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
972 TEST_ALL_VLDX_LANE(3);
973 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st3_0
, " chunk 0");
975 TEST_ALL_EXTRA_CHUNKS(3, 1);
976 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st3_1
, " chunk 1");
978 TEST_ALL_EXTRA_CHUNKS(3, 2);
979 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st3_2
, " chunk 2");
981 /* Check vld4_lane/vld4q_lane. */
984 #define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
985 TEST_ALL_VLDX_LANE(4);
986 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st4_0
, " chunk 0");
988 TEST_ALL_EXTRA_CHUNKS(4, 1);
989 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st4_1
, " chunk 1");
991 TEST_ALL_EXTRA_CHUNKS(4, 2);
992 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st4_2
, " chunk 2");
994 TEST_ALL_EXTRA_CHUNKS(4, 3);
995 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG
, expected_vld_st4_3
, " chunk 3");
997 /* In this case, input variables are arrays of vectors. */
998 #define DECL_VSTX_LANE(T1, W, N, X) \
999 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
1000 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
1001 VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
1003 /* We need to use a temporary result buffer (result_bis), because
1004 the one used for other tests is not large enough. A subset of the
1005 result data is moved from result_bis to result, and it is this
1006 subset which is used to check the actual behavior. The next
1007 macro enables to move another chunk of data from result_bis to
1009 /* We also use another extra input buffer (buffer_src), which we
1010 fill with 0xAA, and which it used to load a vector from which we
1011 read a given lane. */
1012 #define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \
1013 memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
1014 sizeof(VECT_VAR(buffer_src, T1, W, N))); \
1015 memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \
1016 sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \
1018 VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
1019 vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
1021 VECT_ARRAY_VAR(vector, T1, W, N, X) = \
1022 /* Use dedicated init buffer, of size X. */ \
1023 vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
1024 VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
1026 vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
1027 VECT_ARRAY_VAR(vector, T1, W, N, X), \
1029 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
1030 sizeof(VECT_VAR(result, T1, W, N)));
1032 #define TEST_ALL_VSTX_LANE(X) \
1033 TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
1034 TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
1036 /* Check vst2_lane/vst2q_lane. */
1039 #define TEST_MSG "VST2_LANE/VST2Q_LANE"
1040 TEST_ALL_VSTX_LANE(2);
1042 #define CMT " (chunk 0)"
1043 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st2_0
, CMT
);
1045 TEST_ALL_EXTRA_CHUNKS(2, 1);
1047 #define CMT " chunk 1"
1048 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st2_1
, CMT
);
1050 /* Check vst3_lane/vst3q_lane. */
1053 #define TEST_MSG "VST3_LANE/VST3Q_LANE"
1054 TEST_ALL_VSTX_LANE(3);
1057 #define CMT " (chunk 0)"
1058 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st3_0
, CMT
);
1060 TEST_ALL_EXTRA_CHUNKS(3, 1);
1063 #define CMT " (chunk 1)"
1064 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st3_1
, CMT
);
1066 TEST_ALL_EXTRA_CHUNKS(3, 2);
1069 #define CMT " (chunk 2)"
1070 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st3_2
, CMT
);
1072 /* Check vst4_lane/vst4q_lane. */
1075 #define TEST_MSG "VST4_LANE/VST4Q_LANE"
1076 TEST_ALL_VSTX_LANE(4);
1079 #define CMT " (chunk 0)"
1080 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st4_0
, CMT
);
1082 TEST_ALL_EXTRA_CHUNKS(4, 1);
1085 #define CMT " (chunk 1)"
1086 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st4_1
, CMT
);
1088 TEST_ALL_EXTRA_CHUNKS(4, 2);
1091 #define CMT " (chunk 2)"
1092 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st4_2
, CMT
);
1094 TEST_ALL_EXTRA_CHUNKS(4, 3);
1097 #define CMT " (chunk 3)"
1098 CHECK_POLY(TEST_MSG
, poly
, 64, 1, PRIx64
, expected_vld_st4_3
, CMT
);
1100 #endif /* __aarch64__. */