a3210a94b1dcd00ea8b9f687a7805386ef5c0b08
[gcc.git] / gcc / testsuite / gcc.target / aarch64 / advsimd-intrinsics / p64_p128.c
1 /* This file contains tests for all the *p64 intrinsics, except for
2 vreinterpret which have their own testcase. */
3
4 /* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
5 /* { dg-add-options arm_crypto } */
6 /* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
7
8 #include <arm_neon.h>
9 #include "arm-neon-ref.h"
10 #include "compute-ref-data.h"
11
12 /* Expected results: vbsl. */
13 VECT_VAR_DECL(vbsl_expected,poly,64,1) [] = { 0xfffffff1 };
14 VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1,
15 0xfffffff1 };
16
17 /* Expected results: vceq. */
18 VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 };
19
20 /* Expected results: vcombine. */
21 VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 };
22
23 /* Expected results: vcreate. */
24 VECT_VAR_DECL(vcreate_expected,poly,64,1) [] = { 0x123456789abcdef0 };
25
26 /* Expected results: vdup_lane. */
27 VECT_VAR_DECL(vdup_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
28 VECT_VAR_DECL(vdup_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
29 0xfffffffffffffff0 };
30
31 /* Expected results: vdup_n. */
32 VECT_VAR_DECL(vdup_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
33 VECT_VAR_DECL(vdup_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
34 0xfffffffffffffff0 };
35 VECT_VAR_DECL(vdup_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
36 VECT_VAR_DECL(vdup_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
37 0xfffffffffffffff1 };
38 VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
39 VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
40 0xfffffffffffffff2 };
41
42 /* Expected results: vext. */
43 VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
44 VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
45
46 /* Expected results: vget_low. */
47 VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
48
49 /* Expected results: vget_high. */
50 VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
51
52 /* Expected results: vld1. */
53 VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
54 VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
55 0xfffffffffffffff1 };
56
57 /* Expected results: vld1_dup. */
58 VECT_VAR_DECL(vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
59 VECT_VAR_DECL(vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
60 0xfffffffffffffff0 };
61 VECT_VAR_DECL(vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
62 VECT_VAR_DECL(vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
63 0xfffffffffffffff1 };
64 VECT_VAR_DECL(vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
65 VECT_VAR_DECL(vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
66 0xfffffffffffffff2 };
67
68 /* Expected results: vld1_lane. */
69 VECT_VAR_DECL(vld1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
70 VECT_VAR_DECL(vld1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
71 0xaaaaaaaaaaaaaaaa };
72
73 /* Expected results: vldX. */
74 VECT_VAR_DECL(vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
75 VECT_VAR_DECL(vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
76 VECT_VAR_DECL(vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
77 VECT_VAR_DECL(vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
78 VECT_VAR_DECL(vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
79 VECT_VAR_DECL(vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
80 VECT_VAR_DECL(vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
81 VECT_VAR_DECL(vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
82 VECT_VAR_DECL(vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
83
84 /* Expected results: vldX_dup. */
85 VECT_VAR_DECL(vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
86 VECT_VAR_DECL(vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
87 VECT_VAR_DECL(vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
88 VECT_VAR_DECL(vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
89 VECT_VAR_DECL(vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
90 VECT_VAR_DECL(vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 };
91 VECT_VAR_DECL(vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 };
92 VECT_VAR_DECL(vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 };
93 VECT_VAR_DECL(vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 };
94
95 /* Expected results: vsli. */
96 VECT_VAR_DECL(vsli_expected,poly,64,1) [] = { 0x10 };
97 VECT_VAR_DECL(vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0,
98 0x7ffffffffffff1 };
99 VECT_VAR_DECL(vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 };
100 VECT_VAR_DECL(vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
101 0xfffffffffffffff1 };
102
103 /* Expected results: vsri. */
104 VECT_VAR_DECL(vsri_expected,poly,64,1) [] = { 0xe000000000000000 };
105 VECT_VAR_DECL(vsri_expected,poly,64,2) [] = { 0xfffffffffffff800,
106 0xfffffffffffff800 };
107 VECT_VAR_DECL(vsri_expected_max_shift,poly,64,1) [] = { 0xfffffffffffffff0 };
108 VECT_VAR_DECL(vsri_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0,
109 0xfffffffffffffff1 };
110
111 /* Expected results: vst1_lane. */
112 VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
113 VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
114 0x3333333333333333 };
115
116 /* Expected results: vget_lane. */
117 VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
118 VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
119
120 /* Expected results: vset_lane. */
121 VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
122 VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
123
124 /* Expected results: vtst. */
125 VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
126
127 #ifdef __aarch64__
128 /* Expected results: vmov_n. */
129 VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
130 VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
131 0xfffffffffffffff0 };
132 VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
133 VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
134 0xfffffffffffffff1 };
135 VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
136 VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
137 0xfffffffffffffff2 };
138
139 /* Expected results: vldX_lane. */
140 VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
141 VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
142 0xfffffffffffffff1 };
143 VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
144 VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
145 0xaaaaaaaaaaaaaaaa };
146 VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
147 VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
148 0xfffffffffffffff1 };
149 VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
150 VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
151 0xaaaaaaaaaaaaaaaa };
152 VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
153 VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
154 0xaaaaaaaaaaaaaaaa };
155 VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
156 VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
157 0xfffffffffffffff1 };
158 VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
159 VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
160 0xfffffffffffffff3 };
161 VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
162 VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
163 0xaaaaaaaaaaaaaaaa };
164 VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
165 VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
166 0xaaaaaaaaaaaaaaaa };
167
168 /* Expected results: vtst. */
169 VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
170 #endif
171
172 int main (void)
173 {
174 int i;
175
176 /* vbsl_p64 tests. */
177 #define TEST_MSG "VBSL/VBSLQ"
178
179 #define TEST_VBSL(T3, Q, T1, T2, W, N) \
180 VECT_VAR(vbsl_vector_res, T1, W, N) = \
181 vbsl##Q##_##T2##W(VECT_VAR(vbsl_vector_first, T3, W, N), \
182 VECT_VAR(vbsl_vector, T1, W, N), \
183 VECT_VAR(vbsl_vector2, T1, W, N)); \
184 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vbsl_vector_res, T1, W, N))
185
186 DECL_VARIABLE(vbsl_vector, poly, 64, 1);
187 DECL_VARIABLE(vbsl_vector, poly, 64, 2);
188 DECL_VARIABLE(vbsl_vector2, poly, 64, 1);
189 DECL_VARIABLE(vbsl_vector2, poly, 64, 2);
190 DECL_VARIABLE(vbsl_vector_res, poly, 64, 1);
191 DECL_VARIABLE(vbsl_vector_res, poly, 64, 2);
192
193 DECL_VARIABLE(vbsl_vector_first, uint, 64, 1);
194 DECL_VARIABLE(vbsl_vector_first, uint, 64, 2);
195
196 CLEAN(result, poly, 64, 1);
197 CLEAN(result, poly, 64, 2);
198
199 VLOAD(vbsl_vector, buffer, , poly, p, 64, 1);
200 VLOAD(vbsl_vector, buffer, q, poly, p, 64, 2);
201
202 VDUP(vbsl_vector2, , poly, p, 64, 1, 0xFFFFFFF3);
203 VDUP(vbsl_vector2, q, poly, p, 64, 2, 0xFFFFFFF3);
204
205 VDUP(vbsl_vector_first, , uint, u, 64, 1, 0xFFFFFFF2);
206 VDUP(vbsl_vector_first, q, uint, u, 64, 2, 0xFFFFFFF2);
207
208 TEST_VBSL(uint, , poly, p, 64, 1);
209 TEST_VBSL(uint, q, poly, p, 64, 2);
210
211 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vbsl_expected, "");
212 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vbsl_expected, "");
213
214 /* vceq_p64 tests. */
215 #undef TEST_MSG
216 #define TEST_MSG "VCEQ"
217
218 #define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \
219 VECT_VAR(vceq_vector_res, T3, W, N) = \
220 INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N), \
221 VECT_VAR(vceq_vector2, T1, W, N)); \
222 vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N))
223
224 #define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \
225 TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N)
226
227 DECL_VARIABLE(vceq_vector, poly, 64, 1);
228 DECL_VARIABLE(vceq_vector2, poly, 64, 1);
229 DECL_VARIABLE(vceq_vector_res, uint, 64, 1);
230
231 CLEAN(result, uint, 64, 1);
232
233 VLOAD(vceq_vector, buffer, , poly, p, 64, 1);
234
235 VDUP(vceq_vector2, , poly, p, 64, 1, 0x88);
236
237 TEST_VCOMP(vceq, , poly, p, uint, 64, 1);
238
239 CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, "");
240
241 /* vcombine_p64 tests. */
242 #undef TEST_MSG
243 #define TEST_MSG "VCOMBINE"
244
245 #define TEST_VCOMBINE(T1, T2, W, N, N2) \
246 VECT_VAR(vcombine_vector128, T1, W, N2) = \
247 vcombine_##T2##W(VECT_VAR(vcombine_vector64_a, T1, W, N), \
248 VECT_VAR(vcombine_vector64_b, T1, W, N)); \
249 vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vcombine_vector128, T1, W, N2))
250
251 DECL_VARIABLE(vcombine_vector64_a, poly, 64, 1);
252 DECL_VARIABLE(vcombine_vector64_b, poly, 64, 1);
253 DECL_VARIABLE(vcombine_vector128, poly, 64, 2);
254
255 CLEAN(result, poly, 64, 2);
256
257 VLOAD(vcombine_vector64_a, buffer, , poly, p, 64, 1);
258
259 VDUP(vcombine_vector64_b, , poly, p, 64, 1, 0x88);
260
261 TEST_VCOMBINE(poly, p, 64, 1, 2);
262
263 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vcombine_expected, "");
264
265 /* vcreate_p64 tests. */
266 #undef TEST_MSG
267 #define TEST_MSG "VCREATE"
268
269 #define TEST_VCREATE(T1, T2, W, N) \
270 VECT_VAR(vcreate_vector_res, T1, W, N) = \
271 vcreate_##T2##W(VECT_VAR(vcreate_val, T1, W, N)); \
272 vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vcreate_vector_res, T1, W, N))
273
274 #define DECL_VAL(VAR, T1, W, N) \
275 uint64_t VECT_VAR(VAR, T1, W, N)
276
277 DECL_VAL(vcreate_val, poly, 64, 1);
278 DECL_VARIABLE(vcreate_vector_res, poly, 64, 1);
279
280 CLEAN(result, poly, 64, 2);
281
282 VECT_VAR(vcreate_val, poly, 64, 1) = 0x123456789abcdef0ULL;
283
284 TEST_VCREATE(poly, p, 64, 1);
285
286 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vcreate_expected, "");
287
288 /* vdup_lane_p64 tests. */
289 #undef TEST_MSG
290 #define TEST_MSG "VDUP_LANE/VDUP_LANEQ"
291
292 #define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \
293 VECT_VAR(vdup_lane_vector_res, T1, W, N) = \
294 vdup##Q##_lane_##T2##W(VECT_VAR(vdup_lane_vector, T1, W, N2), L); \
295 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_lane_vector_res, T1, W, N))
296
297 DECL_VARIABLE(vdup_lane_vector, poly, 64, 1);
298 DECL_VARIABLE(vdup_lane_vector, poly, 64, 2);
299 DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 1);
300 DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 2);
301
302 CLEAN(result, poly, 64, 1);
303 CLEAN(result, poly, 64, 2);
304
305 VLOAD(vdup_lane_vector, buffer, , poly, p, 64, 1);
306
307 TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0);
308 TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0);
309
310 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_lane_expected, "");
311 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_lane_expected, "");
312
313 /* vdup_n_p64 tests. */
314 #undef TEST_MSG
315 #define TEST_MSG "VDUP/VDUPQ"
316
317 #define TEST_VDUP(Q, T1, T2, W, N) \
318 VECT_VAR(vdup_n_vector, T1, W, N) = \
319 vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
320 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_n_vector, T1, W, N))
321
322 DECL_VARIABLE(vdup_n_vector, poly, 64, 1);
323 DECL_VARIABLE(vdup_n_vector, poly, 64, 2);
324
325 /* Try to read different places from the input buffer. */
326 for (i=0; i< 3; i++) {
327 CLEAN(result, poly, 64, 1);
328 CLEAN(result, poly, 64, 2);
329
330 TEST_VDUP(, poly, p, 64, 1);
331 TEST_VDUP(q, poly, p, 64, 2);
332
333 switch (i) {
334 case 0:
335 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected0, "");
336 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected0, "");
337 break;
338 case 1:
339 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected1, "");
340 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected1, "");
341 break;
342 case 2:
343 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected2, "");
344 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected2, "");
345 break;
346 default:
347 abort();
348 }
349 }
350
351 /* vexit_p64 tests. */
352 #undef TEST_MSG
353 #define TEST_MSG "VEXT/VEXTQ"
354
355 #define TEST_VEXT(Q, T1, T2, W, N, V) \
356 VECT_VAR(vext_vector_res, T1, W, N) = \
357 vext##Q##_##T2##W(VECT_VAR(vext_vector1, T1, W, N), \
358 VECT_VAR(vext_vector2, T1, W, N), \
359 V); \
360 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vext_vector_res, T1, W, N))
361
362 DECL_VARIABLE(vext_vector1, poly, 64, 1);
363 DECL_VARIABLE(vext_vector1, poly, 64, 2);
364 DECL_VARIABLE(vext_vector2, poly, 64, 1);
365 DECL_VARIABLE(vext_vector2, poly, 64, 2);
366 DECL_VARIABLE(vext_vector_res, poly, 64, 1);
367 DECL_VARIABLE(vext_vector_res, poly, 64, 2);
368
369 CLEAN(result, poly, 64, 1);
370 CLEAN(result, poly, 64, 2);
371
372 VLOAD(vext_vector1, buffer, , poly, p, 64, 1);
373 VLOAD(vext_vector1, buffer, q, poly, p, 64, 2);
374
375 VDUP(vext_vector2, , poly, p, 64, 1, 0x88);
376 VDUP(vext_vector2, q, poly, p, 64, 2, 0x88);
377
378 TEST_VEXT(, poly, p, 64, 1, 0);
379 TEST_VEXT(q, poly, p, 64, 2, 1);
380
381 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vext_expected, "");
382 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vext_expected, "");
383
384 /* vget_low_p64 tests. */
385 #undef TEST_MSG
386 #define TEST_MSG "VGET_LOW"
387
388 #define TEST_VGET_LOW(T1, T2, W, N, N2) \
389 VECT_VAR(vget_low_vector64, T1, W, N) = \
390 vget_low_##T2##W(VECT_VAR(vget_low_vector128, T1, W, N2)); \
391 vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_low_vector64, T1, W, N))
392
393 DECL_VARIABLE(vget_low_vector64, poly, 64, 1);
394 DECL_VARIABLE(vget_low_vector128, poly, 64, 2);
395
396 CLEAN(result, poly, 64, 1);
397
398 VLOAD(vget_low_vector128, buffer, q, poly, p, 64, 2);
399
400 TEST_VGET_LOW(poly, p, 64, 1, 2);
401
402 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
403
404 /* vget_high_p64 tests. */
405 #undef TEST_MSG
406 #define TEST_MSG "VGET_HIGH"
407
408 #define TEST_VGET_HIGH(T1, T2, W, N, N2) \
409 VECT_VAR(vget_high_vector64, T1, W, N) = \
410 vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \
411 vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
412
413 DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
414 DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
415
416 CLEAN(result, poly, 64, 1);
417
418 VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
419
420 TEST_VGET_HIGH(poly, p, 64, 1, 2);
421
422 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
423
424 /* vld1_p64 tests. */
425 #undef TEST_MSG
426 #define TEST_MSG "VLD1/VLD1Q"
427
428 #define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
429 VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
430 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
431
432 DECL_VARIABLE(vld1_vector, poly, 64, 1);
433 DECL_VARIABLE(vld1_vector, poly, 64, 2);
434
435 CLEAN(result, poly, 64, 1);
436 CLEAN(result, poly, 64, 2);
437
438 VLOAD(vld1_vector, buffer, , poly, p, 64, 1);
439 VLOAD(vld1_vector, buffer, q, poly, p, 64, 2);
440
441 TEST_VLD1(vld1_vector, buffer, , poly, p, 64, 1);
442 TEST_VLD1(vld1_vector, buffer, q, poly, p, 64, 2);
443
444 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, "");
445 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, "");
446
447 /* vld1_dup_p64 tests. */
448 #undef TEST_MSG
449 #define TEST_MSG "VLD1_DUP/VLD1_DUPQ"
450
451 #define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \
452 VECT_VAR(VAR, T1, W, N) = \
453 vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]); \
454 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
455
456 DECL_VARIABLE(vld1_dup_vector, poly, 64, 1);
457 DECL_VARIABLE(vld1_dup_vector, poly, 64, 2);
458
459 /* Try to read different places from the input buffer. */
460 for (i=0; i<3; i++) {
461 CLEAN(result, poly, 64, 1);
462 CLEAN(result, poly, 64, 2);
463
464 TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, , poly, p, 64, 1);
465 TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, q, poly, p, 64, 2);
466
467 switch (i) {
468 case 0:
469 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, "");
470 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, "");
471 break;
472 case 1:
473 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, "");
474 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, "");
475 break;
476 case 2:
477 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, "");
478 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, "");
479 break;
480 default:
481 abort();
482 }
483 }
484
485 /* vld1_lane_p64 tests. */
486 #undef TEST_MSG
487 #define TEST_MSG "VLD1_LANE/VLD1_LANEQ"
488
489 #define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \
490 memset (VECT_VAR(vld1_lane_buffer_src, T1, W, N), 0xAA, W/8*N); \
491 VECT_VAR(vld1_lane_vector_src, T1, W, N) = \
492 vld1##Q##_##T2##W(VECT_VAR(vld1_lane_buffer_src, T1, W, N)); \
493 VECT_VAR(vld1_lane_vector, T1, W, N) = \
494 vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \
495 VECT_VAR(vld1_lane_vector_src, T1, W, N), L); \
496 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vld1_lane_vector, T1, W, N))
497
498 DECL_VARIABLE(vld1_lane_vector, poly, 64, 1);
499 DECL_VARIABLE(vld1_lane_vector, poly, 64, 2);
500 DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 1);
501 DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 2);
502
503 ARRAY(vld1_lane_buffer_src, poly, 64, 1);
504 ARRAY(vld1_lane_buffer_src, poly, 64, 2);
505
506 CLEAN(result, poly, 64, 1);
507 CLEAN(result, poly, 64, 2);
508
509 TEST_VLD1_LANE(, poly, p, 64, 1, 0);
510 TEST_VLD1_LANE(q, poly, p, 64, 2, 0);
511
512 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_lane_expected, "");
513 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_lane_expected, "");
514
515 /* vldX_p64 tests. */
516 #define DECL_VLDX(T1, W, N, X) \
517 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \
518 VECT_VAR_DECL(vldX_result_bis_##X, T1, W, N)[X * N]
519
520 #define TEST_VLDX(Q, T1, T2, W, N, X) \
521 VECT_ARRAY_VAR(vldX_vector, T1, W, N, X) = \
522 /* Use dedicated init buffer, of size X */ \
523 vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X)); \
524 vst##X##Q##_##T2##W(VECT_VAR(vldX_result_bis_##X, T1, W, N), \
525 VECT_ARRAY_VAR(vldX_vector, T1, W, N, X)); \
526 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_result_bis_##X, T1, W, N), \
527 sizeof(VECT_VAR(result, T1, W, N)));
528
529 /* Overwrite "result" with the contents of "result_bis"[Y]. */
530 #define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \
531 memcpy(VECT_VAR(result, T1, W, N), \
532 &(VECT_VAR(vldX_result_bis_##X, T1, W, N)[Y*N]), \
533 sizeof(VECT_VAR(result, T1, W, N)));
534
535 DECL_VLDX(poly, 64, 1, 2);
536 DECL_VLDX(poly, 64, 1, 3);
537 DECL_VLDX(poly, 64, 1, 4);
538
539 VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1);
540 PAD(buffer_vld2_pad, poly, 64, 1);
541 VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1);
542 PAD(buffer_vld3_pad, poly, 64, 1);
543 VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1);
544 PAD(buffer_vld4_pad, poly, 64, 1);
545
546 #undef TEST_MSG
547 #define TEST_MSG "VLD2/VLD2Q"
548 CLEAN(result, poly, 64, 1);
549 TEST_VLDX(, poly, p, 64, 1, 2);
550 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0");
551 CLEAN(result, poly, 64, 1);
552 TEST_EXTRA_CHUNK(poly, 64, 1, 2, 1);
553 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1");
554
555 #undef TEST_MSG
556 #define TEST_MSG "VLD3/VLD3Q"
557 CLEAN(result, poly, 64, 1);
558 TEST_VLDX(, poly, p, 64, 1, 3);
559 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0");
560 CLEAN(result, poly, 64, 1);
561 TEST_EXTRA_CHUNK(poly, 64, 1, 3, 1);
562 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1");
563 CLEAN(result, poly, 64, 1);
564 TEST_EXTRA_CHUNK(poly, 64, 1, 3, 2);
565 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2");
566
567 #undef TEST_MSG
568 #define TEST_MSG "VLD4/VLD4Q"
569 CLEAN(result, poly, 64, 1);
570 TEST_VLDX(, poly, p, 64, 1, 4);
571 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0");
572 CLEAN(result, poly, 64, 1);
573 TEST_EXTRA_CHUNK(poly, 64, 1, 4, 1);
574 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1");
575 CLEAN(result, poly, 64, 1);
576 TEST_EXTRA_CHUNK(poly, 64, 1, 4, 2);
577 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2");
578 CLEAN(result, poly, 64, 1);
579 TEST_EXTRA_CHUNK(poly, 64, 1, 4, 3);
580 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3");
581
582 /* vldX_dup_p64 tests. */
583 #define DECL_VLDX_DUP(T1, W, N, X) \
584 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X); \
585 VECT_VAR_DECL(vldX_dup_result_bis_##X, T1, W, N)[X * N]
586
587 #define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \
588 VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X) = \
589 vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]); \
590 \
591 vst##X##Q##_##T2##W(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \
592 VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X)); \
593 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \
594 sizeof(VECT_VAR(result, T1, W, N)));
595
596 /* Overwrite "result" with the contents of "result_bis"[Y]. */
597 #define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \
598 memcpy(VECT_VAR(result, T1, W, N), \
599 &(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \
600 sizeof(VECT_VAR(result, T1, W, N)));
601
602 DECL_VLDX_DUP(poly, 64, 1, 2);
603 DECL_VLDX_DUP(poly, 64, 1, 3);
604 DECL_VLDX_DUP(poly, 64, 1, 4);
605
606
607 #undef TEST_MSG
608 #define TEST_MSG "VLD2_DUP/VLD2Q_DUP"
609 CLEAN(result, poly, 64, 1);
610 TEST_VLDX_DUP(, poly, p, 64, 1, 2);
611 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0");
612 CLEAN(result, poly, 64, 1);
613 TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 2, 1);
614 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1");
615
616 #undef TEST_MSG
617 #define TEST_MSG "VLD3_DUP/VLD3Q_DUP"
618 CLEAN(result, poly, 64, 1);
619 TEST_VLDX_DUP(, poly, p, 64, 1, 3);
620 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0");
621 CLEAN(result, poly, 64, 1);
622 TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 1);
623 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1");
624 CLEAN(result, poly, 64, 1);
625 TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 2);
626 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2");
627
628 #undef TEST_MSG
629 #define TEST_MSG "VLD4_DUP/VLD4Q_DUP"
630 CLEAN(result, poly, 64, 1);
631 TEST_VLDX_DUP(, poly, p, 64, 1, 4);
632 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0");
633 CLEAN(result, poly, 64, 1);
634 TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 1);
635 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1");
636 CLEAN(result, poly, 64, 1);
637 TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 2);
638 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2");
639 CLEAN(result, poly, 64, 1);
640 TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 3);
641 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3");
642
643 /* vsli_p64 tests. */
644 #undef TEST_MSG
645 #define TEST_MSG "VSLI"
646
647 #define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \
648 VECT_VAR(vsXi_vector_res, T1, W, N) = \
649 INSN##Q##_n_##T2##W(VECT_VAR(vsXi_vector, T1, W, N), \
650 VECT_VAR(vsXi_vector2, T1, W, N), \
651 V); \
652 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vsXi_vector_res, T1, W, N))
653
654 #define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \
655 TEST_VSXI1(INSN, Q, T1, T2, W, N, V)
656
657 DECL_VARIABLE(vsXi_vector, poly, 64, 1);
658 DECL_VARIABLE(vsXi_vector, poly, 64, 2);
659 DECL_VARIABLE(vsXi_vector2, poly, 64, 1);
660 DECL_VARIABLE(vsXi_vector2, poly, 64, 2);
661 DECL_VARIABLE(vsXi_vector_res, poly, 64, 1);
662 DECL_VARIABLE(vsXi_vector_res, poly, 64, 2);
663
664 CLEAN(result, poly, 64, 1);
665 CLEAN(result, poly, 64, 2);
666
667 VLOAD(vsXi_vector, buffer, , poly, p, 64, 1);
668 VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2);
669
670 VDUP(vsXi_vector2, , poly, p, 64, 1, 2);
671 VDUP(vsXi_vector2, q, poly, p, 64, 2, 3);
672
673 TEST_VSXI(vsli, , poly, p, 64, 1, 3);
674 TEST_VSXI(vsli, q, poly, p, 64, 2, 53);
675
676 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, "");
677 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, "");
678
679 /* Test cases with maximum shift amount. */
680 CLEAN(result, poly, 64, 1);
681 CLEAN(result, poly, 64, 2);
682
683 TEST_VSXI(vsli, , poly, p, 64, 1, 63);
684 TEST_VSXI(vsli, q, poly, p, 64, 2, 63);
685
686 #define COMMENT "(max shift amount)"
687 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT);
688 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT);
689
690 /* vsri_p64 tests. */
691 #undef TEST_MSG
692 #define TEST_MSG "VSRI"
693
694 CLEAN(result, poly, 64, 1);
695 CLEAN(result, poly, 64, 2);
696
697 VLOAD(vsXi_vector, buffer, , poly, p, 64, 1);
698 VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2);
699
700 VDUP(vsXi_vector2, , poly, p, 64, 1, 2);
701 VDUP(vsXi_vector2, q, poly, p, 64, 2, 3);
702
703 TEST_VSXI(vsri, , poly, p, 64, 1, 3);
704 TEST_VSXI(vsri, q, poly, p, 64, 2, 53);
705
706 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected, "");
707 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected, "");
708
709 /* Test cases with maximum shift amount. */
710 CLEAN(result, poly, 64, 1);
711 CLEAN(result, poly, 64, 2);
712
713 TEST_VSXI(vsri, , poly, p, 64, 1, 64);
714 TEST_VSXI(vsri, q, poly, p, 64, 2, 64);
715
716 #define COMMENT "(max shift amount)"
717 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected_max_shift, COMMENT);
718 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected_max_shift, COMMENT);
719
720 /* vst1_lane_p64 tests. */
721 #undef TEST_MSG
722 #define TEST_MSG "VST1_LANE/VST1_LANEQ"
723
724 #define TEST_VST1_LANE(Q, T1, T2, W, N, L) \
725 VECT_VAR(vst1_lane_vector, T1, W, N) = \
726 vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
727 vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \
728 VECT_VAR(vst1_lane_vector, T1, W, N), L);
729
730 DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
731 DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
732
733 CLEAN(result, poly, 64, 1);
734 CLEAN(result, poly, 64, 2);
735
736 TEST_VST1_LANE(, poly, p, 64, 1, 0);
737 TEST_VST1_LANE(q, poly, p, 64, 2, 0);
738
739 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
740 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
741
742 /* vget_lane_p64 tests. */
743 #undef TEST_MSG
744 #define TEST_MSG "VGET_LANE/VGETQ_LANE"
745
746 #define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
747 VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
748 if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
749 fprintf(stderr, \
750 "ERROR in %s (%s line %d in result '%s') at type %s " \
751 "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
752 TEST_MSG, __FILE__, __LINE__, \
753 STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
754 STR(VECT_NAME(T1, W, N)), \
755 VECT_VAR(vget_lane_vector, T1, W, N), \
756 VECT_VAR(vget_lane_expected, T1, W, N)); \
757 abort (); \
758 }
759
760 /* Initialize input values. */
761 DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
762 DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
763
764 VLOAD(vget_lane_vector1, buffer, , poly, p, 64, 1);
765 VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
766
767 VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
768 VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
769
770 TEST_VGET_LANE( , poly, p, 64, 1, 0);
771 TEST_VGET_LANE(q, poly, p, 64, 2, 0);
772
773
774 /* vset_lane_p64 tests. */
775 #undef TEST_MSG
776 #define TEST_MSG "VSET_LANE/VSETQ_LANE"
777
778 #define TEST_VSET_LANE(Q, T1, T2, W, N, V, L) \
779 VECT_VAR(vset_lane_vector, T1, W, N) = \
780 vset##Q##_lane_##T2##W(V, \
781 VECT_VAR(vset_lane_vector, T1, W, N), \
782 L); \
783 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1, W, N))
784
785 /* Initialize input values. */
786 DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
787 DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
788
789 CLEAN(result, uint, 64, 1);
790 CLEAN(result, uint, 64, 2);
791
792 VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
793 VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
794
795 /* Choose value and lane arbitrarily. */
796 TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
797 TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
798
799 CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
800 CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
801
802
803 /* vtst_p64 tests. */
804 #undef TEST_MSG
805 #define TEST_MSG "VTST"
806
807 #define TEST_VTST1(INSN, Q, T1, T2, W, N) \
808 VECT_VAR(vtst_vector_res, uint, W, N) = \
809 INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N), \
810 VECT_VAR(vtst_vector2, T1, W, N)); \
811 vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \
812 VECT_VAR(vtst_vector_res, uint, W, N))
813
814 #define TEST_VTST(INSN, Q, T1, T2, W, N) \
815 TEST_VTST1(INSN, Q, T1, T2, W, N) \
816
817 /* Initialize input values. */
818 DECL_VARIABLE(vtst_vector, poly, 64, 1);
819 DECL_VARIABLE(vtst_vector2, poly, 64, 1);
820 DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
821
822 CLEAN(result, uint, 64, 1);
823
824 VLOAD(vtst_vector, buffer, , poly, p, 64, 1);
825 VDUP(vtst_vector2, , poly, p, 64, 1, 5);
826
827 TEST_VTST(vtst, , poly, p, 64, 1);
828
829 CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
830
831 /* vtstq_p64 is supported by aarch64 only. */
832 #ifdef __aarch64__
833 DECL_VARIABLE(vtst_vector, poly, 64, 2);
834 DECL_VARIABLE(vtst_vector2, poly, 64, 2);
835 DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
836 CLEAN(result, uint, 64, 2);
837 VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
838 VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
839 TEST_VTST(vtst, q, poly, p, 64, 2);
840 CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
841
842 /* vmov_n_p64 tests. */
843 #undef TEST_MSG
844 #define TEST_MSG "VMOV/VMOVQ"
845
846 #define TEST_VMOV(Q, T1, T2, W, N) \
847 VECT_VAR(vmov_n_vector, T1, W, N) = \
848 vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
849 vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
850
851 DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
852 DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
853
854 /* Try to read different places from the input buffer. */
855 for (i=0; i< 3; i++) {
856 CLEAN(result, poly, 64, 1);
857 CLEAN(result, poly, 64, 2);
858
859 TEST_VMOV(, poly, p, 64, 1);
860 TEST_VMOV(q, poly, p, 64, 2);
861
862 switch (i) {
863 case 0:
864 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
865 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
866 break;
867 case 1:
868 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
869 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
870 break;
871 case 2:
872 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
873 CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
874 break;
875 default:
876 abort();
877 }
878 }
879
880 /* vldx_lane_p64 tests. */
881 #undef TEST_MSG
882 #define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
883
884 VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
885 VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
886 VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
887
888 /* In this case, input variables are arrays of vectors. */
889 #define DECL_VLD_STX_LANE(T1, W, N, X) \
890 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
891 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
892 VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
893
894 /* We need to use a temporary result buffer (result_bis), because
895 the one used for other tests is not large enough. A subset of the
896 result data is moved from result_bis to result, and it is this
897 subset which is used to check the actual behavior. The next
898 macro enables to move another chunk of data from result_bis to
899 result. */
900 /* We also use another extra input buffer (buffer_src), which we
901 fill with 0xAA, and which it used to load a vector from which we
902 read a given lane. */
903
904 #define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
905 memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
906 sizeof(VECT_VAR(buffer_src, T1, W, N))); \
907 \
908 VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
909 vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
910 \
911 VECT_ARRAY_VAR(vector, T1, W, N, X) = \
912 /* Use dedicated init buffer, of size. X */ \
913 vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
914 VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
915 L); \
916 vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
917 VECT_ARRAY_VAR(vector, T1, W, N, X)); \
918 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
919 sizeof(VECT_VAR(result, T1, W, N)))
920
921 /* Overwrite "result" with the contents of "result_bis"[Y]. */
922 #undef TEST_EXTRA_CHUNK
923 #define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
924 memcpy(VECT_VAR(result, T1, W, N), \
925 &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
926 sizeof(VECT_VAR(result, T1, W, N)));
927
928 /* Add some padding to try to catch out of bound accesses. */
929 #define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
930 #define DUMMY_ARRAY(V, T, W, N, L) \
931 VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
932 ARRAY1(V##_pad,T,W,N)
933
934 #define DECL_ALL_VLD_STX_LANE(X) \
935 DECL_VLD_STX_LANE(poly, 64, 1, X); \
936 DECL_VLD_STX_LANE(poly, 64, 2, X);
937
938 #define TEST_ALL_VLDX_LANE(X) \
939 TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
940 TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
941
942 #define TEST_ALL_EXTRA_CHUNKS(X,Y) \
943 TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
944 TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
945
946 #define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \
947 CHECK_POLY(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
948 CHECK_POLY(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
949
950 /* Declare the temporary buffers / variables. */
951 DECL_ALL_VLD_STX_LANE(2);
952 DECL_ALL_VLD_STX_LANE(3);
953 DECL_ALL_VLD_STX_LANE(4);
954
955 DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
956 DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
957
958 /* Check vld2_lane/vld2q_lane. */
959 clean_results ();
960 #undef TEST_MSG
961 #define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
962 TEST_ALL_VLDX_LANE(2);
963 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
964
965 TEST_ALL_EXTRA_CHUNKS(2, 1);
966 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
967
968 /* Check vld3_lane/vld3q_lane. */
969 clean_results ();
970 #undef TEST_MSG
971 #define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
972 TEST_ALL_VLDX_LANE(3);
973 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
974
975 TEST_ALL_EXTRA_CHUNKS(3, 1);
976 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
977
978 TEST_ALL_EXTRA_CHUNKS(3, 2);
979 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
980
981 /* Check vld4_lane/vld4q_lane. */
982 clean_results ();
983 #undef TEST_MSG
984 #define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
985 TEST_ALL_VLDX_LANE(4);
986 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
987
988 TEST_ALL_EXTRA_CHUNKS(4, 1);
989 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
990
991 TEST_ALL_EXTRA_CHUNKS(4, 2);
992 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
993
994 TEST_ALL_EXTRA_CHUNKS(4, 3);
995 CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
996
997 /* In this case, input variables are arrays of vectors. */
998 #define DECL_VSTX_LANE(T1, W, N, X) \
999 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
1000 VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
1001 VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
1002
1003 /* We need to use a temporary result buffer (result_bis), because
1004 the one used for other tests is not large enough. A subset of the
1005 result data is moved from result_bis to result, and it is this
1006 subset which is used to check the actual behavior. The next
1007 macro enables to move another chunk of data from result_bis to
1008 result. */
1009 /* We also use another extra input buffer (buffer_src), which we
1010 fill with 0xAA, and which it used to load a vector from which we
1011 read a given lane. */
1012 #define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \
1013 memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
1014 sizeof(VECT_VAR(buffer_src, T1, W, N))); \
1015 memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \
1016 sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \
1017 \
1018 VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
1019 vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
1020 \
1021 VECT_ARRAY_VAR(vector, T1, W, N, X) = \
1022 /* Use dedicated init buffer, of size X. */ \
1023 vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
1024 VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
1025 L); \
1026 vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
1027 VECT_ARRAY_VAR(vector, T1, W, N, X), \
1028 L); \
1029 memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
1030 sizeof(VECT_VAR(result, T1, W, N)));
1031
1032 #define TEST_ALL_VSTX_LANE(X) \
1033 TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
1034 TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
1035
1036 /* Check vst2_lane/vst2q_lane. */
1037 clean_results ();
1038 #undef TEST_MSG
1039 #define TEST_MSG "VST2_LANE/VST2Q_LANE"
1040 TEST_ALL_VSTX_LANE(2);
1041
1042 #define CMT " (chunk 0)"
1043 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
1044
1045 TEST_ALL_EXTRA_CHUNKS(2, 1);
1046 #undef CMT
1047 #define CMT " chunk 1"
1048 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
1049
1050 /* Check vst3_lane/vst3q_lane. */
1051 clean_results ();
1052 #undef TEST_MSG
1053 #define TEST_MSG "VST3_LANE/VST3Q_LANE"
1054 TEST_ALL_VSTX_LANE(3);
1055
1056 #undef CMT
1057 #define CMT " (chunk 0)"
1058 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
1059
1060 TEST_ALL_EXTRA_CHUNKS(3, 1);
1061
1062 #undef CMT
1063 #define CMT " (chunk 1)"
1064 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
1065
1066 TEST_ALL_EXTRA_CHUNKS(3, 2);
1067
1068 #undef CMT
1069 #define CMT " (chunk 2)"
1070 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
1071
1072 /* Check vst4_lane/vst4q_lane. */
1073 clean_results ();
1074 #undef TEST_MSG
1075 #define TEST_MSG "VST4_LANE/VST4Q_LANE"
1076 TEST_ALL_VSTX_LANE(4);
1077
1078 #undef CMT
1079 #define CMT " (chunk 0)"
1080 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
1081
1082 TEST_ALL_EXTRA_CHUNKS(4, 1);
1083
1084 #undef CMT
1085 #define CMT " (chunk 1)"
1086 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
1087
1088 TEST_ALL_EXTRA_CHUNKS(4, 2);
1089
1090 #undef CMT
1091 #define CMT " (chunk 2)"
1092 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
1093
1094 TEST_ALL_EXTRA_CHUNKS(4, 3);
1095
1096 #undef CMT
1097 #define CMT " (chunk 3)"
1098 CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
1099
1100 #endif /* __aarch64__. */
1101
1102 return 0;
1103 }