Adjust formatting of acc_get_property tests
[gcc.git] / libgomp / testsuite / libgomp.oacc-c-c++-common / parallel-dims.c
1 /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
2 vector_length. */
3
4 /* See also '../libgomp.oacc-fortran/parallel-dims.f90'. */
5
6 #include <limits.h>
7 #include <openacc.h>
8 #include <gomp-constants.h>
9
10 /* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
11 not behaving as expected for -O0. */
12 #pragma acc routine seq
13 static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
14 {
15 if (acc_on_device ((int) acc_device_host))
16 return 0;
17 else if (acc_on_device ((int) acc_device_nvidia)
18 || acc_on_device ((int) acc_device_radeon))
19 return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
20 else
21 __builtin_abort ();
22 }
23
24 #pragma acc routine seq
25 static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
26 {
27 if (acc_on_device ((int) acc_device_host))
28 return 0;
29 else if (acc_on_device ((int) acc_device_nvidia)
30 || acc_on_device ((int) acc_device_radeon))
31 return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
32 else
33 __builtin_abort ();
34 }
35
36 #pragma acc routine seq
37 static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
38 {
39 if (acc_on_device ((int) acc_device_host))
40 return 0;
41 else if (acc_on_device ((int) acc_device_nvidia)
42 || acc_on_device ((int) acc_device_radeon))
43 return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
44 else
45 __builtin_abort ();
46 }
47
48
49 int main ()
50 {
51 acc_init (acc_device_default);
52
53 /* OpenACC parallel construct. */
54
55 /* Non-positive value. */
56
57 /* GR, WS, VS. */
58 {
59 #define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
60 int gangs_actual = GANGS;
61 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
62 gangs_min = workers_min = vectors_min = INT_MAX;
63 gangs_max = workers_max = vectors_max = INT_MIN;
64 #pragma acc parallel copy (gangs_actual) \
65 reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
66 num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
67 {
68 /* We're actually executing with num_gangs (1). */
69 gangs_actual = 1;
70 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
71 {
72 /* <https://gcc.gnu.org/PR80547>. */
73 #if 0
74 gangs_min = gangs_max = acc_gang ();
75 workers_min = workers_max = acc_worker ();
76 vectors_min = vectors_max = acc_vector ();
77 #else
78 int gangs = acc_gang ();
79 gangs_min = (gangs_min < gangs) ? gangs_min : gangs;
80 gangs_max = (gangs_max > gangs) ? gangs_max : gangs;
81 int workers = acc_worker ();
82 workers_min = (workers_min < workers) ? workers_min : workers;
83 workers_max = (workers_max > workers) ? workers_max : workers;
84 int vectors = acc_vector ();
85 vectors_min = (vectors_min < vectors) ? vectors_min : vectors;
86 vectors_max = (vectors_max > vectors) ? vectors_max : vectors;
87 #endif
88 }
89 }
90 if (gangs_actual != 1)
91 __builtin_abort ();
92 if (gangs_min != 0 || gangs_max != gangs_actual - 1
93 || workers_min != 0 || workers_max != 0
94 || vectors_min != 0 || vectors_max != 0)
95 __builtin_abort ();
96 #undef GANGS
97 }
98
99 /* GP, WS, VS. */
100 {
101 #define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
102 int gangs_actual = GANGS;
103 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
104 gangs_min = workers_min = vectors_min = INT_MAX;
105 gangs_max = workers_max = vectors_max = INT_MIN;
106 #pragma acc parallel copy (gangs_actual) \
107 num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
108 {
109 /* We're actually executing with num_gangs (1). */
110 gangs_actual = 1;
111 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
112 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
113 {
114 gangs_min = gangs_max = acc_gang ();
115 workers_min = workers_max = acc_worker ();
116 vectors_min = vectors_max = acc_vector ();
117 }
118 }
119 if (gangs_actual != 1)
120 __builtin_abort ();
121 if (gangs_min != 0 || gangs_max != gangs_actual - 1
122 || workers_min != 0 || workers_max != 0
123 || vectors_min != 0 || vectors_max != 0)
124 __builtin_abort ();
125 #undef GANGS
126 }
127
128 /* GR, WP, VS. */
129 {
130 #define WORKERS 0 /* { dg-warning "'num_workers' value must be positive" "" { target c } } */
131 int workers_actual = WORKERS;
132 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
133 gangs_min = workers_min = vectors_min = INT_MAX;
134 gangs_max = workers_max = vectors_max = INT_MIN;
135 #pragma acc parallel copy (workers_actual) \
136 num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */
137 {
138 /* We're actually executing with num_workers (1). */
139 workers_actual = 1;
140 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
141 for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
142 {
143 gangs_min = gangs_max = acc_gang ();
144 workers_min = workers_max = acc_worker ();
145 vectors_min = vectors_max = acc_vector ();
146 }
147 }
148 if (workers_actual != 1)
149 __builtin_abort ();
150 if (gangs_min != 0 || gangs_max != 0
151 || workers_min != 0 || workers_max != workers_actual - 1
152 || vectors_min != 0 || vectors_max != 0)
153 __builtin_abort ();
154 #undef WORKERS
155 }
156
157 /* GR, WS, VP. */
158 {
159 #define VECTORS 0 /* { dg-warning "'vector_length' value must be positive" "" { target c } } */
160 int vectors_actual = VECTORS;
161 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
162 gangs_min = workers_min = vectors_min = INT_MAX;
163 gangs_max = workers_max = vectors_max = INT_MIN;
164 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
165 vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */
166 {
167 /* We're actually executing with vector_length (1), just the GCC nvptx
168 back end enforces vector_length (32). */
169 if (acc_on_device (acc_device_nvidia))
170 vectors_actual = 32;
171 else
172 vectors_actual = 1;
173 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
174 for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
175 {
176 gangs_min = gangs_max = acc_gang ();
177 workers_min = workers_max = acc_worker ();
178 vectors_min = vectors_max = acc_vector ();
179 }
180 }
181 if (acc_get_device_type () == acc_device_nvidia)
182 {
183 if (vectors_actual != 32)
184 __builtin_abort ();
185 }
186 else
187 if (vectors_actual != 1)
188 __builtin_abort ();
189 if (gangs_min != 0 || gangs_max != 0
190 || workers_min != 0 || workers_max != 0
191 || vectors_min != 0 || vectors_max != vectors_actual - 1)
192 __builtin_abort ();
193 #undef VECTORS
194 }
195
196
197 /* High value. */
198
199 /* GR, WS, VS. */
200 {
201 /* There is no actual limit for the number of gangs, so we try with a
202 rather high value. */
203 int gangs = 12345;
204 int gangs_actual = gangs;
205 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
206 gangs_min = workers_min = vectors_min = INT_MAX;
207 gangs_max = workers_max = vectors_max = INT_MIN;
208 #pragma acc parallel copy (gangs_actual) \
209 reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
210 num_gangs (gangs)
211 {
212 if (acc_on_device (acc_device_host))
213 {
214 /* We're actually executing with num_gangs (1). */
215 gangs_actual = 1;
216 }
217 /* As we're executing GR not GP, don't multiply with a "gangs_actual"
218 factor. */
219 for (int i = 100 /* * gangs_actual */; i > -100 /* * gangs_actual */; --i)
220 {
221 gangs_min = gangs_max = acc_gang ();
222 workers_min = workers_max = acc_worker ();
223 vectors_min = vectors_max = acc_vector ();
224 }
225 }
226 if (gangs_actual < 1)
227 __builtin_abort ();
228 if (gangs_min != 0 || gangs_max != gangs_actual - 1
229 || workers_min != 0 || workers_max != 0
230 || vectors_min != 0 || vectors_max != 0)
231 __builtin_abort ();
232 }
233
234 /* GP, WS, VS. */
235 {
236 /* There is no actual limit for the number of gangs, so we try with a
237 rather high value. */
238 int gangs = 12345;
239 int gangs_actual = gangs;
240 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
241 gangs_min = workers_min = vectors_min = INT_MAX;
242 gangs_max = workers_max = vectors_max = INT_MIN;
243 #pragma acc parallel copy (gangs_actual) \
244 num_gangs (gangs)
245 {
246 if (acc_on_device (acc_device_host))
247 {
248 /* We're actually executing with num_gangs (1). */
249 gangs_actual = 1;
250 }
251 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
252 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
253 {
254 gangs_min = gangs_max = acc_gang ();
255 workers_min = workers_max = acc_worker ();
256 vectors_min = vectors_max = acc_vector ();
257 }
258 }
259 if (gangs_actual < 1)
260 __builtin_abort ();
261 if (gangs_min != 0 || gangs_max != gangs_actual - 1
262 || workers_min != 0 || workers_max != 0
263 || vectors_min != 0 || vectors_max != 0)
264 __builtin_abort ();
265 }
266
267 /* GR, WP, VS. */
268 {
269 /* We try with an outrageously large value. */
270 #define WORKERS 2 << 20
271 int workers_actual = WORKERS;
272 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
273 gangs_min = workers_min = vectors_min = INT_MAX;
274 gangs_max = workers_max = vectors_max = INT_MIN;
275 #pragma acc parallel copy (workers_actual) /* { dg-warning "using num_workers \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
276 num_workers (WORKERS)
277 {
278 if (acc_on_device (acc_device_host))
279 {
280 /* We're actually executing with num_workers (1). */
281 workers_actual = 1;
282 }
283 else if (acc_on_device (acc_device_nvidia))
284 {
285 /* The GCC nvptx back end enforces num_workers (32). */
286 workers_actual = 32;
287 }
288 else if (acc_on_device (acc_device_radeon))
289 {
290 /* The GCC GCN back end is limited to num_workers (16).
291 Temporarily set this to 1 until multiple workers are permitted. */
292 workers_actual = 1; // 16;
293 }
294 else
295 __builtin_abort ();
296 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
297 for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
298 {
299 gangs_min = gangs_max = acc_gang ();
300 workers_min = workers_max = acc_worker ();
301 vectors_min = vectors_max = acc_vector ();
302 }
303 }
304 if (workers_actual < 1)
305 __builtin_abort ();
306 if (gangs_min != 0 || gangs_max != 0
307 || workers_min != 0 || workers_max != workers_actual - 1
308 || vectors_min != 0 || vectors_max != 0)
309 __builtin_abort ();
310 #undef WORKERS
311 }
312
313 /* GR, WP, VS. */
314 {
315 /* We try with an outrageously large value. */
316 int workers = 2 << 20;
317 /* For nvptx offloading, this one will not result in "using num_workers
318 (32), ignoring runtime setting", and will in fact try to launch with
319 "num_workers (workers)", which will run into "libgomp: cuLaunchKernel
320 error: invalid argument". So, limit ourselves here. */
321 if (acc_get_device_type () == acc_device_nvidia)
322 workers = 32;
323 int workers_actual = workers;
324 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
325 gangs_min = workers_min = vectors_min = INT_MAX;
326 gangs_max = workers_max = vectors_max = INT_MIN;
327 #pragma acc parallel copy (workers_actual) \
328 num_workers (workers)
329 {
330 if (acc_on_device (acc_device_host))
331 {
332 /* We're actually executing with num_workers (1). */
333 workers_actual = 1;
334 }
335 else if (acc_on_device (acc_device_nvidia))
336 {
337 /* We're actually executing with num_workers (32). */
338 /* workers_actual = 32; */
339 }
340 else if (acc_on_device (acc_device_radeon))
341 {
342 /* The GCC GCN back end is limited to num_workers (16). */
343 workers_actual = 16;
344 }
345 else
346 __builtin_abort ();
347 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
348 for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
349 {
350 gangs_min = gangs_max = acc_gang ();
351 workers_min = workers_max = acc_worker ();
352 vectors_min = vectors_max = acc_vector ();
353 }
354 }
355 if (workers_actual < 1)
356 __builtin_abort ();
357 if (gangs_min != 0 || gangs_max != 0
358 || workers_min != 0 || workers_max != workers_actual - 1
359 || vectors_min != 0 || vectors_max != 0)
360 __builtin_abort ();
361 }
362
363 /* GR, WS, VP. */
364 {
365 /* We try with an outrageously large value. */
366 #define VECTORS 2 << 20
367 int vectors_actual = VECTORS;
368 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
369 gangs_min = workers_min = vectors_min = INT_MAX;
370 gangs_max = workers_max = vectors_max = INT_MIN;
371 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(1024\\), ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
372 vector_length (VECTORS)
373 {
374 if (acc_on_device (acc_device_host))
375 {
376 /* We're actually executing with vector_length (1). */
377 vectors_actual = 1;
378 }
379 else if (acc_on_device (acc_device_nvidia))
380 {
381 /* The GCC nvptx back end enforces vector_length (32). */
382 vectors_actual = 1024;
383 }
384 else if (acc_on_device (acc_device_radeon))
385 {
386 /* The GCC GCN back end enforces vector_length (1): autovectorize. */
387 vectors_actual = 1;
388 }
389 else
390 __builtin_abort ();
391 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
392 for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
393 {
394 gangs_min = gangs_max = acc_gang ();
395 workers_min = workers_max = acc_worker ();
396 vectors_min = vectors_max = acc_vector ();
397 }
398 }
399 if (vectors_actual < 1)
400 __builtin_abort ();
401 if (gangs_min != 0 || gangs_max != 0
402 || workers_min != 0 || workers_max != 0
403 || vectors_min != 0 || vectors_max != vectors_actual - 1)
404 __builtin_abort ();
405 #undef VECTORS
406 }
407
408 /* GR, WS, VP. */
409 {
410 /* We try with an outrageously large value. */
411 int vectors = 2 << 20;
412 int vectors_actual = vectors;
413 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
414 gangs_min = workers_min = vectors_min = INT_MAX;
415 gangs_max = workers_max = vectors_max = INT_MIN;
416 #pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring runtime setting" "" { target openacc_nvidia_accel_selected } } */ \
417 vector_length (vectors)
418 {
419 if (acc_on_device (acc_device_host))
420 {
421 /* We're actually executing with vector_length (1). */
422 vectors_actual = 1;
423 }
424 else if (acc_on_device (acc_device_nvidia))
425 {
426 /* The GCC nvptx back end enforces vector_length (32). */
427 vectors_actual = 32;
428 }
429 else if (acc_on_device (acc_device_radeon))
430 {
431 /* Because of the way vectors are implemented for GCN, a vector loop
432 containing a seq routine call will not vectorize calls to that
433 routine. Hence, we'll only get one "vector". */
434 vectors_actual = 1;
435 }
436 else
437 __builtin_abort ();
438 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
439 for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
440 {
441 gangs_min = gangs_max = acc_gang ();
442 workers_min = workers_max = acc_worker ();
443 vectors_min = vectors_max = acc_vector ();
444 }
445 }
446 if (vectors_actual < 1)
447 __builtin_abort ();
448 if (gangs_min != 0 || gangs_max != 0
449 || workers_min != 0 || workers_max != 0
450 || vectors_min != 0 || vectors_max != vectors_actual - 1)
451 __builtin_abort ();
452 }
453
454
455 /* Composition of GP, WP, VP. */
456 {
457 int gangs = 12345;
458 /* With nvptx offloading, multi-level reductions apparently are very slow
459 in the following case. So, limit ourselves here. */
460 if (acc_get_device_type () == acc_device_nvidia)
461 gangs = 3;
462 /* Similar appears to be true for GCN. */
463 if (acc_get_device_type () == acc_device_radeon)
464 gangs = 3;
465 int gangs_actual = gangs;
466 #define WORKERS 3
467 int workers_actual = WORKERS;
468 #define VECTORS 11
469 int vectors_actual = VECTORS;
470 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
471 gangs_min = workers_min = vectors_min = INT_MAX;
472 gangs_max = workers_max = vectors_max = INT_MIN;
473 #pragma acc parallel copy (gangs_actual, workers_actual, vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 11" "" { target openacc_nvidia_accel_selected } } */ \
474 num_gangs (gangs) \
475 num_workers (WORKERS) \
476 vector_length (VECTORS)
477 {
478 if (acc_on_device (acc_device_host))
479 {
480 /* We're actually executing with num_gangs (1), num_workers (1),
481 vector_length (1). */
482 gangs_actual = 1;
483 workers_actual = 1;
484 vectors_actual = 1;
485 }
486 else if (acc_on_device (acc_device_nvidia))
487 {
488 /* The GCC nvptx back end enforces vector_length (32). */
489 vectors_actual = 32;
490 }
491 else if (acc_on_device (acc_device_radeon))
492 {
493 /* Temporary setting, until multiple workers are permitted. */
494 workers_actual = 1;
495 /* See above comments about GCN vectors_actual. */
496 vectors_actual = 1;
497 }
498 else
499 __builtin_abort ();
500 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
501 for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
502 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
503 for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
504 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
505 for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
506 {
507 gangs_min = gangs_max = acc_gang ();
508 workers_min = workers_max = acc_worker ();
509 vectors_min = vectors_max = acc_vector ();
510 }
511 }
512 if (gangs_min != 0 || gangs_max != gangs_actual - 1
513 || workers_min != 0 || workers_max != workers_actual - 1
514 || vectors_min != 0 || vectors_max != vectors_actual - 1)
515 __builtin_abort ();
516 #undef VECTORS
517 #undef WORKERS
518 }
519
520
521 /* OpenACC kernels construct. */
522
523 /* We can't test parallelized OpenACC kernels constructs in this way: use of
524 the acc_gang, acc_worker, acc_vector functions will make the construct
525 unparallelizable. */
526
527
528 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
529 kernels. */
530 {
531 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
532 gangs_min = workers_min = vectors_min = INT_MAX;
533 gangs_max = workers_max = vectors_max = INT_MIN;
534 #pragma acc kernels
535 {
536 /* This is to make the OpenACC kernels construct unparallelizable. */
537 asm volatile ("" : : : "memory");
538
539 #pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
540 for (int i = 100; i > -100; --i)
541 {
542 gangs_min = gangs_max = acc_gang ();
543 workers_min = workers_max = acc_worker ();
544 vectors_min = vectors_max = acc_vector ();
545 }
546 }
547 if (gangs_min != 0 || gangs_max != 1 - 1
548 || workers_min != 0 || workers_max != 1 - 1
549 || vectors_min != 0 || vectors_max != 1 - 1)
550 __builtin_abort ();
551 }
552
553
554 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
555 kernels even when there are explicit num_gangs, num_workers, or
556 vector_length clauses. */
557 {
558 int gangs = 5;
559 #define WORKERS 5
560 #define VECTORS 13
561 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
562 gangs_min = workers_min = vectors_min = INT_MAX;
563 gangs_max = workers_max = vectors_max = INT_MIN;
564 #pragma acc kernels \
565 num_gangs (gangs) \
566 num_workers (WORKERS) \
567 vector_length (VECTORS)
568 {
569 /* This is to make the OpenACC kernels construct unparallelizable. */
570 asm volatile ("" : : : "memory");
571
572 #pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
573 for (int i = 100; i > -100; --i)
574 {
575 gangs_min = gangs_max = acc_gang ();
576 workers_min = workers_max = acc_worker ();
577 vectors_min = vectors_max = acc_vector ();
578 }
579 }
580 if (gangs_min != 0 || gangs_max != 1 - 1
581 || workers_min != 0 || workers_max != 1 - 1
582 || vectors_min != 0 || vectors_max != 1 - 1)
583 __builtin_abort ();
584 #undef VECTORS
585 #undef WORKERS
586 }
587
588
589 /* OpenACC serial construct. */
590
591 /* GR, WS, VS. */
592 {
593 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
594 gangs_min = workers_min = vectors_min = INT_MAX;
595 gangs_max = workers_max = vectors_max = INT_MIN;
596 #pragma acc serial /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
597 reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
598 {
599 for (int i = 100; i > -100; i--)
600 {
601 gangs_min = gangs_max = acc_gang ();
602 workers_min = workers_max = acc_worker ();
603 vectors_min = vectors_max = acc_vector ();
604 }
605 }
606 if (gangs_min != 0 || gangs_max != 1 - 1
607 || workers_min != 0 || workers_max != 1 - 1
608 || vectors_min != 0 || vectors_max != 1 - 1)
609 __builtin_abort ();
610 }
611
612 /* Composition of GP, WP, VP. */
613 {
614 int vectors_actual = 1; /* Implicit 'vector_length (1)' clause. */
615 int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
616 gangs_min = workers_min = vectors_min = INT_MAX;
617 gangs_max = workers_max = vectors_max = INT_MIN;
618 #pragma acc serial copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
619 copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
620 {
621 if (acc_on_device (acc_device_nvidia))
622 {
623 /* The GCC nvptx back end enforces vector_length (32). */
624 /* It's unclear if that's actually permissible here;
625 <https://github.com/OpenACC/openacc-spec/issues/238> "OpenACC
626 'serial' construct might not actually be serial". */
627 vectors_actual = 32;
628 }
629 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
630 for (int i = 100; i > -100; i--)
631 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
632 for (int j = 100; j > -100; j--)
633 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
634 for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--)
635 {
636 gangs_min = gangs_max = acc_gang ();
637 workers_min = workers_max = acc_worker ();
638 vectors_min = vectors_max = acc_vector ();
639 }
640 }
641 if (acc_get_device_type () == acc_device_nvidia)
642 {
643 if (vectors_actual != 32)
644 __builtin_abort ();
645 }
646 else
647 if (vectors_actual != 1)
648 __builtin_abort ();
649 if (gangs_min != 0 || gangs_max != 1 - 1
650 || workers_min != 0 || workers_max != 1 - 1
651 || vectors_min != 0 || vectors_max != vectors_actual - 1)
652 __builtin_abort ();
653 }
654
655
656 return 0;
657 }