[ARM/AArch64][2/2] Crypto intrinsics tuning for Cortex-A53 - pipeline description
[gcc.git] / gcc / graphite-optimize-isl.c
1 /* A scheduling optimizer for Graphite
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Tobias Grosser <tobias@grosser.es>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22
23 #ifdef HAVE_cloog
24 #include <isl/set.h>
25 #include <isl/map.h>
26 #include <isl/union_map.h>
27 #include <isl/schedule.h>
28 #include <isl/band.h>
29 #include <isl/aff.h>
30 #include <isl/options.h>
31 #endif
32
33 #include "system.h"
34 #include "coretypes.h"
35 #include "tree.h"
36 #include "basic-block.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "gimple-expr.h"
40 #include "is-a.h"
41 #include "gimple.h"
42 #include "gimple-iterator.h"
43 #include "tree-ssa-loop.h"
44 #include "dumpfile.h"
45 #include "cfgloop.h"
46 #include "tree-chrec.h"
47 #include "tree-data-ref.h"
48 #include "tree-scalar-evolution.h"
49 #include "sese.h"
50
51 #ifdef HAVE_cloog
52 #include "graphite-poly.h"
53
54 static isl_union_set *
55 scop_get_domains (scop_p scop ATTRIBUTE_UNUSED)
56 {
57 int i;
58 poly_bb_p pbb;
59 isl_space *space = isl_set_get_space (scop->context);
60 isl_union_set *res = isl_union_set_empty (space);
61
62 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
63 res = isl_union_set_add_set (res, isl_set_copy (pbb->domain));
64
65 return res;
66 }
67
68 static isl_union_map *
69 scop_get_dependences (scop_p scop)
70 {
71 isl_union_map *dependences;
72
73 if (!scop->must_raw)
74 compute_deps (scop, SCOP_BBS (scop),
75 &scop->must_raw, &scop->may_raw,
76 &scop->must_raw_no_source, &scop->may_raw_no_source,
77 &scop->must_war, &scop->may_war,
78 &scop->must_war_no_source, &scop->may_war_no_source,
79 &scop->must_waw, &scop->may_waw,
80 &scop->must_waw_no_source, &scop->may_waw_no_source);
81
82 dependences = isl_union_map_copy (scop->must_raw);
83 dependences = isl_union_map_union (dependences,
84 isl_union_map_copy (scop->must_war));
85 dependences = isl_union_map_union (dependences,
86 isl_union_map_copy (scop->must_waw));
87 dependences = isl_union_map_union (dependences,
88 isl_union_map_copy (scop->may_raw));
89 dependences = isl_union_map_union (dependences,
90 isl_union_map_copy (scop->may_war));
91 dependences = isl_union_map_union (dependences,
92 isl_union_map_copy (scop->may_waw));
93
94 return dependences;
95 }
96
97 /* getTileMap - Create a map that describes a n-dimensonal tiling.
98
99 getTileMap creates a map from a n-dimensional scattering space into an
100 2*n-dimensional scattering space. The map describes a rectangular tiling.
101
102 Example:
103 scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32
104
105 tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]:
106 t0 % 32 = 0 and t0 <= s0 < t0 + 32 and
107 t1 % 32 = 0 and t1 <= s1 < t1 + 32}
108
109 Before tiling:
110
111 for (i = 0; i < N; i++)
112 for (j = 0; j < M; j++)
113 S(i,j)
114
115 After tiling:
116
117 for (t_i = 0; t_i < N; i+=32)
118 for (t_j = 0; t_j < M; j+=32)
119 for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0
120 for (j = t_j; j < t_j + 32; j++) | Known that M % 32 = 0
121 S(i,j)
122 */
123
124 static isl_basic_map *
125 getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize)
126 {
127 int x;
128 /* We construct
129
130 tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]:
131 s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and
132 s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32}
133
134 and project out the auxilary dimensions a0 and a1. */
135 isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions,
136 scheduleDimensions * 3);
137 isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space));
138
139 isl_local_space *LocalSpace = isl_local_space_from_space (Space);
140
141 for (x = 0; x < scheduleDimensions; x++)
142 {
143 int sX = x;
144 int tX = x;
145 int pX = scheduleDimensions + x;
146 int aX = 2 * scheduleDimensions + x;
147
148 isl_constraint *c;
149
150 /* sX = aX * tileSize; */
151 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
152 isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1);
153 isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize);
154 tileMap = isl_basic_map_add_constraint (tileMap, c);
155
156 /* pX = sX; */
157 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
158 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
159 isl_constraint_set_coefficient_si (c, isl_dim_in, sX, -1);
160 tileMap = isl_basic_map_add_constraint (tileMap, c);
161
162 /* tX <= pX */
163 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
164 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, 1);
165 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, -1);
166 tileMap = isl_basic_map_add_constraint (tileMap, c);
167
168 /* pX <= tX + (tileSize - 1) */
169 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
170 isl_constraint_set_coefficient_si (c, isl_dim_out, tX, 1);
171 isl_constraint_set_coefficient_si (c, isl_dim_out, pX, -1);
172 isl_constraint_set_constant_si (c, tileSize - 1);
173 tileMap = isl_basic_map_add_constraint (tileMap, c);
174 }
175
176 /* Project out auxiliary dimensions.
177
178 The auxiliary dimensions are transformed into existentially quantified ones.
179 This reduces the number of visible scattering dimensions and allows Cloog
180 to produces better code. */
181 tileMap = isl_basic_map_project_out (tileMap, isl_dim_out,
182 2 * scheduleDimensions,
183 scheduleDimensions);
184 isl_local_space_free (LocalSpace);
185 return tileMap;
186 }
187
188 /* getScheduleForBand - Get the schedule for this band.
189
190 Polly applies transformations like tiling on top of the isl calculated value.
191 This can influence the number of scheduling dimension. The number of
192 schedule dimensions is returned in the parameter 'Dimension'. */
193 static bool DisableTiling = false;
194
195 static isl_union_map *
196 getScheduleForBand (isl_band *Band, int *Dimensions)
197 {
198 isl_union_map *PartialSchedule;
199 isl_ctx *ctx;
200 isl_space *Space;
201 isl_basic_map *TileMap;
202 isl_union_map *TileUMap;
203
204 PartialSchedule = isl_band_get_partial_schedule (Band);
205 *Dimensions = isl_band_n_member (Band);
206
207 if (DisableTiling)
208 return PartialSchedule;
209
210 /* It does not make any sense to tile a band with just one dimension. */
211 if (*Dimensions == 1)
212 return PartialSchedule;
213
214 ctx = isl_union_map_get_ctx (PartialSchedule);
215 Space = isl_union_map_get_space (PartialSchedule);
216
217 TileMap = getTileMap (ctx, *Dimensions, 32);
218 TileUMap = isl_union_map_from_map (isl_map_from_basic_map (TileMap));
219 TileUMap = isl_union_map_align_params (TileUMap, Space);
220 *Dimensions = 2 * *Dimensions;
221
222 return isl_union_map_apply_range (PartialSchedule, TileUMap);
223 }
224
225 /* Create a map that pre-vectorizes one scheduling dimension.
226
227 getPrevectorMap creates a map that maps each input dimension to the same
228 output dimension, except for the dimension DimToVectorize. DimToVectorize is
229 strip mined by 'VectorWidth' and the newly created point loop of
230 DimToVectorize is moved to the innermost level.
231
232 Example (DimToVectorize=0, ScheduleDimensions=2, VectorWidth=4):
233
234 | Before transformation
235 |
236 | A[i,j] -> [i,j]
237 |
238 | for (i = 0; i < 128; i++)
239 | for (j = 0; j < 128; j++)
240 | A(i,j);
241
242 Prevector map:
243 [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
244
245 | After transformation:
246 |
247 | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip
248 |
249 | for (it = 0; it < 128; it+=4)
250 | for (j = 0; j < 128; j++)
251 | for (ip = max(0,it); ip < min(128, it + 3); ip++)
252 | A(ip,j);
253
254 The goal of this transformation is to create a trivially vectorizable loop.
255 This means a parallel loop at the innermost level that has a constant number
256 of iterations corresponding to the target vector width.
257
258 This transformation creates a loop at the innermost level. The loop has a
259 constant number of iterations, if the number of loop iterations at
260 DimToVectorize can be devided by VectorWidth. The default VectorWidth is
261 currently constant and not yet target specific. This function does not reason
262 about parallelism. */
263 static isl_map *
264 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
265 int ScheduleDimensions,
266 int VectorWidth)
267 {
268 isl_space *Space;
269 isl_local_space *LocalSpace, *LocalSpaceRange;
270 isl_set *Modulo;
271 isl_map *TilingMap;
272 isl_constraint *c;
273 isl_aff *Aff;
274 int PointDimension; /* ip */
275 int TileDimension; /* it */
276 isl_int VectorWidthMP;
277 int i;
278
279 /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
280
281 Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
282 TilingMap = isl_map_universe (isl_space_copy (Space));
283 LocalSpace = isl_local_space_from_space (Space);
284 PointDimension = ScheduleDimensions;
285 TileDimension = DimToVectorize;
286
287 /* Create an identity map for everything except DimToVectorize and map
288 DimToVectorize to the point loop at the innermost dimension. */
289 for (i = 0; i < ScheduleDimensions; i++)
290 {
291 c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
292 isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
293
294 if (i == DimToVectorize)
295 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
296 else
297 isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
298
299 TilingMap = isl_map_add_constraint (TilingMap, c);
300 }
301
302 /* it % 'VectorWidth' = 0 */
303 LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
304 Aff = isl_aff_zero_on_domain (LocalSpaceRange);
305 Aff = isl_aff_set_constant_si (Aff, VectorWidth);
306 Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
307 isl_int_init (VectorWidthMP);
308 isl_int_set_si (VectorWidthMP, VectorWidth);
309 Aff = isl_aff_mod (Aff, VectorWidthMP);
310 isl_int_clear (VectorWidthMP);
311 Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
312 TilingMap = isl_map_intersect_range (TilingMap, Modulo);
313
314 /* it <= ip */
315 c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
316 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
317 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
318 TilingMap = isl_map_add_constraint (TilingMap, c);
319
320 /* ip <= it + ('VectorWidth' - 1) */
321 c = isl_inequality_alloc (LocalSpace);
322 isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
323 isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
324 isl_constraint_set_constant_si (c, VectorWidth - 1);
325 TilingMap = isl_map_add_constraint (TilingMap, c);
326
327 isl_map_dump (TilingMap);
328
329 return TilingMap;
330 }
331
332 static bool EnablePollyVector = false;
333
334 /* getScheduleForBandList - Get the scheduling map for a list of bands.
335
336 We walk recursively the forest of bands to combine the schedules of the
337 individual bands to the overall schedule. In case tiling is requested,
338 the individual bands are tiled. */
339 static isl_union_map *
340 getScheduleForBandList (isl_band_list *BandList)
341 {
342 int NumBands, i;
343 isl_union_map *Schedule;
344 isl_ctx *ctx;
345
346 ctx = isl_band_list_get_ctx (BandList);
347 NumBands = isl_band_list_n_band (BandList);
348 Schedule = isl_union_map_empty (isl_space_params_alloc (ctx, 0));
349
350 for (i = 0; i < NumBands; i++)
351 {
352 isl_band *Band;
353 isl_union_map *PartialSchedule;
354 int ScheduleDimensions;
355 isl_space *Space;
356
357 Band = isl_band_list_get_band (BandList, i);
358 PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
359 Space = isl_union_map_get_space (PartialSchedule);
360
361 if (isl_band_has_children (Band))
362 {
363 isl_band_list *Children;
364 isl_union_map *SuffixSchedule;
365
366 Children = isl_band_get_children (Band);
367 SuffixSchedule = getScheduleForBandList (Children);
368 PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
369 SuffixSchedule);
370 isl_band_list_free (Children);
371 }
372 else if (EnablePollyVector)
373 {
374 for (i = ScheduleDimensions - 1 ; i >= 0 ; i--)
375 {
376 if (isl_band_member_is_zero_distance (Band, i))
377 {
378 isl_map *TileMap;
379 isl_union_map *TileUMap;
380
381 TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
382 TileUMap = isl_union_map_from_map (TileMap);
383 TileUMap = isl_union_map_align_params
384 (TileUMap, isl_space_copy (Space));
385 PartialSchedule = isl_union_map_apply_range
386 (PartialSchedule, TileUMap);
387 break;
388 }
389 }
390 }
391
392 Schedule = isl_union_map_union (Schedule, PartialSchedule);
393
394 isl_band_free (Band);
395 isl_space_free (Space);
396 }
397
398 return Schedule;
399 }
400
401 static isl_union_map *
402 getScheduleMap (isl_schedule *Schedule)
403 {
404 isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
405 isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
406 isl_band_list_free (BandList);
407 return ScheduleMap;
408 }
409
410 static int
411 getSingleMap (__isl_take isl_map *map, void *user)
412 {
413 isl_map **singleMap = (isl_map **) user;
414 *singleMap = map;
415
416 return 0;
417 }
418
419 static void
420 apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
421 {
422 int i;
423 poly_bb_p pbb;
424
425 FOR_EACH_VEC_ELT (scop->bbs, i, pbb)
426 {
427 isl_set *domain = isl_set_copy (pbb->domain);
428 isl_union_map *stmtBand;
429 isl_map *stmtSchedule;
430
431 stmtBand = isl_union_map_intersect_domain
432 (isl_union_map_copy (schedule_map),
433 isl_union_set_from_set (domain));
434 isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
435 isl_map_free (pbb->transformed);
436 pbb->transformed = stmtSchedule;
437 isl_union_map_free (stmtBand);
438 }
439 }
440
441 static const int CONSTANT_BOUND = 20;
442
443 bool
444 optimize_isl (scop_p scop)
445 {
446
447 isl_schedule *schedule;
448 isl_union_set *domain;
449 isl_union_map *validity, *proximity, *dependences;
450 isl_union_map *schedule_map;
451
452 domain = scop_get_domains (scop);
453 dependences = scop_get_dependences (scop);
454 dependences = isl_union_map_gist_domain (dependences,
455 isl_union_set_copy (domain));
456 dependences = isl_union_map_gist_range (dependences,
457 isl_union_set_copy (domain));
458 validity = dependences;
459
460 proximity = isl_union_map_copy (validity);
461
462 isl_options_set_schedule_max_constant_term (scop->ctx, CONSTANT_BOUND);
463 isl_options_set_schedule_maximize_band_depth (scop->ctx, 1);
464 isl_options_set_schedule_fuse (scop->ctx, ISL_SCHEDULE_FUSE_MIN);
465 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_CONTINUE);
466 schedule = isl_union_set_compute_schedule (domain, validity, proximity);
467 isl_options_set_on_error (scop->ctx, ISL_ON_ERROR_ABORT);
468
469 if (!schedule)
470 return false;
471
472 schedule_map = getScheduleMap (schedule);
473
474 apply_schedule_map_to_scop (scop, schedule_map);
475
476 isl_schedule_free (schedule);
477 isl_union_map_free (schedule_map);
478
479 return true;
480 }
481
482 #endif