gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /* This pass walks a given loop structure searching for array
  23    references.  The information about the array accesses is recorded
  24    in DATA_REFERENCE structures.
  25
  26    The basic test for determining the dependences is:
  27    given two access functions chrec1 and chrec2 to a same array, and
  28    x and y two vectors from the iteration domain, the same element of
  29    the array is accessed twice at iterations x and y if and only if:
  30    |             chrec1 (x) == chrec2 (y).
  31
  32    The goals of this analysis are:
  33
  34    - to determine the independence: the relation between two
  35      independent accesses is qualified with the chrec_known (this
  36      information allows a loop parallelization),
  37
  38    - when two data references access the same data, to qualify the
  39      dependence relation with classic dependence representations:
  40
  41        - distance vectors
  42        - direction vectors
  43        - loop carried level dependence
  44        - polyhedron dependence
  45      or with the chains of recurrences based representation,
  46
  47    - to define a knowledge base for storing the data dependence
  48      information,
  49
  50    - to define an interface to access this data.
  51
  52
  53    Definitions:
  54
  55    - subscript: given two array accesses a subscript is the tuple
  56    composed of the access functions for a given dimension.  Example:
  57    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  58    (f1, g1), (f2, g2), (f3, g3).
  59
  60    - Diophantine equation: an equation whose coefficients and
  61    solutions are integer constants, for example the equation
  62    |   3*x + 2*y = 1
  63    has an integer solution x = 1 and y = -1.
  64
  65    References:
  66
  67    - "Advanced Compilation for High Performance Computing" by Randy
  68    Allen and Ken Kennedy.
  69    http://citeseer.ist.psu.edu/goff91practical.html
  70
  71    - "Loop Transformations for Restructuring Compilers - The Foundations"
  72    by Utpal Banerjee.
  73
  74
  75 */
  76
  77 #include "config.h"
  78 #include "system.h"
  79 #include "coretypes.h"
  80 #include "gimple-pretty-print.h"
  81 #include "tree-flow.h"
  82 #include "cfgloop.h"
  83 #include "tree-data-ref.h"
  84 #include "tree-scalar-evolution.h"
  85 #include "tree-pass.h"
  86 #include "langhooks.h"
  87 #include "tree-affine.h"
  88
  89 static struct datadep_stats
  90 {
  91   int num_dependence_tests;
  92   int num_dependence_dependent;
  93   int num_dependence_independent;
  94   int num_dependence_undetermined;
  95
  96   int num_subscript_tests;
  97   int num_subscript_undetermined;
  98   int num_same_subscript_function;
  99
 100   int num_ziv;
 101   int num_ziv_independent;
 102   int num_ziv_dependent;
 103   int num_ziv_unimplemented;
 104
 105   int num_siv;
 106   int num_siv_independent;
 107   int num_siv_dependent;
 108   int num_siv_unimplemented;
 109
 110   int num_miv;
 111   int num_miv_independent;
 112   int num_miv_dependent;
 113   int num_miv_unimplemented;
 114 } dependence_stats;
 115
 116 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 117                                            struct data_reference *,
 118                                            struct data_reference *,
 119                                            struct loop *);
 120 /* Returns true iff A divides B.  */
 121
 122 static inline bool
 123 tree_fold_divides_p (const_tree a, const_tree b)
 124 {
 125   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 126   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 127   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 128 }
 129
 130 /* Returns true iff A divides B.  */
 131
 132 static inline bool
 133 int_divides_p (int a, int b)
 134 {
 135   return ((b % a) == 0);
 136 }
 137
 138 \f
 139
 140 /* Dump into FILE all the data references from DATAREFS.  */
 141
 142 void
 143 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
 144 {
 145   unsigned int i;
 146   struct data_reference *dr;
 147
 148   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
 149     dump_data_reference (file, dr);
 150 }
 151
 152 /* Dump into STDERR all the data references from DATAREFS.  */
 153
 154 DEBUG_FUNCTION void
 155 debug_data_references (VEC (data_reference_p, heap) *datarefs)
 156 {
 157   dump_data_references (stderr, datarefs);
 158 }
 159
 160 /* Dump to STDERR all the dependence relations from DDRS.  */
 161
 162 DEBUG_FUNCTION void
 163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
 164 {
 165   dump_data_dependence_relations (stderr, ddrs);
 166 }
 167
 168 /* Dump into FILE all the dependence relations from DDRS.  */
 169
 170 void
 171 dump_data_dependence_relations (FILE *file,
 172                                 VEC (ddr_p, heap) *ddrs)
 173 {
 174   unsigned int i;
 175   struct data_dependence_relation *ddr;
 176
 177   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 178     dump_data_dependence_relation (file, ddr);
 179 }
 180
 181 /* Print to STDERR the data_reference DR.  */
 182
 183 DEBUG_FUNCTION void
 184 debug_data_reference (struct data_reference *dr)
 185 {
 186   dump_data_reference (stderr, dr);
 187 }
 188
 189 /* Dump function for a DATA_REFERENCE structure.  */
 190
 191 void
 192 dump_data_reference (FILE *outf,
 193                      struct data_reference *dr)
 194 {
 195   unsigned int i;
 196
 197   fprintf (outf, "#(Data Ref: \n");
 198   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 199   fprintf (outf, "#  stmt: ");
 200   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
 201   fprintf (outf, "#  ref: ");
 202   print_generic_stmt (outf, DR_REF (dr), 0);
 203   fprintf (outf, "#  base_object: ");
 204   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
 205
 206   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 207     {
 208       fprintf (outf, "#  Access function %d: ", i);
 209       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
 210     }
 211   fprintf (outf, "#)\n");
 212 }
 213
 214 /* Dumps the affine function described by FN to the file OUTF.  */
 215
 216 static void
 217 dump_affine_function (FILE *outf, affine_fn fn)
 218 {
 219   unsigned i;
 220   tree coef;
 221
 222   print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
 223   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
 224     {
 225       fprintf (outf, " + ");
 226       print_generic_expr (outf, coef, TDF_SLIM);
 227       fprintf (outf, " * x_%u", i);
 228     }
 229 }
 230
 231 /* Dumps the conflict function CF to the file OUTF.  */
 232
 233 static void
 234 dump_conflict_function (FILE *outf, conflict_function *cf)
 235 {
 236   unsigned i;
 237
 238   if (cf->n == NO_DEPENDENCE)
 239     fprintf (outf, "no dependence\n");
 240   else if (cf->n == NOT_KNOWN)
 241     fprintf (outf, "not known\n");
 242   else
 243     {
 244       for (i = 0; i < cf->n; i++)
 245         {
 246           fprintf (outf, "[");
 247           dump_affine_function (outf, cf->fns[i]);
 248           fprintf (outf, "]\n");
 249         }
 250     }
 251 }
 252
 253 /* Dump function for a SUBSCRIPT structure.  */
 254
 255 void
 256 dump_subscript (FILE *outf, struct subscript *subscript)
 257 {
 258   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 259
 260   fprintf (outf, "\n (subscript \n");
 261   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 262   dump_conflict_function (outf, cf);
 263   if (CF_NONTRIVIAL_P (cf))
 264     {
 265       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 266       fprintf (outf, "  last_conflict: ");
 267       print_generic_stmt (outf, last_iteration, 0);
 268     }
 269
 270   cf = SUB_CONFLICTS_IN_B (subscript);
 271   fprintf (outf, "  iterations_that_access_an_element_twice_in_B: ");
 272   dump_conflict_function (outf, cf);
 273   if (CF_NONTRIVIAL_P (cf))
 274     {
 275       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 276       fprintf (outf, "  last_conflict: ");
 277       print_generic_stmt (outf, last_iteration, 0);
 278     }
 279
 280   fprintf (outf, "  (Subscript distance: ");
 281   print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
 282   fprintf (outf, "  )\n");
 283   fprintf (outf, " )\n");
 284 }
 285
 286 /* Print the classic direction vector DIRV to OUTF.  */
 287
 288 void
 289 print_direction_vector (FILE *outf,
 290                         lambda_vector dirv,
 291                         int length)
 292 {
 293   int eq;
 294
 295   for (eq = 0; eq < length; eq++)
 296     {
 297       enum data_dependence_direction dir = ((enum data_dependence_direction)
 298                                             dirv[eq]);
 299
 300       switch (dir)
 301         {
 302         case dir_positive:
 303           fprintf (outf, "    +");
 304           break;
 305         case dir_negative:
 306           fprintf (outf, "    -");
 307           break;
 308         case dir_equal:
 309           fprintf (outf, "    =");
 310           break;
 311         case dir_positive_or_equal:
 312           fprintf (outf, "   +=");
 313           break;
 314         case dir_positive_or_negative:
 315           fprintf (outf, "   +-");
 316           break;
 317         case dir_negative_or_equal:
 318           fprintf (outf, "   -=");
 319           break;
 320         case dir_star:
 321           fprintf (outf, "    *");
 322           break;
 323         default:
 324           fprintf (outf, "indep");
 325           break;
 326         }
 327     }
 328   fprintf (outf, "\n");
 329 }
 330
 331 /* Print a vector of direction vectors.  */
 332
 333 void
 334 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
 335                    int length)
 336 {
 337   unsigned j;
 338   lambda_vector v;
 339
 340   FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, v)
 341     print_direction_vector (outf, v, length);
 342 }
 343
 344 /* Print out a vector VEC of length N to OUTFILE.  */
 345
 346 static inline void
 347 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 348 {
 349   int i;
 350
 351   for (i = 0; i < n; i++)
 352     fprintf (outfile, "%3d ", vector[i]);
 353   fprintf (outfile, "\n");
 354 }
 355
 356 /* Print a vector of distance vectors.  */
 357
 358 void
 359 print_dist_vectors  (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
 360                      int length)
 361 {
 362   unsigned j;
 363   lambda_vector v;
 364
 365   FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, v)
 366     print_lambda_vector (outf, v, length);
 367 }
 368
 369 /* Debug version.  */
 370
 371 DEBUG_FUNCTION void
 372 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 373 {
 374   dump_data_dependence_relation (stderr, ddr);
 375 }
 376
 377 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 378
 379 void
 380 dump_data_dependence_relation (FILE *outf,
 381                                struct data_dependence_relation *ddr)
 382 {
 383   struct data_reference *dra, *drb;
 384
 385   fprintf (outf, "(Data Dep: \n");
 386
 387   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 388     {
 389       if (ddr)
 390         {
 391           dra = DDR_A (ddr);
 392           drb = DDR_B (ddr);
 393           if (dra)
 394             dump_data_reference (outf, dra);
 395           else
 396             fprintf (outf, "    (nil)\n");
 397           if (drb)
 398             dump_data_reference (outf, drb);
 399           else
 400             fprintf (outf, "    (nil)\n");
 401         }
 402       fprintf (outf, "    (don't know)\n)\n");
 403       return;
 404     }
 405
 406   dra = DDR_A (ddr);
 407   drb = DDR_B (ddr);
 408   dump_data_reference (outf, dra);
 409   dump_data_reference (outf, drb);
 410
 411   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 412     fprintf (outf, "    (no dependence)\n");
 413
 414   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 415     {
 416       unsigned int i;
 417       struct loop *loopi;
 418
 419       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 420         {
 421           fprintf (outf, "  access_fn_A: ");
 422           print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
 423           fprintf (outf, "  access_fn_B: ");
 424           print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
 425           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 426         }
 427
 428       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 429       fprintf (outf, "  loop nest: (");
 430       FOR_EACH_VEC_ELT (loop_p, DDR_LOOP_NEST (ddr), i, loopi)
 431         fprintf (outf, "%d ", loopi->num);
 432       fprintf (outf, ")\n");
 433
 434       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 435         {
 436           fprintf (outf, "  distance_vector: ");
 437           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 438                                DDR_NB_LOOPS (ddr));
 439         }
 440
 441       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 442         {
 443           fprintf (outf, "  direction_vector: ");
 444           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 445                                   DDR_NB_LOOPS (ddr));
 446         }
 447     }
 448
 449   fprintf (outf, ")\n");
 450 }
 451
 452 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure.  */
 453
 454 void
 455 dump_data_dependence_direction (FILE *file,
 456                                 enum data_dependence_direction dir)
 457 {
 458   switch (dir)
 459     {
 460     case dir_positive:
 461       fprintf (file, "+");
 462       break;
 463
 464     case dir_negative:
 465       fprintf (file, "-");
 466       break;
 467
 468     case dir_equal:
 469       fprintf (file, "=");
 470       break;
 471
 472     case dir_positive_or_negative:
 473       fprintf (file, "+-");
 474       break;
 475
 476     case dir_positive_or_equal:
 477       fprintf (file, "+=");
 478       break;
 479
 480     case dir_negative_or_equal:
 481       fprintf (file, "-=");
 482       break;
 483
 484     case dir_star:
 485       fprintf (file, "*");
 486       break;
 487
 488     default:
 489       break;
 490     }
 491 }
 492
 493 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 494    the dependence relations, and VECT_SIZE is the size of the
 495    dependence vectors, or in other words the number of loops in the
 496    considered nest.  */
 497
 498 void
 499 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
 500 {
 501   unsigned int i, j;
 502   struct data_dependence_relation *ddr;
 503   lambda_vector v;
 504
 505   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 506     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 507       {
 508         FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), j, v)
 509           {
 510             fprintf (file, "DISTANCE_V (");
 511             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 512             fprintf (file, ")\n");
 513           }
 514
 515         FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), j, v)
 516           {
 517             fprintf (file, "DIRECTION_V (");
 518             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 519             fprintf (file, ")\n");
 520           }
 521       }
 522
 523   fprintf (file, "\n\n");
 524 }
 525
 526 /* Dumps the data dependence relations DDRS in FILE.  */
 527
 528 void
 529 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
 530 {
 531   unsigned int i;
 532   struct data_dependence_relation *ddr;
 533
 534   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
 535     dump_data_dependence_relation (file, ddr);
 536
 537   fprintf (file, "\n\n");
 538 }
 539
 540 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 541    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 542    constant of type ssizetype, and returns true.  If we cannot do this
 543    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 544    is returned.  */
 545
 546 static bool
 547 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 548                          tree *var, tree *off)
 549 {
 550   tree var0, var1;
 551   tree off0, off1;
 552   enum tree_code ocode = code;
 553
 554   *var = NULL_TREE;
 555   *off = NULL_TREE;
 556
 557   switch (code)
 558     {
 559     case INTEGER_CST:
 560       *var = build_int_cst (type, 0);
 561       *off = fold_convert (ssizetype, op0);
 562       return true;
 563
 564     case POINTER_PLUS_EXPR:
 565       ocode = PLUS_EXPR;
 566       /* FALLTHROUGH */
 567     case PLUS_EXPR:
 568     case MINUS_EXPR:
 569       split_constant_offset (op0, &var0, &off0);
 570       split_constant_offset (op1, &var1, &off1);
 571       *var = fold_build2 (code, type, var0, var1);
 572       *off = size_binop (ocode, off0, off1);
 573       return true;
 574
 575     case MULT_EXPR:
 576       if (TREE_CODE (op1) != INTEGER_CST)
 577         return false;
 578
 579       split_constant_offset (op0, &var0, &off0);
 580       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 581       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 582       return true;
 583
 584     case ADDR_EXPR:
 585       {
 586         tree base, poffset;
 587         HOST_WIDE_INT pbitsize, pbitpos;
 588         enum machine_mode pmode;
 589         int punsignedp, pvolatilep;
 590
 591         op0 = TREE_OPERAND (op0, 0);
 592         base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
 593                                     &pmode, &punsignedp, &pvolatilep, false);
 594
 595         if (pbitpos % BITS_PER_UNIT != 0)
 596           return false;
 597         base = build_fold_addr_expr (base);
 598         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 599
 600         if (poffset)
 601           {
 602             split_constant_offset (poffset, &poffset, &off1);
 603             off0 = size_binop (PLUS_EXPR, off0, off1);
 604             if (POINTER_TYPE_P (TREE_TYPE (base)))
 605               base = fold_build_pointer_plus (base, poffset);
 606             else
 607               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 608                                   fold_convert (TREE_TYPE (base), poffset));
 609           }
 610
 611         var0 = fold_convert (type, base);
 612
 613         /* If variable length types are involved, punt, otherwise casts
 614            might be converted into ARRAY_REFs in gimplify_conversion.
 615            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 616            possibly no longer appears in current GIMPLE, might resurface.
 617            This perhaps could run
 618            if (CONVERT_EXPR_P (var0))
 619              {
 620                gimplify_conversion (&var0);
 621                // Attempt to fill in any within var0 found ARRAY_REF's
 622                // element size from corresponding op embedded ARRAY_REF,
 623                // if unsuccessful, just punt.
 624              }  */
 625         while (POINTER_TYPE_P (type))
 626           type = TREE_TYPE (type);
 627         if (int_size_in_bytes (type) < 0)
 628           return false;
 629
 630         *var = var0;
 631         *off = off0;
 632         return true;
 633       }
 634
 635     case SSA_NAME:
 636       {
 637         gimple def_stmt = SSA_NAME_DEF_STMT (op0);
 638         enum tree_code subcode;
 639
 640         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 641           return false;
 642
 643         var0 = gimple_assign_rhs1 (def_stmt);
 644         subcode = gimple_assign_rhs_code (def_stmt);
 645         var1 = gimple_assign_rhs2 (def_stmt);
 646
 647         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 648       }
 649     CASE_CONVERT:
 650       {
 651         /* We must not introduce undefined overflow, and we must not change the value.
 652            Hence we're okay if the inner type doesn't overflow to start with
 653            (pointer or signed), the outer type also is an integer or pointer
 654            and the outer precision is at least as large as the inner.  */
 655         tree itype = TREE_TYPE (op0);
 656         if ((POINTER_TYPE_P (itype)
 657              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 658             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 659             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 660           {
 661             split_constant_offset (op0, &var0, off);
 662             *var = fold_convert (type, var0);
 663             return true;
 664           }
 665         return false;
 666       }
 667
 668     default:
 669       return false;
 670     }
 671 }
 672
 673 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 674    will be ssizetype.  */
 675
 676 void
 677 split_constant_offset (tree exp, tree *var, tree *off)
 678 {
 679   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 680   enum tree_code code;
 681
 682   *var = exp;
 683   *off = ssize_int (0);
 684   STRIP_NOPS (exp);
 685
 686   if (tree_is_chrec (exp)
 687       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 688     return;
 689
 690   otype = TREE_TYPE (exp);
 691   code = TREE_CODE (exp);
 692   extract_ops_from_tree (exp, &code, &op0, &op1);
 693   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 694     {
 695       *var = fold_convert (type, e);
 696       *off = o;
 697     }
 698 }
 699
 700 /* Returns the address ADDR of an object in a canonical shape (without nop
 701    casts, and with type of pointer to the object).  */
 702
 703 static tree
 704 canonicalize_base_object_address (tree addr)
 705 {
 706   tree orig = addr;
 707
 708   STRIP_NOPS (addr);
 709
 710   /* The base address may be obtained by casting from integer, in that case
 711      keep the cast.  */
 712   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 713     return orig;
 714
 715   if (TREE_CODE (addr) != ADDR_EXPR)
 716     return addr;
 717
 718   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 719 }
 720
 721 /* Analyzes the behavior of the memory reference DR in the innermost loop or
 722    basic block that contains it.  Returns true if analysis succeed or false
 723    otherwise.  */
 724
 725 bool
 726 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 727 {
 728   gimple stmt = DR_STMT (dr);
 729   struct loop *loop = loop_containing_stmt (stmt);
 730   tree ref = DR_REF (dr);
 731   HOST_WIDE_INT pbitsize, pbitpos;
 732   tree base, poffset;
 733   enum machine_mode pmode;
 734   int punsignedp, pvolatilep;
 735   affine_iv base_iv, offset_iv;
 736   tree init, dinit, step;
 737   bool in_loop = (loop && loop->num);
 738
 739   if (dump_file && (dump_flags & TDF_DETAILS))
 740     fprintf (dump_file, "analyze_innermost: ");
 741
 742   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
 743                               &pmode, &punsignedp, &pvolatilep, false);
 744   gcc_assert (base != NULL_TREE);
 745
 746   if (pbitpos % BITS_PER_UNIT != 0)
 747     {
 748       if (dump_file && (dump_flags & TDF_DETAILS))
 749         fprintf (dump_file, "failed: bit offset alignment.\n");
 750       return false;
 751     }
 752
 753   if (TREE_CODE (base) == MEM_REF)
 754     {
 755       if (!integer_zerop (TREE_OPERAND (base, 1)))
 756         {
 757           if (!poffset)
 758             {
 759               double_int moff = mem_ref_offset (base);
 760               poffset = double_int_to_tree (sizetype, moff);
 761             }
 762           else
 763             poffset = size_binop (PLUS_EXPR, poffset, TREE_OPERAND (base, 1));
 764         }
 765       base = TREE_OPERAND (base, 0);
 766     }
 767   else
 768     base = build_fold_addr_expr (base);
 769
 770   if (in_loop)
 771     {
 772       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
 773                       false))
 774         {
 775           if (nest)
 776             {
 777               if (dump_file && (dump_flags & TDF_DETAILS))
 778                 fprintf (dump_file, "failed: evolution of base is not"
 779                                     " affine.\n");
 780               return false;
 781             }
 782           else
 783             {
 784               base_iv.base = base;
 785               base_iv.step = ssize_int (0);
 786               base_iv.no_overflow = true;
 787             }
 788         }
 789     }
 790   else
 791     {
 792       base_iv.base = base;
 793       base_iv.step = ssize_int (0);
 794       base_iv.no_overflow = true;
 795     }
 796
 797   if (!poffset)
 798     {
 799       offset_iv.base = ssize_int (0);
 800       offset_iv.step = ssize_int (0);
 801     }
 802   else
 803     {
 804       if (!in_loop)
 805         {
 806           offset_iv.base = poffset;
 807           offset_iv.step = ssize_int (0);
 808         }
 809       else if (!simple_iv (loop, loop_containing_stmt (stmt),
 810                            poffset, &offset_iv, false))
 811         {
 812           if (nest)
 813             {
 814               if (dump_file && (dump_flags & TDF_DETAILS))
 815                 fprintf (dump_file, "failed: evolution of offset is not"
 816                                     " affine.\n");
 817               return false;
 818             }
 819           else
 820             {
 821               offset_iv.base = poffset;
 822               offset_iv.step = ssize_int (0);
 823             }
 824         }
 825     }
 826
 827   init = ssize_int (pbitpos / BITS_PER_UNIT);
 828   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 829   init =  size_binop (PLUS_EXPR, init, dinit);
 830   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 831   init =  size_binop (PLUS_EXPR, init, dinit);
 832
 833   step = size_binop (PLUS_EXPR,
 834                      fold_convert (ssizetype, base_iv.step),
 835                      fold_convert (ssizetype, offset_iv.step));
 836
 837   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 838
 839   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 840   DR_INIT (dr) = init;
 841   DR_STEP (dr) = step;
 842
 843   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 844
 845   if (dump_file && (dump_flags & TDF_DETAILS))
 846     fprintf (dump_file, "success.\n");
 847
 848   return true;
 849 }
 850
 851 /* Determines the base object and the list of indices of memory reference
 852    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
 853
 854 static void
 855 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 856 {
 857   VEC (tree, heap) *access_fns = NULL;
 858   tree ref, *aref, op;
 859   tree base, off, access_fn;
 860   basic_block before_loop;
 861
 862   /* If analyzing a basic-block there are no indices to analyze
 863      and thus no access functions.  */
 864   if (!nest)
 865     {
 866       DR_BASE_OBJECT (dr) = DR_REF (dr);
 867       DR_ACCESS_FNS (dr) = NULL;
 868       return;
 869     }
 870
 871   ref = unshare_expr (DR_REF (dr));
 872   before_loop = block_before_loop (nest);
 873
 874   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 875      into a two element array with a constant index.  The base is
 876      then just the immediate underlying object.  */
 877   if (TREE_CODE (ref) == REALPART_EXPR)
 878     {
 879       ref = TREE_OPERAND (ref, 0);
 880       VEC_safe_push (tree, heap, access_fns, integer_zero_node);
 881     }
 882   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 883     {
 884       ref = TREE_OPERAND (ref, 0);
 885       VEC_safe_push (tree, heap, access_fns, integer_one_node);
 886     }
 887
 888   /* Analyze access functions of dimensions we know to be independent.  */
 889   aref = &ref;
 890   while (handled_component_p (*aref))
 891     {
 892       if (TREE_CODE (*aref) == ARRAY_REF)
 893         {
 894           op = TREE_OPERAND (*aref, 1);
 895           access_fn = analyze_scalar_evolution (loop, op);
 896           access_fn = instantiate_scev (before_loop, loop, access_fn);
 897           VEC_safe_push (tree, heap, access_fns, access_fn);
 898           /* For ARRAY_REFs the base is the reference with the index replaced
 899              by zero if we can not strip it as the outermost component.  */
 900           if (*aref == ref)
 901             {
 902               *aref = TREE_OPERAND (*aref, 0);
 903               continue;
 904             }
 905           else
 906             TREE_OPERAND (*aref, 1) = build_int_cst (TREE_TYPE (op), 0);
 907         }
 908
 909       aref = &TREE_OPERAND (*aref, 0);
 910     }
 911
 912   /* If the address operand of a MEM_REF base has an evolution in the
 913      analyzed nest, add it as an additional independent access-function.  */
 914   if (TREE_CODE (*aref) == MEM_REF)
 915     {
 916       op = TREE_OPERAND (*aref, 0);
 917       access_fn = analyze_scalar_evolution (loop, op);
 918       access_fn = instantiate_scev (before_loop, loop, access_fn);
 919       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 920         {
 921           tree orig_type;
 922           base = initial_condition (access_fn);
 923           orig_type = TREE_TYPE (base);
 924           STRIP_USELESS_TYPE_CONVERSION (base);
 925           split_constant_offset (base, &base, &off);
 926           /* Fold the MEM_REF offset into the evolutions initial
 927              value to make more bases comparable.  */
 928           if (!integer_zerop (TREE_OPERAND (*aref, 1)))
 929             {
 930               off = size_binop (PLUS_EXPR, off,
 931                                 fold_convert (ssizetype,
 932                                               TREE_OPERAND (*aref, 1)));
 933               TREE_OPERAND (*aref, 1)
 934                 = build_int_cst (TREE_TYPE (TREE_OPERAND (*aref, 1)), 0);
 935             }
 936           access_fn = chrec_replace_initial_condition
 937               (access_fn, fold_convert (orig_type, off));
 938           *aref = fold_build2_loc (EXPR_LOCATION (*aref),
 939                                    MEM_REF, TREE_TYPE (*aref),
 940                                    base, TREE_OPERAND (*aref, 1));
 941           VEC_safe_push (tree, heap, access_fns, access_fn);
 942         }
 943     }
 944
 945   DR_BASE_OBJECT (dr) = ref;
 946   DR_ACCESS_FNS (dr) = access_fns;
 947 }
 948
 949 /* Extracts the alias analysis information from the memory reference DR.  */
 950
 951 static void
 952 dr_analyze_alias (struct data_reference *dr)
 953 {
 954   tree ref = DR_REF (dr);
 955   tree base = get_base_address (ref), addr;
 956
 957   if (INDIRECT_REF_P (base)
 958       || TREE_CODE (base) == MEM_REF)
 959     {
 960       addr = TREE_OPERAND (base, 0);
 961       if (TREE_CODE (addr) == SSA_NAME)
 962         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
 963     }
 964 }
 965
 966 /* Frees data reference DR.  */
 967
 968 void
 969 free_data_ref (data_reference_p dr)
 970 {
 971   VEC_free (tree, heap, DR_ACCESS_FNS (dr));
 972   free (dr);
 973 }
 974
 975 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
 976    is read if IS_READ is true, write otherwise.  Returns the
 977    data_reference description of MEMREF.  NEST is the outermost loop
 978    in which the reference should be instantiated, LOOP is the loop in
 979    which the data reference should be analyzed.  */
 980
 981 struct data_reference *
 982 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
 983                  bool is_read)
 984 {
 985   struct data_reference *dr;
 986
 987   if (dump_file && (dump_flags & TDF_DETAILS))
 988     {
 989       fprintf (dump_file, "Creating dr for ");
 990       print_generic_expr (dump_file, memref, TDF_SLIM);
 991       fprintf (dump_file, "\n");
 992     }
 993
 994   dr = XCNEW (struct data_reference);
 995   DR_STMT (dr) = stmt;
 996   DR_REF (dr) = memref;
 997   DR_IS_READ (dr) = is_read;
 998
 999   dr_analyze_innermost (dr, nest);
1000   dr_analyze_indices (dr, nest, loop);
1001   dr_analyze_alias (dr);
1002
1003   if (dump_file && (dump_flags & TDF_DETAILS))
1004     {
1005       unsigned i;
1006       fprintf (dump_file, "\tbase_address: ");
1007       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1008       fprintf (dump_file, "\n\toffset from base address: ");
1009       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1010       fprintf (dump_file, "\n\tconstant offset from base address: ");
1011       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1012       fprintf (dump_file, "\n\tstep: ");
1013       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1014       fprintf (dump_file, "\n\taligned to: ");
1015       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1016       fprintf (dump_file, "\n\tbase_object: ");
1017       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1018       fprintf (dump_file, "\n");
1019       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1020         {
1021           fprintf (dump_file, "\tAccess function %d: ", i);
1022           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1023         }
1024     }
1025
1026   return dr;
1027 }
1028
1029 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1030    expressions.  */
1031 static bool
1032 dr_equal_offsets_p1 (tree offset1, tree offset2)
1033 {
1034   bool res;
1035
1036   STRIP_NOPS (offset1);
1037   STRIP_NOPS (offset2);
1038
1039   if (offset1 == offset2)
1040     return true;
1041
1042   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1043       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1044     return false;
1045
1046   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1047                              TREE_OPERAND (offset2, 0));
1048
1049   if (!res || !BINARY_CLASS_P (offset1))
1050     return res;
1051
1052   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1053                              TREE_OPERAND (offset2, 1));
1054
1055   return res;
1056 }
1057
1058 /* Check if DRA and DRB have equal offsets.  */
1059 bool
1060 dr_equal_offsets_p (struct data_reference *dra,
1061                     struct data_reference *drb)
1062 {
1063   tree offset1, offset2;
1064
1065   offset1 = DR_OFFSET (dra);
1066   offset2 = DR_OFFSET (drb);
1067
1068   return dr_equal_offsets_p1 (offset1, offset2);
1069 }
1070
1071 /* Returns true if FNA == FNB.  */
1072
1073 static bool
1074 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1075 {
1076   unsigned i, n = VEC_length (tree, fna);
1077
1078   if (n != VEC_length (tree, fnb))
1079     return false;
1080
1081   for (i = 0; i < n; i++)
1082     if (!operand_equal_p (VEC_index (tree, fna, i),
1083                           VEC_index (tree, fnb, i), 0))
1084       return false;
1085
1086   return true;
1087 }
1088
1089 /* If all the functions in CF are the same, returns one of them,
1090    otherwise returns NULL.  */
1091
1092 static affine_fn
1093 common_affine_function (conflict_function *cf)
1094 {
1095   unsigned i;
1096   affine_fn comm;
1097
1098   if (!CF_NONTRIVIAL_P (cf))
1099     return NULL;
1100
1101   comm = cf->fns[0];
1102
1103   for (i = 1; i < cf->n; i++)
1104     if (!affine_function_equal_p (comm, cf->fns[i]))
1105       return NULL;
1106
1107   return comm;
1108 }
1109
1110 /* Returns the base of the affine function FN.  */
1111
1112 static tree
1113 affine_function_base (affine_fn fn)
1114 {
1115   return VEC_index (tree, fn, 0);
1116 }
1117
1118 /* Returns true if FN is a constant.  */
1119
1120 static bool
1121 affine_function_constant_p (affine_fn fn)
1122 {
1123   unsigned i;
1124   tree coef;
1125
1126   for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
1127     if (!integer_zerop (coef))
1128       return false;
1129
1130   return true;
1131 }
1132
1133 /* Returns true if FN is the zero constant function.  */
1134
1135 static bool
1136 affine_function_zero_p (affine_fn fn)
1137 {
1138   return (integer_zerop (affine_function_base (fn))
1139           && affine_function_constant_p (fn));
1140 }
1141
1142 /* Returns a signed integer type with the largest precision from TA
1143    and TB.  */
1144
1145 static tree
1146 signed_type_for_types (tree ta, tree tb)
1147 {
1148   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1149     return signed_type_for (ta);
1150   else
1151     return signed_type_for (tb);
1152 }
1153
1154 /* Applies operation OP on affine functions FNA and FNB, and returns the
1155    result.  */
1156
1157 static affine_fn
1158 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1159 {
1160   unsigned i, n, m;
1161   affine_fn ret;
1162   tree coef;
1163
1164   if (VEC_length (tree, fnb) > VEC_length (tree, fna))
1165     {
1166       n = VEC_length (tree, fna);
1167       m = VEC_length (tree, fnb);
1168     }
1169   else
1170     {
1171       n = VEC_length (tree, fnb);
1172       m = VEC_length (tree, fna);
1173     }
1174
1175   ret = VEC_alloc (tree, heap, m);
1176   for (i = 0; i < n; i++)
1177     {
1178       tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1179                                          TREE_TYPE (VEC_index (tree, fnb, i)));
1180
1181       VEC_quick_push (tree, ret,
1182                       fold_build2 (op, type,
1183                                    VEC_index (tree, fna, i),
1184                                    VEC_index (tree, fnb, i)));
1185     }
1186
1187   for (; VEC_iterate (tree, fna, i, coef); i++)
1188     VEC_quick_push (tree, ret,
1189                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1190                                  coef, integer_zero_node));
1191   for (; VEC_iterate (tree, fnb, i, coef); i++)
1192     VEC_quick_push (tree, ret,
1193                     fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1194                                  integer_zero_node, coef));
1195
1196   return ret;
1197 }
1198
1199 /* Returns the sum of affine functions FNA and FNB.  */
1200
1201 static affine_fn
1202 affine_fn_plus (affine_fn fna, affine_fn fnb)
1203 {
1204   return affine_fn_op (PLUS_EXPR, fna, fnb);
1205 }
1206
1207 /* Returns the difference of affine functions FNA and FNB.  */
1208
1209 static affine_fn
1210 affine_fn_minus (affine_fn fna, affine_fn fnb)
1211 {
1212   return affine_fn_op (MINUS_EXPR, fna, fnb);
1213 }
1214
1215 /* Frees affine function FN.  */
1216
1217 static void
1218 affine_fn_free (affine_fn fn)
1219 {
1220   VEC_free (tree, heap, fn);
1221 }
1222
1223 /* Determine for each subscript in the data dependence relation DDR
1224    the distance.  */
1225
1226 static void
1227 compute_subscript_distance (struct data_dependence_relation *ddr)
1228 {
1229   conflict_function *cf_a, *cf_b;
1230   affine_fn fn_a, fn_b, diff;
1231
1232   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1233     {
1234       unsigned int i;
1235
1236       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1237         {
1238           struct subscript *subscript;
1239
1240           subscript = DDR_SUBSCRIPT (ddr, i);
1241           cf_a = SUB_CONFLICTS_IN_A (subscript);
1242           cf_b = SUB_CONFLICTS_IN_B (subscript);
1243
1244           fn_a = common_affine_function (cf_a);
1245           fn_b = common_affine_function (cf_b);
1246           if (!fn_a || !fn_b)
1247             {
1248               SUB_DISTANCE (subscript) = chrec_dont_know;
1249               return;
1250             }
1251           diff = affine_fn_minus (fn_a, fn_b);
1252
1253           if (affine_function_constant_p (diff))
1254             SUB_DISTANCE (subscript) = affine_function_base (diff);
1255           else
1256             SUB_DISTANCE (subscript) = chrec_dont_know;
1257
1258           affine_fn_free (diff);
1259         }
1260     }
1261 }
1262
1263 /* Returns the conflict function for "unknown".  */
1264
1265 static conflict_function *
1266 conflict_fn_not_known (void)
1267 {
1268   conflict_function *fn = XCNEW (conflict_function);
1269   fn->n = NOT_KNOWN;
1270
1271   return fn;
1272 }
1273
1274 /* Returns the conflict function for "independent".  */
1275
1276 static conflict_function *
1277 conflict_fn_no_dependence (void)
1278 {
1279   conflict_function *fn = XCNEW (conflict_function);
1280   fn->n = NO_DEPENDENCE;
1281
1282   return fn;
1283 }
1284
1285 /* Returns true if the address of OBJ is invariant in LOOP.  */
1286
1287 static bool
1288 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1289 {
1290   while (handled_component_p (obj))
1291     {
1292       if (TREE_CODE (obj) == ARRAY_REF)
1293         {
1294           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1295              need to check the stride and the lower bound of the reference.  */
1296           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1297                                                       loop->num)
1298               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1299                                                          loop->num))
1300             return false;
1301         }
1302       else if (TREE_CODE (obj) == COMPONENT_REF)
1303         {
1304           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1305                                                       loop->num))
1306             return false;
1307         }
1308       obj = TREE_OPERAND (obj, 0);
1309     }
1310
1311   if (!INDIRECT_REF_P (obj)
1312       && TREE_CODE (obj) != MEM_REF)
1313     return true;
1314
1315   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1316                                                   loop->num);
1317 }
1318
1319 /* Returns false if we can prove that data references A and B do not alias,
1320    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1321    considered.  */
1322
1323 bool
1324 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1325                 bool loop_nest)
1326 {
1327   tree addr_a = DR_BASE_OBJECT (a);
1328   tree addr_b = DR_BASE_OBJECT (b);
1329
1330   /* If we are not processing a loop nest but scalar code we
1331      do not need to care about possible cross-iteration dependences
1332      and thus can process the full original reference.  Do so,
1333      similar to how loop invariant motion applies extra offset-based
1334      disambiguation.  */
1335   if (!loop_nest)
1336     {
1337       aff_tree off1, off2;
1338       double_int size1, size2;
1339       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1340       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1341       aff_combination_scale (&off1, double_int_minus_one);
1342       aff_combination_add (&off2, &off1);
1343       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1344         return false;
1345     }
1346
1347   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1348     return refs_output_dependent_p (addr_a, addr_b);
1349   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1350     return refs_anti_dependent_p (addr_a, addr_b);
1351   return refs_may_alias_p (addr_a, addr_b);
1352 }
1353
1354 /* Initialize a data dependence relation between data accesses A and
1355    B.  NB_LOOPS is the number of loops surrounding the references: the
1356    size of the classic distance/direction vectors.  */
1357
1358 struct data_dependence_relation *
1359 initialize_data_dependence_relation (struct data_reference *a,
1360                                      struct data_reference *b,
1361                                      VEC (loop_p, heap) *loop_nest)
1362 {
1363   struct data_dependence_relation *res;
1364   unsigned int i;
1365
1366   res = XNEW (struct data_dependence_relation);
1367   DDR_A (res) = a;
1368   DDR_B (res) = b;
1369   DDR_LOOP_NEST (res) = NULL;
1370   DDR_REVERSED_P (res) = false;
1371   DDR_SUBSCRIPTS (res) = NULL;
1372   DDR_DIR_VECTS (res) = NULL;
1373   DDR_DIST_VECTS (res) = NULL;
1374
1375   if (a == NULL || b == NULL)
1376     {
1377       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1378       return res;
1379     }
1380
1381   /* If the data references do not alias, then they are independent.  */
1382   if (!dr_may_alias_p (a, b, loop_nest != NULL))
1383     {
1384       DDR_ARE_DEPENDENT (res) = chrec_known;
1385       return res;
1386     }
1387
1388   /* When the references are exactly the same, don't spend time doing
1389      the data dependence tests, just initialize the ddr and return.  */
1390   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1391     {
1392       DDR_AFFINE_P (res) = true;
1393       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1394       DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1395       DDR_LOOP_NEST (res) = loop_nest;
1396       DDR_INNER_LOOP (res) = 0;
1397       DDR_SELF_REFERENCE (res) = true;
1398       compute_self_dependence (res);
1399       return res;
1400     }
1401
1402   /* If the references do not access the same object, we do not know
1403      whether they alias or not.  */
1404   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1405     {
1406       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1407       return res;
1408     }
1409
1410   /* If the base of the object is not invariant in the loop nest, we cannot
1411      analyze it.  TODO -- in fact, it would suffice to record that there may
1412      be arbitrary dependences in the loops where the base object varies.  */
1413   if (loop_nest
1414       && !object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1415                                               DR_BASE_OBJECT (a)))
1416     {
1417       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1418       return res;
1419     }
1420
1421   /* If the number of dimensions of the access to not agree we can have
1422      a pointer access to a component of the array element type and an
1423      array access while the base-objects are still the same.  Punt.  */
1424   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1425     {
1426       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1427       return res;
1428     }
1429
1430   DDR_AFFINE_P (res) = true;
1431   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1432   DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1433   DDR_LOOP_NEST (res) = loop_nest;
1434   DDR_INNER_LOOP (res) = 0;
1435   DDR_SELF_REFERENCE (res) = false;
1436
1437   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1438     {
1439       struct subscript *subscript;
1440
1441       subscript = XNEW (struct subscript);
1442       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1443       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1444       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1445       SUB_DISTANCE (subscript) = chrec_dont_know;
1446       VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1447     }
1448
1449   return res;
1450 }
1451
1452 /* Frees memory used by the conflict function F.  */
1453
1454 static void
1455 free_conflict_function (conflict_function *f)
1456 {
1457   unsigned i;
1458
1459   if (CF_NONTRIVIAL_P (f))
1460     {
1461       for (i = 0; i < f->n; i++)
1462         affine_fn_free (f->fns[i]);
1463     }
1464   free (f);
1465 }
1466
1467 /* Frees memory used by SUBSCRIPTS.  */
1468
1469 static void
1470 free_subscripts (VEC (subscript_p, heap) *subscripts)
1471 {
1472   unsigned i;
1473   subscript_p s;
1474
1475   FOR_EACH_VEC_ELT (subscript_p, subscripts, i, s)
1476     {
1477       free_conflict_function (s->conflicting_iterations_in_a);
1478       free_conflict_function (s->conflicting_iterations_in_b);
1479       free (s);
1480     }
1481   VEC_free (subscript_p, heap, subscripts);
1482 }
1483
1484 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1485    description.  */
1486
1487 static inline void
1488 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1489                         tree chrec)
1490 {
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       fprintf (dump_file, "(dependence classified: ");
1494       print_generic_expr (dump_file, chrec, 0);
1495       fprintf (dump_file, ")\n");
1496     }
1497
1498   DDR_ARE_DEPENDENT (ddr) = chrec;
1499   free_subscripts (DDR_SUBSCRIPTS (ddr));
1500   DDR_SUBSCRIPTS (ddr) = NULL;
1501 }
1502
1503 /* The dependence relation DDR cannot be represented by a distance
1504    vector.  */
1505
1506 static inline void
1507 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1508 {
1509   if (dump_file && (dump_flags & TDF_DETAILS))
1510     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1511
1512   DDR_AFFINE_P (ddr) = false;
1513 }
1514
1515 \f
1516
1517 /* This section contains the classic Banerjee tests.  */
1518
1519 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1520    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1521
1522 static inline bool
1523 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1524 {
1525   return (evolution_function_is_constant_p (chrec_a)
1526           && evolution_function_is_constant_p (chrec_b));
1527 }
1528
1529 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1530    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1531
1532 static bool
1533 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1534 {
1535   if ((evolution_function_is_constant_p (chrec_a)
1536        && evolution_function_is_univariate_p (chrec_b))
1537       || (evolution_function_is_constant_p (chrec_b)
1538           && evolution_function_is_univariate_p (chrec_a)))
1539     return true;
1540
1541   if (evolution_function_is_univariate_p (chrec_a)
1542       && evolution_function_is_univariate_p (chrec_b))
1543     {
1544       switch (TREE_CODE (chrec_a))
1545         {
1546         case POLYNOMIAL_CHREC:
1547           switch (TREE_CODE (chrec_b))
1548             {
1549             case POLYNOMIAL_CHREC:
1550               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1551                 return false;
1552
1553             default:
1554               return true;
1555             }
1556
1557         default:
1558           return true;
1559         }
1560     }
1561
1562   return false;
1563 }
1564
1565 /* Creates a conflict function with N dimensions.  The affine functions
1566    in each dimension follow.  */
1567
1568 static conflict_function *
1569 conflict_fn (unsigned n, ...)
1570 {
1571   unsigned i;
1572   conflict_function *ret = XCNEW (conflict_function);
1573   va_list ap;
1574
1575   gcc_assert (0 < n && n <= MAX_DIM);
1576   va_start(ap, n);
1577
1578   ret->n = n;
1579   for (i = 0; i < n; i++)
1580     ret->fns[i] = va_arg (ap, affine_fn);
1581   va_end(ap);
1582
1583   return ret;
1584 }
1585
1586 /* Returns constant affine function with value CST.  */
1587
1588 static affine_fn
1589 affine_fn_cst (tree cst)
1590 {
1591   affine_fn fn = VEC_alloc (tree, heap, 1);
1592   VEC_quick_push (tree, fn, cst);
1593   return fn;
1594 }
1595
1596 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1597
1598 static affine_fn
1599 affine_fn_univar (tree cst, unsigned dim, tree coef)
1600 {
1601   affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1602   unsigned i;
1603
1604   gcc_assert (dim > 0);
1605   VEC_quick_push (tree, fn, cst);
1606   for (i = 1; i < dim; i++)
1607     VEC_quick_push (tree, fn, integer_zero_node);
1608   VEC_quick_push (tree, fn, coef);
1609   return fn;
1610 }
1611
1612 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1613    *OVERLAPS_B are initialized to the functions that describe the
1614    relation between the elements accessed twice by CHREC_A and
1615    CHREC_B.  For k >= 0, the following property is verified:
1616
1617    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1618
1619 static void
1620 analyze_ziv_subscript (tree chrec_a,
1621                        tree chrec_b,
1622                        conflict_function **overlaps_a,
1623                        conflict_function **overlaps_b,
1624                        tree *last_conflicts)
1625 {
1626   tree type, difference;
1627   dependence_stats.num_ziv++;
1628
1629   if (dump_file && (dump_flags & TDF_DETAILS))
1630     fprintf (dump_file, "(analyze_ziv_subscript \n");
1631
1632   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1633   chrec_a = chrec_convert (type, chrec_a, NULL);
1634   chrec_b = chrec_convert (type, chrec_b, NULL);
1635   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1636
1637   switch (TREE_CODE (difference))
1638     {
1639     case INTEGER_CST:
1640       if (integer_zerop (difference))
1641         {
1642           /* The difference is equal to zero: the accessed index
1643              overlaps for each iteration in the loop.  */
1644           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1645           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1646           *last_conflicts = chrec_dont_know;
1647           dependence_stats.num_ziv_dependent++;
1648         }
1649       else
1650         {
1651           /* The accesses do not overlap.  */
1652           *overlaps_a = conflict_fn_no_dependence ();
1653           *overlaps_b = conflict_fn_no_dependence ();
1654           *last_conflicts = integer_zero_node;
1655           dependence_stats.num_ziv_independent++;
1656         }
1657       break;
1658
1659     default:
1660       /* We're not sure whether the indexes overlap.  For the moment,
1661          conservatively answer "don't know".  */
1662       if (dump_file && (dump_flags & TDF_DETAILS))
1663         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1664
1665       *overlaps_a = conflict_fn_not_known ();
1666       *overlaps_b = conflict_fn_not_known ();
1667       *last_conflicts = chrec_dont_know;
1668       dependence_stats.num_ziv_unimplemented++;
1669       break;
1670     }
1671
1672   if (dump_file && (dump_flags & TDF_DETAILS))
1673     fprintf (dump_file, ")\n");
1674 }
1675
1676 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1677    and only if it fits to the int type.  If this is not the case, or the
1678    bound  on the number of iterations of LOOP could not be derived, returns
1679    chrec_dont_know.  */
1680
1681 static tree
1682 max_stmt_executions_tree (struct loop *loop)
1683 {
1684   double_int nit;
1685
1686   if (!max_stmt_executions (loop, true, &nit))
1687     return chrec_dont_know;
1688
1689   if (!double_int_fits_to_tree_p (unsigned_type_node, nit))
1690     return chrec_dont_know;
1691
1692   return double_int_to_tree (unsigned_type_node, nit);
1693 }
1694
1695 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1696    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1697    *OVERLAPS_B are initialized to the functions that describe the
1698    relation between the elements accessed twice by CHREC_A and
1699    CHREC_B.  For k >= 0, the following property is verified:
1700
1701    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1702
1703 static void
1704 analyze_siv_subscript_cst_affine (tree chrec_a,
1705                                   tree chrec_b,
1706                                   conflict_function **overlaps_a,
1707                                   conflict_function **overlaps_b,
1708                                   tree *last_conflicts)
1709 {
1710   bool value0, value1, value2;
1711   tree type, difference, tmp;
1712
1713   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1714   chrec_a = chrec_convert (type, chrec_a, NULL);
1715   chrec_b = chrec_convert (type, chrec_b, NULL);
1716   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1717
1718   if (!chrec_is_positive (initial_condition (difference), &value0))
1719     {
1720       if (dump_file && (dump_flags & TDF_DETAILS))
1721         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1722
1723       dependence_stats.num_siv_unimplemented++;
1724       *overlaps_a = conflict_fn_not_known ();
1725       *overlaps_b = conflict_fn_not_known ();
1726       *last_conflicts = chrec_dont_know;
1727       return;
1728     }
1729   else
1730     {
1731       if (value0 == false)
1732         {
1733           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1734             {
1735               if (dump_file && (dump_flags & TDF_DETAILS))
1736                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1737
1738               *overlaps_a = conflict_fn_not_known ();
1739               *overlaps_b = conflict_fn_not_known ();
1740               *last_conflicts = chrec_dont_know;
1741               dependence_stats.num_siv_unimplemented++;
1742               return;
1743             }
1744           else
1745             {
1746               if (value1 == true)
1747                 {
1748                   /* Example:
1749                      chrec_a = 12
1750                      chrec_b = {10, +, 1}
1751                   */
1752
1753                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1754                     {
1755                       HOST_WIDE_INT numiter;
1756                       struct loop *loop = get_chrec_loop (chrec_b);
1757
1758                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1759                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
1760                                          fold_build1 (ABS_EXPR, type, difference),
1761                                          CHREC_RIGHT (chrec_b));
1762                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1763                       *last_conflicts = integer_one_node;
1764
1765
1766                       /* Perform weak-zero siv test to see if overlap is
1767                          outside the loop bounds.  */
1768                       numiter = max_stmt_executions_int (loop, true);
1769
1770                       if (numiter >= 0
1771                           && compare_tree_int (tmp, numiter) > 0)
1772                         {
1773                           free_conflict_function (*overlaps_a);
1774                           free_conflict_function (*overlaps_b);
1775                           *overlaps_a = conflict_fn_no_dependence ();
1776                           *overlaps_b = conflict_fn_no_dependence ();
1777                           *last_conflicts = integer_zero_node;
1778                           dependence_stats.num_siv_independent++;
1779                           return;
1780                         }
1781                       dependence_stats.num_siv_dependent++;
1782                       return;
1783                     }
1784
1785                   /* When the step does not divide the difference, there are
1786                      no overlaps.  */
1787                   else
1788                     {
1789                       *overlaps_a = conflict_fn_no_dependence ();
1790                       *overlaps_b = conflict_fn_no_dependence ();
1791                       *last_conflicts = integer_zero_node;
1792                       dependence_stats.num_siv_independent++;
1793                       return;
1794                     }
1795                 }
1796
1797               else
1798                 {
1799                   /* Example:
1800                      chrec_a = 12
1801                      chrec_b = {10, +, -1}
1802
1803                      In this case, chrec_a will not overlap with chrec_b.  */
1804                   *overlaps_a = conflict_fn_no_dependence ();
1805                   *overlaps_b = conflict_fn_no_dependence ();
1806                   *last_conflicts = integer_zero_node;
1807                   dependence_stats.num_siv_independent++;
1808                   return;
1809                 }
1810             }
1811         }
1812       else
1813         {
1814           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1815             {
1816               if (dump_file && (dump_flags & TDF_DETAILS))
1817                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1818
1819               *overlaps_a = conflict_fn_not_known ();
1820               *overlaps_b = conflict_fn_not_known ();
1821               *last_conflicts = chrec_dont_know;
1822               dependence_stats.num_siv_unimplemented++;
1823               return;
1824             }
1825           else
1826             {
1827               if (value2 == false)
1828                 {
1829                   /* Example:
1830                      chrec_a = 3
1831                      chrec_b = {10, +, -1}
1832                   */
1833                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1834                     {
1835                       HOST_WIDE_INT numiter;
1836                       struct loop *loop = get_chrec_loop (chrec_b);
1837
1838                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1839                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1840                                          CHREC_RIGHT (chrec_b));
1841                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1842                       *last_conflicts = integer_one_node;
1843
1844                       /* Perform weak-zero siv test to see if overlap is
1845                          outside the loop bounds.  */
1846                       numiter = max_stmt_executions_int (loop, true);
1847
1848                       if (numiter >= 0
1849                           && compare_tree_int (tmp, numiter) > 0)
1850                         {
1851                           free_conflict_function (*overlaps_a);
1852                           free_conflict_function (*overlaps_b);
1853                           *overlaps_a = conflict_fn_no_dependence ();
1854                           *overlaps_b = conflict_fn_no_dependence ();
1855                           *last_conflicts = integer_zero_node;
1856                           dependence_stats.num_siv_independent++;
1857                           return;
1858                         }
1859                       dependence_stats.num_siv_dependent++;
1860                       return;
1861                     }
1862
1863                   /* When the step does not divide the difference, there
1864                      are no overlaps.  */
1865                   else
1866                     {
1867                       *overlaps_a = conflict_fn_no_dependence ();
1868                       *overlaps_b = conflict_fn_no_dependence ();
1869                       *last_conflicts = integer_zero_node;
1870                       dependence_stats.num_siv_independent++;
1871                       return;
1872                     }
1873                 }
1874               else
1875                 {
1876                   /* Example:
1877                      chrec_a = 3
1878                      chrec_b = {4, +, 1}
1879
1880                      In this case, chrec_a will not overlap with chrec_b.  */
1881                   *overlaps_a = conflict_fn_no_dependence ();
1882                   *overlaps_b = conflict_fn_no_dependence ();
1883                   *last_conflicts = integer_zero_node;
1884                   dependence_stats.num_siv_independent++;
1885                   return;
1886                 }
1887             }
1888         }
1889     }
1890 }
1891
1892 /* Helper recursive function for initializing the matrix A.  Returns
1893    the initial value of CHREC.  */
1894
1895 static tree
1896 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1897 {
1898   gcc_assert (chrec);
1899
1900   switch (TREE_CODE (chrec))
1901     {
1902     case POLYNOMIAL_CHREC:
1903       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1904
1905       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1906       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1907
1908     case PLUS_EXPR:
1909     case MULT_EXPR:
1910     case MINUS_EXPR:
1911       {
1912         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1913         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1914
1915         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1916       }
1917
1918     case NOP_EXPR:
1919       {
1920         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1921         return chrec_convert (chrec_type (chrec), op, NULL);
1922       }
1923
1924     case BIT_NOT_EXPR:
1925       {
1926         /* Handle ~X as -1 - X.  */
1927         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1928         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1929                               build_int_cst (TREE_TYPE (chrec), -1), op);
1930       }
1931
1932     case INTEGER_CST:
1933       return chrec;
1934
1935     default:
1936       gcc_unreachable ();
1937       return NULL_TREE;
1938     }
1939 }
1940
1941 #define FLOOR_DIV(x,y) ((x) / (y))
1942
1943 /* Solves the special case of the Diophantine equation:
1944    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1945
1946    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
1947    number of iterations that loops X and Y run.  The overlaps will be
1948    constructed as evolutions in dimension DIM.  */
1949
1950 static void
1951 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1952                                          affine_fn *overlaps_a,
1953                                          affine_fn *overlaps_b,
1954                                          tree *last_conflicts, int dim)
1955 {
1956   if (((step_a > 0 && step_b > 0)
1957        || (step_a < 0 && step_b < 0)))
1958     {
1959       int step_overlaps_a, step_overlaps_b;
1960       int gcd_steps_a_b, last_conflict, tau2;
1961
1962       gcd_steps_a_b = gcd (step_a, step_b);
1963       step_overlaps_a = step_b / gcd_steps_a_b;
1964       step_overlaps_b = step_a / gcd_steps_a_b;
1965
1966       if (niter > 0)
1967         {
1968           tau2 = FLOOR_DIV (niter, step_overlaps_a);
1969           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1970           last_conflict = tau2;
1971           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1972         }
1973       else
1974         *last_conflicts = chrec_dont_know;
1975
1976       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1977                                       build_int_cst (NULL_TREE,
1978                                                      step_overlaps_a));
1979       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1980                                       build_int_cst (NULL_TREE,
1981                                                      step_overlaps_b));
1982     }
1983
1984   else
1985     {
1986       *overlaps_a = affine_fn_cst (integer_zero_node);
1987       *overlaps_b = affine_fn_cst (integer_zero_node);
1988       *last_conflicts = integer_zero_node;
1989     }
1990 }
1991
1992 /* Solves the special case of a Diophantine equation where CHREC_A is
1993    an affine bivariate function, and CHREC_B is an affine univariate
1994    function.  For example,
1995
1996    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1997
1998    has the following overlapping functions:
1999
2000    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2001    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2002    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2003
2004    FORNOW: This is a specialized implementation for a case occurring in
2005    a common benchmark.  Implement the general algorithm.  */
2006
2007 static void
2008 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2009                                       conflict_function **overlaps_a,
2010                                       conflict_function **overlaps_b,
2011                                       tree *last_conflicts)
2012 {
2013   bool xz_p, yz_p, xyz_p;
2014   int step_x, step_y, step_z;
2015   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2016   affine_fn overlaps_a_xz, overlaps_b_xz;
2017   affine_fn overlaps_a_yz, overlaps_b_yz;
2018   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2019   affine_fn ova1, ova2, ovb;
2020   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2021
2022   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2023   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2024   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2025
2026   niter_x =
2027     max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
2028   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2029   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2030
2031   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2032     {
2033       if (dump_file && (dump_flags & TDF_DETAILS))
2034         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2035
2036       *overlaps_a = conflict_fn_not_known ();
2037       *overlaps_b = conflict_fn_not_known ();
2038       *last_conflicts = chrec_dont_know;
2039       return;
2040     }
2041
2042   niter = MIN (niter_x, niter_z);
2043   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2044                                            &overlaps_a_xz,
2045                                            &overlaps_b_xz,
2046                                            &last_conflicts_xz, 1);
2047   niter = MIN (niter_y, niter_z);
2048   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2049                                            &overlaps_a_yz,
2050                                            &overlaps_b_yz,
2051                                            &last_conflicts_yz, 2);
2052   niter = MIN (niter_x, niter_z);
2053   niter = MIN (niter_y, niter);
2054   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2055                                            &overlaps_a_xyz,
2056                                            &overlaps_b_xyz,
2057                                            &last_conflicts_xyz, 3);
2058
2059   xz_p = !integer_zerop (last_conflicts_xz);
2060   yz_p = !integer_zerop (last_conflicts_yz);
2061   xyz_p = !integer_zerop (last_conflicts_xyz);
2062
2063   if (xz_p || yz_p || xyz_p)
2064     {
2065       ova1 = affine_fn_cst (integer_zero_node);
2066       ova2 = affine_fn_cst (integer_zero_node);
2067       ovb = affine_fn_cst (integer_zero_node);
2068       if (xz_p)
2069         {
2070           affine_fn t0 = ova1;
2071           affine_fn t2 = ovb;
2072
2073           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2074           ovb = affine_fn_plus (ovb, overlaps_b_xz);
2075           affine_fn_free (t0);
2076           affine_fn_free (t2);
2077           *last_conflicts = last_conflicts_xz;
2078         }
2079       if (yz_p)
2080         {
2081           affine_fn t0 = ova2;
2082           affine_fn t2 = ovb;
2083
2084           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2085           ovb = affine_fn_plus (ovb, overlaps_b_yz);
2086           affine_fn_free (t0);
2087           affine_fn_free (t2);
2088           *last_conflicts = last_conflicts_yz;
2089         }
2090       if (xyz_p)
2091         {
2092           affine_fn t0 = ova1;
2093           affine_fn t2 = ova2;
2094           affine_fn t4 = ovb;
2095
2096           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2097           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2098           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2099           affine_fn_free (t0);
2100           affine_fn_free (t2);
2101           affine_fn_free (t4);
2102           *last_conflicts = last_conflicts_xyz;
2103         }
2104       *overlaps_a = conflict_fn (2, ova1, ova2);
2105       *overlaps_b = conflict_fn (1, ovb);
2106     }
2107   else
2108     {
2109       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2110       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2111       *last_conflicts = integer_zero_node;
2112     }
2113
2114   affine_fn_free (overlaps_a_xz);
2115   affine_fn_free (overlaps_b_xz);
2116   affine_fn_free (overlaps_a_yz);
2117   affine_fn_free (overlaps_b_yz);
2118   affine_fn_free (overlaps_a_xyz);
2119   affine_fn_free (overlaps_b_xyz);
2120 }
2121
2122 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2123
2124 static void
2125 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2126                     int size)
2127 {
2128   memcpy (vec2, vec1, size * sizeof (*vec1));
2129 }
2130
2131 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2132
2133 static void
2134 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2135                     int m, int n)
2136 {
2137   int i;
2138
2139   for (i = 0; i < m; i++)
2140     lambda_vector_copy (mat1[i], mat2[i], n);
2141 }
2142
2143 /* Store the N x N identity matrix in MAT.  */
2144
2145 static void
2146 lambda_matrix_id (lambda_matrix mat, int size)
2147 {
2148   int i, j;
2149
2150   for (i = 0; i < size; i++)
2151     for (j = 0; j < size; j++)
2152       mat[i][j] = (i == j) ? 1 : 0;
2153 }
2154
2155 /* Return the first nonzero element of vector VEC1 between START and N.
2156    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2157
2158 static int
2159 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2160 {
2161   int j = start;
2162   while (j < n && vec1[j] == 0)
2163     j++;
2164   return j;
2165 }
2166
2167 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2168    R2 = R2 + CONST1 * R1.  */
2169
2170 static void
2171 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2172 {
2173   int i;
2174
2175   if (const1 == 0)
2176     return;
2177
2178   for (i = 0; i < n; i++)
2179     mat[r2][i] += const1 * mat[r1][i];
2180 }
2181
2182 /* Swap rows R1 and R2 in matrix MAT.  */
2183
2184 static void
2185 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2186 {
2187   lambda_vector row;
2188
2189   row = mat[r1];
2190   mat[r1] = mat[r2];
2191   mat[r2] = row;
2192 }
2193
2194 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2195    and store the result in VEC2.  */
2196
2197 static void
2198 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2199                           int size, int const1)
2200 {
2201   int i;
2202
2203   if (const1 == 0)
2204     lambda_vector_clear (vec2, size);
2205   else
2206     for (i = 0; i < size; i++)
2207       vec2[i] = const1 * vec1[i];
2208 }
2209
2210 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2211
2212 static void
2213 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2214                       int size)
2215 {
2216   lambda_vector_mult_const (vec1, vec2, size, -1);
2217 }
2218
2219 /* Negate row R1 of matrix MAT which has N columns.  */
2220
2221 static void
2222 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2223 {
2224   lambda_vector_negate (mat[r1], mat[r1], n);
2225 }
2226
2227 /* Return true if two vectors are equal.  */
2228
2229 static bool
2230 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2231 {
2232   int i;
2233   for (i = 0; i < size; i++)
2234     if (vec1[i] != vec2[i])
2235       return false;
2236   return true;
2237 }
2238
2239 /* Given an M x N integer matrix A, this function determines an M x
2240    M unimodular matrix U, and an M x N echelon matrix S such that
2241    "U.A = S".  This decomposition is also known as "right Hermite".
2242
2243    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2244    Restructuring Compilers" Utpal Banerjee.  */
2245
2246 static void
2247 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2248                              lambda_matrix S, lambda_matrix U)
2249 {
2250   int i, j, i0 = 0;
2251
2252   lambda_matrix_copy (A, S, m, n);
2253   lambda_matrix_id (U, m);
2254
2255   for (j = 0; j < n; j++)
2256     {
2257       if (lambda_vector_first_nz (S[j], m, i0) < m)
2258         {
2259           ++i0;
2260           for (i = m - 1; i >= i0; i--)
2261             {
2262               while (S[i][j] != 0)
2263                 {
2264                   int sigma, factor, a, b;
2265
2266                   a = S[i-1][j];
2267                   b = S[i][j];
2268                   sigma = (a * b < 0) ? -1: 1;
2269                   a = abs (a);
2270                   b = abs (b);
2271                   factor = sigma * (a / b);
2272
2273                   lambda_matrix_row_add (S, n, i, i-1, -factor);
2274                   lambda_matrix_row_exchange (S, i, i-1);
2275
2276                   lambda_matrix_row_add (U, m, i, i-1, -factor);
2277                   lambda_matrix_row_exchange (U, i, i-1);
2278                 }
2279             }
2280         }
2281     }
2282 }
2283
2284 /* Determines the overlapping elements due to accesses CHREC_A and
2285    CHREC_B, that are affine functions.  This function cannot handle
2286    symbolic evolution functions, ie. when initial conditions are
2287    parameters, because it uses lambda matrices of integers.  */
2288
2289 static void
2290 analyze_subscript_affine_affine (tree chrec_a,
2291                                  tree chrec_b,
2292                                  conflict_function **overlaps_a,
2293                                  conflict_function **overlaps_b,
2294                                  tree *last_conflicts)
2295 {
2296   unsigned nb_vars_a, nb_vars_b, dim;
2297   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2298   lambda_matrix A, U, S;
2299   struct obstack scratch_obstack;
2300
2301   if (eq_evolutions_p (chrec_a, chrec_b))
2302     {
2303       /* The accessed index overlaps for each iteration in the
2304          loop.  */
2305       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2306       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2307       *last_conflicts = chrec_dont_know;
2308       return;
2309     }
2310   if (dump_file && (dump_flags & TDF_DETAILS))
2311     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2312
2313   /* For determining the initial intersection, we have to solve a
2314      Diophantine equation.  This is the most time consuming part.
2315
2316      For answering to the question: "Is there a dependence?" we have
2317      to prove that there exists a solution to the Diophantine
2318      equation, and that the solution is in the iteration domain,
2319      i.e. the solution is positive or zero, and that the solution
2320      happens before the upper bound loop.nb_iterations.  Otherwise
2321      there is no dependence.  This function outputs a description of
2322      the iterations that hold the intersections.  */
2323
2324   nb_vars_a = nb_vars_in_chrec (chrec_a);
2325   nb_vars_b = nb_vars_in_chrec (chrec_b);
2326
2327   gcc_obstack_init (&scratch_obstack);
2328
2329   dim = nb_vars_a + nb_vars_b;
2330   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2331   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2332   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2333
2334   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2335   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2336   gamma = init_b - init_a;
2337
2338   /* Don't do all the hard work of solving the Diophantine equation
2339      when we already know the solution: for example,
2340      | {3, +, 1}_1
2341      | {3, +, 4}_2
2342      | gamma = 3 - 3 = 0.
2343      Then the first overlap occurs during the first iterations:
2344      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2345   */
2346   if (gamma == 0)
2347     {
2348       if (nb_vars_a == 1 && nb_vars_b == 1)
2349         {
2350           HOST_WIDE_INT step_a, step_b;
2351           HOST_WIDE_INT niter, niter_a, niter_b;
2352           affine_fn ova, ovb;
2353
2354           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
2355           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
2356           niter = MIN (niter_a, niter_b);
2357           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2358           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2359
2360           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2361                                                    &ova, &ovb,
2362                                                    last_conflicts, 1);
2363           *overlaps_a = conflict_fn (1, ova);
2364           *overlaps_b = conflict_fn (1, ovb);
2365         }
2366
2367       else if (nb_vars_a == 2 && nb_vars_b == 1)
2368         compute_overlap_steps_for_affine_1_2
2369           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2370
2371       else if (nb_vars_a == 1 && nb_vars_b == 2)
2372         compute_overlap_steps_for_affine_1_2
2373           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2374
2375       else
2376         {
2377           if (dump_file && (dump_flags & TDF_DETAILS))
2378             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2379           *overlaps_a = conflict_fn_not_known ();
2380           *overlaps_b = conflict_fn_not_known ();
2381           *last_conflicts = chrec_dont_know;
2382         }
2383       goto end_analyze_subs_aa;
2384     }
2385
2386   /* U.A = S */
2387   lambda_matrix_right_hermite (A, dim, 1, S, U);
2388
2389   if (S[0][0] < 0)
2390     {
2391       S[0][0] *= -1;
2392       lambda_matrix_row_negate (U, dim, 0);
2393     }
2394   gcd_alpha_beta = S[0][0];
2395
2396   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2397      but that is a quite strange case.  Instead of ICEing, answer
2398      don't know.  */
2399   if (gcd_alpha_beta == 0)
2400     {
2401       *overlaps_a = conflict_fn_not_known ();
2402       *overlaps_b = conflict_fn_not_known ();
2403       *last_conflicts = chrec_dont_know;
2404       goto end_analyze_subs_aa;
2405     }
2406
2407   /* The classic "gcd-test".  */
2408   if (!int_divides_p (gcd_alpha_beta, gamma))
2409     {
2410       /* The "gcd-test" has determined that there is no integer
2411          solution, i.e. there is no dependence.  */
2412       *overlaps_a = conflict_fn_no_dependence ();
2413       *overlaps_b = conflict_fn_no_dependence ();
2414       *last_conflicts = integer_zero_node;
2415     }
2416
2417   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2418   else if (nb_vars_a == 1 && nb_vars_b == 1)
2419     {
2420       /* Both functions should have the same evolution sign.  */
2421       if (((A[0][0] > 0 && -A[1][0] > 0)
2422            || (A[0][0] < 0 && -A[1][0] < 0)))
2423         {
2424           /* The solutions are given by:
2425              |
2426              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2427              |                           [u21 u22]    [y0]
2428
2429              For a given integer t.  Using the following variables,
2430
2431              | i0 = u11 * gamma / gcd_alpha_beta
2432              | j0 = u12 * gamma / gcd_alpha_beta
2433              | i1 = u21
2434              | j1 = u22
2435
2436              the solutions are:
2437
2438              | x0 = i0 + i1 * t,
2439              | y0 = j0 + j1 * t.  */
2440           HOST_WIDE_INT i0, j0, i1, j1;
2441
2442           i0 = U[0][0] * gamma / gcd_alpha_beta;
2443           j0 = U[0][1] * gamma / gcd_alpha_beta;
2444           i1 = U[1][0];
2445           j1 = U[1][1];
2446
2447           if ((i1 == 0 && i0 < 0)
2448               || (j1 == 0 && j0 < 0))
2449             {
2450               /* There is no solution.
2451                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2452                  falls in here, but for the moment we don't look at the
2453                  upper bound of the iteration domain.  */
2454               *overlaps_a = conflict_fn_no_dependence ();
2455               *overlaps_b = conflict_fn_no_dependence ();
2456               *last_conflicts = integer_zero_node;
2457               goto end_analyze_subs_aa;
2458             }
2459
2460           if (i1 > 0 && j1 > 0)
2461             {
2462               HOST_WIDE_INT niter_a = max_stmt_executions_int
2463                 (get_chrec_loop (chrec_a), true);
2464               HOST_WIDE_INT niter_b = max_stmt_executions_int
2465                 (get_chrec_loop (chrec_b), true);
2466               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2467
2468               /* (X0, Y0) is a solution of the Diophantine equation:
2469                  "chrec_a (X0) = chrec_b (Y0)".  */
2470               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2471                                         CEIL (-j0, j1));
2472               HOST_WIDE_INT x0 = i1 * tau1 + i0;
2473               HOST_WIDE_INT y0 = j1 * tau1 + j0;
2474
2475               /* (X1, Y1) is the smallest positive solution of the eq
2476                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2477                  first conflict occurs.  */
2478               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2479               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2480               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2481
2482               if (niter > 0)
2483                 {
2484                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2485                                             FLOOR_DIV (niter - j0, j1));
2486                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2487
2488                   /* If the overlap occurs outside of the bounds of the
2489                      loop, there is no dependence.  */
2490                   if (x1 >= niter || y1 >= niter)
2491                     {
2492                       *overlaps_a = conflict_fn_no_dependence ();
2493                       *overlaps_b = conflict_fn_no_dependence ();
2494                       *last_conflicts = integer_zero_node;
2495                       goto end_analyze_subs_aa;
2496                     }
2497                   else
2498                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2499                 }
2500               else
2501                 *last_conflicts = chrec_dont_know;
2502
2503               *overlaps_a
2504                 = conflict_fn (1,
2505                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
2506                                                  1,
2507                                                  build_int_cst (NULL_TREE, i1)));
2508               *overlaps_b
2509                 = conflict_fn (1,
2510                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
2511                                                  1,
2512                                                  build_int_cst (NULL_TREE, j1)));
2513             }
2514           else
2515             {
2516               /* FIXME: For the moment, the upper bound of the
2517                  iteration domain for i and j is not checked.  */
2518               if (dump_file && (dump_flags & TDF_DETAILS))
2519                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2520               *overlaps_a = conflict_fn_not_known ();
2521               *overlaps_b = conflict_fn_not_known ();
2522               *last_conflicts = chrec_dont_know;
2523             }
2524         }
2525       else
2526         {
2527           if (dump_file && (dump_flags & TDF_DETAILS))
2528             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2529           *overlaps_a = conflict_fn_not_known ();
2530           *overlaps_b = conflict_fn_not_known ();
2531           *last_conflicts = chrec_dont_know;
2532         }
2533     }
2534   else
2535     {
2536       if (dump_file && (dump_flags & TDF_DETAILS))
2537         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2538       *overlaps_a = conflict_fn_not_known ();
2539       *overlaps_b = conflict_fn_not_known ();
2540       *last_conflicts = chrec_dont_know;
2541     }
2542
2543 end_analyze_subs_aa:
2544   obstack_free (&scratch_obstack, NULL);
2545   if (dump_file && (dump_flags & TDF_DETAILS))
2546     {
2547       fprintf (dump_file, "  (overlaps_a = ");
2548       dump_conflict_function (dump_file, *overlaps_a);
2549       fprintf (dump_file, ")\n  (overlaps_b = ");
2550       dump_conflict_function (dump_file, *overlaps_b);
2551       fprintf (dump_file, ")\n");
2552       fprintf (dump_file, ")\n");
2553     }
2554 }
2555
2556 /* Returns true when analyze_subscript_affine_affine can be used for
2557    determining the dependence relation between chrec_a and chrec_b,
2558    that contain symbols.  This function modifies chrec_a and chrec_b
2559    such that the analysis result is the same, and such that they don't
2560    contain symbols, and then can safely be passed to the analyzer.
2561
2562    Example: The analysis of the following tuples of evolutions produce
2563    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2564    vs. {0, +, 1}_1
2565
2566    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2567    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2568 */
2569
2570 static bool
2571 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2572 {
2573   tree diff, type, left_a, left_b, right_b;
2574
2575   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2576       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2577     /* FIXME: For the moment not handled.  Might be refined later.  */
2578     return false;
2579
2580   type = chrec_type (*chrec_a);
2581   left_a = CHREC_LEFT (*chrec_a);
2582   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2583   diff = chrec_fold_minus (type, left_a, left_b);
2584
2585   if (!evolution_function_is_constant_p (diff))
2586     return false;
2587
2588   if (dump_file && (dump_flags & TDF_DETAILS))
2589     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2590
2591   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2592                                      diff, CHREC_RIGHT (*chrec_a));
2593   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2594   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2595                                      build_int_cst (type, 0),
2596                                      right_b);
2597   return true;
2598 }
2599
2600 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2601    *OVERLAPS_B are initialized to the functions that describe the
2602    relation between the elements accessed twice by CHREC_A and
2603    CHREC_B.  For k >= 0, the following property is verified:
2604
2605    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2606
2607 static void
2608 analyze_siv_subscript (tree chrec_a,
2609                        tree chrec_b,
2610                        conflict_function **overlaps_a,
2611                        conflict_function **overlaps_b,
2612                        tree *last_conflicts,
2613                        int loop_nest_num)
2614 {
2615   dependence_stats.num_siv++;
2616
2617   if (dump_file && (dump_flags & TDF_DETAILS))
2618     fprintf (dump_file, "(analyze_siv_subscript \n");
2619
2620   if (evolution_function_is_constant_p (chrec_a)
2621       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2622     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2623                                       overlaps_a, overlaps_b, last_conflicts);
2624
2625   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2626            && evolution_function_is_constant_p (chrec_b))
2627     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2628                                       overlaps_b, overlaps_a, last_conflicts);
2629
2630   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2631            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2632     {
2633       if (!chrec_contains_symbols (chrec_a)
2634           && !chrec_contains_symbols (chrec_b))
2635         {
2636           analyze_subscript_affine_affine (chrec_a, chrec_b,
2637                                            overlaps_a, overlaps_b,
2638                                            last_conflicts);
2639
2640           if (CF_NOT_KNOWN_P (*overlaps_a)
2641               || CF_NOT_KNOWN_P (*overlaps_b))
2642             dependence_stats.num_siv_unimplemented++;
2643           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2644                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2645             dependence_stats.num_siv_independent++;
2646           else
2647             dependence_stats.num_siv_dependent++;
2648         }
2649       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2650                                                         &chrec_b))
2651         {
2652           analyze_subscript_affine_affine (chrec_a, chrec_b,
2653                                            overlaps_a, overlaps_b,
2654                                            last_conflicts);
2655
2656           if (CF_NOT_KNOWN_P (*overlaps_a)
2657               || CF_NOT_KNOWN_P (*overlaps_b))
2658             dependence_stats.num_siv_unimplemented++;
2659           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2660                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2661             dependence_stats.num_siv_independent++;
2662           else
2663             dependence_stats.num_siv_dependent++;
2664         }
2665       else
2666         goto siv_subscript_dontknow;
2667     }
2668
2669   else
2670     {
2671     siv_subscript_dontknow:;
2672       if (dump_file && (dump_flags & TDF_DETAILS))
2673         fprintf (dump_file, "siv test failed: unimplemented.\n");
2674       *overlaps_a = conflict_fn_not_known ();
2675       *overlaps_b = conflict_fn_not_known ();
2676       *last_conflicts = chrec_dont_know;
2677       dependence_stats.num_siv_unimplemented++;
2678     }
2679
2680   if (dump_file && (dump_flags & TDF_DETAILS))
2681     fprintf (dump_file, ")\n");
2682 }
2683
2684 /* Returns false if we can prove that the greatest common divisor of the steps
2685    of CHREC does not divide CST, false otherwise.  */
2686
2687 static bool
2688 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2689 {
2690   HOST_WIDE_INT cd = 0, val;
2691   tree step;
2692
2693   if (!host_integerp (cst, 0))
2694     return true;
2695   val = tree_low_cst (cst, 0);
2696
2697   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2698     {
2699       step = CHREC_RIGHT (chrec);
2700       if (!host_integerp (step, 0))
2701         return true;
2702       cd = gcd (cd, tree_low_cst (step, 0));
2703       chrec = CHREC_LEFT (chrec);
2704     }
2705
2706   return val % cd == 0;
2707 }
2708
2709 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2710    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2711    functions that describe the relation between the elements accessed
2712    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2713    is verified:
2714
2715    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2716
2717 static void
2718 analyze_miv_subscript (tree chrec_a,
2719                        tree chrec_b,
2720                        conflict_function **overlaps_a,
2721                        conflict_function **overlaps_b,
2722                        tree *last_conflicts,
2723                        struct loop *loop_nest)
2724 {
2725   tree type, difference;
2726
2727   dependence_stats.num_miv++;
2728   if (dump_file && (dump_flags & TDF_DETAILS))
2729     fprintf (dump_file, "(analyze_miv_subscript \n");
2730
2731   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2732   chrec_a = chrec_convert (type, chrec_a, NULL);
2733   chrec_b = chrec_convert (type, chrec_b, NULL);
2734   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2735
2736   if (eq_evolutions_p (chrec_a, chrec_b))
2737     {
2738       /* Access functions are the same: all the elements are accessed
2739          in the same order.  */
2740       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2741       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2742       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2743       dependence_stats.num_miv_dependent++;
2744     }
2745
2746   else if (evolution_function_is_constant_p (difference)
2747            /* For the moment, the following is verified:
2748               evolution_function_is_affine_multivariate_p (chrec_a,
2749               loop_nest->num) */
2750            && !gcd_of_steps_may_divide_p (chrec_a, difference))
2751     {
2752       /* testsuite/.../ssa-chrec-33.c
2753          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2754
2755          The difference is 1, and all the evolution steps are multiples
2756          of 2, consequently there are no overlapping elements.  */
2757       *overlaps_a = conflict_fn_no_dependence ();
2758       *overlaps_b = conflict_fn_no_dependence ();
2759       *last_conflicts = integer_zero_node;
2760       dependence_stats.num_miv_independent++;
2761     }
2762
2763   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2764            && !chrec_contains_symbols (chrec_a)
2765            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2766            && !chrec_contains_symbols (chrec_b))
2767     {
2768       /* testsuite/.../ssa-chrec-35.c
2769          {0, +, 1}_2  vs.  {0, +, 1}_3
2770          the overlapping elements are respectively located at iterations:
2771          {0, +, 1}_x and {0, +, 1}_x,
2772          in other words, we have the equality:
2773          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2774
2775          Other examples:
2776          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2777          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2778
2779          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2780          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2781       */
2782       analyze_subscript_affine_affine (chrec_a, chrec_b,
2783                                        overlaps_a, overlaps_b, last_conflicts);
2784
2785       if (CF_NOT_KNOWN_P (*overlaps_a)
2786           || CF_NOT_KNOWN_P (*overlaps_b))
2787         dependence_stats.num_miv_unimplemented++;
2788       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2789                || CF_NO_DEPENDENCE_P (*overlaps_b))
2790         dependence_stats.num_miv_independent++;
2791       else
2792         dependence_stats.num_miv_dependent++;
2793     }
2794
2795   else
2796     {
2797       /* When the analysis is too difficult, answer "don't know".  */
2798       if (dump_file && (dump_flags & TDF_DETAILS))
2799         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2800
2801       *overlaps_a = conflict_fn_not_known ();
2802       *overlaps_b = conflict_fn_not_known ();
2803       *last_conflicts = chrec_dont_know;
2804       dependence_stats.num_miv_unimplemented++;
2805     }
2806
2807   if (dump_file && (dump_flags & TDF_DETAILS))
2808     fprintf (dump_file, ")\n");
2809 }
2810
2811 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2812    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
2813    OVERLAP_ITERATIONS_B are initialized with two functions that
2814    describe the iterations that contain conflicting elements.
2815
2816    Remark: For an integer k >= 0, the following equality is true:
2817
2818    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2819 */
2820
2821 static void
2822 analyze_overlapping_iterations (tree chrec_a,
2823                                 tree chrec_b,
2824                                 conflict_function **overlap_iterations_a,
2825                                 conflict_function **overlap_iterations_b,
2826                                 tree *last_conflicts, struct loop *loop_nest)
2827 {
2828   unsigned int lnn = loop_nest->num;
2829
2830   dependence_stats.num_subscript_tests++;
2831
2832   if (dump_file && (dump_flags & TDF_DETAILS))
2833     {
2834       fprintf (dump_file, "(analyze_overlapping_iterations \n");
2835       fprintf (dump_file, "  (chrec_a = ");
2836       print_generic_expr (dump_file, chrec_a, 0);
2837       fprintf (dump_file, ")\n  (chrec_b = ");
2838       print_generic_expr (dump_file, chrec_b, 0);
2839       fprintf (dump_file, ")\n");
2840     }
2841
2842   if (chrec_a == NULL_TREE
2843       || chrec_b == NULL_TREE
2844       || chrec_contains_undetermined (chrec_a)
2845       || chrec_contains_undetermined (chrec_b))
2846     {
2847       dependence_stats.num_subscript_undetermined++;
2848
2849       *overlap_iterations_a = conflict_fn_not_known ();
2850       *overlap_iterations_b = conflict_fn_not_known ();
2851     }
2852
2853   /* If they are the same chrec, and are affine, they overlap
2854      on every iteration.  */
2855   else if (eq_evolutions_p (chrec_a, chrec_b)
2856            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2857                || operand_equal_p (chrec_a, chrec_b, 0)))
2858     {
2859       dependence_stats.num_same_subscript_function++;
2860       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2861       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2862       *last_conflicts = chrec_dont_know;
2863     }
2864
2865   /* If they aren't the same, and aren't affine, we can't do anything
2866      yet.  */
2867   else if ((chrec_contains_symbols (chrec_a)
2868             || chrec_contains_symbols (chrec_b))
2869            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2870                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2871     {
2872       dependence_stats.num_subscript_undetermined++;
2873       *overlap_iterations_a = conflict_fn_not_known ();
2874       *overlap_iterations_b = conflict_fn_not_known ();
2875     }
2876
2877   else if (ziv_subscript_p (chrec_a, chrec_b))
2878     analyze_ziv_subscript (chrec_a, chrec_b,
2879                            overlap_iterations_a, overlap_iterations_b,
2880                            last_conflicts);
2881
2882   else if (siv_subscript_p (chrec_a, chrec_b))
2883     analyze_siv_subscript (chrec_a, chrec_b,
2884                            overlap_iterations_a, overlap_iterations_b,
2885                            last_conflicts, lnn);
2886
2887   else
2888     analyze_miv_subscript (chrec_a, chrec_b,
2889                            overlap_iterations_a, overlap_iterations_b,
2890                            last_conflicts, loop_nest);
2891
2892   if (dump_file && (dump_flags & TDF_DETAILS))
2893     {
2894       fprintf (dump_file, "  (overlap_iterations_a = ");
2895       dump_conflict_function (dump_file, *overlap_iterations_a);
2896       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
2897       dump_conflict_function (dump_file, *overlap_iterations_b);
2898       fprintf (dump_file, ")\n");
2899       fprintf (dump_file, ")\n");
2900     }
2901 }
2902
2903 /* Helper function for uniquely inserting distance vectors.  */
2904
2905 static void
2906 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2907 {
2908   unsigned i;
2909   lambda_vector v;
2910
2911   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, v)
2912     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2913       return;
2914
2915   VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2916 }
2917
2918 /* Helper function for uniquely inserting direction vectors.  */
2919
2920 static void
2921 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2922 {
2923   unsigned i;
2924   lambda_vector v;
2925
2926   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIR_VECTS (ddr), i, v)
2927     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2928       return;
2929
2930   VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2931 }
2932
2933 /* Add a distance of 1 on all the loops outer than INDEX.  If we
2934    haven't yet determined a distance for this outer loop, push a new
2935    distance vector composed of the previous distance, and a distance
2936    of 1 for this outer loop.  Example:
2937
2938    | loop_1
2939    |   loop_2
2940    |     A[10]
2941    |   endloop_2
2942    | endloop_1
2943
2944    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
2945    save (0, 1), then we have to save (1, 0).  */
2946
2947 static void
2948 add_outer_distances (struct data_dependence_relation *ddr,
2949                      lambda_vector dist_v, int index)
2950 {
2951   /* For each outer loop where init_v is not set, the accesses are
2952      in dependence of distance 1 in the loop.  */
2953   while (--index >= 0)
2954     {
2955       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2956       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2957       save_v[index] = 1;
2958       save_dist_v (ddr, save_v);
2959     }
2960 }
2961
2962 /* Return false when fail to represent the data dependence as a
2963    distance vector.  INIT_B is set to true when a component has been
2964    added to the distance vector DIST_V.  INDEX_CARRY is then set to
2965    the index in DIST_V that carries the dependence.  */
2966
2967 static bool
2968 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2969                              struct data_reference *ddr_a,
2970                              struct data_reference *ddr_b,
2971                              lambda_vector dist_v, bool *init_b,
2972                              int *index_carry)
2973 {
2974   unsigned i;
2975   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2976
2977   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2978     {
2979       tree access_fn_a, access_fn_b;
2980       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2981
2982       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2983         {
2984           non_affine_dependence_relation (ddr);
2985           return false;
2986         }
2987
2988       access_fn_a = DR_ACCESS_FN (ddr_a, i);
2989       access_fn_b = DR_ACCESS_FN (ddr_b, i);
2990
2991       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2992           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2993         {
2994           int dist, index;
2995           int var_a = CHREC_VARIABLE (access_fn_a);
2996           int var_b = CHREC_VARIABLE (access_fn_b);
2997
2998           if (var_a != var_b
2999               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3000             {
3001               non_affine_dependence_relation (ddr);
3002               return false;
3003             }
3004
3005           dist = int_cst_value (SUB_DISTANCE (subscript));
3006           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3007           *index_carry = MIN (index, *index_carry);
3008
3009           /* This is the subscript coupling test.  If we have already
3010              recorded a distance for this loop (a distance coming from
3011              another subscript), it should be the same.  For example,
3012              in the following code, there is no dependence:
3013
3014              | loop i = 0, N, 1
3015              |   T[i+1][i] = ...
3016              |   ... = T[i][i]
3017              | endloop
3018           */
3019           if (init_v[index] != 0 && dist_v[index] != dist)
3020             {
3021               finalize_ddr_dependent (ddr, chrec_known);
3022               return false;
3023             }
3024
3025           dist_v[index] = dist;
3026           init_v[index] = 1;
3027           *init_b = true;
3028         }
3029       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3030         {
3031           /* This can be for example an affine vs. constant dependence
3032              (T[i] vs. T[3]) that is not an affine dependence and is
3033              not representable as a distance vector.  */
3034           non_affine_dependence_relation (ddr);
3035           return false;
3036         }
3037     }
3038
3039   return true;
3040 }
3041
3042 /* Return true when the DDR contains only constant access functions.  */
3043
3044 static bool
3045 constant_access_functions (const struct data_dependence_relation *ddr)
3046 {
3047   unsigned i;
3048
3049   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3050     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3051         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3052       return false;
3053
3054   return true;
3055 }
3056
3057 /* Helper function for the case where DDR_A and DDR_B are the same
3058    multivariate access function with a constant step.  For an example
3059    see pr34635-1.c.  */
3060
3061 static void
3062 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3063 {
3064   int x_1, x_2;
3065   tree c_1 = CHREC_LEFT (c_2);
3066   tree c_0 = CHREC_LEFT (c_1);
3067   lambda_vector dist_v;
3068   int v1, v2, cd;
3069
3070   /* Polynomials with more than 2 variables are not handled yet.  When
3071      the evolution steps are parameters, it is not possible to
3072      represent the dependence using classical distance vectors.  */
3073   if (TREE_CODE (c_0) != INTEGER_CST
3074       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3075       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3076     {
3077       DDR_AFFINE_P (ddr) = false;
3078       return;
3079     }
3080
3081   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3082   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3083
3084   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3085   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3086   v1 = int_cst_value (CHREC_RIGHT (c_1));
3087   v2 = int_cst_value (CHREC_RIGHT (c_2));
3088   cd = gcd (v1, v2);
3089   v1 /= cd;
3090   v2 /= cd;
3091
3092   if (v2 < 0)
3093     {
3094       v2 = -v2;
3095       v1 = -v1;
3096     }
3097
3098   dist_v[x_1] = v2;
3099   dist_v[x_2] = -v1;
3100   save_dist_v (ddr, dist_v);
3101
3102   add_outer_distances (ddr, dist_v, x_1);
3103 }
3104
3105 /* Helper function for the case where DDR_A and DDR_B are the same
3106    access functions.  */
3107
3108 static void
3109 add_other_self_distances (struct data_dependence_relation *ddr)
3110 {
3111   lambda_vector dist_v;
3112   unsigned i;
3113   int index_carry = DDR_NB_LOOPS (ddr);
3114
3115   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3116     {
3117       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3118
3119       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3120         {
3121           if (!evolution_function_is_univariate_p (access_fun))
3122             {
3123               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3124                 {
3125                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3126                   return;
3127                 }
3128
3129               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3130
3131               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3132                 add_multivariate_self_dist (ddr, access_fun);
3133               else
3134                 /* The evolution step is not constant: it varies in
3135                    the outer loop, so this cannot be represented by a
3136                    distance vector.  For example in pr34635.c the
3137                    evolution is {0, +, {0, +, 4}_1}_2.  */
3138                 DDR_AFFINE_P (ddr) = false;
3139
3140               return;
3141             }
3142
3143           index_carry = MIN (index_carry,
3144                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
3145                                                  DDR_LOOP_NEST (ddr)));
3146         }
3147     }
3148
3149   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3150   add_outer_distances (ddr, dist_v, index_carry);
3151 }
3152
3153 static void
3154 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3155 {
3156   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3157
3158   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3159   save_dist_v (ddr, dist_v);
3160 }
3161
3162 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3163    is the case for example when access functions are the same and
3164    equal to a constant, as in:
3165
3166    | loop_1
3167    |   A[3] = ...
3168    |   ... = A[3]
3169    | endloop_1
3170
3171    in which case the distance vectors are (0) and (1).  */
3172
3173 static void
3174 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3175 {
3176   unsigned i, j;
3177
3178   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3179     {
3180       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3181       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3182       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3183
3184       for (j = 0; j < ca->n; j++)
3185         if (affine_function_zero_p (ca->fns[j]))
3186           {
3187             insert_innermost_unit_dist_vector (ddr);
3188             return;
3189           }
3190
3191       for (j = 0; j < cb->n; j++)
3192         if (affine_function_zero_p (cb->fns[j]))
3193           {
3194             insert_innermost_unit_dist_vector (ddr);
3195             return;
3196           }
3197     }
3198 }
3199
3200 /* Compute the classic per loop distance vector.  DDR is the data
3201    dependence relation to build a vector from.  Return false when fail
3202    to represent the data dependence as a distance vector.  */
3203
3204 static bool
3205 build_classic_dist_vector (struct data_dependence_relation *ddr,
3206                            struct loop *loop_nest)
3207 {
3208   bool init_b = false;
3209   int index_carry = DDR_NB_LOOPS (ddr);
3210   lambda_vector dist_v;
3211
3212   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3213     return false;
3214
3215   if (same_access_functions (ddr))
3216     {
3217       /* Save the 0 vector.  */
3218       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3219       save_dist_v (ddr, dist_v);
3220
3221       if (constant_access_functions (ddr))
3222         add_distance_for_zero_overlaps (ddr);
3223
3224       if (DDR_NB_LOOPS (ddr) > 1)
3225         add_other_self_distances (ddr);
3226
3227       return true;
3228     }
3229
3230   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3231   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3232                                     dist_v, &init_b, &index_carry))
3233     return false;
3234
3235   /* Save the distance vector if we initialized one.  */
3236   if (init_b)
3237     {
3238       /* Verify a basic constraint: classic distance vectors should
3239          always be lexicographically positive.
3240
3241          Data references are collected in the order of execution of
3242          the program, thus for the following loop
3243
3244          | for (i = 1; i < 100; i++)
3245          |   for (j = 1; j < 100; j++)
3246          |     {
3247          |       t = T[j+1][i-1];  // A
3248          |       T[j][i] = t + 2;  // B
3249          |     }
3250
3251          references are collected following the direction of the wind:
3252          A then B.  The data dependence tests are performed also
3253          following this order, such that we're looking at the distance
3254          separating the elements accessed by A from the elements later
3255          accessed by B.  But in this example, the distance returned by
3256          test_dep (A, B) is lexicographically negative (-1, 1), that
3257          means that the access A occurs later than B with respect to
3258          the outer loop, ie. we're actually looking upwind.  In this
3259          case we solve test_dep (B, A) looking downwind to the
3260          lexicographically positive solution, that returns the
3261          distance vector (1, -1).  */
3262       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3263         {
3264           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3265           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3266                                               loop_nest))
3267             return false;
3268           compute_subscript_distance (ddr);
3269           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3270                                             save_v, &init_b, &index_carry))
3271             return false;
3272           save_dist_v (ddr, save_v);
3273           DDR_REVERSED_P (ddr) = true;
3274
3275           /* In this case there is a dependence forward for all the
3276              outer loops:
3277
3278              | for (k = 1; k < 100; k++)
3279              |  for (i = 1; i < 100; i++)
3280              |   for (j = 1; j < 100; j++)
3281              |     {
3282              |       t = T[j+1][i-1];  // A
3283              |       T[j][i] = t + 2;  // B
3284              |     }
3285
3286              the vectors are:
3287              (0,  1, -1)
3288              (1,  1, -1)
3289              (1, -1,  1)
3290           */
3291           if (DDR_NB_LOOPS (ddr) > 1)
3292             {
3293               add_outer_distances (ddr, save_v, index_carry);
3294               add_outer_distances (ddr, dist_v, index_carry);
3295             }
3296         }
3297       else
3298         {
3299           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3300           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3301
3302           if (DDR_NB_LOOPS (ddr) > 1)
3303             {
3304               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3305
3306               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3307                                                   DDR_A (ddr), loop_nest))
3308                 return false;
3309               compute_subscript_distance (ddr);
3310               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3311                                                 opposite_v, &init_b,
3312                                                 &index_carry))
3313                 return false;
3314
3315               save_dist_v (ddr, save_v);
3316               add_outer_distances (ddr, dist_v, index_carry);
3317               add_outer_distances (ddr, opposite_v, index_carry);
3318             }
3319           else
3320             save_dist_v (ddr, save_v);
3321         }
3322     }
3323   else
3324     {
3325       /* There is a distance of 1 on all the outer loops: Example:
3326          there is a dependence of distance 1 on loop_1 for the array A.
3327
3328          | loop_1
3329          |   A[5] = ...
3330          | endloop
3331       */
3332       add_outer_distances (ddr, dist_v,
3333                            lambda_vector_first_nz (dist_v,
3334                                                    DDR_NB_LOOPS (ddr), 0));
3335     }
3336
3337   if (dump_file && (dump_flags & TDF_DETAILS))
3338     {
3339       unsigned i;
3340
3341       fprintf (dump_file, "(build_classic_dist_vector\n");
3342       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3343         {
3344           fprintf (dump_file, "  dist_vector = (");
3345           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3346                                DDR_NB_LOOPS (ddr));
3347           fprintf (dump_file, "  )\n");
3348         }
3349       fprintf (dump_file, ")\n");
3350     }
3351
3352   return true;
3353 }
3354
3355 /* Return the direction for a given distance.
3356    FIXME: Computing dir this way is suboptimal, since dir can catch
3357    cases that dist is unable to represent.  */
3358
3359 static inline enum data_dependence_direction
3360 dir_from_dist (int dist)
3361 {
3362   if (dist > 0)
3363     return dir_positive;
3364   else if (dist < 0)
3365     return dir_negative;
3366   else
3367     return dir_equal;
3368 }
3369
3370 /* Compute the classic per loop direction vector.  DDR is the data
3371    dependence relation to build a vector from.  */
3372
3373 static void
3374 build_classic_dir_vector (struct data_dependence_relation *ddr)
3375 {
3376   unsigned i, j;
3377   lambda_vector dist_v;
3378
3379   FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
3380     {
3381       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3382
3383       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3384         dir_v[j] = dir_from_dist (dist_v[j]);
3385
3386       save_dir_v (ddr, dir_v);
3387     }
3388 }
3389
3390 /* Helper function.  Returns true when there is a dependence between
3391    data references DRA and DRB.  */
3392
3393 static bool
3394 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3395                                struct data_reference *dra,
3396                                struct data_reference *drb,
3397                                struct loop *loop_nest)
3398 {
3399   unsigned int i;
3400   tree last_conflicts;
3401   struct subscript *subscript;
3402
3403   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3404        i++)
3405     {
3406       conflict_function *overlaps_a, *overlaps_b;
3407
3408       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3409                                       DR_ACCESS_FN (drb, i),
3410                                       &overlaps_a, &overlaps_b,
3411                                       &last_conflicts, loop_nest);
3412
3413       if (CF_NOT_KNOWN_P (overlaps_a)
3414           || CF_NOT_KNOWN_P (overlaps_b))
3415         {
3416           finalize_ddr_dependent (ddr, chrec_dont_know);
3417           dependence_stats.num_dependence_undetermined++;
3418           free_conflict_function (overlaps_a);
3419           free_conflict_function (overlaps_b);
3420           return false;
3421         }
3422
3423       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3424                || CF_NO_DEPENDENCE_P (overlaps_b))
3425         {
3426           finalize_ddr_dependent (ddr, chrec_known);
3427           dependence_stats.num_dependence_independent++;
3428           free_conflict_function (overlaps_a);
3429           free_conflict_function (overlaps_b);
3430           return false;
3431         }
3432
3433       else
3434         {
3435           if (SUB_CONFLICTS_IN_A (subscript))
3436             free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3437           if (SUB_CONFLICTS_IN_B (subscript))
3438             free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3439
3440           SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3441           SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3442           SUB_LAST_CONFLICT (subscript) = last_conflicts;
3443         }
3444     }
3445
3446   return true;
3447 }
3448
3449 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3450
3451 static void
3452 subscript_dependence_tester (struct data_dependence_relation *ddr,
3453                              struct loop *loop_nest)
3454 {
3455
3456   if (dump_file && (dump_flags & TDF_DETAILS))
3457     fprintf (dump_file, "(subscript_dependence_tester \n");
3458
3459   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3460     dependence_stats.num_dependence_dependent++;
3461
3462   compute_subscript_distance (ddr);
3463   if (build_classic_dist_vector (ddr, loop_nest))
3464     build_classic_dir_vector (ddr);
3465
3466   if (dump_file && (dump_flags & TDF_DETAILS))
3467     fprintf (dump_file, ")\n");
3468 }
3469
3470 /* Returns true when all the access functions of A are affine or
3471    constant with respect to LOOP_NEST.  */
3472
3473 static bool
3474 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3475                                            const struct loop *loop_nest)
3476 {
3477   unsigned int i;
3478   VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3479   tree t;
3480
3481   FOR_EACH_VEC_ELT (tree, fns, i, t)
3482     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3483         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3484       return false;
3485
3486   return true;
3487 }
3488
3489 /* Initializes an equation for an OMEGA problem using the information
3490    contained in the ACCESS_FUN.  Returns true when the operation
3491    succeeded.
3492
3493    PB is the omega constraint system.
3494    EQ is the number of the equation to be initialized.
3495    OFFSET is used for shifting the variables names in the constraints:
3496    a constrain is composed of 2 * the number of variables surrounding
3497    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3498    then it is set to n.
3499    ACCESS_FUN is expected to be an affine chrec.  */
3500
3501 static bool
3502 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3503                        unsigned int offset, tree access_fun,
3504                        struct data_dependence_relation *ddr)
3505 {
3506   switch (TREE_CODE (access_fun))
3507     {
3508     case POLYNOMIAL_CHREC:
3509       {
3510         tree left = CHREC_LEFT (access_fun);
3511         tree right = CHREC_RIGHT (access_fun);
3512         int var = CHREC_VARIABLE (access_fun);
3513         unsigned var_idx;
3514
3515         if (TREE_CODE (right) != INTEGER_CST)
3516           return false;
3517
3518         var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3519         pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3520
3521         /* Compute the innermost loop index.  */
3522         DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3523
3524         if (offset == 0)
3525           pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3526             += int_cst_value (right);
3527
3528         switch (TREE_CODE (left))
3529           {
3530           case POLYNOMIAL_CHREC:
3531             return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3532
3533           case INTEGER_CST:
3534             pb->eqs[eq].coef[0] += int_cst_value (left);
3535             return true;
3536
3537           default:
3538             return false;
3539           }
3540       }
3541
3542     case INTEGER_CST:
3543       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3544       return true;
3545
3546     default:
3547       return false;
3548     }
3549 }
3550
3551 /* As explained in the comments preceding init_omega_for_ddr, we have
3552    to set up a system for each loop level, setting outer loops
3553    variation to zero, and current loop variation to positive or zero.
3554    Save each lexico positive distance vector.  */
3555
3556 static void
3557 omega_extract_distance_vectors (omega_pb pb,
3558                                 struct data_dependence_relation *ddr)
3559 {
3560   int eq, geq;
3561   unsigned i, j;
3562   struct loop *loopi, *loopj;
3563   enum omega_result res;
3564
3565   /* Set a new problem for each loop in the nest.  The basis is the
3566      problem that we have initialized until now.  On top of this we
3567      add new constraints.  */
3568   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3569          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3570     {
3571       int dist = 0;
3572       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3573                                            DDR_NB_LOOPS (ddr));
3574
3575       omega_copy_problem (copy, pb);
3576
3577       /* For all the outer loops "loop_j", add "dj = 0".  */
3578       for (j = 0;
3579            j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3580         {
3581           eq = omega_add_zero_eq (copy, omega_black);
3582           copy->eqs[eq].coef[j + 1] = 1;
3583         }
3584
3585       /* For "loop_i", add "0 <= di".  */
3586       geq = omega_add_zero_geq (copy, omega_black);
3587       copy->geqs[geq].coef[i + 1] = 1;
3588
3589       /* Reduce the constraint system, and test that the current
3590          problem is feasible.  */
3591       res = omega_simplify_problem (copy);
3592       if (res == omega_false
3593           || res == omega_unknown
3594           || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3595         goto next_problem;
3596
3597       for (eq = 0; eq < copy->num_subs; eq++)
3598         if (copy->subs[eq].key == (int) i + 1)
3599           {
3600             dist = copy->subs[eq].coef[0];
3601             goto found_dist;
3602           }
3603
3604       if (dist == 0)
3605         {
3606           /* Reinitialize problem...  */
3607           omega_copy_problem (copy, pb);
3608           for (j = 0;
3609                j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3610             {
3611               eq = omega_add_zero_eq (copy, omega_black);
3612               copy->eqs[eq].coef[j + 1] = 1;
3613             }
3614
3615           /* ..., but this time "di = 1".  */
3616           eq = omega_add_zero_eq (copy, omega_black);
3617           copy->eqs[eq].coef[i + 1] = 1;
3618           copy->eqs[eq].coef[0] = -1;
3619
3620           res = omega_simplify_problem (copy);
3621           if (res == omega_false
3622               || res == omega_unknown
3623               || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3624             goto next_problem;
3625
3626           for (eq = 0; eq < copy->num_subs; eq++)
3627             if (copy->subs[eq].key == (int) i + 1)
3628               {
3629                 dist = copy->subs[eq].coef[0];
3630                 goto found_dist;
3631               }
3632         }
3633
3634     found_dist:;
3635       /* Save the lexicographically positive distance vector.  */
3636       if (dist >= 0)
3637         {
3638           lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3639           lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3640
3641           dist_v[i] = dist;
3642
3643           for (eq = 0; eq < copy->num_subs; eq++)
3644             if (copy->subs[eq].key > 0)
3645               {
3646                 dist = copy->subs[eq].coef[0];
3647                 dist_v[copy->subs[eq].key - 1] = dist;
3648               }
3649
3650           for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3651             dir_v[j] = dir_from_dist (dist_v[j]);
3652
3653           save_dist_v (ddr, dist_v);
3654           save_dir_v (ddr, dir_v);
3655         }
3656
3657     next_problem:;
3658       omega_free_problem (copy);
3659     }
3660 }
3661
3662 /* This is called for each subscript of a tuple of data references:
3663    insert an equality for representing the conflicts.  */
3664
3665 static bool
3666 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3667                        struct data_dependence_relation *ddr,
3668                        omega_pb pb, bool *maybe_dependent)
3669 {
3670   int eq;
3671   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3672                                      TREE_TYPE (access_fun_b));
3673   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3674   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3675   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3676   tree minus_one;
3677
3678   /* When the fun_a - fun_b is not constant, the dependence is not
3679      captured by the classic distance vector representation.  */
3680   if (TREE_CODE (difference) != INTEGER_CST)
3681     return false;
3682
3683   /* ZIV test.  */
3684   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3685     {
3686       /* There is no dependence.  */
3687       *maybe_dependent = false;
3688       return true;
3689     }
3690
3691   minus_one = build_int_cst (type, -1);
3692   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3693
3694   eq = omega_add_zero_eq (pb, omega_black);
3695   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3696       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3697     /* There is probably a dependence, but the system of
3698        constraints cannot be built: answer "don't know".  */
3699     return false;
3700
3701   /* GCD test.  */
3702   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3703       && !int_divides_p (lambda_vector_gcd
3704                          ((lambda_vector) &(pb->eqs[eq].coef[1]),
3705                           2 * DDR_NB_LOOPS (ddr)),
3706                          pb->eqs[eq].coef[0]))
3707     {
3708       /* There is no dependence.  */
3709       *maybe_dependent = false;
3710       return true;
3711     }
3712
3713   return true;
3714 }
3715
3716 /* Helper function, same as init_omega_for_ddr but specialized for
3717    data references A and B.  */
3718
3719 static bool
3720 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3721                       struct data_dependence_relation *ddr,
3722                       omega_pb pb, bool *maybe_dependent)
3723 {
3724   unsigned i;
3725   int ineq;
3726   struct loop *loopi;
3727   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3728
3729   /* Insert an equality per subscript.  */
3730   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3731     {
3732       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3733                                   ddr, pb, maybe_dependent))
3734         return false;
3735       else if (*maybe_dependent == false)
3736         {
3737           /* There is no dependence.  */
3738           DDR_ARE_DEPENDENT (ddr) = chrec_known;
3739           return true;
3740         }
3741     }
3742
3743   /* Insert inequalities: constraints corresponding to the iteration
3744      domain, i.e. the loops surrounding the references "loop_x" and
3745      the distance variables "dx".  The layout of the OMEGA
3746      representation is as follows:
3747      - coef[0] is the constant
3748      - coef[1..nb_loops] are the protected variables that will not be
3749      removed by the solver: the "dx"
3750      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3751   */
3752   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3753          && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3754     {
3755       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
3756
3757       /* 0 <= loop_x */
3758       ineq = omega_add_zero_geq (pb, omega_black);
3759       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3760
3761       /* 0 <= loop_x + dx */
3762       ineq = omega_add_zero_geq (pb, omega_black);
3763       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3764       pb->geqs[ineq].coef[i + 1] = 1;
3765
3766       if (nbi != -1)
3767         {
3768           /* loop_x <= nb_iters */
3769           ineq = omega_add_zero_geq (pb, omega_black);
3770           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3771           pb->geqs[ineq].coef[0] = nbi;
3772
3773           /* loop_x + dx <= nb_iters */
3774           ineq = omega_add_zero_geq (pb, omega_black);
3775           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3776           pb->geqs[ineq].coef[i + 1] = -1;
3777           pb->geqs[ineq].coef[0] = nbi;
3778
3779           /* A step "dx" bigger than nb_iters is not feasible, so
3780              add "0 <= nb_iters + dx",  */
3781           ineq = omega_add_zero_geq (pb, omega_black);
3782           pb->geqs[ineq].coef[i + 1] = 1;
3783           pb->geqs[ineq].coef[0] = nbi;
3784           /* and "dx <= nb_iters".  */
3785           ineq = omega_add_zero_geq (pb, omega_black);
3786           pb->geqs[ineq].coef[i + 1] = -1;
3787           pb->geqs[ineq].coef[0] = nbi;
3788         }
3789     }
3790
3791   omega_extract_distance_vectors (pb, ddr);
3792
3793   return true;
3794 }
3795
3796 /* Sets up the Omega dependence problem for the data dependence
3797    relation DDR.  Returns false when the constraint system cannot be
3798    built, ie. when the test answers "don't know".  Returns true
3799    otherwise, and when independence has been proved (using one of the
3800    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3801    set MAYBE_DEPENDENT to true.
3802
3803    Example: for setting up the dependence system corresponding to the
3804    conflicting accesses
3805
3806    | loop_i
3807    |   loop_j
3808    |     A[i, i+1] = ...
3809    |     ... A[2*j, 2*(i + j)]
3810    |   endloop_j
3811    | endloop_i
3812
3813    the following constraints come from the iteration domain:
3814
3815    0 <= i <= Ni
3816    0 <= i + di <= Ni
3817    0 <= j <= Nj
3818    0 <= j + dj <= Nj
3819
3820    where di, dj are the distance variables.  The constraints
3821    representing the conflicting elements are:
3822
3823    i = 2 * (j + dj)
3824    i + 1 = 2 * (i + di + j + dj)
3825
3826    For asking that the resulting distance vector (di, dj) be
3827    lexicographically positive, we insert the constraint "di >= 0".  If
3828    "di = 0" in the solution, we fix that component to zero, and we
3829    look at the inner loops: we set a new problem where all the outer
3830    loop distances are zero, and fix this inner component to be
3831    positive.  When one of the components is positive, we save that
3832    distance, and set a new problem where the distance on this loop is
3833    zero, searching for other distances in the inner loops.  Here is
3834    the classic example that illustrates that we have to set for each
3835    inner loop a new problem:
3836
3837    | loop_1
3838    |   loop_2
3839    |     A[10]
3840    |   endloop_2
3841    | endloop_1
3842
3843    we have to save two distances (1, 0) and (0, 1).
3844
3845    Given two array references, refA and refB, we have to set the
3846    dependence problem twice, refA vs. refB and refB vs. refA, and we
3847    cannot do a single test, as refB might occur before refA in the
3848    inner loops, and the contrary when considering outer loops: ex.
3849
3850    | loop_0
3851    |   loop_1
3852    |     loop_2
3853    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
3854    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
3855    |     endloop_2
3856    |   endloop_1
3857    | endloop_0
3858
3859    refB touches the elements in T before refA, and thus for the same
3860    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3861    but for successive loop_0 iterations, we have (1, -1, 1)
3862
3863    The Omega solver expects the distance variables ("di" in the
3864    previous example) to come first in the constraint system (as
3865    variables to be protected, or "safe" variables), the constraint
3866    system is built using the following layout:
3867
3868    "cst | distance vars | index vars".
3869 */
3870
3871 static bool
3872 init_omega_for_ddr (struct data_dependence_relation *ddr,
3873                     bool *maybe_dependent)
3874 {
3875   omega_pb pb;
3876   bool res = false;
3877
3878   *maybe_dependent = true;
3879
3880   if (same_access_functions (ddr))
3881     {
3882       unsigned j;
3883       lambda_vector dir_v;
3884
3885       /* Save the 0 vector.  */
3886       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3887       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3888       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3889         dir_v[j] = dir_equal;
3890       save_dir_v (ddr, dir_v);
3891
3892       /* Save the dependences carried by outer loops.  */
3893       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3894       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3895                                   maybe_dependent);
3896       omega_free_problem (pb);
3897       return res;
3898     }
3899
3900   /* Omega expects the protected variables (those that have to be kept
3901      after elimination) to appear first in the constraint system.
3902      These variables are the distance variables.  In the following
3903      initialization we declare NB_LOOPS safe variables, and the total
3904      number of variables for the constraint system is 2*NB_LOOPS.  */
3905   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3906   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3907                               maybe_dependent);
3908   omega_free_problem (pb);
3909
3910   /* Stop computation if not decidable, or no dependence.  */
3911   if (res == false || *maybe_dependent == false)
3912     return res;
3913
3914   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3915   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3916                               maybe_dependent);
3917   omega_free_problem (pb);
3918
3919   return res;
3920 }
3921
3922 /* Return true when DDR contains the same information as that stored
3923    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
3924
3925 static bool
3926 ddr_consistent_p (FILE *file,
3927                   struct data_dependence_relation *ddr,
3928                   VEC (lambda_vector, heap) *dist_vects,
3929                   VEC (lambda_vector, heap) *dir_vects)
3930 {
3931   unsigned int i, j;
3932
3933   /* If dump_file is set, output there.  */
3934   if (dump_file && (dump_flags & TDF_DETAILS))
3935     file = dump_file;
3936
3937   if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3938     {
3939       lambda_vector b_dist_v;
3940       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3941                VEC_length (lambda_vector, dist_vects),
3942                DDR_NUM_DIST_VECTS (ddr));
3943
3944       fprintf (file, "Banerjee dist vectors:\n");
3945       FOR_EACH_VEC_ELT (lambda_vector, dist_vects, i, b_dist_v)
3946         print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3947
3948       fprintf (file, "Omega dist vectors:\n");
3949       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3950         print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3951
3952       fprintf (file, "data dependence relation:\n");
3953       dump_data_dependence_relation (file, ddr);
3954
3955       fprintf (file, ")\n");
3956       return false;
3957     }
3958
3959   if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3960     {
3961       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3962                VEC_length (lambda_vector, dir_vects),
3963                DDR_NUM_DIR_VECTS (ddr));
3964       return false;
3965     }
3966
3967   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3968     {
3969       lambda_vector a_dist_v;
3970       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3971
3972       /* Distance vectors are not ordered in the same way in the DDR
3973          and in the DIST_VECTS: search for a matching vector.  */
3974       FOR_EACH_VEC_ELT (lambda_vector, dist_vects, j, a_dist_v)
3975         if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3976           break;
3977
3978       if (j == VEC_length (lambda_vector, dist_vects))
3979         {
3980           fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3981           print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3982           fprintf (file, "not found in Omega dist vectors:\n");
3983           print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3984           fprintf (file, "data dependence relation:\n");
3985           dump_data_dependence_relation (file, ddr);
3986           fprintf (file, ")\n");
3987         }
3988     }
3989
3990   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3991     {
3992       lambda_vector a_dir_v;
3993       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3994
3995       /* Direction vectors are not ordered in the same way in the DDR
3996          and in the DIR_VECTS: search for a matching vector.  */
3997       FOR_EACH_VEC_ELT (lambda_vector, dir_vects, j, a_dir_v)
3998         if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3999           break;
4000
4001       if (j == VEC_length (lambda_vector, dist_vects))
4002         {
4003           fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4004           print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4005           fprintf (file, "not found in Omega dir vectors:\n");
4006           print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4007           fprintf (file, "data dependence relation:\n");
4008           dump_data_dependence_relation (file, ddr);
4009           fprintf (file, ")\n");
4010         }
4011     }
4012
4013   return true;
4014 }
4015
4016 /* This computes the affine dependence relation between A and B with
4017    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4018    independence between two accesses, while CHREC_DONT_KNOW is used
4019    for representing the unknown relation.
4020
4021    Note that it is possible to stop the computation of the dependence
4022    relation the first time we detect a CHREC_KNOWN element for a given
4023    subscript.  */
4024
4025 static void
4026 compute_affine_dependence (struct data_dependence_relation *ddr,
4027                            struct loop *loop_nest)
4028 {
4029   struct data_reference *dra = DDR_A (ddr);
4030   struct data_reference *drb = DDR_B (ddr);
4031
4032   if (dump_file && (dump_flags & TDF_DETAILS))
4033     {
4034       fprintf (dump_file, "(compute_affine_dependence\n");
4035       fprintf (dump_file, "  (stmt_a = \n");
4036       print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
4037       fprintf (dump_file, ")\n  (stmt_b = \n");
4038       print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
4039       fprintf (dump_file, ")\n");
4040     }
4041
4042   /* Analyze only when the dependence relation is not yet known.  */
4043   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
4044       && !DDR_SELF_REFERENCE (ddr))
4045     {
4046       dependence_stats.num_dependence_tests++;
4047
4048       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4049           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4050         {
4051           if (flag_check_data_deps)
4052             {
4053               /* Compute the dependences using the first algorithm.  */
4054               subscript_dependence_tester (ddr, loop_nest);
4055
4056               if (dump_file && (dump_flags & TDF_DETAILS))
4057                 {
4058                   fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4059                   dump_data_dependence_relation (dump_file, ddr);
4060                 }
4061
4062               if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4063                 {
4064                   bool maybe_dependent;
4065                   VEC (lambda_vector, heap) *dir_vects, *dist_vects;
4066
4067                   /* Save the result of the first DD analyzer.  */
4068                   dist_vects = DDR_DIST_VECTS (ddr);
4069                   dir_vects = DDR_DIR_VECTS (ddr);
4070
4071                   /* Reset the information.  */
4072                   DDR_DIST_VECTS (ddr) = NULL;
4073                   DDR_DIR_VECTS (ddr) = NULL;
4074
4075                   /* Compute the same information using Omega.  */
4076                   if (!init_omega_for_ddr (ddr, &maybe_dependent))
4077                     goto csys_dont_know;
4078
4079                   if (dump_file && (dump_flags & TDF_DETAILS))
4080                     {
4081                       fprintf (dump_file, "Omega Analyzer\n");
4082                       dump_data_dependence_relation (dump_file, ddr);
4083                     }
4084
4085                   /* Check that we get the same information.  */
4086                   if (maybe_dependent)
4087                     gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4088                                                   dir_vects));
4089                 }
4090             }
4091           else
4092             subscript_dependence_tester (ddr, loop_nest);
4093         }
4094
4095       /* As a last case, if the dependence cannot be determined, or if
4096          the dependence is considered too difficult to determine, answer
4097          "don't know".  */
4098       else
4099         {
4100         csys_dont_know:;
4101           dependence_stats.num_dependence_undetermined++;
4102
4103           if (dump_file && (dump_flags & TDF_DETAILS))
4104             {
4105               fprintf (dump_file, "Data ref a:\n");
4106               dump_data_reference (dump_file, dra);
4107               fprintf (dump_file, "Data ref b:\n");
4108               dump_data_reference (dump_file, drb);
4109               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4110             }
4111           finalize_ddr_dependent (ddr, chrec_dont_know);
4112         }
4113     }
4114
4115   if (dump_file && (dump_flags & TDF_DETAILS))
4116     fprintf (dump_file, ")\n");
4117 }
4118
4119 /* This computes the dependence relation for the same data
4120    reference into DDR.  */
4121
4122 void
4123 compute_self_dependence (struct data_dependence_relation *ddr)
4124 {
4125   unsigned int i;
4126   struct subscript *subscript;
4127
4128   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
4129     return;
4130
4131   for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
4132        i++)
4133     {
4134       if (SUB_CONFLICTS_IN_A (subscript))
4135         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
4136       if (SUB_CONFLICTS_IN_B (subscript))
4137         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
4138
4139       /* The accessed index overlaps for each iteration.  */
4140       SUB_CONFLICTS_IN_A (subscript)
4141         = conflict_fn (1, affine_fn_cst (integer_zero_node));
4142       SUB_CONFLICTS_IN_B (subscript)
4143         = conflict_fn (1, affine_fn_cst (integer_zero_node));
4144       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
4145     }
4146
4147   /* The distance vector is the zero vector.  */
4148   save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4149   save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4150 }
4151
4152 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4153    the data references in DATAREFS, in the LOOP_NEST.  When
4154    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4155    relations.  */
4156
4157 void
4158 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4159                          VEC (ddr_p, heap) **dependence_relations,
4160                          VEC (loop_p, heap) *loop_nest,
4161                          bool compute_self_and_rr)
4162 {
4163   struct data_dependence_relation *ddr;
4164   struct data_reference *a, *b;
4165   unsigned int i, j;
4166
4167   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4168     for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4169       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4170         {
4171           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4172           VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4173           if (loop_nest)
4174             compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4175         }
4176
4177   if (compute_self_and_rr)
4178     FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, a)
4179       {
4180         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4181         VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4182         compute_self_dependence (ddr);
4183       }
4184 }
4185
4186 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4187    true if STMT clobbers memory, false otherwise.  */
4188
4189 bool
4190 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4191 {
4192   bool clobbers_memory = false;
4193   data_ref_loc *ref;
4194   tree *op0, *op1;
4195   enum gimple_code stmt_code = gimple_code (stmt);
4196
4197   *references = NULL;
4198
4199   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4200      Calls have side-effects, except those to const or pure
4201      functions.  */
4202   if ((stmt_code == GIMPLE_CALL
4203        && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4204       || (stmt_code == GIMPLE_ASM
4205           && gimple_asm_volatile_p (stmt)))
4206     clobbers_memory = true;
4207
4208   if (!gimple_vuse (stmt))
4209     return clobbers_memory;
4210
4211   if (stmt_code == GIMPLE_ASSIGN)
4212     {
4213       tree base;
4214       op0 = gimple_assign_lhs_ptr (stmt);
4215       op1 = gimple_assign_rhs1_ptr (stmt);
4216
4217       if (DECL_P (*op1)
4218           || (REFERENCE_CLASS_P (*op1)
4219               && (base = get_base_address (*op1))
4220               && TREE_CODE (base) != SSA_NAME))
4221         {
4222           ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4223           ref->pos = op1;
4224           ref->is_read = true;
4225         }
4226     }
4227   else if (stmt_code == GIMPLE_CALL)
4228     {
4229       unsigned i, n;
4230
4231       op0 = gimple_call_lhs_ptr (stmt);
4232       n = gimple_call_num_args (stmt);
4233       for (i = 0; i < n; i++)
4234         {
4235           op1 = gimple_call_arg_ptr (stmt, i);
4236
4237           if (DECL_P (*op1)
4238               || (REFERENCE_CLASS_P (*op1) && get_base_address (*op1)))
4239             {
4240               ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4241               ref->pos = op1;
4242               ref->is_read = true;
4243             }
4244         }
4245     }
4246   else
4247     return clobbers_memory;
4248
4249   if (*op0
4250       && (DECL_P (*op0)
4251           || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0))))
4252     {
4253       ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4254       ref->pos = op0;
4255       ref->is_read = false;
4256     }
4257   return clobbers_memory;
4258 }
4259
4260 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4261    reference, returns false, otherwise returns true.  NEST is the outermost
4262    loop of the loop nest in which the references should be analyzed.  */
4263
4264 bool
4265 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4266                               VEC (data_reference_p, heap) **datarefs)
4267 {
4268   unsigned i;
4269   VEC (data_ref_loc, heap) *references;
4270   data_ref_loc *ref;
4271   bool ret = true;
4272   data_reference_p dr;
4273
4274   if (get_references_in_stmt (stmt, &references))
4275     {
4276       VEC_free (data_ref_loc, heap, references);
4277       return false;
4278     }
4279
4280   FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4281     {
4282       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4283                             *ref->pos, stmt, ref->is_read);
4284       gcc_assert (dr != NULL);
4285       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4286     }
4287   VEC_free (data_ref_loc, heap, references);
4288   return ret;
4289 }
4290
4291 /* Stores the data references in STMT to DATAREFS.  If there is an
4292    unanalyzable reference, returns false, otherwise returns true.
4293    NEST is the outermost loop of the loop nest in which the references
4294    should be instantiated, LOOP is the loop in which the references
4295    should be analyzed.  */
4296
4297 bool
4298 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4299                                        VEC (data_reference_p, heap) **datarefs)
4300 {
4301   unsigned i;
4302   VEC (data_ref_loc, heap) *references;
4303   data_ref_loc *ref;
4304   bool ret = true;
4305   data_reference_p dr;
4306
4307   if (get_references_in_stmt (stmt, &references))
4308     {
4309       VEC_free (data_ref_loc, heap, references);
4310       return false;
4311     }
4312
4313   FOR_EACH_VEC_ELT (data_ref_loc, references, i, ref)
4314     {
4315       dr = create_data_ref (nest, loop, *ref->pos, stmt, ref->is_read);
4316       gcc_assert (dr != NULL);
4317       VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4318     }
4319
4320   VEC_free (data_ref_loc, heap, references);
4321   return ret;
4322 }
4323
4324 /* Search the data references in LOOP, and record the information into
4325    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4326    difficult case, returns NULL_TREE otherwise.  */
4327
4328 tree
4329 find_data_references_in_bb (struct loop *loop, basic_block bb,
4330                             VEC (data_reference_p, heap) **datarefs)
4331 {
4332   gimple_stmt_iterator bsi;
4333
4334   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4335     {
4336       gimple stmt = gsi_stmt (bsi);
4337
4338       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4339         {
4340           struct data_reference *res;
4341           res = XCNEW (struct data_reference);
4342           VEC_safe_push (data_reference_p, heap, *datarefs, res);
4343
4344           return chrec_dont_know;
4345         }
4346     }
4347
4348   return NULL_TREE;
4349 }
4350
4351 /* Search the data references in LOOP, and record the information into
4352    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4353    difficult case, returns NULL_TREE otherwise.
4354
4355    TODO: This function should be made smarter so that it can handle address
4356    arithmetic as if they were array accesses, etc.  */
4357
4358 tree
4359 find_data_references_in_loop (struct loop *loop,
4360                               VEC (data_reference_p, heap) **datarefs)
4361 {
4362   basic_block bb, *bbs;
4363   unsigned int i;
4364
4365   bbs = get_loop_body_in_dom_order (loop);
4366
4367   for (i = 0; i < loop->num_nodes; i++)
4368     {
4369       bb = bbs[i];
4370
4371       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4372         {
4373           free (bbs);
4374           return chrec_dont_know;
4375         }
4376     }
4377   free (bbs);
4378
4379   return NULL_TREE;
4380 }
4381
4382 /* Recursive helper function.  */
4383
4384 static bool
4385 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4386 {
4387   /* Inner loops of the nest should not contain siblings.  Example:
4388      when there are two consecutive loops,
4389
4390      | loop_0
4391      |   loop_1
4392      |     A[{0, +, 1}_1]
4393      |   endloop_1
4394      |   loop_2
4395      |     A[{0, +, 1}_2]
4396      |   endloop_2
4397      | endloop_0
4398
4399      the dependence relation cannot be captured by the distance
4400      abstraction.  */
4401   if (loop->next)
4402     return false;
4403
4404   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4405   if (loop->inner)
4406     return find_loop_nest_1 (loop->inner, loop_nest);
4407   return true;
4408 }
4409
4410 /* Return false when the LOOP is not well nested.  Otherwise return
4411    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4412    contain the loops from the outermost to the innermost, as they will
4413    appear in the classic distance vector.  */
4414
4415 bool
4416 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4417 {
4418   VEC_safe_push (loop_p, heap, *loop_nest, loop);
4419   if (loop->inner)
4420     return find_loop_nest_1 (loop->inner, loop_nest);
4421   return true;
4422 }
4423
4424 /* Returns true when the data dependences have been computed, false otherwise.
4425    Given a loop nest LOOP, the following vectors are returned:
4426    DATAREFS is initialized to all the array elements contained in this loop,
4427    DEPENDENCE_RELATIONS contains the relations between the data references.
4428    Compute read-read and self relations if
4429    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4430
4431 bool
4432 compute_data_dependences_for_loop (struct loop *loop,
4433                                    bool compute_self_and_read_read_dependences,
4434                                    VEC (loop_p, heap) **loop_nest,
4435                                    VEC (data_reference_p, heap) **datarefs,
4436                                    VEC (ddr_p, heap) **dependence_relations)
4437 {
4438   bool res = true;
4439
4440   memset (&dependence_stats, 0, sizeof (dependence_stats));
4441
4442   /* If the loop nest is not well formed, or one of the data references
4443      is not computable, give up without spending time to compute other
4444      dependences.  */
4445   if (!loop
4446       || !find_loop_nest (loop, loop_nest)
4447       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4448     {
4449       struct data_dependence_relation *ddr;
4450
4451       /* Insert a single relation into dependence_relations:
4452          chrec_dont_know.  */
4453       ddr = initialize_data_dependence_relation (NULL, NULL, *loop_nest);
4454       VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4455       res = false;
4456     }
4457   else
4458     compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4459                              compute_self_and_read_read_dependences);
4460
4461   if (dump_file && (dump_flags & TDF_STATS))
4462     {
4463       fprintf (dump_file, "Dependence tester statistics:\n");
4464
4465       fprintf (dump_file, "Number of dependence tests: %d\n",
4466                dependence_stats.num_dependence_tests);
4467       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4468                dependence_stats.num_dependence_dependent);
4469       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4470                dependence_stats.num_dependence_independent);
4471       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4472                dependence_stats.num_dependence_undetermined);
4473
4474       fprintf (dump_file, "Number of subscript tests: %d\n",
4475                dependence_stats.num_subscript_tests);
4476       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4477                dependence_stats.num_subscript_undetermined);
4478       fprintf (dump_file, "Number of same subscript function: %d\n",
4479                dependence_stats.num_same_subscript_function);
4480
4481       fprintf (dump_file, "Number of ziv tests: %d\n",
4482                dependence_stats.num_ziv);
4483       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4484                dependence_stats.num_ziv_dependent);
4485       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4486                dependence_stats.num_ziv_independent);
4487       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4488                dependence_stats.num_ziv_unimplemented);
4489
4490       fprintf (dump_file, "Number of siv tests: %d\n",
4491                dependence_stats.num_siv);
4492       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4493                dependence_stats.num_siv_dependent);
4494       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4495                dependence_stats.num_siv_independent);
4496       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4497                dependence_stats.num_siv_unimplemented);
4498
4499       fprintf (dump_file, "Number of miv tests: %d\n",
4500                dependence_stats.num_miv);
4501       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4502                dependence_stats.num_miv_dependent);
4503       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4504                dependence_stats.num_miv_independent);
4505       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4506                dependence_stats.num_miv_unimplemented);
4507     }
4508
4509   return res;
4510 }
4511
4512 /* Returns true when the data dependences for the basic block BB have been
4513    computed, false otherwise.
4514    DATAREFS is initialized to all the array elements contained in this basic
4515    block, DEPENDENCE_RELATIONS contains the relations between the data
4516    references. Compute read-read and self relations if
4517    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4518 bool
4519 compute_data_dependences_for_bb (basic_block bb,
4520                                  bool compute_self_and_read_read_dependences,
4521                                  VEC (data_reference_p, heap) **datarefs,
4522                                  VEC (ddr_p, heap) **dependence_relations)
4523 {
4524   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4525     return false;
4526
4527   compute_all_dependences (*datarefs, dependence_relations, NULL,
4528                            compute_self_and_read_read_dependences);
4529   return true;
4530 }
4531
4532 /* Entry point (for testing only).  Analyze all the data references
4533    and the dependence relations in LOOP.
4534
4535    The data references are computed first.
4536
4537    A relation on these nodes is represented by a complete graph.  Some
4538    of the relations could be of no interest, thus the relations can be
4539    computed on demand.
4540
4541    In the following function we compute all the relations.  This is
4542    just a first implementation that is here for:
4543    - for showing how to ask for the dependence relations,
4544    - for the debugging the whole dependence graph,
4545    - for the dejagnu testcases and maintenance.
4546
4547    It is possible to ask only for a part of the graph, avoiding to
4548    compute the whole dependence graph.  The computed dependences are
4549    stored in a knowledge base (KB) such that later queries don't
4550    recompute the same information.  The implementation of this KB is
4551    transparent to the optimizer, and thus the KB can be changed with a
4552    more efficient implementation, or the KB could be disabled.  */
4553 static void
4554 analyze_all_data_dependences (struct loop *loop)
4555 {
4556   unsigned int i;
4557   int nb_data_refs = 10;
4558   VEC (data_reference_p, heap) *datarefs =
4559     VEC_alloc (data_reference_p, heap, nb_data_refs);
4560   VEC (ddr_p, heap) *dependence_relations =
4561     VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4562   VEC (loop_p, heap) *loop_nest = VEC_alloc (loop_p, heap, 3);
4563
4564   /* Compute DDs on the whole function.  */
4565   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4566                                      &dependence_relations);
4567
4568   if (dump_file)
4569     {
4570       dump_data_dependence_relations (dump_file, dependence_relations);
4571       fprintf (dump_file, "\n\n");
4572
4573       if (dump_flags & TDF_DETAILS)
4574         dump_dist_dir_vectors (dump_file, dependence_relations);
4575
4576       if (dump_flags & TDF_STATS)
4577         {
4578           unsigned nb_top_relations = 0;
4579           unsigned nb_bot_relations = 0;
4580           unsigned nb_chrec_relations = 0;
4581           struct data_dependence_relation *ddr;
4582
4583           FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4584             {
4585               if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4586                 nb_top_relations++;
4587
4588               else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4589                 nb_bot_relations++;
4590
4591               else
4592                 nb_chrec_relations++;
4593             }
4594
4595           gather_stats_on_scev_database ();
4596         }
4597     }
4598
4599   VEC_free (loop_p, heap, loop_nest);
4600   free_dependence_relations (dependence_relations);
4601   free_data_refs (datarefs);
4602 }
4603
4604 /* Computes all the data dependences and check that the results of
4605    several analyzers are the same.  */
4606
4607 void
4608 tree_check_data_deps (void)
4609 {
4610   loop_iterator li;
4611   struct loop *loop_nest;
4612
4613   FOR_EACH_LOOP (li, loop_nest, 0)
4614     analyze_all_data_dependences (loop_nest);
4615 }
4616
4617 /* Free the memory used by a data dependence relation DDR.  */
4618
4619 void
4620 free_dependence_relation (struct data_dependence_relation *ddr)
4621 {
4622   if (ddr == NULL)
4623     return;
4624
4625   if (DDR_SUBSCRIPTS (ddr))
4626     free_subscripts (DDR_SUBSCRIPTS (ddr));
4627   if (DDR_DIST_VECTS (ddr))
4628     VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4629   if (DDR_DIR_VECTS (ddr))
4630     VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4631
4632   free (ddr);
4633 }
4634
4635 /* Free the memory used by the data dependence relations from
4636    DEPENDENCE_RELATIONS.  */
4637
4638 void
4639 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4640 {
4641   unsigned int i;
4642   struct data_dependence_relation *ddr;
4643
4644   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
4645     if (ddr)
4646       free_dependence_relation (ddr);
4647
4648   VEC_free (ddr_p, heap, dependence_relations);
4649 }
4650
4651 /* Free the memory used by the data references from DATAREFS.  */
4652
4653 void
4654 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4655 {
4656   unsigned int i;
4657   struct data_reference *dr;
4658
4659   FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
4660     free_data_ref (dr);
4661   VEC_free (data_reference_p, heap, datarefs);
4662 }
4663
4664 \f
4665
4666 /* Dump vertex I in RDG to FILE.  */
4667
4668 void
4669 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4670 {
4671   struct vertex *v = &(rdg->vertices[i]);
4672   struct graph_edge *e;
4673
4674   fprintf (file, "(vertex %d: (%s%s) (in:", i,
4675            RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4676            RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4677
4678   if (v->pred)
4679     for (e = v->pred; e; e = e->pred_next)
4680       fprintf (file, " %d", e->src);
4681
4682   fprintf (file, ") (out:");
4683
4684   if (v->succ)
4685     for (e = v->succ; e; e = e->succ_next)
4686       fprintf (file, " %d", e->dest);
4687
4688   fprintf (file, ")\n");
4689   print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4690   fprintf (file, ")\n");
4691 }
4692
4693 /* Call dump_rdg_vertex on stderr.  */
4694
4695 DEBUG_FUNCTION void
4696 debug_rdg_vertex (struct graph *rdg, int i)
4697 {
4698   dump_rdg_vertex (stderr, rdg, i);
4699 }
4700
4701 /* Dump component C of RDG to FILE.  If DUMPED is non-null, set the
4702    dumped vertices to that bitmap.  */
4703
4704 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4705 {
4706   int i;
4707
4708   fprintf (file, "(%d\n", c);
4709
4710   for (i = 0; i < rdg->n_vertices; i++)
4711     if (rdg->vertices[i].component == c)
4712       {
4713         if (dumped)
4714           bitmap_set_bit (dumped, i);
4715
4716         dump_rdg_vertex (file, rdg, i);
4717       }
4718
4719   fprintf (file, ")\n");
4720 }
4721
4722 /* Call dump_rdg_vertex on stderr.  */
4723
4724 DEBUG_FUNCTION void
4725 debug_rdg_component (struct graph *rdg, int c)
4726 {
4727   dump_rdg_component (stderr, rdg, c, NULL);
4728 }
4729
4730 /* Dump the reduced dependence graph RDG to FILE.  */
4731
4732 void
4733 dump_rdg (FILE *file, struct graph *rdg)
4734 {
4735   int i;
4736   bitmap dumped = BITMAP_ALLOC (NULL);
4737
4738   fprintf (file, "(rdg\n");
4739
4740   for (i = 0; i < rdg->n_vertices; i++)
4741     if (!bitmap_bit_p (dumped, i))
4742       dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4743
4744   fprintf (file, ")\n");
4745   BITMAP_FREE (dumped);
4746 }
4747
4748 /* Call dump_rdg on stderr.  */
4749
4750 DEBUG_FUNCTION void
4751 debug_rdg (struct graph *rdg)
4752 {
4753   dump_rdg (stderr, rdg);
4754 }
4755
4756 static void
4757 dot_rdg_1 (FILE *file, struct graph *rdg)
4758 {
4759   int i;
4760
4761   fprintf (file, "digraph RDG {\n");
4762
4763   for (i = 0; i < rdg->n_vertices; i++)
4764     {
4765       struct vertex *v = &(rdg->vertices[i]);
4766       struct graph_edge *e;
4767
4768       /* Highlight reads from memory.  */
4769       if (RDG_MEM_READS_STMT (rdg, i))
4770        fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4771
4772       /* Highlight stores to memory.  */
4773       if (RDG_MEM_WRITE_STMT (rdg, i))
4774        fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4775
4776       if (v->succ)
4777        for (e = v->succ; e; e = e->succ_next)
4778          switch (RDGE_TYPE (e))
4779            {
4780            case input_dd:
4781              fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4782              break;
4783
4784            case output_dd:
4785              fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4786              break;
4787
4788            case flow_dd:
4789              /* These are the most common dependences: don't print these. */
4790              fprintf (file, "%d -> %d \n", i, e->dest);
4791              break;
4792
4793            case anti_dd:
4794              fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4795              break;
4796
4797            default:
4798              gcc_unreachable ();
4799            }
4800     }
4801
4802   fprintf (file, "}\n\n");
4803 }
4804
4805 /* Display the Reduced Dependence Graph using dotty.  */
4806 extern void dot_rdg (struct graph *);
4807
4808 DEBUG_FUNCTION void
4809 dot_rdg (struct graph *rdg)
4810 {
4811   /* When debugging, enable the following code.  This cannot be used
4812      in production compilers because it calls "system".  */
4813 #if 0
4814   FILE *file = fopen ("/tmp/rdg.dot", "w");
4815   gcc_assert (file != NULL);
4816
4817   dot_rdg_1 (file, rdg);
4818   fclose (file);
4819
4820   system ("dotty /tmp/rdg.dot &");
4821 #else
4822   dot_rdg_1 (stderr, rdg);
4823 #endif
4824 }
4825
4826 /* This structure is used for recording the mapping statement index in
4827    the RDG.  */
4828
4829 struct GTY(()) rdg_vertex_info
4830 {
4831   gimple stmt;
4832   int index;
4833 };
4834
4835 /* Returns the index of STMT in RDG.  */
4836
4837 int
4838 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4839 {
4840   struct rdg_vertex_info rvi, *slot;
4841
4842   rvi.stmt = stmt;
4843   slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4844
4845   if (!slot)
4846     return -1;
4847
4848   return slot->index;
4849 }
4850
4851 /* Creates an edge in RDG for each distance vector from DDR.  The
4852    order that we keep track of in the RDG is the order in which
4853    statements have to be executed.  */
4854
4855 static void
4856 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4857 {
4858   struct graph_edge *e;
4859   int va, vb;
4860   data_reference_p dra = DDR_A (ddr);
4861   data_reference_p drb = DDR_B (ddr);
4862   unsigned level = ddr_dependence_level (ddr);
4863
4864   /* For non scalar dependences, when the dependence is REVERSED,
4865      statement B has to be executed before statement A.  */
4866   if (level > 0
4867       && !DDR_REVERSED_P (ddr))
4868     {
4869       data_reference_p tmp = dra;
4870       dra = drb;
4871       drb = tmp;
4872     }
4873
4874   va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4875   vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4876
4877   if (va < 0 || vb < 0)
4878     return;
4879
4880   e = add_edge (rdg, va, vb);
4881   e->data = XNEW (struct rdg_edge);
4882
4883   RDGE_LEVEL (e) = level;
4884   RDGE_RELATION (e) = ddr;
4885
4886   /* Determines the type of the data dependence.  */
4887   if (DR_IS_READ (dra) && DR_IS_READ (drb))
4888     RDGE_TYPE (e) = input_dd;
4889   else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
4890     RDGE_TYPE (e) = output_dd;
4891   else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
4892     RDGE_TYPE (e) = flow_dd;
4893   else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
4894     RDGE_TYPE (e) = anti_dd;
4895 }
4896
4897 /* Creates dependence edges in RDG for all the uses of DEF.  IDEF is
4898    the index of DEF in RDG.  */
4899
4900 static void
4901 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4902 {
4903   use_operand_p imm_use_p;
4904   imm_use_iterator iterator;
4905
4906   FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4907     {
4908       struct graph_edge *e;
4909       int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4910
4911       if (use < 0)
4912         continue;
4913
4914       e = add_edge (rdg, idef, use);
4915       e->data = XNEW (struct rdg_edge);
4916       RDGE_TYPE (e) = flow_dd;
4917       RDGE_RELATION (e) = NULL;
4918     }
4919 }
4920
4921 /* Creates the edges of the reduced dependence graph RDG.  */
4922
4923 static void
4924 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4925 {
4926   int i;
4927   struct data_dependence_relation *ddr;
4928   def_operand_p def_p;
4929   ssa_op_iter iter;
4930
4931   FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
4932     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4933       create_rdg_edge_for_ddr (rdg, ddr);
4934
4935   for (i = 0; i < rdg->n_vertices; i++)
4936     FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4937                               iter, SSA_OP_DEF)
4938       create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4939 }
4940
4941 /* Build the vertices of the reduced dependence graph RDG.  */
4942
4943 void
4944 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4945 {
4946   int i, j;
4947   gimple stmt;
4948
4949   FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
4950     {
4951       VEC (data_ref_loc, heap) *references;
4952       data_ref_loc *ref;
4953       struct vertex *v = &(rdg->vertices[i]);
4954       struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4955       struct rdg_vertex_info **slot;
4956
4957       rvi->stmt = stmt;
4958       rvi->index = i;
4959       slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4960
4961       if (!*slot)
4962         *slot = rvi;
4963       else
4964         free (rvi);
4965
4966       v->data = XNEW (struct rdg_vertex);
4967       RDG_STMT (rdg, i) = stmt;
4968
4969       RDG_MEM_WRITE_STMT (rdg, i) = false;
4970       RDG_MEM_READS_STMT (rdg, i) = false;
4971       if (gimple_code (stmt) == GIMPLE_PHI)
4972         continue;
4973
4974       get_references_in_stmt (stmt, &references);
4975       FOR_EACH_VEC_ELT (data_ref_loc, references, j, ref)
4976         if (!ref->is_read)
4977           RDG_MEM_WRITE_STMT (rdg, i) = true;
4978         else
4979           RDG_MEM_READS_STMT (rdg, i) = true;
4980
4981       VEC_free (data_ref_loc, heap, references);
4982     }
4983 }
4984
4985 /* Initialize STMTS with all the statements of LOOP.  When
4986    INCLUDE_PHIS is true, include also the PHI nodes.  The order in
4987    which we discover statements is important as
4988    generate_loops_for_partition is using the same traversal for
4989    identifying statements. */
4990
4991 static void
4992 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4993 {
4994   unsigned int i;
4995   basic_block *bbs = get_loop_body_in_dom_order (loop);
4996
4997   for (i = 0; i < loop->num_nodes; i++)
4998     {
4999       basic_block bb = bbs[i];
5000       gimple_stmt_iterator bsi;
5001       gimple stmt;
5002
5003       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5004         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5005
5006       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5007         {
5008           stmt = gsi_stmt (bsi);
5009           if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
5010             VEC_safe_push (gimple, heap, *stmts, stmt);
5011         }
5012     }
5013
5014   free (bbs);
5015 }
5016
5017 /* Returns true when all the dependences are computable.  */
5018
5019 static bool
5020 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
5021 {
5022   ddr_p ddr;
5023   unsigned int i;
5024
5025   FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
5026     if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5027       return false;
5028
5029   return true;
5030 }
5031
5032 /* Computes a hash function for element ELT.  */
5033
5034 static hashval_t
5035 hash_stmt_vertex_info (const void *elt)
5036 {
5037   const struct rdg_vertex_info *const rvi =
5038     (const struct rdg_vertex_info *) elt;
5039   gimple stmt = rvi->stmt;
5040
5041   return htab_hash_pointer (stmt);
5042 }
5043
5044 /* Compares database elements E1 and E2.  */
5045
5046 static int
5047 eq_stmt_vertex_info (const void *e1, const void *e2)
5048 {
5049   const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
5050   const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
5051
5052   return elt1->stmt == elt2->stmt;
5053 }
5054
5055 /* Free the element E.  */
5056
5057 static void
5058 hash_stmt_vertex_del (void *e)
5059 {
5060   free (e);
5061 }
5062
5063 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5064    statement of the loop nest, and one edge per data dependence or
5065    scalar dependence.  */
5066
5067 struct graph *
5068 build_empty_rdg (int n_stmts)
5069 {
5070   int nb_data_refs = 10;
5071   struct graph *rdg = new_graph (n_stmts);
5072
5073   rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
5074                               eq_stmt_vertex_info, hash_stmt_vertex_del);
5075   return rdg;
5076 }
5077
5078 /* Build the Reduced Dependence Graph (RDG) with one vertex per
5079    statement of the loop nest, and one edge per data dependence or
5080    scalar dependence.  */
5081
5082 struct graph *
5083 build_rdg (struct loop *loop,
5084            VEC (loop_p, heap) **loop_nest,
5085            VEC (ddr_p, heap) **dependence_relations,
5086            VEC (data_reference_p, heap) **datarefs)
5087 {
5088   struct graph *rdg = NULL;
5089   VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, 10);
5090
5091   compute_data_dependences_for_loop (loop, false, loop_nest, datarefs,
5092                                      dependence_relations);
5093
5094   if (known_dependences_p (*dependence_relations))
5095     {
5096       stmts_from_loop (loop, &stmts);
5097       rdg = build_empty_rdg (VEC_length (gimple, stmts));
5098       create_rdg_vertices (rdg, stmts);
5099       create_rdg_edges (rdg, *dependence_relations);
5100     }
5101
5102   VEC_free (gimple, heap, stmts);
5103   return rdg;
5104 }
5105
5106 /* Free the reduced dependence graph RDG.  */
5107
5108 void
5109 free_rdg (struct graph *rdg)
5110 {
5111   int i;
5112
5113   for (i = 0; i < rdg->n_vertices; i++)
5114     {
5115       struct vertex *v = &(rdg->vertices[i]);
5116       struct graph_edge *e;
5117
5118       for (e = v->succ; e; e = e->succ_next)
5119         free (e->data);
5120
5121       free (v->data);
5122     }
5123
5124   htab_delete (rdg->indices);
5125   free_graph (rdg);
5126 }
5127
5128 /* Initialize STMTS with all the statements of LOOP that contain a
5129    store to memory.  */
5130
5131 void
5132 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5133 {
5134   unsigned int i;
5135   basic_block *bbs = get_loop_body_in_dom_order (loop);
5136
5137   for (i = 0; i < loop->num_nodes; i++)
5138     {
5139       basic_block bb = bbs[i];
5140       gimple_stmt_iterator bsi;
5141
5142       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5143         if (gimple_vdef (gsi_stmt (bsi)))
5144           VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
5145     }
5146
5147   free (bbs);
5148 }
5149
5150 /* Returns true when the statement at STMT is of the form "A[i] = 0"
5151    that contains a data reference on its LHS with a stride of the same
5152    size as its unit type.  */
5153
5154 bool
5155 stmt_with_adjacent_zero_store_dr_p (gimple stmt)
5156 {
5157   tree op0, op1;
5158   bool res;
5159   struct data_reference *dr;
5160
5161   if (!stmt
5162       || !gimple_vdef (stmt)
5163       || !is_gimple_assign (stmt)
5164       || !gimple_assign_single_p (stmt)
5165       || !(op1 = gimple_assign_rhs1 (stmt))
5166       || !(integer_zerop (op1) || real_zerop (op1)))
5167     return false;
5168
5169   dr = XCNEW (struct data_reference);
5170   op0 = gimple_assign_lhs (stmt);
5171
5172   DR_STMT (dr) = stmt;
5173   DR_REF (dr) = op0;
5174
5175   res = dr_analyze_innermost (dr, loop_containing_stmt (stmt))
5176     && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0));
5177
5178   free_data_ref (dr);
5179   return res;
5180 }
5181
5182 /* Initialize STMTS with all the statements of LOOP that contain a
5183    store to memory of the form "A[i] = 0".  */
5184
5185 void
5186 stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
5187 {
5188   unsigned int i;
5189   basic_block bb;
5190   gimple_stmt_iterator si;
5191   gimple stmt;
5192   basic_block *bbs = get_loop_body_in_dom_order (loop);
5193
5194   for (i = 0; i < loop->num_nodes; i++)
5195     for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5196       if ((stmt = gsi_stmt (si))
5197           && stmt_with_adjacent_zero_store_dr_p (stmt))
5198         VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si));
5199
5200   free (bbs);
5201 }
5202
5203 /* For a data reference REF, return the declaration of its base
5204    address or NULL_TREE if the base is not determined.  */
5205
5206 static inline tree
5207 ref_base_address (gimple stmt, data_ref_loc *ref)
5208 {
5209   tree base = NULL_TREE;
5210   tree base_address;
5211   struct data_reference *dr = XCNEW (struct data_reference);
5212
5213   DR_STMT (dr) = stmt;
5214   DR_REF (dr) = *ref->pos;
5215   dr_analyze_innermost (dr, loop_containing_stmt (stmt));
5216   base_address = DR_BASE_ADDRESS (dr);
5217
5218   if (!base_address)
5219     goto end;
5220
5221   switch (TREE_CODE (base_address))
5222     {
5223     case ADDR_EXPR:
5224       base = TREE_OPERAND (base_address, 0);
5225       break;
5226
5227     default:
5228       base = base_address;
5229       break;
5230     }
5231
5232  end:
5233   free_data_ref (dr);
5234   return base;
5235 }
5236
5237 /* Determines whether the statement from vertex V of the RDG has a
5238    definition used outside the loop that contains this statement.  */
5239
5240 bool
5241 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
5242 {
5243   gimple stmt = RDG_STMT (rdg, v);
5244   struct loop *loop = loop_containing_stmt (stmt);
5245   use_operand_p imm_use_p;
5246   imm_use_iterator iterator;
5247   ssa_op_iter it;
5248   def_operand_p def_p;
5249
5250   if (!loop)
5251     return true;
5252
5253   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5254     {
5255       FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5256         {
5257           if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5258             return true;
5259         }
5260     }
5261
5262   return false;
5263 }
5264
5265 /* Determines whether statements S1 and S2 access to similar memory
5266    locations.  Two memory accesses are considered similar when they
5267    have the same base address declaration, i.e. when their
5268    ref_base_address is the same.  */
5269
5270 bool
5271 have_similar_memory_accesses (gimple s1, gimple s2)
5272 {
5273   bool res = false;
5274   unsigned i, j;
5275   VEC (data_ref_loc, heap) *refs1, *refs2;
5276   data_ref_loc *ref1, *ref2;
5277
5278   get_references_in_stmt (s1, &refs1);
5279   get_references_in_stmt (s2, &refs2);
5280
5281   FOR_EACH_VEC_ELT (data_ref_loc, refs1, i, ref1)
5282     {
5283       tree base1 = ref_base_address (s1, ref1);
5284
5285       if (base1)
5286         FOR_EACH_VEC_ELT (data_ref_loc, refs2, j, ref2)
5287           if (base1 == ref_base_address (s2, ref2))
5288             {
5289               res = true;
5290               goto end;
5291             }
5292     }
5293
5294  end:
5295   VEC_free (data_ref_loc, heap, refs1);
5296   VEC_free (data_ref_loc, heap, refs2);
5297   return res;
5298 }
5299
5300 /* Helper function for the hashtab.  */
5301
5302 static int
5303 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5304 {
5305   return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5306                                        CONST_CAST_GIMPLE ((const_gimple) s2));
5307 }
5308
5309 /* Helper function for the hashtab.  */
5310
5311 static hashval_t
5312 ref_base_address_1 (const void *s)
5313 {
5314   gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5315   unsigned i;
5316   VEC (data_ref_loc, heap) *refs;
5317   data_ref_loc *ref;
5318   hashval_t res = 0;
5319
5320   get_references_in_stmt (stmt, &refs);
5321
5322   FOR_EACH_VEC_ELT (data_ref_loc, refs, i, ref)
5323     if (!ref->is_read)
5324       {
5325         res = htab_hash_pointer (ref_base_address (stmt, ref));
5326         break;
5327       }
5328
5329   VEC_free (data_ref_loc, heap, refs);
5330   return res;
5331 }
5332
5333 /* Try to remove duplicated write data references from STMTS.  */
5334
5335 void
5336 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5337 {
5338   unsigned i;
5339   gimple stmt;
5340   htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5341                              have_similar_memory_accesses_1, NULL);
5342
5343   for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5344     {
5345       void **slot;
5346
5347       slot = htab_find_slot (seen, stmt, INSERT);
5348
5349       if (*slot)
5350         VEC_ordered_remove (gimple, *stmts, i);
5351       else
5352         {
5353           *slot = (void *) stmt;
5354           i++;
5355         }
5356     }
5357
5358   htab_delete (seen);
5359 }
5360
5361 /* Returns the index of PARAMETER in the parameters vector of the
5362    ACCESS_MATRIX.  If PARAMETER does not exist return -1.  */
5363
5364 int
5365 access_matrix_get_index_for_parameter (tree parameter,
5366                                        struct access_matrix *access_matrix)
5367 {
5368   int i;
5369   VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5370   tree lambda_parameter;
5371
5372   FOR_EACH_VEC_ELT (tree, lambda_parameters, i, lambda_parameter)
5373     if (lambda_parameter == parameter)
5374       return i + AM_NB_INDUCTION_VARS (access_matrix);
5375
5376   return -1;
5377 }