gcc/tree-data-ref.c

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #include "config.h"
  77 #include "system.h"
  78 #include "coretypes.h"
  79 #include "hash-set.h"
  80 #include "machmode.h"
  81 #include "vec.h"
  82 #include "double-int.h"
  83 #include "input.h"
  84 #include "alias.h"
  85 #include "symtab.h"
  86 #include "options.h"
  87 #include "wide-int.h"
  88 #include "inchash.h"
  89 #include "tree.h"
  90 #include "fold-const.h"
  91 #include "expr.h"
  92 #include "gimple-pretty-print.h"
  93 #include "predict.h"
  94 #include "tm.h"
  95 #include "hard-reg-set.h"
  96 #include "input.h"
  97 #include "function.h"
  98 #include "dominance.h"
  99 #include "cfg.h"
 100 #include "basic-block.h"
 101 #include "tree-ssa-alias.h"
 102 #include "internal-fn.h"
 103 #include "gimple-expr.h"
 104 #include "is-a.h"
 105 #include "gimple.h"
 106 #include "gimple-iterator.h"
 107 #include "tree-ssa-loop-niter.h"
 108 #include "tree-ssa-loop.h"
 109 #include "tree-ssa.h"
 110 #include "cfgloop.h"
 111 #include "tree-data-ref.h"
 112 #include "tree-scalar-evolution.h"
 113 #include "dumpfile.h"
 114 #include "langhooks.h"
 115 #include "tree-affine.h"
 116 #include "params.h"
 117
 118 static struct datadep_stats
 119 {
 120   int num_dependence_tests;
 121   int num_dependence_dependent;
 122   int num_dependence_independent;
 123   int num_dependence_undetermined;
 124
 125   int num_subscript_tests;
 126   int num_subscript_undetermined;
 127   int num_same_subscript_function;
 128
 129   int num_ziv;
 130   int num_ziv_independent;
 131   int num_ziv_dependent;
 132   int num_ziv_unimplemented;
 133
 134   int num_siv;
 135   int num_siv_independent;
 136   int num_siv_dependent;
 137   int num_siv_unimplemented;
 138
 139   int num_miv;
 140   int num_miv_independent;
 141   int num_miv_dependent;
 142   int num_miv_unimplemented;
 143 } dependence_stats;
 144
 145 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 146                                            struct data_reference *,
 147                                            struct data_reference *,
 148                                            struct loop *);
 149 /* Returns true iff A divides B.  */
 150
 151 static inline bool
 152 tree_fold_divides_p (const_tree a, const_tree b)
 153 {
 154   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 155   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 156   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 157 }
 158
 159 /* Returns true iff A divides B.  */
 160
 161 static inline bool
 162 int_divides_p (int a, int b)
 163 {
 164   return ((b % a) == 0);
 165 }
 166
 167 \f
 168
 169 /* Dump into FILE all the data references from DATAREFS.  */
 170
 171 static void
 172 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 173 {
 174   unsigned int i;
 175   struct data_reference *dr;
 176
 177   FOR_EACH_VEC_ELT (datarefs, i, dr)
 178     dump_data_reference (file, dr);
 179 }
 180
 181 /* Unified dump into FILE all the data references from DATAREFS.  */
 182
 183 DEBUG_FUNCTION void
 184 debug (vec<data_reference_p> &ref)
 185 {
 186   dump_data_references (stderr, ref);
 187 }
 188
 189 DEBUG_FUNCTION void
 190 debug (vec<data_reference_p> *ptr)
 191 {
 192   if (ptr)
 193     debug (*ptr);
 194   else
 195     fprintf (stderr, "<nil>\n");
 196 }
 197
 198
 199 /* Dump into STDERR all the data references from DATAREFS.  */
 200
 201 DEBUG_FUNCTION void
 202 debug_data_references (vec<data_reference_p> datarefs)
 203 {
 204   dump_data_references (stderr, datarefs);
 205 }
 206
 207 /* Print to STDERR the data_reference DR.  */
 208
 209 DEBUG_FUNCTION void
 210 debug_data_reference (struct data_reference *dr)
 211 {
 212   dump_data_reference (stderr, dr);
 213 }
 214
 215 /* Dump function for a DATA_REFERENCE structure.  */
 216
 217 void
 218 dump_data_reference (FILE *outf,
 219                      struct data_reference *dr)
 220 {
 221   unsigned int i;
 222
 223   fprintf (outf, "#(Data Ref: \n");
 224   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 225   fprintf (outf, "#  stmt: ");
 226   print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
 227   fprintf (outf, "#  ref: ");
 228   print_generic_stmt (outf, DR_REF (dr), 0);
 229   fprintf (outf, "#  base_object: ");
 230   print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
 231
 232   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 233     {
 234       fprintf (outf, "#  Access function %d: ", i);
 235       print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
 236     }
 237   fprintf (outf, "#)\n");
 238 }
 239
 240 /* Unified dump function for a DATA_REFERENCE structure.  */
 241
 242 DEBUG_FUNCTION void
 243 debug (data_reference &ref)
 244 {
 245   dump_data_reference (stderr, &ref);
 246 }
 247
 248 DEBUG_FUNCTION void
 249 debug (data_reference *ptr)
 250 {
 251   if (ptr)
 252     debug (*ptr);
 253   else
 254     fprintf (stderr, "<nil>\n");
 255 }
 256
 257
 258 /* Dumps the affine function described by FN to the file OUTF.  */
 259
 260 static void
 261 dump_affine_function (FILE *outf, affine_fn fn)
 262 {
 263   unsigned i;
 264   tree coef;
 265
 266   print_generic_expr (outf, fn[0], TDF_SLIM);
 267   for (i = 1; fn.iterate (i, &coef); i++)
 268     {
 269       fprintf (outf, " + ");
 270       print_generic_expr (outf, coef, TDF_SLIM);
 271       fprintf (outf, " * x_%u", i);
 272     }
 273 }
 274
 275 /* Dumps the conflict function CF to the file OUTF.  */
 276
 277 static void
 278 dump_conflict_function (FILE *outf, conflict_function *cf)
 279 {
 280   unsigned i;
 281
 282   if (cf->n == NO_DEPENDENCE)
 283     fprintf (outf, "no dependence");
 284   else if (cf->n == NOT_KNOWN)
 285     fprintf (outf, "not known");
 286   else
 287     {
 288       for (i = 0; i < cf->n; i++)
 289         {
 290           if (i != 0)
 291             fprintf (outf, " ");
 292           fprintf (outf, "[");
 293           dump_affine_function (outf, cf->fns[i]);
 294           fprintf (outf, "]");
 295         }
 296     }
 297 }
 298
 299 /* Dump function for a SUBSCRIPT structure.  */
 300
 301 static void
 302 dump_subscript (FILE *outf, struct subscript *subscript)
 303 {
 304   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 305
 306   fprintf (outf, "\n (subscript \n");
 307   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 308   dump_conflict_function (outf, cf);
 309   if (CF_NONTRIVIAL_P (cf))
 310     {
 311       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 312       fprintf (outf, "\n  last_conflict: ");
 313       print_generic_expr (outf, last_iteration, 0);
 314     }
 315
 316   cf = SUB_CONFLICTS_IN_B (subscript);
 317   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 318   dump_conflict_function (outf, cf);
 319   if (CF_NONTRIVIAL_P (cf))
 320     {
 321       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 322       fprintf (outf, "\n  last_conflict: ");
 323       print_generic_expr (outf, last_iteration, 0);
 324     }
 325
 326   fprintf (outf, "\n  (Subscript distance: ");
 327   print_generic_expr (outf, SUB_DISTANCE (subscript), 0);
 328   fprintf (outf, " ))\n");
 329 }
 330
 331 /* Print the classic direction vector DIRV to OUTF.  */
 332
 333 static void
 334 print_direction_vector (FILE *outf,
 335                         lambda_vector dirv,
 336                         int length)
 337 {
 338   int eq;
 339
 340   for (eq = 0; eq < length; eq++)
 341     {
 342       enum data_dependence_direction dir = ((enum data_dependence_direction)
 343                                             dirv[eq]);
 344
 345       switch (dir)
 346         {
 347         case dir_positive:
 348           fprintf (outf, "    +");
 349           break;
 350         case dir_negative:
 351           fprintf (outf, "    -");
 352           break;
 353         case dir_equal:
 354           fprintf (outf, "    =");
 355           break;
 356         case dir_positive_or_equal:
 357           fprintf (outf, "   +=");
 358           break;
 359         case dir_positive_or_negative:
 360           fprintf (outf, "   +-");
 361           break;
 362         case dir_negative_or_equal:
 363           fprintf (outf, "   -=");
 364           break;
 365         case dir_star:
 366           fprintf (outf, "    *");
 367           break;
 368         default:
 369           fprintf (outf, "indep");
 370           break;
 371         }
 372     }
 373   fprintf (outf, "\n");
 374 }
 375
 376 /* Print a vector of direction vectors.  */
 377
 378 static void
 379 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 380                    int length)
 381 {
 382   unsigned j;
 383   lambda_vector v;
 384
 385   FOR_EACH_VEC_ELT (dir_vects, j, v)
 386     print_direction_vector (outf, v, length);
 387 }
 388
 389 /* Print out a vector VEC of length N to OUTFILE.  */
 390
 391 static inline void
 392 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 393 {
 394   int i;
 395
 396   for (i = 0; i < n; i++)
 397     fprintf (outfile, "%3d ", vector[i]);
 398   fprintf (outfile, "\n");
 399 }
 400
 401 /* Print a vector of distance vectors.  */
 402
 403 static void
 404 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 405                     int length)
 406 {
 407   unsigned j;
 408   lambda_vector v;
 409
 410   FOR_EACH_VEC_ELT (dist_vects, j, v)
 411     print_lambda_vector (outf, v, length);
 412 }
 413
 414 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 415
 416 static void
 417 dump_data_dependence_relation (FILE *outf,
 418                                struct data_dependence_relation *ddr)
 419 {
 420   struct data_reference *dra, *drb;
 421
 422   fprintf (outf, "(Data Dep: \n");
 423
 424   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 425     {
 426       if (ddr)
 427         {
 428           dra = DDR_A (ddr);
 429           drb = DDR_B (ddr);
 430           if (dra)
 431             dump_data_reference (outf, dra);
 432           else
 433             fprintf (outf, "    (nil)\n");
 434           if (drb)
 435             dump_data_reference (outf, drb);
 436           else
 437             fprintf (outf, "    (nil)\n");
 438         }
 439       fprintf (outf, "    (don't know)\n)\n");
 440       return;
 441     }
 442
 443   dra = DDR_A (ddr);
 444   drb = DDR_B (ddr);
 445   dump_data_reference (outf, dra);
 446   dump_data_reference (outf, drb);
 447
 448   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 449     fprintf (outf, "    (no dependence)\n");
 450
 451   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 452     {
 453       unsigned int i;
 454       struct loop *loopi;
 455
 456       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
 457         {
 458           fprintf (outf, "  access_fn_A: ");
 459           print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
 460           fprintf (outf, "  access_fn_B: ");
 461           print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
 462           dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
 463         }
 464
 465       fprintf (outf, "  inner loop index: %d\n", DDR_INNER_LOOP (ddr));
 466       fprintf (outf, "  loop nest: (");
 467       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 468         fprintf (outf, "%d ", loopi->num);
 469       fprintf (outf, ")\n");
 470
 471       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  distance_vector: ");
 474           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 475                                DDR_NB_LOOPS (ddr));
 476         }
 477
 478       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 479         {
 480           fprintf (outf, "  direction_vector: ");
 481           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 482                                   DDR_NB_LOOPS (ddr));
 483         }
 484     }
 485
 486   fprintf (outf, ")\n");
 487 }
 488
 489 /* Debug version.  */
 490
 491 DEBUG_FUNCTION void
 492 debug_data_dependence_relation (struct data_dependence_relation *ddr)
 493 {
 494   dump_data_dependence_relation (stderr, ddr);
 495 }
 496
 497 /* Dump into FILE all the dependence relations from DDRS.  */
 498
 499 void
 500 dump_data_dependence_relations (FILE *file,
 501                                 vec<ddr_p> ddrs)
 502 {
 503   unsigned int i;
 504   struct data_dependence_relation *ddr;
 505
 506   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 507     dump_data_dependence_relation (file, ddr);
 508 }
 509
 510 DEBUG_FUNCTION void
 511 debug (vec<ddr_p> &ref)
 512 {
 513   dump_data_dependence_relations (stderr, ref);
 514 }
 515
 516 DEBUG_FUNCTION void
 517 debug (vec<ddr_p> *ptr)
 518 {
 519   if (ptr)
 520     debug (*ptr);
 521   else
 522     fprintf (stderr, "<nil>\n");
 523 }
 524
 525
 526 /* Dump to STDERR all the dependence relations from DDRS.  */
 527
 528 DEBUG_FUNCTION void
 529 debug_data_dependence_relations (vec<ddr_p> ddrs)
 530 {
 531   dump_data_dependence_relations (stderr, ddrs);
 532 }
 533
 534 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 535    the dependence relations, and VECT_SIZE is the size of the
 536    dependence vectors, or in other words the number of loops in the
 537    considered nest.  */
 538
 539 static void
 540 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 541 {
 542   unsigned int i, j;
 543   struct data_dependence_relation *ddr;
 544   lambda_vector v;
 545
 546   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 547     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 548       {
 549         FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v)
 550           {
 551             fprintf (file, "DISTANCE_V (");
 552             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 553             fprintf (file, ")\n");
 554           }
 555
 556         FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v)
 557           {
 558             fprintf (file, "DIRECTION_V (");
 559             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 560             fprintf (file, ")\n");
 561           }
 562       }
 563
 564   fprintf (file, "\n\n");
 565 }
 566
 567 /* Dumps the data dependence relations DDRS in FILE.  */
 568
 569 static void
 570 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 571 {
 572   unsigned int i;
 573   struct data_dependence_relation *ddr;
 574
 575   FOR_EACH_VEC_ELT (ddrs, i, ddr)
 576     dump_data_dependence_relation (file, ddr);
 577
 578   fprintf (file, "\n\n");
 579 }
 580
 581 DEBUG_FUNCTION void
 582 debug_ddrs (vec<ddr_p> ddrs)
 583 {
 584   dump_ddrs (stderr, ddrs);
 585 }
 586
 587 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
 588    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
 589    constant of type ssizetype, and returns true.  If we cannot do this
 590    with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
 591    is returned.  */
 592
 593 static bool
 594 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 595                          tree *var, tree *off)
 596 {
 597   tree var0, var1;
 598   tree off0, off1;
 599   enum tree_code ocode = code;
 600
 601   *var = NULL_TREE;
 602   *off = NULL_TREE;
 603
 604   switch (code)
 605     {
 606     case INTEGER_CST:
 607       *var = build_int_cst (type, 0);
 608       *off = fold_convert (ssizetype, op0);
 609       return true;
 610
 611     case POINTER_PLUS_EXPR:
 612       ocode = PLUS_EXPR;
 613       /* FALLTHROUGH */
 614     case PLUS_EXPR:
 615     case MINUS_EXPR:
 616       split_constant_offset (op0, &var0, &off0);
 617       split_constant_offset (op1, &var1, &off1);
 618       *var = fold_build2 (code, type, var0, var1);
 619       *off = size_binop (ocode, off0, off1);
 620       return true;
 621
 622     case MULT_EXPR:
 623       if (TREE_CODE (op1) != INTEGER_CST)
 624         return false;
 625
 626       split_constant_offset (op0, &var0, &off0);
 627       *var = fold_build2 (MULT_EXPR, type, var0, op1);
 628       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 629       return true;
 630
 631     case ADDR_EXPR:
 632       {
 633         tree base, poffset;
 634         HOST_WIDE_INT pbitsize, pbitpos;
 635         machine_mode pmode;
 636         int punsignedp, pvolatilep;
 637
 638         op0 = TREE_OPERAND (op0, 0);
 639         base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
 640                                     &pmode, &punsignedp, &pvolatilep, false);
 641
 642         if (pbitpos % BITS_PER_UNIT != 0)
 643           return false;
 644         base = build_fold_addr_expr (base);
 645         off0 = ssize_int (pbitpos / BITS_PER_UNIT);
 646
 647         if (poffset)
 648           {
 649             split_constant_offset (poffset, &poffset, &off1);
 650             off0 = size_binop (PLUS_EXPR, off0, off1);
 651             if (POINTER_TYPE_P (TREE_TYPE (base)))
 652               base = fold_build_pointer_plus (base, poffset);
 653             else
 654               base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
 655                                   fold_convert (TREE_TYPE (base), poffset));
 656           }
 657
 658         var0 = fold_convert (type, base);
 659
 660         /* If variable length types are involved, punt, otherwise casts
 661            might be converted into ARRAY_REFs in gimplify_conversion.
 662            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 663            possibly no longer appears in current GIMPLE, might resurface.
 664            This perhaps could run
 665            if (CONVERT_EXPR_P (var0))
 666              {
 667                gimplify_conversion (&var0);
 668                // Attempt to fill in any within var0 found ARRAY_REF's
 669                // element size from corresponding op embedded ARRAY_REF,
 670                // if unsuccessful, just punt.
 671              }  */
 672         while (POINTER_TYPE_P (type))
 673           type = TREE_TYPE (type);
 674         if (int_size_in_bytes (type) < 0)
 675           return false;
 676
 677         *var = var0;
 678         *off = off0;
 679         return true;
 680       }
 681
 682     case SSA_NAME:
 683       {
 684         if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 685           return false;
 686
 687         gimple def_stmt = SSA_NAME_DEF_STMT (op0);
 688         enum tree_code subcode;
 689
 690         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 691           return false;
 692
 693         var0 = gimple_assign_rhs1 (def_stmt);
 694         subcode = gimple_assign_rhs_code (def_stmt);
 695         var1 = gimple_assign_rhs2 (def_stmt);
 696
 697         return split_constant_offset_1 (type, var0, subcode, var1, var, off);
 698       }
 699     CASE_CONVERT:
 700       {
 701         /* We must not introduce undefined overflow, and we must not change the value.
 702            Hence we're okay if the inner type doesn't overflow to start with
 703            (pointer or signed), the outer type also is an integer or pointer
 704            and the outer precision is at least as large as the inner.  */
 705         tree itype = TREE_TYPE (op0);
 706         if ((POINTER_TYPE_P (itype)
 707              || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
 708             && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
 709             && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
 710           {
 711             split_constant_offset (op0, &var0, off);
 712             *var = fold_convert (type, var0);
 713             return true;
 714           }
 715         return false;
 716       }
 717
 718     default:
 719       return false;
 720     }
 721 }
 722
 723 /* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
 724    will be ssizetype.  */
 725
 726 void
 727 split_constant_offset (tree exp, tree *var, tree *off)
 728 {
 729   tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
 730   enum tree_code code;
 731
 732   *var = exp;
 733   *off = ssize_int (0);
 734   STRIP_NOPS (exp);
 735
 736   if (tree_is_chrec (exp)
 737       || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
 738     return;
 739
 740   otype = TREE_TYPE (exp);
 741   code = TREE_CODE (exp);
 742   extract_ops_from_tree (exp, &code, &op0, &op1);
 743   if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
 744     {
 745       *var = fold_convert (type, e);
 746       *off = o;
 747     }
 748 }
 749
 750 /* Returns the address ADDR of an object in a canonical shape (without nop
 751    casts, and with type of pointer to the object).  */
 752
 753 static tree
 754 canonicalize_base_object_address (tree addr)
 755 {
 756   tree orig = addr;
 757
 758   STRIP_NOPS (addr);
 759
 760   /* The base address may be obtained by casting from integer, in that case
 761      keep the cast.  */
 762   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
 763     return orig;
 764
 765   if (TREE_CODE (addr) != ADDR_EXPR)
 766     return addr;
 767
 768   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 769 }
 770
 771 /* Analyzes the behavior of the memory reference DR in the innermost loop or
 772    basic block that contains it.  Returns true if analysis succeed or false
 773    otherwise.  */
 774
 775 bool
 776 dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 777 {
 778   gimple stmt = DR_STMT (dr);
 779   struct loop *loop = loop_containing_stmt (stmt);
 780   tree ref = DR_REF (dr);
 781   HOST_WIDE_INT pbitsize, pbitpos;
 782   tree base, poffset;
 783   machine_mode pmode;
 784   int punsignedp, pvolatilep;
 785   affine_iv base_iv, offset_iv;
 786   tree init, dinit, step;
 787   bool in_loop = (loop && loop->num);
 788
 789   if (dump_file && (dump_flags & TDF_DETAILS))
 790     fprintf (dump_file, "analyze_innermost: ");
 791
 792   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
 793                               &pmode, &punsignedp, &pvolatilep, false);
 794   gcc_assert (base != NULL_TREE);
 795
 796   if (pbitpos % BITS_PER_UNIT != 0)
 797     {
 798       if (dump_file && (dump_flags & TDF_DETAILS))
 799         fprintf (dump_file, "failed: bit offset alignment.\n");
 800       return false;
 801     }
 802
 803   if (TREE_CODE (base) == MEM_REF)
 804     {
 805       if (!integer_zerop (TREE_OPERAND (base, 1)))
 806         {
 807           offset_int moff = mem_ref_offset (base);
 808           tree mofft = wide_int_to_tree (sizetype, moff);
 809           if (!poffset)
 810             poffset = mofft;
 811           else
 812             poffset = size_binop (PLUS_EXPR, poffset, mofft);
 813         }
 814       base = TREE_OPERAND (base, 0);
 815     }
 816   else
 817     base = build_fold_addr_expr (base);
 818
 819   if (in_loop)
 820     {
 821       if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
 822                       nest ? true : false))
 823         {
 824           if (nest)
 825             {
 826               if (dump_file && (dump_flags & TDF_DETAILS))
 827                 fprintf (dump_file, "failed: evolution of base is not"
 828                                     " affine.\n");
 829               return false;
 830             }
 831           else
 832             {
 833               base_iv.base = base;
 834               base_iv.step = ssize_int (0);
 835               base_iv.no_overflow = true;
 836             }
 837         }
 838     }
 839   else
 840     {
 841       base_iv.base = base;
 842       base_iv.step = ssize_int (0);
 843       base_iv.no_overflow = true;
 844     }
 845
 846   if (!poffset)
 847     {
 848       offset_iv.base = ssize_int (0);
 849       offset_iv.step = ssize_int (0);
 850     }
 851   else
 852     {
 853       if (!in_loop)
 854         {
 855           offset_iv.base = poffset;
 856           offset_iv.step = ssize_int (0);
 857         }
 858       else if (!simple_iv (loop, loop_containing_stmt (stmt),
 859                            poffset, &offset_iv,
 860                            nest ? true : false))
 861         {
 862           if (nest)
 863             {
 864               if (dump_file && (dump_flags & TDF_DETAILS))
 865                 fprintf (dump_file, "failed: evolution of offset is not"
 866                                     " affine.\n");
 867               return false;
 868             }
 869           else
 870             {
 871               offset_iv.base = poffset;
 872               offset_iv.step = ssize_int (0);
 873             }
 874         }
 875     }
 876
 877   init = ssize_int (pbitpos / BITS_PER_UNIT);
 878   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
 879   init =  size_binop (PLUS_EXPR, init, dinit);
 880   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
 881   init =  size_binop (PLUS_EXPR, init, dinit);
 882
 883   step = size_binop (PLUS_EXPR,
 884                      fold_convert (ssizetype, base_iv.step),
 885                      fold_convert (ssizetype, offset_iv.step));
 886
 887   DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
 888
 889   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
 890   DR_INIT (dr) = init;
 891   DR_STEP (dr) = step;
 892
 893   DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
 894
 895   if (dump_file && (dump_flags & TDF_DETAILS))
 896     fprintf (dump_file, "success.\n");
 897
 898   return true;
 899 }
 900
 901 /* Determines the base object and the list of indices of memory reference
 902    DR, analyzed in LOOP and instantiated in loop nest NEST.  */
 903
 904 static void
 905 dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 906 {
 907   vec<tree> access_fns = vNULL;
 908   tree ref, op;
 909   tree base, off, access_fn;
 910   basic_block before_loop;
 911
 912   /* If analyzing a basic-block there are no indices to analyze
 913      and thus no access functions.  */
 914   if (!nest)
 915     {
 916       DR_BASE_OBJECT (dr) = DR_REF (dr);
 917       DR_ACCESS_FNS (dr).create (0);
 918       return;
 919     }
 920
 921   ref = DR_REF (dr);
 922   before_loop = block_before_loop (nest);
 923
 924   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
 925      into a two element array with a constant index.  The base is
 926      then just the immediate underlying object.  */
 927   if (TREE_CODE (ref) == REALPART_EXPR)
 928     {
 929       ref = TREE_OPERAND (ref, 0);
 930       access_fns.safe_push (integer_zero_node);
 931     }
 932   else if (TREE_CODE (ref) == IMAGPART_EXPR)
 933     {
 934       ref = TREE_OPERAND (ref, 0);
 935       access_fns.safe_push (integer_one_node);
 936     }
 937
 938   /* Analyze access functions of dimensions we know to be independent.  */
 939   while (handled_component_p (ref))
 940     {
 941       if (TREE_CODE (ref) == ARRAY_REF)
 942         {
 943           op = TREE_OPERAND (ref, 1);
 944           access_fn = analyze_scalar_evolution (loop, op);
 945           access_fn = instantiate_scev (before_loop, loop, access_fn);
 946           access_fns.safe_push (access_fn);
 947         }
 948       else if (TREE_CODE (ref) == COMPONENT_REF
 949                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
 950         {
 951           /* For COMPONENT_REFs of records (but not unions!) use the
 952              FIELD_DECL offset as constant access function so we can
 953              disambiguate a[i].f1 and a[i].f2.  */
 954           tree off = component_ref_field_offset (ref);
 955           off = size_binop (PLUS_EXPR,
 956                             size_binop (MULT_EXPR,
 957                                         fold_convert (bitsizetype, off),
 958                                         bitsize_int (BITS_PER_UNIT)),
 959                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
 960           access_fns.safe_push (off);
 961         }
 962       else
 963         /* If we have an unhandled component we could not translate
 964            to an access function stop analyzing.  We have determined
 965            our base object in this case.  */
 966         break;
 967
 968       ref = TREE_OPERAND (ref, 0);
 969     }
 970
 971   /* If the address operand of a MEM_REF base has an evolution in the
 972      analyzed nest, add it as an additional independent access-function.  */
 973   if (TREE_CODE (ref) == MEM_REF)
 974     {
 975       op = TREE_OPERAND (ref, 0);
 976       access_fn = analyze_scalar_evolution (loop, op);
 977       access_fn = instantiate_scev (before_loop, loop, access_fn);
 978       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 979         {
 980           tree orig_type;
 981           tree memoff = TREE_OPERAND (ref, 1);
 982           base = initial_condition (access_fn);
 983           orig_type = TREE_TYPE (base);
 984           STRIP_USELESS_TYPE_CONVERSION (base);
 985           split_constant_offset (base, &base, &off);
 986           STRIP_USELESS_TYPE_CONVERSION (base);
 987           /* Fold the MEM_REF offset into the evolutions initial
 988              value to make more bases comparable.  */
 989           if (!integer_zerop (memoff))
 990             {
 991               off = size_binop (PLUS_EXPR, off,
 992                                 fold_convert (ssizetype, memoff));
 993               memoff = build_int_cst (TREE_TYPE (memoff), 0);
 994             }
 995           access_fn = chrec_replace_initial_condition
 996               (access_fn, fold_convert (orig_type, off));
 997           /* ???  This is still not a suitable base object for
 998              dr_may_alias_p - the base object needs to be an
 999              access that covers the object as whole.  With
1000              an evolution in the pointer this cannot be
1001              guaranteed.
1002              As a band-aid, mark the access so we can special-case
1003              it in dr_may_alias_p.  */
1004           tree old = ref;
1005           ref = fold_build2_loc (EXPR_LOCATION (ref),
1006                                  MEM_REF, TREE_TYPE (ref),
1007                                  base, memoff);
1008           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1009           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1010           access_fns.safe_push (access_fn);
1011         }
1012     }
1013   else if (DECL_P (ref))
1014     {
1015       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1016       ref = build2 (MEM_REF, TREE_TYPE (ref),
1017                     build_fold_addr_expr (ref),
1018                     build_int_cst (reference_alias_ptr_type (ref), 0));
1019     }
1020
1021   DR_BASE_OBJECT (dr) = ref;
1022   DR_ACCESS_FNS (dr) = access_fns;
1023 }
1024
1025 /* Extracts the alias analysis information from the memory reference DR.  */
1026
1027 static void
1028 dr_analyze_alias (struct data_reference *dr)
1029 {
1030   tree ref = DR_REF (dr);
1031   tree base = get_base_address (ref), addr;
1032
1033   if (INDIRECT_REF_P (base)
1034       || TREE_CODE (base) == MEM_REF)
1035     {
1036       addr = TREE_OPERAND (base, 0);
1037       if (TREE_CODE (addr) == SSA_NAME)
1038         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1039     }
1040 }
1041
1042 /* Frees data reference DR.  */
1043
1044 void
1045 free_data_ref (data_reference_p dr)
1046 {
1047   DR_ACCESS_FNS (dr).release ();
1048   free (dr);
1049 }
1050
1051 /* Analyzes memory reference MEMREF accessed in STMT.  The reference
1052    is read if IS_READ is true, write otherwise.  Returns the
1053    data_reference description of MEMREF.  NEST is the outermost loop
1054    in which the reference should be instantiated, LOOP is the loop in
1055    which the data reference should be analyzed.  */
1056
1057 struct data_reference *
1058 create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt,
1059                  bool is_read)
1060 {
1061   struct data_reference *dr;
1062
1063   if (dump_file && (dump_flags & TDF_DETAILS))
1064     {
1065       fprintf (dump_file, "Creating dr for ");
1066       print_generic_expr (dump_file, memref, TDF_SLIM);
1067       fprintf (dump_file, "\n");
1068     }
1069
1070   dr = XCNEW (struct data_reference);
1071   DR_STMT (dr) = stmt;
1072   DR_REF (dr) = memref;
1073   DR_IS_READ (dr) = is_read;
1074
1075   dr_analyze_innermost (dr, nest);
1076   dr_analyze_indices (dr, nest, loop);
1077   dr_analyze_alias (dr);
1078
1079   if (dump_file && (dump_flags & TDF_DETAILS))
1080     {
1081       unsigned i;
1082       fprintf (dump_file, "\tbase_address: ");
1083       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1084       fprintf (dump_file, "\n\toffset from base address: ");
1085       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1086       fprintf (dump_file, "\n\tconstant offset from base address: ");
1087       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1088       fprintf (dump_file, "\n\tstep: ");
1089       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1090       fprintf (dump_file, "\n\taligned to: ");
1091       print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
1092       fprintf (dump_file, "\n\tbase_object: ");
1093       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1094       fprintf (dump_file, "\n");
1095       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1096         {
1097           fprintf (dump_file, "\tAccess function %d: ", i);
1098           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1099         }
1100     }
1101
1102   return dr;
1103 }
1104
1105 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
1106    expressions.  */
1107 static bool
1108 dr_equal_offsets_p1 (tree offset1, tree offset2)
1109 {
1110   bool res;
1111
1112   STRIP_NOPS (offset1);
1113   STRIP_NOPS (offset2);
1114
1115   if (offset1 == offset2)
1116     return true;
1117
1118   if (TREE_CODE (offset1) != TREE_CODE (offset2)
1119       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
1120     return false;
1121
1122   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
1123                              TREE_OPERAND (offset2, 0));
1124
1125   if (!res || !BINARY_CLASS_P (offset1))
1126     return res;
1127
1128   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
1129                              TREE_OPERAND (offset2, 1));
1130
1131   return res;
1132 }
1133
1134 /* Check if DRA and DRB have equal offsets.  */
1135 bool
1136 dr_equal_offsets_p (struct data_reference *dra,
1137                     struct data_reference *drb)
1138 {
1139   tree offset1, offset2;
1140
1141   offset1 = DR_OFFSET (dra);
1142   offset2 = DR_OFFSET (drb);
1143
1144   return dr_equal_offsets_p1 (offset1, offset2);
1145 }
1146
1147 /* Returns true if FNA == FNB.  */
1148
1149 static bool
1150 affine_function_equal_p (affine_fn fna, affine_fn fnb)
1151 {
1152   unsigned i, n = fna.length ();
1153
1154   if (n != fnb.length ())
1155     return false;
1156
1157   for (i = 0; i < n; i++)
1158     if (!operand_equal_p (fna[i], fnb[i], 0))
1159       return false;
1160
1161   return true;
1162 }
1163
1164 /* If all the functions in CF are the same, returns one of them,
1165    otherwise returns NULL.  */
1166
1167 static affine_fn
1168 common_affine_function (conflict_function *cf)
1169 {
1170   unsigned i;
1171   affine_fn comm;
1172
1173   if (!CF_NONTRIVIAL_P (cf))
1174     return affine_fn ();
1175
1176   comm = cf->fns[0];
1177
1178   for (i = 1; i < cf->n; i++)
1179     if (!affine_function_equal_p (comm, cf->fns[i]))
1180       return affine_fn ();
1181
1182   return comm;
1183 }
1184
1185 /* Returns the base of the affine function FN.  */
1186
1187 static tree
1188 affine_function_base (affine_fn fn)
1189 {
1190   return fn[0];
1191 }
1192
1193 /* Returns true if FN is a constant.  */
1194
1195 static bool
1196 affine_function_constant_p (affine_fn fn)
1197 {
1198   unsigned i;
1199   tree coef;
1200
1201   for (i = 1; fn.iterate (i, &coef); i++)
1202     if (!integer_zerop (coef))
1203       return false;
1204
1205   return true;
1206 }
1207
1208 /* Returns true if FN is the zero constant function.  */
1209
1210 static bool
1211 affine_function_zero_p (affine_fn fn)
1212 {
1213   return (integer_zerop (affine_function_base (fn))
1214           && affine_function_constant_p (fn));
1215 }
1216
1217 /* Returns a signed integer type with the largest precision from TA
1218    and TB.  */
1219
1220 static tree
1221 signed_type_for_types (tree ta, tree tb)
1222 {
1223   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
1224     return signed_type_for (ta);
1225   else
1226     return signed_type_for (tb);
1227 }
1228
1229 /* Applies operation OP on affine functions FNA and FNB, and returns the
1230    result.  */
1231
1232 static affine_fn
1233 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
1234 {
1235   unsigned i, n, m;
1236   affine_fn ret;
1237   tree coef;
1238
1239   if (fnb.length () > fna.length ())
1240     {
1241       n = fna.length ();
1242       m = fnb.length ();
1243     }
1244   else
1245     {
1246       n = fnb.length ();
1247       m = fna.length ();
1248     }
1249
1250   ret.create (m);
1251   for (i = 0; i < n; i++)
1252     {
1253       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
1254                                          TREE_TYPE (fnb[i]));
1255       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
1256     }
1257
1258   for (; fna.iterate (i, &coef); i++)
1259     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1260                                  coef, integer_zero_node));
1261   for (; fnb.iterate (i, &coef); i++)
1262     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1263                                  integer_zero_node, coef));
1264
1265   return ret;
1266 }
1267
1268 /* Returns the sum of affine functions FNA and FNB.  */
1269
1270 static affine_fn
1271 affine_fn_plus (affine_fn fna, affine_fn fnb)
1272 {
1273   return affine_fn_op (PLUS_EXPR, fna, fnb);
1274 }
1275
1276 /* Returns the difference of affine functions FNA and FNB.  */
1277
1278 static affine_fn
1279 affine_fn_minus (affine_fn fna, affine_fn fnb)
1280 {
1281   return affine_fn_op (MINUS_EXPR, fna, fnb);
1282 }
1283
1284 /* Frees affine function FN.  */
1285
1286 static void
1287 affine_fn_free (affine_fn fn)
1288 {
1289   fn.release ();
1290 }
1291
1292 /* Determine for each subscript in the data dependence relation DDR
1293    the distance.  */
1294
1295 static void
1296 compute_subscript_distance (struct data_dependence_relation *ddr)
1297 {
1298   conflict_function *cf_a, *cf_b;
1299   affine_fn fn_a, fn_b, diff;
1300
1301   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1302     {
1303       unsigned int i;
1304
1305       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1306         {
1307           struct subscript *subscript;
1308
1309           subscript = DDR_SUBSCRIPT (ddr, i);
1310           cf_a = SUB_CONFLICTS_IN_A (subscript);
1311           cf_b = SUB_CONFLICTS_IN_B (subscript);
1312
1313           fn_a = common_affine_function (cf_a);
1314           fn_b = common_affine_function (cf_b);
1315           if (!fn_a.exists () || !fn_b.exists ())
1316             {
1317               SUB_DISTANCE (subscript) = chrec_dont_know;
1318               return;
1319             }
1320           diff = affine_fn_minus (fn_a, fn_b);
1321
1322           if (affine_function_constant_p (diff))
1323             SUB_DISTANCE (subscript) = affine_function_base (diff);
1324           else
1325             SUB_DISTANCE (subscript) = chrec_dont_know;
1326
1327           affine_fn_free (diff);
1328         }
1329     }
1330 }
1331
1332 /* Returns the conflict function for "unknown".  */
1333
1334 static conflict_function *
1335 conflict_fn_not_known (void)
1336 {
1337   conflict_function *fn = XCNEW (conflict_function);
1338   fn->n = NOT_KNOWN;
1339
1340   return fn;
1341 }
1342
1343 /* Returns the conflict function for "independent".  */
1344
1345 static conflict_function *
1346 conflict_fn_no_dependence (void)
1347 {
1348   conflict_function *fn = XCNEW (conflict_function);
1349   fn->n = NO_DEPENDENCE;
1350
1351   return fn;
1352 }
1353
1354 /* Returns true if the address of OBJ is invariant in LOOP.  */
1355
1356 static bool
1357 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1358 {
1359   while (handled_component_p (obj))
1360     {
1361       if (TREE_CODE (obj) == ARRAY_REF)
1362         {
1363           /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1364              need to check the stride and the lower bound of the reference.  */
1365           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1366                                                       loop->num)
1367               || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1368                                                          loop->num))
1369             return false;
1370         }
1371       else if (TREE_CODE (obj) == COMPONENT_REF)
1372         {
1373           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1374                                                       loop->num))
1375             return false;
1376         }
1377       obj = TREE_OPERAND (obj, 0);
1378     }
1379
1380   if (!INDIRECT_REF_P (obj)
1381       && TREE_CODE (obj) != MEM_REF)
1382     return true;
1383
1384   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1385                                                   loop->num);
1386 }
1387
1388 /* Returns false if we can prove that data references A and B do not alias,
1389    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
1390    considered.  */
1391
1392 bool
1393 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
1394                 bool loop_nest)
1395 {
1396   tree addr_a = DR_BASE_OBJECT (a);
1397   tree addr_b = DR_BASE_OBJECT (b);
1398
1399   /* If we are not processing a loop nest but scalar code we
1400      do not need to care about possible cross-iteration dependences
1401      and thus can process the full original reference.  Do so,
1402      similar to how loop invariant motion applies extra offset-based
1403      disambiguation.  */
1404   if (!loop_nest)
1405     {
1406       aff_tree off1, off2;
1407       widest_int size1, size2;
1408       get_inner_reference_aff (DR_REF (a), &off1, &size1);
1409       get_inner_reference_aff (DR_REF (b), &off2, &size2);
1410       aff_combination_scale (&off1, -1);
1411       aff_combination_add (&off2, &off1);
1412       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1413         return false;
1414     }
1415
1416   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
1417       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
1418       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
1419       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
1420     return false;
1421
1422   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
1423      do not know the size of the base-object.  So we cannot do any
1424      offset/overlap based analysis but have to rely on points-to
1425      information only.  */
1426   if (TREE_CODE (addr_a) == MEM_REF
1427       && TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)
1428     {
1429       /* For true dependences we can apply TBAA.  */
1430       if (flag_strict_aliasing
1431           && DR_IS_WRITE (a) && DR_IS_READ (b)
1432           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1433                                      get_alias_set (DR_REF (b))))
1434         return false;
1435       if (TREE_CODE (addr_b) == MEM_REF)
1436         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1437                                        TREE_OPERAND (addr_b, 0));
1438       else
1439         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1440                                        build_fold_addr_expr (addr_b));
1441     }
1442   else if (TREE_CODE (addr_b) == MEM_REF
1443            && TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)
1444     {
1445       /* For true dependences we can apply TBAA.  */
1446       if (flag_strict_aliasing
1447           && DR_IS_WRITE (a) && DR_IS_READ (b)
1448           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
1449                                      get_alias_set (DR_REF (b))))
1450         return false;
1451       if (TREE_CODE (addr_a) == MEM_REF)
1452         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
1453                                        TREE_OPERAND (addr_b, 0));
1454       else
1455         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
1456                                        TREE_OPERAND (addr_b, 0));
1457     }
1458
1459   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
1460      that is being subsetted in the loop nest.  */
1461   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
1462     return refs_output_dependent_p (addr_a, addr_b);
1463   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
1464     return refs_anti_dependent_p (addr_a, addr_b);
1465   return refs_may_alias_p (addr_a, addr_b);
1466 }
1467
1468 /* Initialize a data dependence relation between data accesses A and
1469    B.  NB_LOOPS is the number of loops surrounding the references: the
1470    size of the classic distance/direction vectors.  */
1471
1472 struct data_dependence_relation *
1473 initialize_data_dependence_relation (struct data_reference *a,
1474                                      struct data_reference *b,
1475                                      vec<loop_p> loop_nest)
1476 {
1477   struct data_dependence_relation *res;
1478   unsigned int i;
1479
1480   res = XNEW (struct data_dependence_relation);
1481   DDR_A (res) = a;
1482   DDR_B (res) = b;
1483   DDR_LOOP_NEST (res).create (0);
1484   DDR_REVERSED_P (res) = false;
1485   DDR_SUBSCRIPTS (res).create (0);
1486   DDR_DIR_VECTS (res).create (0);
1487   DDR_DIST_VECTS (res).create (0);
1488
1489   if (a == NULL || b == NULL)
1490     {
1491       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1492       return res;
1493     }
1494
1495   /* If the data references do not alias, then they are independent.  */
1496   if (!dr_may_alias_p (a, b, loop_nest.exists ()))
1497     {
1498       DDR_ARE_DEPENDENT (res) = chrec_known;
1499       return res;
1500     }
1501
1502   /* The case where the references are exactly the same.  */
1503   if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1504     {
1505      if (loop_nest.exists ()
1506         && !object_address_invariant_in_loop_p (loop_nest[0],
1507                                                 DR_BASE_OBJECT (a)))
1508       {
1509         DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1510         return res;
1511       }
1512       DDR_AFFINE_P (res) = true;
1513       DDR_ARE_DEPENDENT (res) = NULL_TREE;
1514       DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1515       DDR_LOOP_NEST (res) = loop_nest;
1516       DDR_INNER_LOOP (res) = 0;
1517       DDR_SELF_REFERENCE (res) = true;
1518       for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1519        {
1520          struct subscript *subscript;
1521
1522          subscript = XNEW (struct subscript);
1523          SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1524          SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1525          SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1526          SUB_DISTANCE (subscript) = chrec_dont_know;
1527          DDR_SUBSCRIPTS (res).safe_push (subscript);
1528        }
1529       return res;
1530     }
1531
1532   /* If the references do not access the same object, we do not know
1533      whether they alias or not.  */
1534   if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1535     {
1536       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1537       return res;
1538     }
1539
1540   /* If the base of the object is not invariant in the loop nest, we cannot
1541      analyze it.  TODO -- in fact, it would suffice to record that there may
1542      be arbitrary dependences in the loops where the base object varies.  */
1543   if (loop_nest.exists ()
1544       && !object_address_invariant_in_loop_p (loop_nest[0],
1545                                               DR_BASE_OBJECT (a)))
1546     {
1547       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1548       return res;
1549     }
1550
1551   /* If the number of dimensions of the access to not agree we can have
1552      a pointer access to a component of the array element type and an
1553      array access while the base-objects are still the same.  Punt.  */
1554   if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b))
1555     {
1556       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1557       return res;
1558     }
1559
1560   DDR_AFFINE_P (res) = true;
1561   DDR_ARE_DEPENDENT (res) = NULL_TREE;
1562   DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a));
1563   DDR_LOOP_NEST (res) = loop_nest;
1564   DDR_INNER_LOOP (res) = 0;
1565   DDR_SELF_REFERENCE (res) = false;
1566
1567   for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1568     {
1569       struct subscript *subscript;
1570
1571       subscript = XNEW (struct subscript);
1572       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1573       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1574       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1575       SUB_DISTANCE (subscript) = chrec_dont_know;
1576       DDR_SUBSCRIPTS (res).safe_push (subscript);
1577     }
1578
1579   return res;
1580 }
1581
1582 /* Frees memory used by the conflict function F.  */
1583
1584 static void
1585 free_conflict_function (conflict_function *f)
1586 {
1587   unsigned i;
1588
1589   if (CF_NONTRIVIAL_P (f))
1590     {
1591       for (i = 0; i < f->n; i++)
1592         affine_fn_free (f->fns[i]);
1593     }
1594   free (f);
1595 }
1596
1597 /* Frees memory used by SUBSCRIPTS.  */
1598
1599 static void
1600 free_subscripts (vec<subscript_p> subscripts)
1601 {
1602   unsigned i;
1603   subscript_p s;
1604
1605   FOR_EACH_VEC_ELT (subscripts, i, s)
1606     {
1607       free_conflict_function (s->conflicting_iterations_in_a);
1608       free_conflict_function (s->conflicting_iterations_in_b);
1609       free (s);
1610     }
1611   subscripts.release ();
1612 }
1613
1614 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1615    description.  */
1616
1617 static inline void
1618 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1619                         tree chrec)
1620 {
1621   DDR_ARE_DEPENDENT (ddr) = chrec;
1622   free_subscripts (DDR_SUBSCRIPTS (ddr));
1623   DDR_SUBSCRIPTS (ddr).create (0);
1624 }
1625
1626 /* The dependence relation DDR cannot be represented by a distance
1627    vector.  */
1628
1629 static inline void
1630 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1631 {
1632   if (dump_file && (dump_flags & TDF_DETAILS))
1633     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1634
1635   DDR_AFFINE_P (ddr) = false;
1636 }
1637
1638 \f
1639
1640 /* This section contains the classic Banerjee tests.  */
1641
1642 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1643    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
1644
1645 static inline bool
1646 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1647 {
1648   return (evolution_function_is_constant_p (chrec_a)
1649           && evolution_function_is_constant_p (chrec_b));
1650 }
1651
1652 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1653    variable, i.e., if the SIV (Single Index Variable) test is true.  */
1654
1655 static bool
1656 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1657 {
1658   if ((evolution_function_is_constant_p (chrec_a)
1659        && evolution_function_is_univariate_p (chrec_b))
1660       || (evolution_function_is_constant_p (chrec_b)
1661           && evolution_function_is_univariate_p (chrec_a)))
1662     return true;
1663
1664   if (evolution_function_is_univariate_p (chrec_a)
1665       && evolution_function_is_univariate_p (chrec_b))
1666     {
1667       switch (TREE_CODE (chrec_a))
1668         {
1669         case POLYNOMIAL_CHREC:
1670           switch (TREE_CODE (chrec_b))
1671             {
1672             case POLYNOMIAL_CHREC:
1673               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1674                 return false;
1675
1676             default:
1677               return true;
1678             }
1679
1680         default:
1681           return true;
1682         }
1683     }
1684
1685   return false;
1686 }
1687
1688 /* Creates a conflict function with N dimensions.  The affine functions
1689    in each dimension follow.  */
1690
1691 static conflict_function *
1692 conflict_fn (unsigned n, ...)
1693 {
1694   unsigned i;
1695   conflict_function *ret = XCNEW (conflict_function);
1696   va_list ap;
1697
1698   gcc_assert (0 < n && n <= MAX_DIM);
1699   va_start (ap, n);
1700
1701   ret->n = n;
1702   for (i = 0; i < n; i++)
1703     ret->fns[i] = va_arg (ap, affine_fn);
1704   va_end (ap);
1705
1706   return ret;
1707 }
1708
1709 /* Returns constant affine function with value CST.  */
1710
1711 static affine_fn
1712 affine_fn_cst (tree cst)
1713 {
1714   affine_fn fn;
1715   fn.create (1);
1716   fn.quick_push (cst);
1717   return fn;
1718 }
1719
1720 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
1721
1722 static affine_fn
1723 affine_fn_univar (tree cst, unsigned dim, tree coef)
1724 {
1725   affine_fn fn;
1726   fn.create (dim + 1);
1727   unsigned i;
1728
1729   gcc_assert (dim > 0);
1730   fn.quick_push (cst);
1731   for (i = 1; i < dim; i++)
1732     fn.quick_push (integer_zero_node);
1733   fn.quick_push (coef);
1734   return fn;
1735 }
1736
1737 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
1738    *OVERLAPS_B are initialized to the functions that describe the
1739    relation between the elements accessed twice by CHREC_A and
1740    CHREC_B.  For k >= 0, the following property is verified:
1741
1742    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1743
1744 static void
1745 analyze_ziv_subscript (tree chrec_a,
1746                        tree chrec_b,
1747                        conflict_function **overlaps_a,
1748                        conflict_function **overlaps_b,
1749                        tree *last_conflicts)
1750 {
1751   tree type, difference;
1752   dependence_stats.num_ziv++;
1753
1754   if (dump_file && (dump_flags & TDF_DETAILS))
1755     fprintf (dump_file, "(analyze_ziv_subscript \n");
1756
1757   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1758   chrec_a = chrec_convert (type, chrec_a, NULL);
1759   chrec_b = chrec_convert (type, chrec_b, NULL);
1760   difference = chrec_fold_minus (type, chrec_a, chrec_b);
1761
1762   switch (TREE_CODE (difference))
1763     {
1764     case INTEGER_CST:
1765       if (integer_zerop (difference))
1766         {
1767           /* The difference is equal to zero: the accessed index
1768              overlaps for each iteration in the loop.  */
1769           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1770           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1771           *last_conflicts = chrec_dont_know;
1772           dependence_stats.num_ziv_dependent++;
1773         }
1774       else
1775         {
1776           /* The accesses do not overlap.  */
1777           *overlaps_a = conflict_fn_no_dependence ();
1778           *overlaps_b = conflict_fn_no_dependence ();
1779           *last_conflicts = integer_zero_node;
1780           dependence_stats.num_ziv_independent++;
1781         }
1782       break;
1783
1784     default:
1785       /* We're not sure whether the indexes overlap.  For the moment,
1786          conservatively answer "don't know".  */
1787       if (dump_file && (dump_flags & TDF_DETAILS))
1788         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1789
1790       *overlaps_a = conflict_fn_not_known ();
1791       *overlaps_b = conflict_fn_not_known ();
1792       *last_conflicts = chrec_dont_know;
1793       dependence_stats.num_ziv_unimplemented++;
1794       break;
1795     }
1796
1797   if (dump_file && (dump_flags & TDF_DETAILS))
1798     fprintf (dump_file, ")\n");
1799 }
1800
1801 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
1802    and only if it fits to the int type.  If this is not the case, or the
1803    bound  on the number of iterations of LOOP could not be derived, returns
1804    chrec_dont_know.  */
1805
1806 static tree
1807 max_stmt_executions_tree (struct loop *loop)
1808 {
1809   widest_int nit;
1810
1811   if (!max_stmt_executions (loop, &nit))
1812     return chrec_dont_know;
1813
1814   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
1815     return chrec_dont_know;
1816
1817   return wide_int_to_tree (unsigned_type_node, nit);
1818 }
1819
1820 /* Determine whether the CHREC is always positive/negative.  If the expression
1821    cannot be statically analyzed, return false, otherwise set the answer into
1822    VALUE.  */
1823
1824 static bool
1825 chrec_is_positive (tree chrec, bool *value)
1826 {
1827   bool value0, value1, value2;
1828   tree end_value, nb_iter;
1829
1830   switch (TREE_CODE (chrec))
1831     {
1832     case POLYNOMIAL_CHREC:
1833       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
1834           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
1835         return false;
1836
1837       /* FIXME -- overflows.  */
1838       if (value0 == value1)
1839         {
1840           *value = value0;
1841           return true;
1842         }
1843
1844       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
1845          and the proof consists in showing that the sign never
1846          changes during the execution of the loop, from 0 to
1847          loop->nb_iterations.  */
1848       if (!evolution_function_is_affine_p (chrec))
1849         return false;
1850
1851       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
1852       if (chrec_contains_undetermined (nb_iter))
1853         return false;
1854
1855 #if 0
1856       /* TODO -- If the test is after the exit, we may decrease the number of
1857          iterations by one.  */
1858       if (after_exit)
1859         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
1860 #endif
1861
1862       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
1863
1864       if (!chrec_is_positive (end_value, &value2))
1865         return false;
1866
1867       *value = value0;
1868       return value0 == value1;
1869
1870     case INTEGER_CST:
1871       switch (tree_int_cst_sgn (chrec))
1872         {
1873         case -1:
1874           *value = false;
1875           break;
1876         case 1:
1877           *value = true;
1878           break;
1879         default:
1880           return false;
1881         }
1882       return true;
1883
1884     default:
1885       return false;
1886     }
1887 }
1888
1889
1890 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1891    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
1892    *OVERLAPS_B are initialized to the functions that describe the
1893    relation between the elements accessed twice by CHREC_A and
1894    CHREC_B.  For k >= 0, the following property is verified:
1895
1896    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
1897
1898 static void
1899 analyze_siv_subscript_cst_affine (tree chrec_a,
1900                                   tree chrec_b,
1901                                   conflict_function **overlaps_a,
1902                                   conflict_function **overlaps_b,
1903                                   tree *last_conflicts)
1904 {
1905   bool value0, value1, value2;
1906   tree type, difference, tmp;
1907
1908   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1909   chrec_a = chrec_convert (type, chrec_a, NULL);
1910   chrec_b = chrec_convert (type, chrec_b, NULL);
1911   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1912
1913   /* Special case overlap in the first iteration.  */
1914   if (integer_zerop (difference))
1915     {
1916       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1917       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1918       *last_conflicts = integer_one_node;
1919       return;
1920     }
1921
1922   if (!chrec_is_positive (initial_condition (difference), &value0))
1923     {
1924       if (dump_file && (dump_flags & TDF_DETAILS))
1925         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1926
1927       dependence_stats.num_siv_unimplemented++;
1928       *overlaps_a = conflict_fn_not_known ();
1929       *overlaps_b = conflict_fn_not_known ();
1930       *last_conflicts = chrec_dont_know;
1931       return;
1932     }
1933   else
1934     {
1935       if (value0 == false)
1936         {
1937           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1938             {
1939               if (dump_file && (dump_flags & TDF_DETAILS))
1940                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1941
1942               *overlaps_a = conflict_fn_not_known ();
1943               *overlaps_b = conflict_fn_not_known ();
1944               *last_conflicts = chrec_dont_know;
1945               dependence_stats.num_siv_unimplemented++;
1946               return;
1947             }
1948           else
1949             {
1950               if (value1 == true)
1951                 {
1952                   /* Example:
1953                      chrec_a = 12
1954                      chrec_b = {10, +, 1}
1955                   */
1956
1957                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1958                     {
1959                       HOST_WIDE_INT numiter;
1960                       struct loop *loop = get_chrec_loop (chrec_b);
1961
1962                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1963                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
1964                                          fold_build1 (ABS_EXPR, type, difference),
1965                                          CHREC_RIGHT (chrec_b));
1966                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1967                       *last_conflicts = integer_one_node;
1968
1969
1970                       /* Perform weak-zero siv test to see if overlap is
1971                          outside the loop bounds.  */
1972                       numiter = max_stmt_executions_int (loop);
1973
1974                       if (numiter >= 0
1975                           && compare_tree_int (tmp, numiter) > 0)
1976                         {
1977                           free_conflict_function (*overlaps_a);
1978                           free_conflict_function (*overlaps_b);
1979                           *overlaps_a = conflict_fn_no_dependence ();
1980                           *overlaps_b = conflict_fn_no_dependence ();
1981                           *last_conflicts = integer_zero_node;
1982                           dependence_stats.num_siv_independent++;
1983                           return;
1984                         }
1985                       dependence_stats.num_siv_dependent++;
1986                       return;
1987                     }
1988
1989                   /* When the step does not divide the difference, there are
1990                      no overlaps.  */
1991                   else
1992                     {
1993                       *overlaps_a = conflict_fn_no_dependence ();
1994                       *overlaps_b = conflict_fn_no_dependence ();
1995                       *last_conflicts = integer_zero_node;
1996                       dependence_stats.num_siv_independent++;
1997                       return;
1998                     }
1999                 }
2000
2001               else
2002                 {
2003                   /* Example:
2004                      chrec_a = 12
2005                      chrec_b = {10, +, -1}
2006
2007                      In this case, chrec_a will not overlap with chrec_b.  */
2008                   *overlaps_a = conflict_fn_no_dependence ();
2009                   *overlaps_b = conflict_fn_no_dependence ();
2010                   *last_conflicts = integer_zero_node;
2011                   dependence_stats.num_siv_independent++;
2012                   return;
2013                 }
2014             }
2015         }
2016       else
2017         {
2018           if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
2019             {
2020               if (dump_file && (dump_flags & TDF_DETAILS))
2021                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
2022
2023               *overlaps_a = conflict_fn_not_known ();
2024               *overlaps_b = conflict_fn_not_known ();
2025               *last_conflicts = chrec_dont_know;
2026               dependence_stats.num_siv_unimplemented++;
2027               return;
2028             }
2029           else
2030             {
2031               if (value2 == false)
2032                 {
2033                   /* Example:
2034                      chrec_a = 3
2035                      chrec_b = {10, +, -1}
2036                   */
2037                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
2038                     {
2039                       HOST_WIDE_INT numiter;
2040                       struct loop *loop = get_chrec_loop (chrec_b);
2041
2042                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2043                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
2044                                          CHREC_RIGHT (chrec_b));
2045                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
2046                       *last_conflicts = integer_one_node;
2047
2048                       /* Perform weak-zero siv test to see if overlap is
2049                          outside the loop bounds.  */
2050                       numiter = max_stmt_executions_int (loop);
2051
2052                       if (numiter >= 0
2053                           && compare_tree_int (tmp, numiter) > 0)
2054                         {
2055                           free_conflict_function (*overlaps_a);
2056                           free_conflict_function (*overlaps_b);
2057                           *overlaps_a = conflict_fn_no_dependence ();
2058                           *overlaps_b = conflict_fn_no_dependence ();
2059                           *last_conflicts = integer_zero_node;
2060                           dependence_stats.num_siv_independent++;
2061                           return;
2062                         }
2063                       dependence_stats.num_siv_dependent++;
2064                       return;
2065                     }
2066
2067                   /* When the step does not divide the difference, there
2068                      are no overlaps.  */
2069                   else
2070                     {
2071                       *overlaps_a = conflict_fn_no_dependence ();
2072                       *overlaps_b = conflict_fn_no_dependence ();
2073                       *last_conflicts = integer_zero_node;
2074                       dependence_stats.num_siv_independent++;
2075                       return;
2076                     }
2077                 }
2078               else
2079                 {
2080                   /* Example:
2081                      chrec_a = 3
2082                      chrec_b = {4, +, 1}
2083
2084                      In this case, chrec_a will not overlap with chrec_b.  */
2085                   *overlaps_a = conflict_fn_no_dependence ();
2086                   *overlaps_b = conflict_fn_no_dependence ();
2087                   *last_conflicts = integer_zero_node;
2088                   dependence_stats.num_siv_independent++;
2089                   return;
2090                 }
2091             }
2092         }
2093     }
2094 }
2095
2096 /* Helper recursive function for initializing the matrix A.  Returns
2097    the initial value of CHREC.  */
2098
2099 static tree
2100 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
2101 {
2102   gcc_assert (chrec);
2103
2104   switch (TREE_CODE (chrec))
2105     {
2106     case POLYNOMIAL_CHREC:
2107       gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
2108
2109       A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
2110       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
2111
2112     case PLUS_EXPR:
2113     case MULT_EXPR:
2114     case MINUS_EXPR:
2115       {
2116         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2117         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
2118
2119         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
2120       }
2121
2122     CASE_CONVERT:
2123       {
2124         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2125         return chrec_convert (chrec_type (chrec), op, NULL);
2126       }
2127
2128     case BIT_NOT_EXPR:
2129       {
2130         /* Handle ~X as -1 - X.  */
2131         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
2132         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
2133                               build_int_cst (TREE_TYPE (chrec), -1), op);
2134       }
2135
2136     case INTEGER_CST:
2137       return chrec;
2138
2139     default:
2140       gcc_unreachable ();
2141       return NULL_TREE;
2142     }
2143 }
2144
2145 #define FLOOR_DIV(x,y) ((x) / (y))
2146
2147 /* Solves the special case of the Diophantine equation:
2148    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
2149
2150    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
2151    number of iterations that loops X and Y run.  The overlaps will be
2152    constructed as evolutions in dimension DIM.  */
2153
2154 static void
2155 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
2156                                          affine_fn *overlaps_a,
2157                                          affine_fn *overlaps_b,
2158                                          tree *last_conflicts, int dim)
2159 {
2160   if (((step_a > 0 && step_b > 0)
2161        || (step_a < 0 && step_b < 0)))
2162     {
2163       int step_overlaps_a, step_overlaps_b;
2164       int gcd_steps_a_b, last_conflict, tau2;
2165
2166       gcd_steps_a_b = gcd (step_a, step_b);
2167       step_overlaps_a = step_b / gcd_steps_a_b;
2168       step_overlaps_b = step_a / gcd_steps_a_b;
2169
2170       if (niter > 0)
2171         {
2172           tau2 = FLOOR_DIV (niter, step_overlaps_a);
2173           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
2174           last_conflict = tau2;
2175           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2176         }
2177       else
2178         *last_conflicts = chrec_dont_know;
2179
2180       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
2181                                       build_int_cst (NULL_TREE,
2182                                                      step_overlaps_a));
2183       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
2184                                       build_int_cst (NULL_TREE,
2185                                                      step_overlaps_b));
2186     }
2187
2188   else
2189     {
2190       *overlaps_a = affine_fn_cst (integer_zero_node);
2191       *overlaps_b = affine_fn_cst (integer_zero_node);
2192       *last_conflicts = integer_zero_node;
2193     }
2194 }
2195
2196 /* Solves the special case of a Diophantine equation where CHREC_A is
2197    an affine bivariate function, and CHREC_B is an affine univariate
2198    function.  For example,
2199
2200    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
2201
2202    has the following overlapping functions:
2203
2204    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
2205    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
2206    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
2207
2208    FORNOW: This is a specialized implementation for a case occurring in
2209    a common benchmark.  Implement the general algorithm.  */
2210
2211 static void
2212 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
2213                                       conflict_function **overlaps_a,
2214                                       conflict_function **overlaps_b,
2215                                       tree *last_conflicts)
2216 {
2217   bool xz_p, yz_p, xyz_p;
2218   int step_x, step_y, step_z;
2219   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
2220   affine_fn overlaps_a_xz, overlaps_b_xz;
2221   affine_fn overlaps_a_yz, overlaps_b_yz;
2222   affine_fn overlaps_a_xyz, overlaps_b_xyz;
2223   affine_fn ova1, ova2, ovb;
2224   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
2225
2226   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2227   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2228   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2229
2230   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
2231   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
2232   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
2233
2234   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2235     {
2236       if (dump_file && (dump_flags & TDF_DETAILS))
2237         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2238
2239       *overlaps_a = conflict_fn_not_known ();
2240       *overlaps_b = conflict_fn_not_known ();
2241       *last_conflicts = chrec_dont_know;
2242       return;
2243     }
2244
2245   niter = MIN (niter_x, niter_z);
2246   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2247                                            &overlaps_a_xz,
2248                                            &overlaps_b_xz,
2249                                            &last_conflicts_xz, 1);
2250   niter = MIN (niter_y, niter_z);
2251   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2252                                            &overlaps_a_yz,
2253                                            &overlaps_b_yz,
2254                                            &last_conflicts_yz, 2);
2255   niter = MIN (niter_x, niter_z);
2256   niter = MIN (niter_y, niter);
2257   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2258                                            &overlaps_a_xyz,
2259                                            &overlaps_b_xyz,
2260                                            &last_conflicts_xyz, 3);
2261
2262   xz_p = !integer_zerop (last_conflicts_xz);
2263   yz_p = !integer_zerop (last_conflicts_yz);
2264   xyz_p = !integer_zerop (last_conflicts_xyz);
2265
2266   if (xz_p || yz_p || xyz_p)
2267     {
2268       ova1 = affine_fn_cst (integer_zero_node);
2269       ova2 = affine_fn_cst (integer_zero_node);
2270       ovb = affine_fn_cst (integer_zero_node);
2271       if (xz_p)
2272         {
2273           affine_fn t0 = ova1;
2274           affine_fn t2 = ovb;
2275
2276           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2277           ovb = affine_fn_plus (ovb, overlaps_b_xz);
2278           affine_fn_free (t0);
2279           affine_fn_free (t2);
2280           *last_conflicts = last_conflicts_xz;
2281         }
2282       if (yz_p)
2283         {
2284           affine_fn t0 = ova2;
2285           affine_fn t2 = ovb;
2286
2287           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2288           ovb = affine_fn_plus (ovb, overlaps_b_yz);
2289           affine_fn_free (t0);
2290           affine_fn_free (t2);
2291           *last_conflicts = last_conflicts_yz;
2292         }
2293       if (xyz_p)
2294         {
2295           affine_fn t0 = ova1;
2296           affine_fn t2 = ova2;
2297           affine_fn t4 = ovb;
2298
2299           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2300           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2301           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2302           affine_fn_free (t0);
2303           affine_fn_free (t2);
2304           affine_fn_free (t4);
2305           *last_conflicts = last_conflicts_xyz;
2306         }
2307       *overlaps_a = conflict_fn (2, ova1, ova2);
2308       *overlaps_b = conflict_fn (1, ovb);
2309     }
2310   else
2311     {
2312       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2313       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2314       *last_conflicts = integer_zero_node;
2315     }
2316
2317   affine_fn_free (overlaps_a_xz);
2318   affine_fn_free (overlaps_b_xz);
2319   affine_fn_free (overlaps_a_yz);
2320   affine_fn_free (overlaps_b_yz);
2321   affine_fn_free (overlaps_a_xyz);
2322   affine_fn_free (overlaps_b_xyz);
2323 }
2324
2325 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
2326
2327 static void
2328 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
2329                     int size)
2330 {
2331   memcpy (vec2, vec1, size * sizeof (*vec1));
2332 }
2333
2334 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
2335
2336 static void
2337 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
2338                     int m, int n)
2339 {
2340   int i;
2341
2342   for (i = 0; i < m; i++)
2343     lambda_vector_copy (mat1[i], mat2[i], n);
2344 }
2345
2346 /* Store the N x N identity matrix in MAT.  */
2347
2348 static void
2349 lambda_matrix_id (lambda_matrix mat, int size)
2350 {
2351   int i, j;
2352
2353   for (i = 0; i < size; i++)
2354     for (j = 0; j < size; j++)
2355       mat[i][j] = (i == j) ? 1 : 0;
2356 }
2357
2358 /* Return the first nonzero element of vector VEC1 between START and N.
2359    We must have START <= N.   Returns N if VEC1 is the zero vector.  */
2360
2361 static int
2362 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
2363 {
2364   int j = start;
2365   while (j < n && vec1[j] == 0)
2366     j++;
2367   return j;
2368 }
2369
2370 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
2371    R2 = R2 + CONST1 * R1.  */
2372
2373 static void
2374 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1)
2375 {
2376   int i;
2377
2378   if (const1 == 0)
2379     return;
2380
2381   for (i = 0; i < n; i++)
2382     mat[r2][i] += const1 * mat[r1][i];
2383 }
2384
2385 /* Swap rows R1 and R2 in matrix MAT.  */
2386
2387 static void
2388 lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2)
2389 {
2390   lambda_vector row;
2391
2392   row = mat[r1];
2393   mat[r1] = mat[r2];
2394   mat[r2] = row;
2395 }
2396
2397 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
2398    and store the result in VEC2.  */
2399
2400 static void
2401 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
2402                           int size, int const1)
2403 {
2404   int i;
2405
2406   if (const1 == 0)
2407     lambda_vector_clear (vec2, size);
2408   else
2409     for (i = 0; i < size; i++)
2410       vec2[i] = const1 * vec1[i];
2411 }
2412
2413 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
2414
2415 static void
2416 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
2417                       int size)
2418 {
2419   lambda_vector_mult_const (vec1, vec2, size, -1);
2420 }
2421
2422 /* Negate row R1 of matrix MAT which has N columns.  */
2423
2424 static void
2425 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
2426 {
2427   lambda_vector_negate (mat[r1], mat[r1], n);
2428 }
2429
2430 /* Return true if two vectors are equal.  */
2431
2432 static bool
2433 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
2434 {
2435   int i;
2436   for (i = 0; i < size; i++)
2437     if (vec1[i] != vec2[i])
2438       return false;
2439   return true;
2440 }
2441
2442 /* Given an M x N integer matrix A, this function determines an M x
2443    M unimodular matrix U, and an M x N echelon matrix S such that
2444    "U.A = S".  This decomposition is also known as "right Hermite".
2445
2446    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
2447    Restructuring Compilers" Utpal Banerjee.  */
2448
2449 static void
2450 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
2451                              lambda_matrix S, lambda_matrix U)
2452 {
2453   int i, j, i0 = 0;
2454
2455   lambda_matrix_copy (A, S, m, n);
2456   lambda_matrix_id (U, m);
2457
2458   for (j = 0; j < n; j++)
2459     {
2460       if (lambda_vector_first_nz (S[j], m, i0) < m)
2461         {
2462           ++i0;
2463           for (i = m - 1; i >= i0; i--)
2464             {
2465               while (S[i][j] != 0)
2466                 {
2467                   int sigma, factor, a, b;
2468
2469                   a = S[i-1][j];
2470                   b = S[i][j];
2471                   sigma = (a * b < 0) ? -1: 1;
2472                   a = abs (a);
2473                   b = abs (b);
2474                   factor = sigma * (a / b);
2475
2476                   lambda_matrix_row_add (S, n, i, i-1, -factor);
2477                   lambda_matrix_row_exchange (S, i, i-1);
2478
2479                   lambda_matrix_row_add (U, m, i, i-1, -factor);
2480                   lambda_matrix_row_exchange (U, i, i-1);
2481                 }
2482             }
2483         }
2484     }
2485 }
2486
2487 /* Determines the overlapping elements due to accesses CHREC_A and
2488    CHREC_B, that are affine functions.  This function cannot handle
2489    symbolic evolution functions, ie. when initial conditions are
2490    parameters, because it uses lambda matrices of integers.  */
2491
2492 static void
2493 analyze_subscript_affine_affine (tree chrec_a,
2494                                  tree chrec_b,
2495                                  conflict_function **overlaps_a,
2496                                  conflict_function **overlaps_b,
2497                                  tree *last_conflicts)
2498 {
2499   unsigned nb_vars_a, nb_vars_b, dim;
2500   HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2501   lambda_matrix A, U, S;
2502   struct obstack scratch_obstack;
2503
2504   if (eq_evolutions_p (chrec_a, chrec_b))
2505     {
2506       /* The accessed index overlaps for each iteration in the
2507          loop.  */
2508       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2509       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2510       *last_conflicts = chrec_dont_know;
2511       return;
2512     }
2513   if (dump_file && (dump_flags & TDF_DETAILS))
2514     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2515
2516   /* For determining the initial intersection, we have to solve a
2517      Diophantine equation.  This is the most time consuming part.
2518
2519      For answering to the question: "Is there a dependence?" we have
2520      to prove that there exists a solution to the Diophantine
2521      equation, and that the solution is in the iteration domain,
2522      i.e. the solution is positive or zero, and that the solution
2523      happens before the upper bound loop.nb_iterations.  Otherwise
2524      there is no dependence.  This function outputs a description of
2525      the iterations that hold the intersections.  */
2526
2527   nb_vars_a = nb_vars_in_chrec (chrec_a);
2528   nb_vars_b = nb_vars_in_chrec (chrec_b);
2529
2530   gcc_obstack_init (&scratch_obstack);
2531
2532   dim = nb_vars_a + nb_vars_b;
2533   U = lambda_matrix_new (dim, dim, &scratch_obstack);
2534   A = lambda_matrix_new (dim, 1, &scratch_obstack);
2535   S = lambda_matrix_new (dim, 1, &scratch_obstack);
2536
2537   init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2538   init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2539   gamma = init_b - init_a;
2540
2541   /* Don't do all the hard work of solving the Diophantine equation
2542      when we already know the solution: for example,
2543      | {3, +, 1}_1
2544      | {3, +, 4}_2
2545      | gamma = 3 - 3 = 0.
2546      Then the first overlap occurs during the first iterations:
2547      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2548   */
2549   if (gamma == 0)
2550     {
2551       if (nb_vars_a == 1 && nb_vars_b == 1)
2552         {
2553           HOST_WIDE_INT step_a, step_b;
2554           HOST_WIDE_INT niter, niter_a, niter_b;
2555           affine_fn ova, ovb;
2556
2557           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
2558           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
2559           niter = MIN (niter_a, niter_b);
2560           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2561           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2562
2563           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2564                                                    &ova, &ovb,
2565                                                    last_conflicts, 1);
2566           *overlaps_a = conflict_fn (1, ova);
2567           *overlaps_b = conflict_fn (1, ovb);
2568         }
2569
2570       else if (nb_vars_a == 2 && nb_vars_b == 1)
2571         compute_overlap_steps_for_affine_1_2
2572           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2573
2574       else if (nb_vars_a == 1 && nb_vars_b == 2)
2575         compute_overlap_steps_for_affine_1_2
2576           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2577
2578       else
2579         {
2580           if (dump_file && (dump_flags & TDF_DETAILS))
2581             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2582           *overlaps_a = conflict_fn_not_known ();
2583           *overlaps_b = conflict_fn_not_known ();
2584           *last_conflicts = chrec_dont_know;
2585         }
2586       goto end_analyze_subs_aa;
2587     }
2588
2589   /* U.A = S */
2590   lambda_matrix_right_hermite (A, dim, 1, S, U);
2591
2592   if (S[0][0] < 0)
2593     {
2594       S[0][0] *= -1;
2595       lambda_matrix_row_negate (U, dim, 0);
2596     }
2597   gcd_alpha_beta = S[0][0];
2598
2599   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2600      but that is a quite strange case.  Instead of ICEing, answer
2601      don't know.  */
2602   if (gcd_alpha_beta == 0)
2603     {
2604       *overlaps_a = conflict_fn_not_known ();
2605       *overlaps_b = conflict_fn_not_known ();
2606       *last_conflicts = chrec_dont_know;
2607       goto end_analyze_subs_aa;
2608     }
2609
2610   /* The classic "gcd-test".  */
2611   if (!int_divides_p (gcd_alpha_beta, gamma))
2612     {
2613       /* The "gcd-test" has determined that there is no integer
2614          solution, i.e. there is no dependence.  */
2615       *overlaps_a = conflict_fn_no_dependence ();
2616       *overlaps_b = conflict_fn_no_dependence ();
2617       *last_conflicts = integer_zero_node;
2618     }
2619
2620   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
2621   else if (nb_vars_a == 1 && nb_vars_b == 1)
2622     {
2623       /* Both functions should have the same evolution sign.  */
2624       if (((A[0][0] > 0 && -A[1][0] > 0)
2625            || (A[0][0] < 0 && -A[1][0] < 0)))
2626         {
2627           /* The solutions are given by:
2628              |
2629              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
2630              |                           [u21 u22]    [y0]
2631
2632              For a given integer t.  Using the following variables,
2633
2634              | i0 = u11 * gamma / gcd_alpha_beta
2635              | j0 = u12 * gamma / gcd_alpha_beta
2636              | i1 = u21
2637              | j1 = u22
2638
2639              the solutions are:
2640
2641              | x0 = i0 + i1 * t,
2642              | y0 = j0 + j1 * t.  */
2643           HOST_WIDE_INT i0, j0, i1, j1;
2644
2645           i0 = U[0][0] * gamma / gcd_alpha_beta;
2646           j0 = U[0][1] * gamma / gcd_alpha_beta;
2647           i1 = U[1][0];
2648           j1 = U[1][1];
2649
2650           if ((i1 == 0 && i0 < 0)
2651               || (j1 == 0 && j0 < 0))
2652             {
2653               /* There is no solution.
2654                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2655                  falls in here, but for the moment we don't look at the
2656                  upper bound of the iteration domain.  */
2657               *overlaps_a = conflict_fn_no_dependence ();
2658               *overlaps_b = conflict_fn_no_dependence ();
2659               *last_conflicts = integer_zero_node;
2660               goto end_analyze_subs_aa;
2661             }
2662
2663           if (i1 > 0 && j1 > 0)
2664             {
2665               HOST_WIDE_INT niter_a
2666                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
2667               HOST_WIDE_INT niter_b
2668                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
2669               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2670
2671               /* (X0, Y0) is a solution of the Diophantine equation:
2672                  "chrec_a (X0) = chrec_b (Y0)".  */
2673               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2674                                         CEIL (-j0, j1));
2675               HOST_WIDE_INT x0 = i1 * tau1 + i0;
2676               HOST_WIDE_INT y0 = j1 * tau1 + j0;
2677
2678               /* (X1, Y1) is the smallest positive solution of the eq
2679                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2680                  first conflict occurs.  */
2681               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2682               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2683               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2684
2685               if (niter > 0)
2686                 {
2687                   HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2688                                             FLOOR_DIV (niter - j0, j1));
2689                   HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2690
2691                   /* If the overlap occurs outside of the bounds of the
2692                      loop, there is no dependence.  */
2693                   if (x1 >= niter || y1 >= niter)
2694                     {
2695                       *overlaps_a = conflict_fn_no_dependence ();
2696                       *overlaps_b = conflict_fn_no_dependence ();
2697                       *last_conflicts = integer_zero_node;
2698                       goto end_analyze_subs_aa;
2699                     }
2700                   else
2701                     *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2702                 }
2703               else
2704                 *last_conflicts = chrec_dont_know;
2705
2706               *overlaps_a
2707                 = conflict_fn (1,
2708                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
2709                                                  1,
2710                                                  build_int_cst (NULL_TREE, i1)));
2711               *overlaps_b
2712                 = conflict_fn (1,
2713                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
2714                                                  1,
2715                                                  build_int_cst (NULL_TREE, j1)));
2716             }
2717           else
2718             {
2719               /* FIXME: For the moment, the upper bound of the
2720                  iteration domain for i and j is not checked.  */
2721               if (dump_file && (dump_flags & TDF_DETAILS))
2722                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2723               *overlaps_a = conflict_fn_not_known ();
2724               *overlaps_b = conflict_fn_not_known ();
2725               *last_conflicts = chrec_dont_know;
2726             }
2727         }
2728       else
2729         {
2730           if (dump_file && (dump_flags & TDF_DETAILS))
2731             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2732           *overlaps_a = conflict_fn_not_known ();
2733           *overlaps_b = conflict_fn_not_known ();
2734           *last_conflicts = chrec_dont_know;
2735         }
2736     }
2737   else
2738     {
2739       if (dump_file && (dump_flags & TDF_DETAILS))
2740         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2741       *overlaps_a = conflict_fn_not_known ();
2742       *overlaps_b = conflict_fn_not_known ();
2743       *last_conflicts = chrec_dont_know;
2744     }
2745
2746 end_analyze_subs_aa:
2747   obstack_free (&scratch_obstack, NULL);
2748   if (dump_file && (dump_flags & TDF_DETAILS))
2749     {
2750       fprintf (dump_file, "  (overlaps_a = ");
2751       dump_conflict_function (dump_file, *overlaps_a);
2752       fprintf (dump_file, ")\n  (overlaps_b = ");
2753       dump_conflict_function (dump_file, *overlaps_b);
2754       fprintf (dump_file, "))\n");
2755     }
2756 }
2757
2758 /* Returns true when analyze_subscript_affine_affine can be used for
2759    determining the dependence relation between chrec_a and chrec_b,
2760    that contain symbols.  This function modifies chrec_a and chrec_b
2761    such that the analysis result is the same, and such that they don't
2762    contain symbols, and then can safely be passed to the analyzer.
2763
2764    Example: The analysis of the following tuples of evolutions produce
2765    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2766    vs. {0, +, 1}_1
2767
2768    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2769    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2770 */
2771
2772 static bool
2773 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2774 {
2775   tree diff, type, left_a, left_b, right_b;
2776
2777   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2778       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2779     /* FIXME: For the moment not handled.  Might be refined later.  */
2780     return false;
2781
2782   type = chrec_type (*chrec_a);
2783   left_a = CHREC_LEFT (*chrec_a);
2784   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2785   diff = chrec_fold_minus (type, left_a, left_b);
2786
2787   if (!evolution_function_is_constant_p (diff))
2788     return false;
2789
2790   if (dump_file && (dump_flags & TDF_DETAILS))
2791     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2792
2793   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2794                                      diff, CHREC_RIGHT (*chrec_a));
2795   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2796   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2797                                      build_int_cst (type, 0),
2798                                      right_b);
2799   return true;
2800 }
2801
2802 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
2803    *OVERLAPS_B are initialized to the functions that describe the
2804    relation between the elements accessed twice by CHREC_A and
2805    CHREC_B.  For k >= 0, the following property is verified:
2806
2807    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2808
2809 static void
2810 analyze_siv_subscript (tree chrec_a,
2811                        tree chrec_b,
2812                        conflict_function **overlaps_a,
2813                        conflict_function **overlaps_b,
2814                        tree *last_conflicts,
2815                        int loop_nest_num)
2816 {
2817   dependence_stats.num_siv++;
2818
2819   if (dump_file && (dump_flags & TDF_DETAILS))
2820     fprintf (dump_file, "(analyze_siv_subscript \n");
2821
2822   if (evolution_function_is_constant_p (chrec_a)
2823       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2824     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2825                                       overlaps_a, overlaps_b, last_conflicts);
2826
2827   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2828            && evolution_function_is_constant_p (chrec_b))
2829     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2830                                       overlaps_b, overlaps_a, last_conflicts);
2831
2832   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2833            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2834     {
2835       if (!chrec_contains_symbols (chrec_a)
2836           && !chrec_contains_symbols (chrec_b))
2837         {
2838           analyze_subscript_affine_affine (chrec_a, chrec_b,
2839                                            overlaps_a, overlaps_b,
2840                                            last_conflicts);
2841
2842           if (CF_NOT_KNOWN_P (*overlaps_a)
2843               || CF_NOT_KNOWN_P (*overlaps_b))
2844             dependence_stats.num_siv_unimplemented++;
2845           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2846                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2847             dependence_stats.num_siv_independent++;
2848           else
2849             dependence_stats.num_siv_dependent++;
2850         }
2851       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2852                                                         &chrec_b))
2853         {
2854           analyze_subscript_affine_affine (chrec_a, chrec_b,
2855                                            overlaps_a, overlaps_b,
2856                                            last_conflicts);
2857
2858           if (CF_NOT_KNOWN_P (*overlaps_a)
2859               || CF_NOT_KNOWN_P (*overlaps_b))
2860             dependence_stats.num_siv_unimplemented++;
2861           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2862                    || CF_NO_DEPENDENCE_P (*overlaps_b))
2863             dependence_stats.num_siv_independent++;
2864           else
2865             dependence_stats.num_siv_dependent++;
2866         }
2867       else
2868         goto siv_subscript_dontknow;
2869     }
2870
2871   else
2872     {
2873     siv_subscript_dontknow:;
2874       if (dump_file && (dump_flags & TDF_DETAILS))
2875         fprintf (dump_file, "  siv test failed: unimplemented");
2876       *overlaps_a = conflict_fn_not_known ();
2877       *overlaps_b = conflict_fn_not_known ();
2878       *last_conflicts = chrec_dont_know;
2879       dependence_stats.num_siv_unimplemented++;
2880     }
2881
2882   if (dump_file && (dump_flags & TDF_DETAILS))
2883     fprintf (dump_file, ")\n");
2884 }
2885
2886 /* Returns false if we can prove that the greatest common divisor of the steps
2887    of CHREC does not divide CST, false otherwise.  */
2888
2889 static bool
2890 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2891 {
2892   HOST_WIDE_INT cd = 0, val;
2893   tree step;
2894
2895   if (!tree_fits_shwi_p (cst))
2896     return true;
2897   val = tree_to_shwi (cst);
2898
2899   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2900     {
2901       step = CHREC_RIGHT (chrec);
2902       if (!tree_fits_shwi_p (step))
2903         return true;
2904       cd = gcd (cd, tree_to_shwi (step));
2905       chrec = CHREC_LEFT (chrec);
2906     }
2907
2908   return val % cd == 0;
2909 }
2910
2911 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2912    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
2913    functions that describe the relation between the elements accessed
2914    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
2915    is verified:
2916
2917    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
2918
2919 static void
2920 analyze_miv_subscript (tree chrec_a,
2921                        tree chrec_b,
2922                        conflict_function **overlaps_a,
2923                        conflict_function **overlaps_b,
2924                        tree *last_conflicts,
2925                        struct loop *loop_nest)
2926 {
2927   tree type, difference;
2928
2929   dependence_stats.num_miv++;
2930   if (dump_file && (dump_flags & TDF_DETAILS))
2931     fprintf (dump_file, "(analyze_miv_subscript \n");
2932
2933   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2934   chrec_a = chrec_convert (type, chrec_a, NULL);
2935   chrec_b = chrec_convert (type, chrec_b, NULL);
2936   difference = chrec_fold_minus (type, chrec_a, chrec_b);
2937
2938   if (eq_evolutions_p (chrec_a, chrec_b))
2939     {
2940       /* Access functions are the same: all the elements are accessed
2941          in the same order.  */
2942       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2943       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2944       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
2945       dependence_stats.num_miv_dependent++;
2946     }
2947
2948   else if (evolution_function_is_constant_p (difference)
2949            /* For the moment, the following is verified:
2950               evolution_function_is_affine_multivariate_p (chrec_a,
2951               loop_nest->num) */
2952            && !gcd_of_steps_may_divide_p (chrec_a, difference))
2953     {
2954       /* testsuite/.../ssa-chrec-33.c
2955          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
2956
2957          The difference is 1, and all the evolution steps are multiples
2958          of 2, consequently there are no overlapping elements.  */
2959       *overlaps_a = conflict_fn_no_dependence ();
2960       *overlaps_b = conflict_fn_no_dependence ();
2961       *last_conflicts = integer_zero_node;
2962       dependence_stats.num_miv_independent++;
2963     }
2964
2965   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2966            && !chrec_contains_symbols (chrec_a)
2967            && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2968            && !chrec_contains_symbols (chrec_b))
2969     {
2970       /* testsuite/.../ssa-chrec-35.c
2971          {0, +, 1}_2  vs.  {0, +, 1}_3
2972          the overlapping elements are respectively located at iterations:
2973          {0, +, 1}_x and {0, +, 1}_x,
2974          in other words, we have the equality:
2975          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2976
2977          Other examples:
2978          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2979          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2980
2981          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2982          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2983       */
2984       analyze_subscript_affine_affine (chrec_a, chrec_b,
2985                                        overlaps_a, overlaps_b, last_conflicts);
2986
2987       if (CF_NOT_KNOWN_P (*overlaps_a)
2988           || CF_NOT_KNOWN_P (*overlaps_b))
2989         dependence_stats.num_miv_unimplemented++;
2990       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2991                || CF_NO_DEPENDENCE_P (*overlaps_b))
2992         dependence_stats.num_miv_independent++;
2993       else
2994         dependence_stats.num_miv_dependent++;
2995     }
2996
2997   else
2998     {
2999       /* When the analysis is too difficult, answer "don't know".  */
3000       if (dump_file && (dump_flags & TDF_DETAILS))
3001         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
3002
3003       *overlaps_a = conflict_fn_not_known ();
3004       *overlaps_b = conflict_fn_not_known ();
3005       *last_conflicts = chrec_dont_know;
3006       dependence_stats.num_miv_unimplemented++;
3007     }
3008
3009   if (dump_file && (dump_flags & TDF_DETAILS))
3010     fprintf (dump_file, ")\n");
3011 }
3012
3013 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
3014    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
3015    OVERLAP_ITERATIONS_B are initialized with two functions that
3016    describe the iterations that contain conflicting elements.
3017
3018    Remark: For an integer k >= 0, the following equality is true:
3019
3020    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
3021 */
3022
3023 static void
3024 analyze_overlapping_iterations (tree chrec_a,
3025                                 tree chrec_b,
3026                                 conflict_function **overlap_iterations_a,
3027                                 conflict_function **overlap_iterations_b,
3028                                 tree *last_conflicts, struct loop *loop_nest)
3029 {
3030   unsigned int lnn = loop_nest->num;
3031
3032   dependence_stats.num_subscript_tests++;
3033
3034   if (dump_file && (dump_flags & TDF_DETAILS))
3035     {
3036       fprintf (dump_file, "(analyze_overlapping_iterations \n");
3037       fprintf (dump_file, "  (chrec_a = ");
3038       print_generic_expr (dump_file, chrec_a, 0);
3039       fprintf (dump_file, ")\n  (chrec_b = ");
3040       print_generic_expr (dump_file, chrec_b, 0);
3041       fprintf (dump_file, ")\n");
3042     }
3043
3044   if (chrec_a == NULL_TREE
3045       || chrec_b == NULL_TREE
3046       || chrec_contains_undetermined (chrec_a)
3047       || chrec_contains_undetermined (chrec_b))
3048     {
3049       dependence_stats.num_subscript_undetermined++;
3050
3051       *overlap_iterations_a = conflict_fn_not_known ();
3052       *overlap_iterations_b = conflict_fn_not_known ();
3053     }
3054
3055   /* If they are the same chrec, and are affine, they overlap
3056      on every iteration.  */
3057   else if (eq_evolutions_p (chrec_a, chrec_b)
3058            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3059                || operand_equal_p (chrec_a, chrec_b, 0)))
3060     {
3061       dependence_stats.num_same_subscript_function++;
3062       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3063       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3064       *last_conflicts = chrec_dont_know;
3065     }
3066
3067   /* If they aren't the same, and aren't affine, we can't do anything
3068      yet.  */
3069   else if ((chrec_contains_symbols (chrec_a)
3070             || chrec_contains_symbols (chrec_b))
3071            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
3072                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
3073     {
3074       dependence_stats.num_subscript_undetermined++;
3075       *overlap_iterations_a = conflict_fn_not_known ();
3076       *overlap_iterations_b = conflict_fn_not_known ();
3077     }
3078
3079   else if (ziv_subscript_p (chrec_a, chrec_b))
3080     analyze_ziv_subscript (chrec_a, chrec_b,
3081                            overlap_iterations_a, overlap_iterations_b,
3082                            last_conflicts);
3083
3084   else if (siv_subscript_p (chrec_a, chrec_b))
3085     analyze_siv_subscript (chrec_a, chrec_b,
3086                            overlap_iterations_a, overlap_iterations_b,
3087                            last_conflicts, lnn);
3088
3089   else
3090     analyze_miv_subscript (chrec_a, chrec_b,
3091                            overlap_iterations_a, overlap_iterations_b,
3092                            last_conflicts, loop_nest);
3093
3094   if (dump_file && (dump_flags & TDF_DETAILS))
3095     {
3096       fprintf (dump_file, "  (overlap_iterations_a = ");
3097       dump_conflict_function (dump_file, *overlap_iterations_a);
3098       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
3099       dump_conflict_function (dump_file, *overlap_iterations_b);
3100       fprintf (dump_file, "))\n");
3101     }
3102 }
3103
3104 /* Helper function for uniquely inserting distance vectors.  */
3105
3106 static void
3107 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
3108 {
3109   unsigned i;
3110   lambda_vector v;
3111
3112   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v)
3113     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
3114       return;
3115
3116   DDR_DIST_VECTS (ddr).safe_push (dist_v);
3117 }
3118
3119 /* Helper function for uniquely inserting direction vectors.  */
3120
3121 static void
3122 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
3123 {
3124   unsigned i;
3125   lambda_vector v;
3126
3127   FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v)
3128     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
3129       return;
3130
3131   DDR_DIR_VECTS (ddr).safe_push (dir_v);
3132 }
3133
3134 /* Add a distance of 1 on all the loops outer than INDEX.  If we
3135    haven't yet determined a distance for this outer loop, push a new
3136    distance vector composed of the previous distance, and a distance
3137    of 1 for this outer loop.  Example:
3138
3139    | loop_1
3140    |   loop_2
3141    |     A[10]
3142    |   endloop_2
3143    | endloop_1
3144
3145    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
3146    save (0, 1), then we have to save (1, 0).  */
3147
3148 static void
3149 add_outer_distances (struct data_dependence_relation *ddr,
3150                      lambda_vector dist_v, int index)
3151 {
3152   /* For each outer loop where init_v is not set, the accesses are
3153      in dependence of distance 1 in the loop.  */
3154   while (--index >= 0)
3155     {
3156       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3157       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3158       save_v[index] = 1;
3159       save_dist_v (ddr, save_v);
3160     }
3161 }
3162
3163 /* Return false when fail to represent the data dependence as a
3164    distance vector.  INIT_B is set to true when a component has been
3165    added to the distance vector DIST_V.  INDEX_CARRY is then set to
3166    the index in DIST_V that carries the dependence.  */
3167
3168 static bool
3169 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
3170                              struct data_reference *ddr_a,
3171                              struct data_reference *ddr_b,
3172                              lambda_vector dist_v, bool *init_b,
3173                              int *index_carry)
3174 {
3175   unsigned i;
3176   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3177
3178   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3179     {
3180       tree access_fn_a, access_fn_b;
3181       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
3182
3183       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3184         {
3185           non_affine_dependence_relation (ddr);
3186           return false;
3187         }
3188
3189       access_fn_a = DR_ACCESS_FN (ddr_a, i);
3190       access_fn_b = DR_ACCESS_FN (ddr_b, i);
3191
3192       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
3193           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
3194         {
3195           int dist, index;
3196           int var_a = CHREC_VARIABLE (access_fn_a);
3197           int var_b = CHREC_VARIABLE (access_fn_b);
3198
3199           if (var_a != var_b
3200               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
3201             {
3202               non_affine_dependence_relation (ddr);
3203               return false;
3204             }
3205
3206           dist = int_cst_value (SUB_DISTANCE (subscript));
3207           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
3208           *index_carry = MIN (index, *index_carry);
3209
3210           /* This is the subscript coupling test.  If we have already
3211              recorded a distance for this loop (a distance coming from
3212              another subscript), it should be the same.  For example,
3213              in the following code, there is no dependence:
3214
3215              | loop i = 0, N, 1
3216              |   T[i+1][i] = ...
3217              |   ... = T[i][i]
3218              | endloop
3219           */
3220           if (init_v[index] != 0 && dist_v[index] != dist)
3221             {
3222               finalize_ddr_dependent (ddr, chrec_known);
3223               return false;
3224             }
3225
3226           dist_v[index] = dist;
3227           init_v[index] = 1;
3228           *init_b = true;
3229         }
3230       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
3231         {
3232           /* This can be for example an affine vs. constant dependence
3233              (T[i] vs. T[3]) that is not an affine dependence and is
3234              not representable as a distance vector.  */
3235           non_affine_dependence_relation (ddr);
3236           return false;
3237         }
3238     }
3239
3240   return true;
3241 }
3242
3243 /* Return true when the DDR contains only constant access functions.  */
3244
3245 static bool
3246 constant_access_functions (const struct data_dependence_relation *ddr)
3247 {
3248   unsigned i;
3249
3250   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3251     if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
3252         || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
3253       return false;
3254
3255   return true;
3256 }
3257
3258 /* Helper function for the case where DDR_A and DDR_B are the same
3259    multivariate access function with a constant step.  For an example
3260    see pr34635-1.c.  */
3261
3262 static void
3263 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
3264 {
3265   int x_1, x_2;
3266   tree c_1 = CHREC_LEFT (c_2);
3267   tree c_0 = CHREC_LEFT (c_1);
3268   lambda_vector dist_v;
3269   int v1, v2, cd;
3270
3271   /* Polynomials with more than 2 variables are not handled yet.  When
3272      the evolution steps are parameters, it is not possible to
3273      represent the dependence using classical distance vectors.  */
3274   if (TREE_CODE (c_0) != INTEGER_CST
3275       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
3276       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
3277     {
3278       DDR_AFFINE_P (ddr) = false;
3279       return;
3280     }
3281
3282   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
3283   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
3284
3285   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
3286   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3287   v1 = int_cst_value (CHREC_RIGHT (c_1));
3288   v2 = int_cst_value (CHREC_RIGHT (c_2));
3289   cd = gcd (v1, v2);
3290   v1 /= cd;
3291   v2 /= cd;
3292
3293   if (v2 < 0)
3294     {
3295       v2 = -v2;
3296       v1 = -v1;
3297     }
3298
3299   dist_v[x_1] = v2;
3300   dist_v[x_2] = -v1;
3301   save_dist_v (ddr, dist_v);
3302
3303   add_outer_distances (ddr, dist_v, x_1);
3304 }
3305
3306 /* Helper function for the case where DDR_A and DDR_B are the same
3307    access functions.  */
3308
3309 static void
3310 add_other_self_distances (struct data_dependence_relation *ddr)
3311 {
3312   lambda_vector dist_v;
3313   unsigned i;
3314   int index_carry = DDR_NB_LOOPS (ddr);
3315
3316   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3317     {
3318       tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
3319
3320       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
3321         {
3322           if (!evolution_function_is_univariate_p (access_fun))
3323             {
3324               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
3325                 {
3326                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
3327                   return;
3328                 }
3329
3330               access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
3331
3332               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
3333                 add_multivariate_self_dist (ddr, access_fun);
3334               else
3335                 /* The evolution step is not constant: it varies in
3336                    the outer loop, so this cannot be represented by a
3337                    distance vector.  For example in pr34635.c the
3338                    evolution is {0, +, {0, +, 4}_1}_2.  */
3339                 DDR_AFFINE_P (ddr) = false;
3340
3341               return;
3342             }
3343
3344           index_carry = MIN (index_carry,
3345                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
3346                                                  DDR_LOOP_NEST (ddr)));
3347         }
3348     }
3349
3350   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3351   add_outer_distances (ddr, dist_v, index_carry);
3352 }
3353
3354 static void
3355 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
3356 {
3357   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3358
3359   dist_v[DDR_INNER_LOOP (ddr)] = 1;
3360   save_dist_v (ddr, dist_v);
3361 }
3362
3363 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
3364    is the case for example when access functions are the same and
3365    equal to a constant, as in:
3366
3367    | loop_1
3368    |   A[3] = ...
3369    |   ... = A[3]
3370    | endloop_1
3371
3372    in which case the distance vectors are (0) and (1).  */
3373
3374 static void
3375 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3376 {
3377   unsigned i, j;
3378
3379   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3380     {
3381       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3382       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3383       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3384
3385       for (j = 0; j < ca->n; j++)
3386         if (affine_function_zero_p (ca->fns[j]))
3387           {
3388             insert_innermost_unit_dist_vector (ddr);
3389             return;
3390           }
3391
3392       for (j = 0; j < cb->n; j++)
3393         if (affine_function_zero_p (cb->fns[j]))
3394           {
3395             insert_innermost_unit_dist_vector (ddr);
3396             return;
3397           }
3398     }
3399 }
3400
3401 /* Compute the classic per loop distance vector.  DDR is the data
3402    dependence relation to build a vector from.  Return false when fail
3403    to represent the data dependence as a distance vector.  */
3404
3405 static bool
3406 build_classic_dist_vector (struct data_dependence_relation *ddr,
3407                            struct loop *loop_nest)
3408 {
3409   bool init_b = false;
3410   int index_carry = DDR_NB_LOOPS (ddr);
3411   lambda_vector dist_v;
3412
3413   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3414     return false;
3415
3416   if (same_access_functions (ddr))
3417     {
3418       /* Save the 0 vector.  */
3419       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3420       save_dist_v (ddr, dist_v);
3421
3422       if (constant_access_functions (ddr))
3423         add_distance_for_zero_overlaps (ddr);
3424
3425       if (DDR_NB_LOOPS (ddr) > 1)
3426         add_other_self_distances (ddr);
3427
3428       return true;
3429     }
3430
3431   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3432   if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3433                                     dist_v, &init_b, &index_carry))
3434     return false;
3435
3436   /* Save the distance vector if we initialized one.  */
3437   if (init_b)
3438     {
3439       /* Verify a basic constraint: classic distance vectors should
3440          always be lexicographically positive.
3441
3442          Data references are collected in the order of execution of
3443          the program, thus for the following loop
3444
3445          | for (i = 1; i < 100; i++)
3446          |   for (j = 1; j < 100; j++)
3447          |     {
3448          |       t = T[j+1][i-1];  // A
3449          |       T[j][i] = t + 2;  // B
3450          |     }
3451
3452          references are collected following the direction of the wind:
3453          A then B.  The data dependence tests are performed also
3454          following this order, such that we're looking at the distance
3455          separating the elements accessed by A from the elements later
3456          accessed by B.  But in this example, the distance returned by
3457          test_dep (A, B) is lexicographically negative (-1, 1), that
3458          means that the access A occurs later than B with respect to
3459          the outer loop, ie. we're actually looking upwind.  In this
3460          case we solve test_dep (B, A) looking downwind to the
3461          lexicographically positive solution, that returns the
3462          distance vector (1, -1).  */
3463       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3464         {
3465           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3466           if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3467                                               loop_nest))
3468             return false;
3469           compute_subscript_distance (ddr);
3470           if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3471                                             save_v, &init_b, &index_carry))
3472             return false;
3473           save_dist_v (ddr, save_v);
3474           DDR_REVERSED_P (ddr) = true;
3475
3476           /* In this case there is a dependence forward for all the
3477              outer loops:
3478
3479              | for (k = 1; k < 100; k++)
3480              |  for (i = 1; i < 100; i++)
3481              |   for (j = 1; j < 100; j++)
3482              |     {
3483              |       t = T[j+1][i-1];  // A
3484              |       T[j][i] = t + 2;  // B
3485              |     }
3486
3487              the vectors are:
3488              (0,  1, -1)
3489              (1,  1, -1)
3490              (1, -1,  1)
3491           */
3492           if (DDR_NB_LOOPS (ddr) > 1)
3493             {
3494               add_outer_distances (ddr, save_v, index_carry);
3495               add_outer_distances (ddr, dist_v, index_carry);
3496             }
3497         }
3498       else
3499         {
3500           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3501           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3502
3503           if (DDR_NB_LOOPS (ddr) > 1)
3504             {
3505               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3506
3507               if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3508                                                   DDR_A (ddr), loop_nest))
3509                 return false;
3510               compute_subscript_distance (ddr);
3511               if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3512                                                 opposite_v, &init_b,
3513                                                 &index_carry))
3514                 return false;
3515
3516               save_dist_v (ddr, save_v);
3517               add_outer_distances (ddr, dist_v, index_carry);
3518               add_outer_distances (ddr, opposite_v, index_carry);
3519             }
3520           else
3521             save_dist_v (ddr, save_v);
3522         }
3523     }
3524   else
3525     {
3526       /* There is a distance of 1 on all the outer loops: Example:
3527          there is a dependence of distance 1 on loop_1 for the array A.
3528
3529          | loop_1
3530          |   A[5] = ...
3531          | endloop
3532       */
3533       add_outer_distances (ddr, dist_v,
3534                            lambda_vector_first_nz (dist_v,
3535                                                    DDR_NB_LOOPS (ddr), 0));
3536     }
3537
3538   if (dump_file && (dump_flags & TDF_DETAILS))
3539     {
3540       unsigned i;
3541
3542       fprintf (dump_file, "(build_classic_dist_vector\n");
3543       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3544         {
3545           fprintf (dump_file, "  dist_vector = (");
3546           print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3547                                DDR_NB_LOOPS (ddr));
3548           fprintf (dump_file, "  )\n");
3549         }
3550       fprintf (dump_file, ")\n");
3551     }
3552
3553   return true;
3554 }
3555
3556 /* Return the direction for a given distance.
3557    FIXME: Computing dir this way is suboptimal, since dir can catch
3558    cases that dist is unable to represent.  */
3559
3560 static inline enum data_dependence_direction
3561 dir_from_dist (int dist)
3562 {
3563   if (dist > 0)
3564     return dir_positive;
3565   else if (dist < 0)
3566     return dir_negative;
3567   else
3568     return dir_equal;
3569 }
3570
3571 /* Compute the classic per loop direction vector.  DDR is the data
3572    dependence relation to build a vector from.  */
3573
3574 static void
3575 build_classic_dir_vector (struct data_dependence_relation *ddr)
3576 {
3577   unsigned i, j;
3578   lambda_vector dist_v;
3579
3580   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3581     {
3582       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3583
3584       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3585         dir_v[j] = dir_from_dist (dist_v[j]);
3586
3587       save_dir_v (ddr, dir_v);
3588     }
3589 }
3590
3591 /* Helper function.  Returns true when there is a dependence between
3592    data references DRA and DRB.  */
3593
3594 static bool
3595 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3596                                struct data_reference *dra,
3597                                struct data_reference *drb,
3598                                struct loop *loop_nest)
3599 {
3600   unsigned int i;
3601   tree last_conflicts;
3602   struct subscript *subscript;
3603   tree res = NULL_TREE;
3604
3605   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
3606     {
3607       conflict_function *overlaps_a, *overlaps_b;
3608
3609       analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3610                                       DR_ACCESS_FN (drb, i),
3611                                       &overlaps_a, &overlaps_b,
3612                                       &last_conflicts, loop_nest);
3613
3614       if (SUB_CONFLICTS_IN_A (subscript))
3615         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3616       if (SUB_CONFLICTS_IN_B (subscript))
3617         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3618
3619       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3620       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3621       SUB_LAST_CONFLICT (subscript) = last_conflicts;
3622
3623       /* If there is any undetermined conflict function we have to
3624          give a conservative answer in case we cannot prove that
3625          no dependence exists when analyzing another subscript.  */
3626       if (CF_NOT_KNOWN_P (overlaps_a)
3627           || CF_NOT_KNOWN_P (overlaps_b))
3628         {
3629           res = chrec_dont_know;
3630           continue;
3631         }
3632
3633       /* When there is a subscript with no dependence we can stop.  */
3634       else if (CF_NO_DEPENDENCE_P (overlaps_a)
3635                || CF_NO_DEPENDENCE_P (overlaps_b))
3636         {
3637           res = chrec_known;
3638           break;
3639         }
3640     }
3641
3642   if (res == NULL_TREE)
3643     return true;
3644
3645   if (res == chrec_known)
3646     dependence_stats.num_dependence_independent++;
3647   else
3648     dependence_stats.num_dependence_undetermined++;
3649   finalize_ddr_dependent (ddr, res);
3650   return false;
3651 }
3652
3653 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
3654
3655 static void
3656 subscript_dependence_tester (struct data_dependence_relation *ddr,
3657                              struct loop *loop_nest)
3658 {
3659   if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3660     dependence_stats.num_dependence_dependent++;
3661
3662   compute_subscript_distance (ddr);
3663   if (build_classic_dist_vector (ddr, loop_nest))
3664     build_classic_dir_vector (ddr);
3665 }
3666
3667 /* Returns true when all the access functions of A are affine or
3668    constant with respect to LOOP_NEST.  */
3669
3670 static bool
3671 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3672                                            const struct loop *loop_nest)
3673 {
3674   unsigned int i;
3675   vec<tree> fns = DR_ACCESS_FNS (a);
3676   tree t;
3677
3678   FOR_EACH_VEC_ELT (fns, i, t)
3679     if (!evolution_function_is_invariant_p (t, loop_nest->num)
3680         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3681       return false;
3682
3683   return true;
3684 }
3685
3686 /* Initializes an equation for an OMEGA problem using the information
3687    contained in the ACCESS_FUN.  Returns true when the operation
3688    succeeded.
3689
3690    PB is the omega constraint system.
3691    EQ is the number of the equation to be initialized.
3692    OFFSET is used for shifting the variables names in the constraints:
3693    a constrain is composed of 2 * the number of variables surrounding
3694    dependence accesses.  OFFSET is set either to 0 for the first n variables,
3695    then it is set to n.
3696    ACCESS_FUN is expected to be an affine chrec.  */
3697
3698 static bool
3699 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3700                        unsigned int offset, tree access_fun,
3701                        struct data_dependence_relation *ddr)
3702 {
3703   switch (TREE_CODE (access_fun))
3704     {
3705     case POLYNOMIAL_CHREC:
3706       {
3707         tree left = CHREC_LEFT (access_fun);
3708         tree right = CHREC_RIGHT (access_fun);
3709         int var = CHREC_VARIABLE (access_fun);
3710         unsigned var_idx;
3711
3712         if (TREE_CODE (right) != INTEGER_CST)
3713           return false;
3714
3715         var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3716         pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3717
3718         /* Compute the innermost loop index.  */
3719         DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3720
3721         if (offset == 0)
3722           pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3723             += int_cst_value (right);
3724
3725         switch (TREE_CODE (left))
3726           {
3727           case POLYNOMIAL_CHREC:
3728             return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3729
3730           case INTEGER_CST:
3731             pb->eqs[eq].coef[0] += int_cst_value (left);
3732             return true;
3733
3734           default:
3735             return false;
3736           }
3737       }
3738
3739     case INTEGER_CST:
3740       pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3741       return true;
3742
3743     default:
3744       return false;
3745     }
3746 }
3747
3748 /* As explained in the comments preceding init_omega_for_ddr, we have
3749    to set up a system for each loop level, setting outer loops
3750    variation to zero, and current loop variation to positive or zero.
3751    Save each lexico positive distance vector.  */
3752
3753 static void
3754 omega_extract_distance_vectors (omega_pb pb,
3755                                 struct data_dependence_relation *ddr)
3756 {
3757   int eq, geq;
3758   unsigned i, j;
3759   struct loop *loopi, *loopj;
3760   enum omega_result res;
3761
3762   /* Set a new problem for each loop in the nest.  The basis is the
3763      problem that we have initialized until now.  On top of this we
3764      add new constraints.  */
3765   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3766               && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3767     {
3768       int dist = 0;
3769       omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3770                                            DDR_NB_LOOPS (ddr));
3771
3772       omega_copy_problem (copy, pb);
3773
3774       /* For all the outer loops "loop_j", add "dj = 0".  */
3775       for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3776         {
3777           eq = omega_add_zero_eq (copy, omega_black);
3778           copy->eqs[eq].coef[j + 1] = 1;
3779         }
3780
3781       /* For "loop_i", add "0 <= di".  */
3782       geq = omega_add_zero_geq (copy, omega_black);
3783       copy->geqs[geq].coef[i + 1] = 1;
3784
3785       /* Reduce the constraint system, and test that the current
3786          problem is feasible.  */
3787       res = omega_simplify_problem (copy);
3788       if (res == omega_false
3789           || res == omega_unknown
3790           || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3791         goto next_problem;
3792
3793       for (eq = 0; eq < copy->num_subs; eq++)
3794         if (copy->subs[eq].key == (int) i + 1)
3795           {
3796             dist = copy->subs[eq].coef[0];
3797             goto found_dist;
3798           }
3799
3800       if (dist == 0)
3801         {
3802           /* Reinitialize problem...  */
3803           omega_copy_problem (copy, pb);
3804           for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++)
3805             {
3806               eq = omega_add_zero_eq (copy, omega_black);
3807               copy->eqs[eq].coef[j + 1] = 1;
3808             }
3809
3810           /* ..., but this time "di = 1".  */
3811           eq = omega_add_zero_eq (copy, omega_black);
3812           copy->eqs[eq].coef[i + 1] = 1;
3813           copy->eqs[eq].coef[0] = -1;
3814
3815           res = omega_simplify_problem (copy);
3816           if (res == omega_false
3817               || res == omega_unknown
3818               || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3819             goto next_problem;
3820
3821           for (eq = 0; eq < copy->num_subs; eq++)
3822             if (copy->subs[eq].key == (int) i + 1)
3823               {
3824                 dist = copy->subs[eq].coef[0];
3825                 goto found_dist;
3826               }
3827         }
3828
3829     found_dist:;
3830       /* Save the lexicographically positive distance vector.  */
3831       if (dist >= 0)
3832         {
3833           lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3834           lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3835
3836           dist_v[i] = dist;
3837
3838           for (eq = 0; eq < copy->num_subs; eq++)
3839             if (copy->subs[eq].key > 0)
3840               {
3841                 dist = copy->subs[eq].coef[0];
3842                 dist_v[copy->subs[eq].key - 1] = dist;
3843               }
3844
3845           for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3846             dir_v[j] = dir_from_dist (dist_v[j]);
3847
3848           save_dist_v (ddr, dist_v);
3849           save_dir_v (ddr, dir_v);
3850         }
3851
3852     next_problem:;
3853       omega_free_problem (copy);
3854     }
3855 }
3856
3857 /* This is called for each subscript of a tuple of data references:
3858    insert an equality for representing the conflicts.  */
3859
3860 static bool
3861 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3862                        struct data_dependence_relation *ddr,
3863                        omega_pb pb, bool *maybe_dependent)
3864 {
3865   int eq;
3866   tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3867                                      TREE_TYPE (access_fun_b));
3868   tree fun_a = chrec_convert (type, access_fun_a, NULL);
3869   tree fun_b = chrec_convert (type, access_fun_b, NULL);
3870   tree difference = chrec_fold_minus (type, fun_a, fun_b);
3871   tree minus_one;
3872
3873   /* When the fun_a - fun_b is not constant, the dependence is not
3874      captured by the classic distance vector representation.  */
3875   if (TREE_CODE (difference) != INTEGER_CST)
3876     return false;
3877
3878   /* ZIV test.  */
3879   if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3880     {
3881       /* There is no dependence.  */
3882       *maybe_dependent = false;
3883       return true;
3884     }
3885
3886   minus_one = build_int_cst (type, -1);
3887   fun_b = chrec_fold_multiply (type, fun_b, minus_one);
3888
3889   eq = omega_add_zero_eq (pb, omega_black);
3890   if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3891       || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3892     /* There is probably a dependence, but the system of
3893        constraints cannot be built: answer "don't know".  */
3894     return false;
3895
3896   /* GCD test.  */
3897   if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3898       && !int_divides_p (lambda_vector_gcd
3899                          ((lambda_vector) &(pb->eqs[eq].coef[1]),
3900                           2 * DDR_NB_LOOPS (ddr)),
3901                          pb->eqs[eq].coef[0]))
3902     {
3903       /* There is no dependence.  */
3904       *maybe_dependent = false;
3905       return true;
3906     }
3907
3908   return true;
3909 }
3910
3911 /* Helper function, same as init_omega_for_ddr but specialized for
3912    data references A and B.  */
3913
3914 static bool
3915 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3916                       struct data_dependence_relation *ddr,
3917                       omega_pb pb, bool *maybe_dependent)
3918 {
3919   unsigned i;
3920   int ineq;
3921   struct loop *loopi;
3922   unsigned nb_loops = DDR_NB_LOOPS (ddr);
3923
3924   /* Insert an equality per subscript.  */
3925   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3926     {
3927       if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3928                                   ddr, pb, maybe_dependent))
3929         return false;
3930       else if (*maybe_dependent == false)
3931         {
3932           /* There is no dependence.  */
3933           DDR_ARE_DEPENDENT (ddr) = chrec_known;
3934           return true;
3935         }
3936     }
3937
3938   /* Insert inequalities: constraints corresponding to the iteration
3939      domain, i.e. the loops surrounding the references "loop_x" and
3940      the distance variables "dx".  The layout of the OMEGA
3941      representation is as follows:
3942      - coef[0] is the constant
3943      - coef[1..nb_loops] are the protected variables that will not be
3944      removed by the solver: the "dx"
3945      - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3946   */
3947   for (i = 0; i <= DDR_INNER_LOOP (ddr)
3948               && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++)
3949     {
3950       HOST_WIDE_INT nbi = max_stmt_executions_int (loopi);
3951
3952       /* 0 <= loop_x */
3953       ineq = omega_add_zero_geq (pb, omega_black);
3954       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3955
3956       /* 0 <= loop_x + dx */
3957       ineq = omega_add_zero_geq (pb, omega_black);
3958       pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3959       pb->geqs[ineq].coef[i + 1] = 1;
3960
3961       if (nbi != -1)
3962         {
3963           /* loop_x <= nb_iters */
3964           ineq = omega_add_zero_geq (pb, omega_black);
3965           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3966           pb->geqs[ineq].coef[0] = nbi;
3967
3968           /* loop_x + dx <= nb_iters */
3969           ineq = omega_add_zero_geq (pb, omega_black);
3970           pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3971           pb->geqs[ineq].coef[i + 1] = -1;
3972           pb->geqs[ineq].coef[0] = nbi;
3973
3974           /* A step "dx" bigger than nb_iters is not feasible, so
3975              add "0 <= nb_iters + dx",  */
3976           ineq = omega_add_zero_geq (pb, omega_black);
3977           pb->geqs[ineq].coef[i + 1] = 1;
3978           pb->geqs[ineq].coef[0] = nbi;
3979           /* and "dx <= nb_iters".  */
3980           ineq = omega_add_zero_geq (pb, omega_black);
3981           pb->geqs[ineq].coef[i + 1] = -1;
3982           pb->geqs[ineq].coef[0] = nbi;
3983         }
3984     }
3985
3986   omega_extract_distance_vectors (pb, ddr);
3987
3988   return true;
3989 }
3990
3991 /* Sets up the Omega dependence problem for the data dependence
3992    relation DDR.  Returns false when the constraint system cannot be
3993    built, ie. when the test answers "don't know".  Returns true
3994    otherwise, and when independence has been proved (using one of the
3995    trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3996    set MAYBE_DEPENDENT to true.
3997
3998    Example: for setting up the dependence system corresponding to the
3999    conflicting accesses
4000
4001    | loop_i
4002    |   loop_j
4003    |     A[i, i+1] = ...
4004    |     ... A[2*j, 2*(i + j)]
4005    |   endloop_j
4006    | endloop_i
4007
4008    the following constraints come from the iteration domain:
4009
4010    0 <= i <= Ni
4011    0 <= i + di <= Ni
4012    0 <= j <= Nj
4013    0 <= j + dj <= Nj
4014
4015    where di, dj are the distance variables.  The constraints
4016    representing the conflicting elements are:
4017
4018    i = 2 * (j + dj)
4019    i + 1 = 2 * (i + di + j + dj)
4020
4021    For asking that the resulting distance vector (di, dj) be
4022    lexicographically positive, we insert the constraint "di >= 0".  If
4023    "di = 0" in the solution, we fix that component to zero, and we
4024    look at the inner loops: we set a new problem where all the outer
4025    loop distances are zero, and fix this inner component to be
4026    positive.  When one of the components is positive, we save that
4027    distance, and set a new problem where the distance on this loop is
4028    zero, searching for other distances in the inner loops.  Here is
4029    the classic example that illustrates that we have to set for each
4030    inner loop a new problem:
4031
4032    | loop_1
4033    |   loop_2
4034    |     A[10]
4035    |   endloop_2
4036    | endloop_1
4037
4038    we have to save two distances (1, 0) and (0, 1).
4039
4040    Given two array references, refA and refB, we have to set the
4041    dependence problem twice, refA vs. refB and refB vs. refA, and we
4042    cannot do a single test, as refB might occur before refA in the
4043    inner loops, and the contrary when considering outer loops: ex.
4044
4045    | loop_0
4046    |   loop_1
4047    |     loop_2
4048    |       T[{1,+,1}_2][{1,+,1}_1]  // refA
4049    |       T[{2,+,1}_2][{0,+,1}_1]  // refB
4050    |     endloop_2
4051    |   endloop_1
4052    | endloop_0
4053
4054    refB touches the elements in T before refA, and thus for the same
4055    loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
4056    but for successive loop_0 iterations, we have (1, -1, 1)
4057
4058    The Omega solver expects the distance variables ("di" in the
4059    previous example) to come first in the constraint system (as
4060    variables to be protected, or "safe" variables), the constraint
4061    system is built using the following layout:
4062
4063    "cst | distance vars | index vars".
4064 */
4065
4066 static bool
4067 init_omega_for_ddr (struct data_dependence_relation *ddr,
4068                     bool *maybe_dependent)
4069 {
4070   omega_pb pb;
4071   bool res = false;
4072
4073   *maybe_dependent = true;
4074
4075   if (same_access_functions (ddr))
4076     {
4077       unsigned j;
4078       lambda_vector dir_v;
4079
4080       /* Save the 0 vector.  */
4081       save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
4082       dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
4083       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
4084         dir_v[j] = dir_equal;
4085       save_dir_v (ddr, dir_v);
4086
4087       /* Save the dependences carried by outer loops.  */
4088       pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4089       res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4090                                   maybe_dependent);
4091       omega_free_problem (pb);
4092       return res;
4093     }
4094
4095   /* Omega expects the protected variables (those that have to be kept
4096      after elimination) to appear first in the constraint system.
4097      These variables are the distance variables.  In the following
4098      initialization we declare NB_LOOPS safe variables, and the total
4099      number of variables for the constraint system is 2*NB_LOOPS.  */
4100   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4101   res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
4102                               maybe_dependent);
4103   omega_free_problem (pb);
4104
4105   /* Stop computation if not decidable, or no dependence.  */
4106   if (res == false || *maybe_dependent == false)
4107     return res;
4108
4109   pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
4110   res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
4111                               maybe_dependent);
4112   omega_free_problem (pb);
4113
4114   return res;
4115 }
4116
4117 /* Return true when DDR contains the same information as that stored
4118    in DIR_VECTS and in DIST_VECTS, return false otherwise.   */
4119
4120 static bool
4121 ddr_consistent_p (FILE *file,
4122                   struct data_dependence_relation *ddr,
4123                   vec<lambda_vector> dist_vects,
4124                   vec<lambda_vector> dir_vects)
4125 {
4126   unsigned int i, j;
4127
4128   /* If dump_file is set, output there.  */
4129   if (dump_file && (dump_flags & TDF_DETAILS))
4130     file = dump_file;
4131
4132   if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr))
4133     {
4134       lambda_vector b_dist_v;
4135       fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
4136                dist_vects.length (),
4137                DDR_NUM_DIST_VECTS (ddr));
4138
4139       fprintf (file, "Banerjee dist vectors:\n");
4140       FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v)
4141         print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
4142
4143       fprintf (file, "Omega dist vectors:\n");
4144       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4145         print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
4146
4147       fprintf (file, "data dependence relation:\n");
4148       dump_data_dependence_relation (file, ddr);
4149
4150       fprintf (file, ")\n");
4151       return false;
4152     }
4153
4154   if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr))
4155     {
4156       fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
4157                dir_vects.length (),
4158                DDR_NUM_DIR_VECTS (ddr));
4159       return false;
4160     }
4161
4162   for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
4163     {
4164       lambda_vector a_dist_v;
4165       lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
4166
4167       /* Distance vectors are not ordered in the same way in the DDR
4168          and in the DIST_VECTS: search for a matching vector.  */
4169       FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v)
4170         if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
4171           break;
4172
4173       if (j == dist_vects.length ())
4174         {
4175           fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
4176           print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
4177           fprintf (file, "not found in Omega dist vectors:\n");
4178           print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
4179           fprintf (file, "data dependence relation:\n");
4180           dump_data_dependence_relation (file, ddr);
4181           fprintf (file, ")\n");
4182         }
4183     }
4184
4185   for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
4186     {
4187       lambda_vector a_dir_v;
4188       lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
4189
4190       /* Direction vectors are not ordered in the same way in the DDR
4191          and in the DIR_VECTS: search for a matching vector.  */
4192       FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v)
4193         if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
4194           break;
4195
4196       if (j == dist_vects.length ())
4197         {
4198           fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
4199           print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
4200           fprintf (file, "not found in Omega dir vectors:\n");
4201           print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
4202           fprintf (file, "data dependence relation:\n");
4203           dump_data_dependence_relation (file, ddr);
4204           fprintf (file, ")\n");
4205         }
4206     }
4207
4208   return true;
4209 }
4210
4211 /* This computes the affine dependence relation between A and B with
4212    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
4213    independence between two accesses, while CHREC_DONT_KNOW is used
4214    for representing the unknown relation.
4215
4216    Note that it is possible to stop the computation of the dependence
4217    relation the first time we detect a CHREC_KNOWN element for a given
4218    subscript.  */
4219
4220 void
4221 compute_affine_dependence (struct data_dependence_relation *ddr,
4222                            struct loop *loop_nest)
4223 {
4224   struct data_reference *dra = DDR_A (ddr);
4225   struct data_reference *drb = DDR_B (ddr);
4226
4227   if (dump_file && (dump_flags & TDF_DETAILS))
4228     {
4229       fprintf (dump_file, "(compute_affine_dependence\n");
4230       fprintf (dump_file, "  stmt_a: ");
4231       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
4232       fprintf (dump_file, "  stmt_b: ");
4233       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
4234     }
4235
4236   /* Analyze only when the dependence relation is not yet known.  */
4237   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4238     {
4239       dependence_stats.num_dependence_tests++;
4240
4241       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
4242           && access_functions_are_affine_or_constant_p (drb, loop_nest))
4243         {
4244           subscript_dependence_tester (ddr, loop_nest);
4245
4246           if (flag_check_data_deps)
4247             {
4248               /* Dump the dependences from the first algorithm.  */
4249               if (dump_file && (dump_flags & TDF_DETAILS))
4250                 {
4251                   fprintf (dump_file, "\n\nBanerjee Analyzer\n");
4252                   dump_data_dependence_relation (dump_file, ddr);
4253                 }
4254
4255               if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4256                 {
4257                   bool maybe_dependent;
4258                   vec<lambda_vector> dir_vects, dist_vects;
4259
4260                   /* Save the result of the first DD analyzer.  */
4261                   dist_vects = DDR_DIST_VECTS (ddr);
4262                   dir_vects = DDR_DIR_VECTS (ddr);
4263
4264                   /* Reset the information.  */
4265                   DDR_DIST_VECTS (ddr).create (0);
4266                   DDR_DIR_VECTS (ddr).create (0);
4267
4268                   /* Compute the same information using Omega.  */
4269                   if (!init_omega_for_ddr (ddr, &maybe_dependent))
4270                     goto csys_dont_know;
4271
4272                   if (dump_file && (dump_flags & TDF_DETAILS))
4273                     {
4274                       fprintf (dump_file, "Omega Analyzer\n");
4275                       dump_data_dependence_relation (dump_file, ddr);
4276                     }
4277
4278                   /* Check that we get the same information.  */
4279                   if (maybe_dependent)
4280                     gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
4281                                                   dir_vects));
4282                 }
4283             }
4284         }
4285
4286       /* As a last case, if the dependence cannot be determined, or if
4287          the dependence is considered too difficult to determine, answer
4288          "don't know".  */
4289       else
4290         {
4291         csys_dont_know:;
4292           dependence_stats.num_dependence_undetermined++;
4293
4294           if (dump_file && (dump_flags & TDF_DETAILS))
4295             {
4296               fprintf (dump_file, "Data ref a:\n");
4297               dump_data_reference (dump_file, dra);
4298               fprintf (dump_file, "Data ref b:\n");
4299               dump_data_reference (dump_file, drb);
4300               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
4301             }
4302           finalize_ddr_dependent (ddr, chrec_dont_know);
4303         }
4304     }
4305
4306   if (dump_file && (dump_flags & TDF_DETAILS))
4307     {
4308       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4309         fprintf (dump_file, ") -> no dependence\n");
4310       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4311         fprintf (dump_file, ") -> dependence analysis failed\n");
4312       else
4313         fprintf (dump_file, ")\n");
4314     }
4315 }
4316
4317 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
4318    the data references in DATAREFS, in the LOOP_NEST.  When
4319    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4320    relations.  Return true when successful, i.e. data references number
4321    is small enough to be handled.  */
4322
4323 bool
4324 compute_all_dependences (vec<data_reference_p> datarefs,
4325                          vec<ddr_p> *dependence_relations,
4326                          vec<loop_p> loop_nest,
4327                          bool compute_self_and_rr)
4328 {
4329   struct data_dependence_relation *ddr;
4330   struct data_reference *a, *b;
4331   unsigned int i, j;
4332
4333   if ((int) datarefs.length ()
4334       > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
4335     {
4336       struct data_dependence_relation *ddr;
4337
4338       /* Insert a single relation into dependence_relations:
4339          chrec_dont_know.  */
4340       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
4341       dependence_relations->safe_push (ddr);
4342       return false;
4343     }
4344
4345   FOR_EACH_VEC_ELT (datarefs, i, a)
4346     for (j = i + 1; datarefs.iterate (j, &b); j++)
4347       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
4348         {
4349           ddr = initialize_data_dependence_relation (a, b, loop_nest);
4350           dependence_relations->safe_push (ddr);
4351           if (loop_nest.exists ())
4352             compute_affine_dependence (ddr, loop_nest[0]);
4353         }
4354
4355   if (compute_self_and_rr)
4356     FOR_EACH_VEC_ELT (datarefs, i, a)
4357       {
4358         ddr = initialize_data_dependence_relation (a, a, loop_nest);
4359         dependence_relations->safe_push (ddr);
4360         if (loop_nest.exists ())
4361           compute_affine_dependence (ddr, loop_nest[0]);
4362       }
4363
4364   return true;
4365 }
4366
4367 /* Describes a location of a memory reference.  */
4368
4369 typedef struct data_ref_loc_d
4370 {
4371   /* The memory reference.  */
4372   tree ref;
4373
4374   /* True if the memory reference is read.  */
4375   bool is_read;
4376 } data_ref_loc;
4377
4378
4379 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
4380    true if STMT clobbers memory, false otherwise.  */
4381
4382 static bool
4383 get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references)
4384 {
4385   bool clobbers_memory = false;
4386   data_ref_loc ref;
4387   tree op0, op1;
4388   enum gimple_code stmt_code = gimple_code (stmt);
4389
4390   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4391      As we cannot model data-references to not spelled out
4392      accesses give up if they may occur.  */
4393   if (stmt_code == GIMPLE_CALL
4394       && !(gimple_call_flags (stmt) & ECF_CONST))
4395     {
4396       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
4397       if (gimple_call_internal_p (stmt))
4398         switch (gimple_call_internal_fn (stmt))
4399           {
4400           case IFN_GOMP_SIMD_LANE:
4401             {
4402               struct loop *loop = gimple_bb (stmt)->loop_father;
4403               tree uid = gimple_call_arg (stmt, 0);
4404               gcc_assert (TREE_CODE (uid) == SSA_NAME);
4405               if (loop == NULL
4406                   || loop->simduid != SSA_NAME_VAR (uid))
4407                 clobbers_memory = true;
4408               break;
4409             }
4410           case IFN_MASK_LOAD:
4411           case IFN_MASK_STORE:
4412             break;
4413           default:
4414             clobbers_memory = true;
4415             break;
4416           }
4417       else
4418         clobbers_memory = true;
4419     }
4420   else if (stmt_code == GIMPLE_ASM
4421            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
4422                || gimple_vuse (stmt)))
4423     clobbers_memory = true;
4424
4425   if (!gimple_vuse (stmt))
4426     return clobbers_memory;
4427
4428   if (stmt_code == GIMPLE_ASSIGN)
4429     {
4430       tree base;
4431       op0 = gimple_assign_lhs (stmt);
4432       op1 = gimple_assign_rhs1 (stmt);
4433
4434       if (DECL_P (op1)
4435           || (REFERENCE_CLASS_P (op1)
4436               && (base = get_base_address (op1))
4437               && TREE_CODE (base) != SSA_NAME))
4438         {
4439           ref.ref = op1;
4440           ref.is_read = true;
4441           references->safe_push (ref);
4442         }
4443     }
4444   else if (stmt_code == GIMPLE_CALL)
4445     {
4446       unsigned i, n;
4447
4448       ref.is_read = false;
4449       if (gimple_call_internal_p (stmt))
4450         switch (gimple_call_internal_fn (stmt))
4451           {
4452           case IFN_MASK_LOAD:
4453             if (gimple_call_lhs (stmt) == NULL_TREE)
4454               break;
4455             ref.is_read = true;
4456           case IFN_MASK_STORE:
4457             ref.ref = fold_build2 (MEM_REF,
4458                                    ref.is_read
4459                                    ? TREE_TYPE (gimple_call_lhs (stmt))
4460                                    : TREE_TYPE (gimple_call_arg (stmt, 3)),
4461                                    gimple_call_arg (stmt, 0),
4462                                    gimple_call_arg (stmt, 1));
4463             references->safe_push (ref);
4464             return false;
4465           default:
4466             break;
4467           }
4468
4469       op0 = gimple_call_lhs (stmt);
4470       n = gimple_call_num_args (stmt);
4471       for (i = 0; i < n; i++)
4472         {
4473           op1 = gimple_call_arg (stmt, i);
4474
4475           if (DECL_P (op1)
4476               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
4477             {
4478               ref.ref = op1;
4479               ref.is_read = true;
4480               references->safe_push (ref);
4481             }
4482         }
4483     }
4484   else
4485     return clobbers_memory;
4486
4487   if (op0
4488       && (DECL_P (op0)
4489           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
4490     {
4491       ref.ref = op0;
4492       ref.is_read = false;
4493       references->safe_push (ref);
4494     }
4495   return clobbers_memory;
4496 }
4497
4498 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
4499    reference, returns false, otherwise returns true.  NEST is the outermost
4500    loop of the loop nest in which the references should be analyzed.  */
4501
4502 bool
4503 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4504                               vec<data_reference_p> *datarefs)
4505 {
4506   unsigned i;
4507   auto_vec<data_ref_loc, 2> references;
4508   data_ref_loc *ref;
4509   bool ret = true;
4510   data_reference_p dr;
4511
4512   if (get_references_in_stmt (stmt, &references))
4513     return false;
4514
4515   FOR_EACH_VEC_ELT (references, i, ref)
4516     {
4517       dr = create_data_ref (nest, loop_containing_stmt (stmt),
4518                             ref->ref, stmt, ref->is_read);
4519       gcc_assert (dr != NULL);
4520       datarefs->safe_push (dr);
4521     }
4522   references.release ();
4523   return ret;
4524 }
4525
4526 /* Stores the data references in STMT to DATAREFS.  If there is an
4527    unanalyzable reference, returns false, otherwise returns true.
4528    NEST is the outermost loop of the loop nest in which the references
4529    should be instantiated, LOOP is the loop in which the references
4530    should be analyzed.  */
4531
4532 bool
4533 graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt,
4534                                        vec<data_reference_p> *datarefs)
4535 {
4536   unsigned i;
4537   auto_vec<data_ref_loc, 2> references;
4538   data_ref_loc *ref;
4539   bool ret = true;
4540   data_reference_p dr;
4541
4542   if (get_references_in_stmt (stmt, &references))
4543     return false;
4544
4545   FOR_EACH_VEC_ELT (references, i, ref)
4546     {
4547       dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read);
4548       gcc_assert (dr != NULL);
4549       datarefs->safe_push (dr);
4550     }
4551
4552   references.release ();
4553   return ret;
4554 }
4555
4556 /* Search the data references in LOOP, and record the information into
4557    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4558    difficult case, returns NULL_TREE otherwise.  */
4559
4560 tree
4561 find_data_references_in_bb (struct loop *loop, basic_block bb,
4562                             vec<data_reference_p> *datarefs)
4563 {
4564   gimple_stmt_iterator bsi;
4565
4566   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4567     {
4568       gimple stmt = gsi_stmt (bsi);
4569
4570       if (!find_data_references_in_stmt (loop, stmt, datarefs))
4571         {
4572           struct data_reference *res;
4573           res = XCNEW (struct data_reference);
4574           datarefs->safe_push (res);
4575
4576           return chrec_dont_know;
4577         }
4578     }
4579
4580   return NULL_TREE;
4581 }
4582
4583 /* Search the data references in LOOP, and record the information into
4584    DATAREFS.  Returns chrec_dont_know when failing to analyze a
4585    difficult case, returns NULL_TREE otherwise.
4586
4587    TODO: This function should be made smarter so that it can handle address
4588    arithmetic as if they were array accesses, etc.  */
4589
4590 tree
4591 find_data_references_in_loop (struct loop *loop,
4592                               vec<data_reference_p> *datarefs)
4593 {
4594   basic_block bb, *bbs;
4595   unsigned int i;
4596
4597   bbs = get_loop_body_in_dom_order (loop);
4598
4599   for (i = 0; i < loop->num_nodes; i++)
4600     {
4601       bb = bbs[i];
4602
4603       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
4604         {
4605           free (bbs);
4606           return chrec_dont_know;
4607         }
4608     }
4609   free (bbs);
4610
4611   return NULL_TREE;
4612 }
4613
4614 /* Recursive helper function.  */
4615
4616 static bool
4617 find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest)
4618 {
4619   /* Inner loops of the nest should not contain siblings.  Example:
4620      when there are two consecutive loops,
4621
4622      | loop_0
4623      |   loop_1
4624      |     A[{0, +, 1}_1]
4625      |   endloop_1
4626      |   loop_2
4627      |     A[{0, +, 1}_2]
4628      |   endloop_2
4629      | endloop_0
4630
4631      the dependence relation cannot be captured by the distance
4632      abstraction.  */
4633   if (loop->next)
4634     return false;
4635
4636   loop_nest->safe_push (loop);
4637   if (loop->inner)
4638     return find_loop_nest_1 (loop->inner, loop_nest);
4639   return true;
4640 }
4641
4642 /* Return false when the LOOP is not well nested.  Otherwise return
4643    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
4644    contain the loops from the outermost to the innermost, as they will
4645    appear in the classic distance vector.  */
4646
4647 bool
4648 find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest)
4649 {
4650   loop_nest->safe_push (loop);
4651   if (loop->inner)
4652     return find_loop_nest_1 (loop->inner, loop_nest);
4653   return true;
4654 }
4655
4656 /* Returns true when the data dependences have been computed, false otherwise.
4657    Given a loop nest LOOP, the following vectors are returned:
4658    DATAREFS is initialized to all the array elements contained in this loop,
4659    DEPENDENCE_RELATIONS contains the relations between the data references.
4660    Compute read-read and self relations if
4661    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4662
4663 bool
4664 compute_data_dependences_for_loop (struct loop *loop,
4665                                    bool compute_self_and_read_read_dependences,
4666                                    vec<loop_p> *loop_nest,
4667                                    vec<data_reference_p> *datarefs,
4668                                    vec<ddr_p> *dependence_relations)
4669 {
4670   bool res = true;
4671
4672   memset (&dependence_stats, 0, sizeof (dependence_stats));
4673
4674   /* If the loop nest is not well formed, or one of the data references
4675      is not computable, give up without spending time to compute other
4676      dependences.  */
4677   if (!loop
4678       || !find_loop_nest (loop, loop_nest)
4679       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
4680       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
4681                                    compute_self_and_read_read_dependences))
4682     res = false;
4683
4684   if (dump_file && (dump_flags & TDF_STATS))
4685     {
4686       fprintf (dump_file, "Dependence tester statistics:\n");
4687
4688       fprintf (dump_file, "Number of dependence tests: %d\n",
4689                dependence_stats.num_dependence_tests);
4690       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4691                dependence_stats.num_dependence_dependent);
4692       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4693                dependence_stats.num_dependence_independent);
4694       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4695                dependence_stats.num_dependence_undetermined);
4696
4697       fprintf (dump_file, "Number of subscript tests: %d\n",
4698                dependence_stats.num_subscript_tests);
4699       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4700                dependence_stats.num_subscript_undetermined);
4701       fprintf (dump_file, "Number of same subscript function: %d\n",
4702                dependence_stats.num_same_subscript_function);
4703
4704       fprintf (dump_file, "Number of ziv tests: %d\n",
4705                dependence_stats.num_ziv);
4706       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4707                dependence_stats.num_ziv_dependent);
4708       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4709                dependence_stats.num_ziv_independent);
4710       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4711                dependence_stats.num_ziv_unimplemented);
4712
4713       fprintf (dump_file, "Number of siv tests: %d\n",
4714                dependence_stats.num_siv);
4715       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4716                dependence_stats.num_siv_dependent);
4717       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4718                dependence_stats.num_siv_independent);
4719       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4720                dependence_stats.num_siv_unimplemented);
4721
4722       fprintf (dump_file, "Number of miv tests: %d\n",
4723                dependence_stats.num_miv);
4724       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4725                dependence_stats.num_miv_dependent);
4726       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4727                dependence_stats.num_miv_independent);
4728       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4729                dependence_stats.num_miv_unimplemented);
4730     }
4731
4732   return res;
4733 }
4734
4735 /* Returns true when the data dependences for the basic block BB have been
4736    computed, false otherwise.
4737    DATAREFS is initialized to all the array elements contained in this basic
4738    block, DEPENDENCE_RELATIONS contains the relations between the data
4739    references. Compute read-read and self relations if
4740    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
4741 bool
4742 compute_data_dependences_for_bb (basic_block bb,
4743                                  bool compute_self_and_read_read_dependences,
4744                                  vec<data_reference_p> *datarefs,
4745                                  vec<ddr_p> *dependence_relations)
4746 {
4747   if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know)
4748     return false;
4749
4750   return compute_all_dependences (*datarefs, dependence_relations, vNULL,
4751                                   compute_self_and_read_read_dependences);
4752 }
4753
4754 /* Entry point (for testing only).  Analyze all the data references
4755    and the dependence relations in LOOP.
4756
4757    The data references are computed first.
4758
4759    A relation on these nodes is represented by a complete graph.  Some
4760    of the relations could be of no interest, thus the relations can be
4761    computed on demand.
4762
4763    In the following function we compute all the relations.  This is
4764    just a first implementation that is here for:
4765    - for showing how to ask for the dependence relations,
4766    - for the debugging the whole dependence graph,
4767    - for the dejagnu testcases and maintenance.
4768
4769    It is possible to ask only for a part of the graph, avoiding to
4770    compute the whole dependence graph.  The computed dependences are
4771    stored in a knowledge base (KB) such that later queries don't
4772    recompute the same information.  The implementation of this KB is
4773    transparent to the optimizer, and thus the KB can be changed with a
4774    more efficient implementation, or the KB could be disabled.  */
4775 static void
4776 analyze_all_data_dependences (struct loop *loop)
4777 {
4778   unsigned int i;
4779   int nb_data_refs = 10;
4780   vec<data_reference_p> datarefs;
4781   datarefs.create (nb_data_refs);
4782   vec<ddr_p> dependence_relations;
4783   dependence_relations.create (nb_data_refs * nb_data_refs);
4784   vec<loop_p> loop_nest;
4785   loop_nest.create (3);
4786
4787   /* Compute DDs on the whole function.  */
4788   compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs,
4789                                      &dependence_relations);
4790
4791   if (dump_file)
4792     {
4793       dump_data_dependence_relations (dump_file, dependence_relations);
4794       fprintf (dump_file, "\n\n");
4795
4796       if (dump_flags & TDF_DETAILS)
4797         dump_dist_dir_vectors (dump_file, dependence_relations);
4798
4799       if (dump_flags & TDF_STATS)
4800         {
4801           unsigned nb_top_relations = 0;
4802           unsigned nb_bot_relations = 0;
4803           unsigned nb_chrec_relations = 0;
4804           struct data_dependence_relation *ddr;
4805
4806           FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4807             {
4808               if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4809                 nb_top_relations++;
4810
4811               else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4812                 nb_bot_relations++;
4813
4814               else
4815                 nb_chrec_relations++;
4816             }
4817
4818           gather_stats_on_scev_database ();
4819         }
4820     }
4821
4822   loop_nest.release ();
4823   free_dependence_relations (dependence_relations);
4824   free_data_refs (datarefs);
4825 }
4826
4827 /* Computes all the data dependences and check that the results of
4828    several analyzers are the same.  */
4829
4830 void
4831 tree_check_data_deps (void)
4832 {
4833   struct loop *loop_nest;
4834
4835   FOR_EACH_LOOP (loop_nest, 0)
4836     analyze_all_data_dependences (loop_nest);
4837 }
4838
4839 /* Free the memory used by a data dependence relation DDR.  */
4840
4841 void
4842 free_dependence_relation (struct data_dependence_relation *ddr)
4843 {
4844   if (ddr == NULL)
4845     return;
4846
4847   if (DDR_SUBSCRIPTS (ddr).exists ())
4848     free_subscripts (DDR_SUBSCRIPTS (ddr));
4849   DDR_DIST_VECTS (ddr).release ();
4850   DDR_DIR_VECTS (ddr).release ();
4851
4852   free (ddr);
4853 }
4854
4855 /* Free the memory used by the data dependence relations from
4856    DEPENDENCE_RELATIONS.  */
4857
4858 void
4859 free_dependence_relations (vec<ddr_p> dependence_relations)
4860 {
4861   unsigned int i;
4862   struct data_dependence_relation *ddr;
4863
4864   FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
4865     if (ddr)
4866       free_dependence_relation (ddr);
4867
4868   dependence_relations.release ();
4869 }
4870
4871 /* Free the memory used by the data references from DATAREFS.  */
4872
4873 void
4874 free_data_refs (vec<data_reference_p> datarefs)
4875 {
4876   unsigned int i;
4877   struct data_reference *dr;
4878
4879   FOR_EACH_VEC_ELT (datarefs, i, dr)
4880     free_data_ref (dr);
4881   datarefs.release ();
4882 }