omega.c (coalesce): Fix memory leak on early exit.
[gcc.git] / gcc / matrix-reorg.c
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23 /*
24 Matrix flattening optimization tries to replace a N-dimensional
25 matrix with its equivalent M-dimensional matrix, where M < N.
26 This first implementation focuses on global matrices defined dynamically.
27
28 When N==1, we actually flatten the whole matrix.
29 For instance consider a two-dimensional array a [dim1] [dim2].
30 The code for allocating space for it usually looks like:
31
32 a = (int **) malloc(dim1 * sizeof(int *));
33 for (i=0; i<dim1; i++)
34 a[i] = (int *) malloc (dim2 * sizeof(int));
35
36 If the array "a" is found suitable for this optimization,
37 its allocation is replaced by:
38
39 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40
41 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42
43 The two main phases of the optimization are the analysis
44 and transformation.
45 The driver of the optimization is matrix_reorg ().
46
47
48
49 Analysis phase:
50 ===============
51
52 We'll number the dimensions outside-in, meaning the most external
53 is 0, then 1, and so on.
54 The analysis part of the optimization determines K, the escape
55 level of a N-dimensional matrix (K <= N), that allows flattening of
56 the external dimensions 0,1,..., K-1. Escape level 0 means that the
57 whole matrix escapes and no flattening is possible.
58
59 The analysis part is implemented in analyze_matrix_allocation_site()
60 and analyze_matrix_accesses().
61
62 Transformation phase:
63 =====================
64 In this phase we define the new flattened matrices that replace the
65 original matrices in the code.
66 Implemented in transform_allocation_sites(),
67 transform_access_sites().
68
69 Matrix Transposing
70 ==================
71 The idea of Matrix Transposing is organizing the matrix in a different
72 layout such that the dimensions are reordered.
73 This could produce better cache behavior in some cases.
74
75 For example, lets look at the matrix accesses in the following loop:
76
77 for (i=0; i<N; i++)
78 for (j=0; j<M; j++)
79 access to a[i][j]
80
81 This loop can produce good cache behavior because the elements of
82 the inner dimension are accessed sequentially.
83
84 However, if the accesses of the matrix were of the following form:
85
86 for (i=0; i<N; i++)
87 for (j=0; j<M; j++)
88 access to a[j][i]
89
90 In this loop we iterate the columns and not the rows.
91 Therefore, replacing the rows and columns
92 would have had an organization with better (cache) locality.
93 Replacing the dimensions of the matrix is called matrix transposing.
94
95 This example, of course, could be enhanced to multiple dimensions matrices
96 as well.
97
98 Since a program could include all kind of accesses, there is a decision
99 mechanism, implemented in analyze_transpose(), which implements a
100 heuristic that tries to determine whether to transpose the matrix or not,
101 according to the form of the more dominant accesses.
102 This decision is transferred to the flattening mechanism, and whether
103 the matrix was transposed or not, the matrix is flattened (if possible).
104
105 This decision making is based on profiling information and loop information.
106 If profiling information is available, decision making mechanism will be
107 operated, otherwise the matrix will only be flattened (if possible).
108
109 Both optimizations are described in the paper "Matrix flattening and
110 transposing in GCC" which was presented in GCC summit 2006.
111 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf
112
113 */
114
115 #include "config.h"
116 #include "system.h"
117 #include "coretypes.h"
118 #include "tm.h"
119 #include "tree.h"
120 #include "rtl.h"
121 #include "c-tree.h"
122 #include "tree-inline.h"
123 #include "tree-flow.h"
124 #include "tree-flow-inline.h"
125 #include "langhooks.h"
126 #include "hashtab.h"
127 #include "toplev.h"
128 #include "flags.h"
129 #include "ggc.h"
130 #include "debug.h"
131 #include "target.h"
132 #include "cgraph.h"
133 #include "diagnostic.h"
134 #include "timevar.h"
135 #include "params.h"
136 #include "fibheap.h"
137 #include "c-common.h"
138 #include "intl.h"
139 #include "function.h"
140 #include "basic-block.h"
141 #include "cfgloop.h"
142 #include "tree-iterator.h"
143 #include "tree-pass.h"
144 #include "opts.h"
145 #include "tree-data-ref.h"
146 #include "tree-chrec.h"
147 #include "tree-scalar-evolution.h"
148
149 /*
150 We need to collect a lot of data from the original malloc,
151 particularly as the gimplifier has converted:
152
153 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
154
155 into
156
157 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
158 T4 = malloc (T3);
159 T5 = (struct_type *) T4;
160 orig_var = T5;
161
162 The following struct fields allow us to collect all the necessary data from
163 the gimplified program. The comments in the struct below are all based
164 on the gimple example above. */
165
166 struct malloc_call_data
167 {
168 tree call_stmt; /* Tree for "T4 = malloc (T3);" */
169 tree size_var; /* Var decl for T3. */
170 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
171 };
172
173 /* The front end of the compiler, when parsing statements of the form:
174
175 var = (type_cast) malloc (sizeof (type));
176
177 always converts this single statement into the following statements
178 (GIMPLE form):
179
180 T.1 = sizeof (type);
181 T.2 = malloc (T.1);
182 T.3 = (type_cast) T.2;
183 var = T.3;
184
185 Since we need to create new malloc statements and modify the original
186 statements somewhat, we need to find all four of the above statements.
187 Currently record_call_1 (called for building cgraph edges) finds and
188 records the statements containing the actual call to malloc, but we
189 need to find the rest of the variables/statements on our own. That
190 is what the following function does. */
191 static void
192 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
193 {
194 tree size_var = NULL;
195 tree malloc_fn_decl;
196 tree tmp;
197 tree arg1;
198
199 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
200
201 tmp = get_call_expr_in (stmt);
202 malloc_fn_decl = CALL_EXPR_FN (tmp);
203 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
204 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
205 || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
206 BUILT_IN_MALLOC)
207 return;
208
209 arg1 = CALL_EXPR_ARG (tmp, 0);
210 size_var = arg1;
211
212 m_data->call_stmt = stmt;
213 m_data->size_var = size_var;
214 if (TREE_CODE (size_var) != VAR_DECL)
215 m_data->malloc_size = size_var;
216 else
217 m_data->malloc_size = NULL_TREE;
218 }
219
220 /* Information about matrix access site.
221 For example: if an access site of matrix arr is arr[i][j]
222 the ACCESS_SITE_INFO structure will have the address
223 of arr as its stmt. The INDEX_INFO will hold information about the
224 initial address and index of each dimension. */
225 struct access_site_info
226 {
227 /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR). */
228 tree stmt;
229
230 /* In case of POINTER_PLUS_EXPR, what is the offset. */
231 tree offset;
232
233 /* The index which created the offset. */
234 tree index;
235
236 /* The indirection level of this statement. */
237 int level;
238
239 /* TRUE for allocation site FALSE for access site. */
240 bool is_alloc;
241
242 /* The function containing the access site. */
243 tree function_decl;
244
245 /* This access is iterated in the inner most loop */
246 bool iterated_by_inner_most_loop_p;
247 };
248
249 typedef struct access_site_info *access_site_info_p;
250 DEF_VEC_P (access_site_info_p);
251 DEF_VEC_ALLOC_P (access_site_info_p, heap);
252
253 /* Information about matrix to flatten. */
254 struct matrix_info
255 {
256 /* Decl tree of this matrix. */
257 tree decl;
258 /* Number of dimensions; number
259 of "*" in the type declaration. */
260 int num_dims;
261
262 /* Minimum indirection level that escapes, 0 means that
263 the whole matrix escapes, k means that dimensions
264 0 to ACTUAL_DIM - k escapes. */
265 int min_indirect_level_escape;
266
267 tree min_indirect_level_escape_stmt;
268
269 /* Is the matrix transposed. */
270 bool is_transposed_p;
271
272 /* Hold the allocation site for each level (dimension).
273 We can use NUM_DIMS as the upper bound and allocate the array
274 once with this number of elements and no need to use realloc and
275 MAX_MALLOCED_LEVEL. */
276 tree *malloc_for_level;
277
278 int max_malloced_level;
279
280 /* The location of the allocation sites (they must be in one
281 function). */
282 tree allocation_function_decl;
283
284 /* The calls to free for each level of indirection. */
285 struct free_info
286 {
287 tree stmt;
288 tree func;
289 } *free_stmts;
290
291 /* An array which holds for each dimension its size. where
292 dimension 0 is the outer most (one that contains all the others).
293 */
294 tree *dimension_size;
295
296 /* An array which holds for each dimension it's original size
297 (before transposing and flattening take place). */
298 tree *dimension_size_orig;
299
300 /* An array which holds for each dimension the size of the type of
301 of elements accessed in that level (in bytes). */
302 HOST_WIDE_INT *dimension_type_size;
303
304 int dimension_type_size_len;
305
306 /* An array collecting the count of accesses for each dimension. */
307 gcov_type *dim_hot_level;
308
309 /* An array of the accesses to be flattened.
310 elements are of type "struct access_site_info *". */
311 VEC (access_site_info_p, heap) * access_l;
312
313 /* A map of how the dimensions will be organized at the end of
314 the analyses. */
315 int *dim_map;
316 };
317
318 /* In each phi node we want to record the indirection level we have when we
319 get to the phi node. Usually we will have phi nodes with more than two
320 arguments, then we must assure that all of them get to the phi node with
321 the same indirection level, otherwise it's not safe to do the flattening.
322 So we record the information regarding the indirection level each time we
323 get to the phi node in this hash table. */
324
325 struct matrix_access_phi_node
326 {
327 tree phi;
328 int indirection_level;
329 };
330
331 /* We use this structure to find if the SSA variable is accessed inside the
332 tree and record the tree containing it. */
333
334 struct ssa_acc_in_tree
335 {
336 /* The variable whose accesses in the tree we are looking for. */
337 tree ssa_var;
338 /* The tree and code inside it the ssa_var is accessed, currently
339 it could be an INDIRECT_REF or CALL_EXPR. */
340 enum tree_code t_code;
341 tree t_tree;
342 /* The place in the containing tree. */
343 tree *tp;
344 tree second_op;
345 bool var_found;
346 };
347
348 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
349 sbitmap, bool);
350 static int transform_allocation_sites (void **, void *);
351 static int transform_access_sites (void **, void *);
352 static int analyze_transpose (void **, void *);
353 static int dump_matrix_reorg_analysis (void **, void *);
354
355 static bool check_transpose_p;
356
357 /* Hash function used for the phi nodes. */
358
359 static hashval_t
360 mat_acc_phi_hash (const void *p)
361 {
362 const struct matrix_access_phi_node *ma_phi = p;
363
364 return htab_hash_pointer (ma_phi->phi);
365 }
366
367 /* Equality means phi node pointers are the same. */
368
369 static int
370 mat_acc_phi_eq (const void *p1, const void *p2)
371 {
372 const struct matrix_access_phi_node *phi1 = p1;
373 const struct matrix_access_phi_node *phi2 = p2;
374
375 if (phi1->phi == phi2->phi)
376 return 1;
377
378 return 0;
379 }
380
381 /* Hold the PHI nodes we visit during the traversal for escaping
382 analysis. */
383 static htab_t htab_mat_acc_phi_nodes = NULL;
384
385 /* This hash-table holds the information about the matrices we are
386 going to handle. */
387 static htab_t matrices_to_reorg = NULL;
388
389 /* Return a hash for MTT, which is really a "matrix_info *". */
390 static hashval_t
391 mtt_info_hash (const void *mtt)
392 {
393 return htab_hash_pointer (((struct matrix_info *) mtt)->decl);
394 }
395
396 /* Return true if MTT1 and MTT2 (which are really both of type
397 "matrix_info *") refer to the same decl. */
398 static int
399 mtt_info_eq (const void *mtt1, const void *mtt2)
400 {
401 const struct matrix_info *i1 = mtt1;
402 const struct matrix_info *i2 = mtt2;
403
404 if (i1->decl == i2->decl)
405 return true;
406
407 return false;
408 }
409
410 /* Return the inner most tree that is not a cast. */
411 static tree
412 get_inner_of_cast_expr (tree t)
413 {
414 while (TREE_CODE (t) == CONVERT_EXPR || TREE_CODE (t) == NOP_EXPR
415 || TREE_CODE (t) == VIEW_CONVERT_EXPR)
416 t = TREE_OPERAND (t, 0);
417
418 return t;
419 }
420
421 /* Return false if STMT may contain a vector expression.
422 In this situation, all matrices should not be flattened. */
423 static bool
424 may_flatten_matrices_1 (tree stmt)
425 {
426 tree t;
427
428 switch (TREE_CODE (stmt))
429 {
430 case GIMPLE_MODIFY_STMT:
431 t = GIMPLE_STMT_OPERAND (stmt, 1);
432 while (TREE_CODE (t) == CONVERT_EXPR || TREE_CODE (t) == NOP_EXPR)
433 {
434 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
435 {
436 tree pointee;
437
438 pointee = TREE_TYPE (t);
439 while (POINTER_TYPE_P (pointee))
440 pointee = TREE_TYPE (pointee);
441 if (TREE_CODE (pointee) == VECTOR_TYPE)
442 {
443 if (dump_file)
444 fprintf (dump_file,
445 "Found vector type, don't flatten matrix\n");
446 return false;
447 }
448 }
449 t = TREE_OPERAND (t, 0);
450 }
451 break;
452 case ASM_EXPR:
453 /* Asm code could contain vector operations. */
454 return false;
455 break;
456 default:
457 break;
458 }
459 return true;
460 }
461
462 /* Return false if there are hand-written vectors in the program.
463 We disable the flattening in such a case. */
464 static bool
465 may_flatten_matrices (struct cgraph_node *node)
466 {
467 tree decl;
468 struct function *func;
469 basic_block bb;
470 block_stmt_iterator bsi;
471
472 decl = node->decl;
473 if (node->analyzed)
474 {
475 func = DECL_STRUCT_FUNCTION (decl);
476 FOR_EACH_BB_FN (bb, func)
477 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
478 if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
479 return false;
480 }
481 return true;
482 }
483
484 /* Given a VAR_DECL, check its type to determine whether it is
485 a definition of a dynamic allocated matrix and therefore is
486 a suitable candidate for the matrix flattening optimization.
487 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
488 a MATRIX_INFO structure, fill it with the relevant information
489 and return a pointer to it.
490 TODO: handle also statically defined arrays. */
491 static struct matrix_info *
492 analyze_matrix_decl (tree var_decl)
493 {
494 struct matrix_info *m_node, tmpmi, *mi;
495 tree var_type;
496 int dim_num = 0;
497
498 gcc_assert (matrices_to_reorg);
499
500 if (TREE_CODE (var_decl) == PARM_DECL)
501 var_type = DECL_ARG_TYPE (var_decl);
502 else if (TREE_CODE (var_decl) == VAR_DECL)
503 var_type = TREE_TYPE (var_decl);
504 else
505 return NULL;
506
507 if (!POINTER_TYPE_P (var_type))
508 return NULL;
509
510 while (POINTER_TYPE_P (var_type))
511 {
512 var_type = TREE_TYPE (var_type);
513 dim_num++;
514 }
515
516 if (dim_num <= 1)
517 return NULL;
518
519 if (!COMPLETE_TYPE_P (var_type)
520 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
521 return NULL;
522
523 /* Check to see if this pointer is already in there. */
524 tmpmi.decl = var_decl;
525 mi = htab_find (matrices_to_reorg, &tmpmi);
526
527 if (mi)
528 return NULL;
529
530 /* Record the matrix. */
531
532 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
533 m_node->decl = var_decl;
534 m_node->num_dims = dim_num;
535 m_node->free_stmts
536 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
537
538 /* Init min_indirect_level_escape to -1 to indicate that no escape
539 analysis has been done yet. */
540 m_node->min_indirect_level_escape = -1;
541 m_node->is_transposed_p = false;
542
543 return m_node;
544 }
545
546 /* Free matrix E. */
547 static void
548 mat_free (void *e)
549 {
550 struct matrix_info *mat = (struct matrix_info *) e;
551
552 if (!mat)
553 return;
554
555 if (mat->free_stmts)
556 free (mat->free_stmts);
557 if (mat->dim_hot_level)
558 free (mat->dim_hot_level);
559 if (mat->malloc_for_level)
560 free (mat->malloc_for_level);
561 }
562
563 /* Find all potential matrices.
564 TODO: currently we handle only multidimensional
565 dynamically allocated arrays. */
566 static void
567 find_matrices_decl (void)
568 {
569 struct matrix_info *tmp;
570 PTR *slot;
571 struct varpool_node *vnode;
572
573 gcc_assert (matrices_to_reorg);
574
575 /* For every global variable in the program:
576 Check to see if it's of a candidate type and record it. */
577 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
578 {
579 tree var_decl = vnode->decl;
580
581 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
582 continue;
583
584 if (matrices_to_reorg)
585 if ((tmp = analyze_matrix_decl (var_decl)))
586 {
587 if (!TREE_ADDRESSABLE (var_decl))
588 {
589 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
590 *slot = tmp;
591 }
592 }
593 }
594 return;
595 }
596
597 /* Mark that the matrix MI escapes at level L. */
598 static void
599 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
600 {
601 if (mi->min_indirect_level_escape == -1
602 || (mi->min_indirect_level_escape > l))
603 {
604 mi->min_indirect_level_escape = l;
605 mi->min_indirect_level_escape_stmt = s;
606 }
607 }
608
609 /* Find if the SSA variable is accessed inside the
610 tree and record the tree containing it.
611 The only relevant uses are the case of SSA_NAME, or SSA inside
612 INDIRECT_REF, CALL_EXPR, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
613 static void
614 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
615 {
616 tree call, decl;
617 tree arg;
618 call_expr_arg_iterator iter;
619
620 a->t_code = TREE_CODE (t);
621 switch (a->t_code)
622 {
623 tree op1, op2;
624
625 case SSA_NAME:
626 if (t == a->ssa_var)
627 a->var_found = true;
628 break;
629 case INDIRECT_REF:
630 if (SSA_VAR_P (TREE_OPERAND (t, 0))
631 && TREE_OPERAND (t, 0) == a->ssa_var)
632 a->var_found = true;
633 break;
634 case CALL_EXPR:
635 FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
636 {
637 if (arg == a->ssa_var)
638 {
639 a->var_found = true;
640 call = get_call_expr_in (t);
641 if (call && (decl = get_callee_fndecl (call)))
642 a->t_tree = decl;
643 break;
644 }
645 }
646 break;
647 case POINTER_PLUS_EXPR:
648 case PLUS_EXPR:
649 case MULT_EXPR:
650 op1 = TREE_OPERAND (t, 0);
651 op2 = TREE_OPERAND (t, 1);
652
653 if (op1 == a->ssa_var)
654 {
655 a->var_found = true;
656 a->second_op = op2;
657 }
658 else if (op2 == a->ssa_var)
659 {
660 a->var_found = true;
661 a->second_op = op1;
662 }
663 break;
664 default:
665 break;
666 }
667 }
668
669 /* Record the access/allocation site information for matrix MI so we can
670 handle it later in transformation. */
671 static void
672 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
673 tree index, int level, bool is_alloc)
674 {
675 struct access_site_info *acc_info;
676
677 if (!mi->access_l)
678 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
679
680 acc_info
681 = (struct access_site_info *)
682 xcalloc (1, sizeof (struct access_site_info));
683 acc_info->stmt = stmt;
684 acc_info->offset = offset;
685 acc_info->index = index;
686 acc_info->function_decl = current_function_decl;
687 acc_info->level = level;
688 acc_info->is_alloc = is_alloc;
689
690 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
691
692 }
693
694 /* Record the malloc as the allocation site of the given LEVEL. But
695 first we Make sure that all the size parameters passed to malloc in
696 all the allocation sites could be pre-calculated before the call to
697 the malloc of level 0 (the main malloc call). */
698 static void
699 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
700 {
701 struct malloc_call_data mcd;
702
703 /* Make sure that the allocation sites are in the same function. */
704 if (!mi->allocation_function_decl)
705 mi->allocation_function_decl = current_function_decl;
706 else if (mi->allocation_function_decl != current_function_decl)
707 {
708 int min_malloc_level;
709
710 gcc_assert (mi->malloc_for_level);
711
712 /* Find the minimum malloc level that already has been seen;
713 we known its allocation function must be
714 MI->allocation_function_decl since it's different than
715 CURRENT_FUNCTION_DECL then the escaping level should be
716 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
717 must be set accordingly. */
718 for (min_malloc_level = 0;
719 min_malloc_level < mi->max_malloced_level
720 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
721 if (level < min_malloc_level)
722 {
723 mi->allocation_function_decl = current_function_decl;
724 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
725 }
726 else
727 {
728 mark_min_matrix_escape_level (mi, level, stmt);
729 /* cannot be that (level == min_malloc_level)
730 we would have returned earlier. */
731 return;
732 }
733 }
734
735 /* Find the correct malloc information. */
736 collect_data_for_malloc_call (stmt, &mcd);
737
738 /* We accept only calls to malloc function; we do not accept
739 calls like calloc and realloc. */
740 if (!mi->malloc_for_level)
741 {
742 mi->malloc_for_level = xcalloc (level + 1, sizeof (tree));
743 mi->max_malloced_level = level + 1;
744 }
745 else if (mi->max_malloced_level <= level)
746 {
747 mi->malloc_for_level
748 = xrealloc (mi->malloc_for_level, (level + 1) * sizeof (tree));
749
750 /* Zero the newly allocated items. */
751 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
752 0, (level - mi->max_malloced_level) * sizeof (tree));
753
754 mi->max_malloced_level = level + 1;
755 }
756 mi->malloc_for_level[level] = stmt;
757 }
758
759 /* Given an assignment statement STMT that we know that its
760 left-hand-side is the matrix MI variable, we traverse the immediate
761 uses backwards until we get to a malloc site. We make sure that
762 there is one and only one malloc site that sets this variable. When
763 we are performing the flattening we generate a new variable that
764 will hold the size for each dimension; each malloc that allocates a
765 dimension has the size parameter; we use that parameter to
766 initialize the dimension size variable so we can use it later in
767 the address calculations. LEVEL is the dimension we're inspecting.
768 Return if STMT is related to an allocation site. */
769
770 static void
771 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
772 int level, sbitmap visited)
773 {
774 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
775 {
776 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
777
778 rhs = get_inner_of_cast_expr (rhs);
779 if (TREE_CODE (rhs) == SSA_NAME)
780 {
781 tree def = SSA_NAME_DEF_STMT (rhs);
782
783 analyze_matrix_allocation_site (mi, def, level, visited);
784 return;
785 }
786
787 /* A result of call to malloc. */
788 else if (TREE_CODE (rhs) == CALL_EXPR)
789 {
790 int call_flags = call_expr_flags (rhs);
791
792 if (!(call_flags & ECF_MALLOC))
793 {
794 mark_min_matrix_escape_level (mi, level, stmt);
795 return;
796 }
797 else
798 {
799 tree malloc_fn_decl;
800 const char *malloc_fname;
801
802 malloc_fn_decl = CALL_EXPR_FN (rhs);
803 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
804 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
805 FUNCTION_DECL)
806 {
807 mark_min_matrix_escape_level (mi, level, stmt);
808 return;
809 }
810 malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
811 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
812 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
813 {
814 if (dump_file)
815 fprintf (dump_file,
816 "Matrix %s is an argument to function %s\n",
817 get_name (mi->decl), get_name (malloc_fn_decl));
818 mark_min_matrix_escape_level (mi, level, stmt);
819 return;
820 }
821 }
822 /* This is a call to malloc. Check to see if this is the first
823 call in this indirection level; if so, mark it; if not, mark
824 as escaping. */
825 if (mi->malloc_for_level
826 && mi->malloc_for_level[level]
827 && mi->malloc_for_level[level] != stmt)
828 {
829 mark_min_matrix_escape_level (mi, level, stmt);
830 return;
831 }
832 else
833 add_allocation_site (mi, stmt, level);
834 return;
835 }
836 /* If we are back to the original matrix variable then we
837 are sure that this is analyzed as an access site. */
838 else if (rhs == mi->decl)
839 return;
840 }
841 /* Looks like we don't know what is happening in this
842 statement so be in the safe side and mark it as escaping. */
843 mark_min_matrix_escape_level (mi, level, stmt);
844 }
845
846 /* The transposing decision making.
847 In order to to calculate the profitability of transposing, we collect two
848 types of information regarding the accesses:
849 1. profiling information used to express the hotness of an access, that
850 is how often the matrix is accessed by this access site (count of the
851 access site).
852 2. which dimension in the access site is iterated by the inner
853 most loop containing this access.
854
855 The matrix will have a calculated value of weighted hotness for each
856 dimension.
857 Intuitively the hotness level of a dimension is a function of how
858 many times it was the most frequently accessed dimension in the
859 highly executed access sites of this matrix.
860
861 As computed by following equation:
862 m n
863 __ __
864 \ \ dim_hot_level[i] +=
865 /_ /_
866 j i
867 acc[j]->dim[i]->iter_by_inner_loop * count(j)
868
869 Where n is the number of dims and m is the number of the matrix
870 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
871 iterates over dim[i] in innermost loop, and is 0 otherwise.
872
873 The organization of the new matrix should be according to the
874 hotness of each dimension. The hotness of the dimension implies
875 the locality of the elements.*/
876 static int
877 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
878 {
879 struct matrix_info *mi = *slot;
880 int min_escape_l = mi->min_indirect_level_escape;
881 struct loop *loop;
882 affine_iv iv;
883 struct access_site_info *acc_info;
884 int i;
885
886 if (min_escape_l < 2 || !mi->access_l)
887 {
888 if (mi->access_l)
889 {
890 for (i = 0;
891 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
892 i++)
893 free (acc_info);
894 VEC_free (access_site_info_p, heap, mi->access_l);
895
896 }
897 return 1;
898 }
899 if (!mi->dim_hot_level)
900 mi->dim_hot_level =
901 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
902
903
904 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
905 i++)
906 {
907 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 1)) == POINTER_PLUS_EXPR
908 && acc_info->level < min_escape_l)
909 {
910 loop = loop_containing_stmt (acc_info->stmt);
911 if (!loop || loop->inner)
912 {
913 free (acc_info);
914 continue;
915 }
916 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
917 {
918 if (iv.step != NULL)
919 {
920 HOST_WIDE_INT istep;
921
922 istep = int_cst_value (iv.step);
923 if (istep != 0)
924 {
925 acc_info->iterated_by_inner_most_loop_p = 1;
926 mi->dim_hot_level[acc_info->level] +=
927 bb_for_stmt (acc_info->stmt)->count;
928 }
929
930 }
931 }
932 }
933 free (acc_info);
934 }
935 VEC_free (access_site_info_p, heap, mi->access_l);
936
937 return 1;
938 }
939
940 /* Find the index which defines the OFFSET from base.
941 We walk from use to def until we find how the offset was defined. */
942 static tree
943 get_index_from_offset (tree offset, tree def_stmt)
944 {
945 tree op1, op2, expr, index;
946
947 if (TREE_CODE (def_stmt) == PHI_NODE)
948 return NULL;
949 expr = get_inner_of_cast_expr (GIMPLE_STMT_OPERAND (def_stmt, 1));
950 if (TREE_CODE (expr) == SSA_NAME)
951 return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
952 else if (TREE_CODE (expr) == MULT_EXPR)
953 {
954 op1 = TREE_OPERAND (expr, 0);
955 op2 = TREE_OPERAND (expr, 1);
956 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
957 return NULL;
958 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
959 return index;
960 }
961 else
962 return NULL_TREE;
963 }
964
965 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
966 of the type related to the SSA_VAR, or the type related to the
967 lhs of STMT, in the case that it is an INDIRECT_REF. */
968 static void
969 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
970 int current_indirect_level)
971 {
972 tree lhs;
973 HOST_WIDE_INT type_size;
974
975 /* Update type according to the type of the INDIRECT_REF expr. */
976 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
977 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == INDIRECT_REF)
978 {
979 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
980 gcc_assert (POINTER_TYPE_P
981 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
982 type_size =
983 int_size_in_bytes (TREE_TYPE
984 (TREE_TYPE
985 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
986 }
987 else
988 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
989
990 /* Record the size of elements accessed (as a whole)
991 in the current indirection level (dimension). If the size of
992 elements is not known at compile time, mark it as escaping. */
993 if (type_size <= 0)
994 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
995 else
996 {
997 int l = current_indirect_level;
998
999 if (!mi->dimension_type_size)
1000 {
1001 mi->dimension_type_size
1002 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1003 mi->dimension_type_size_len = l + 1;
1004 }
1005 else if (mi->dimension_type_size_len < l + 1)
1006 {
1007 mi->dimension_type_size
1008 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1009 (l + 1) * sizeof (HOST_WIDE_INT));
1010 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1011 0, (l + 1 - mi->dimension_type_size_len)
1012 * sizeof (HOST_WIDE_INT));
1013 mi->dimension_type_size_len = l + 1;
1014 }
1015 /* Make sure all the accesses in the same level have the same size
1016 of the type. */
1017 if (!mi->dimension_type_size[l])
1018 mi->dimension_type_size[l] = type_size;
1019 else if (mi->dimension_type_size[l] != type_size)
1020 mark_min_matrix_escape_level (mi, l, stmt);
1021 }
1022 }
1023
1024 /* USE_STMT represents a call_expr ,where one of the arguments is the
1025 ssa var that we want to check because it came from some use of matrix
1026 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1027 far. */
1028
1029 static void
1030 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1031 int current_indirect_level)
1032 {
1033 tree call = get_call_expr_in (use_stmt);
1034 if (call && get_callee_fndecl (call))
1035 {
1036 if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1037 {
1038 if (dump_file)
1039 fprintf (dump_file,
1040 "Matrix %s: Function call %s, level %d escapes.\n",
1041 get_name (mi->decl), get_name (get_callee_fndecl (call)),
1042 current_indirect_level);
1043 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1044 }
1045 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1046 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1047 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1048 else
1049 {
1050 /*Record the free statements so we can delete them
1051 later. */
1052 int l = current_indirect_level;
1053
1054 mi->free_stmts[l].stmt = use_stmt;
1055 mi->free_stmts[l].func = current_function_decl;
1056 }
1057 }
1058 }
1059
1060 /* USE_STMT represents a phi node of the ssa var that we want to
1061 check because it came from some use of matrix
1062 MI.
1063 We check all the escaping levels that get to the PHI node
1064 and make sure they are all the same escaping;
1065 if not (which is rare) we let the escaping level be the
1066 minimum level that gets into that PHI because starting from
1067 that level we cannot expect the behavior of the indirections.
1068 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1069
1070 static void
1071 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1072 int current_indirect_level, sbitmap visited,
1073 bool record_accesses)
1074 {
1075
1076 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1077
1078 tmp_maphi.phi = use_stmt;
1079 if ((maphi = htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1080 {
1081 if (maphi->indirection_level == current_indirect_level)
1082 return;
1083 else
1084 {
1085 int level = MIN (maphi->indirection_level,
1086 current_indirect_level);
1087 int j;
1088 tree t = NULL_TREE;
1089
1090 maphi->indirection_level = level;
1091 for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1092 {
1093 tree def = PHI_ARG_DEF (use_stmt, j);
1094
1095 if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1096 t = SSA_NAME_DEF_STMT (def);
1097 }
1098 mark_min_matrix_escape_level (mi, level, t);
1099 }
1100 return;
1101 }
1102 maphi = (struct matrix_access_phi_node *)
1103 xcalloc (1, sizeof (struct matrix_access_phi_node));
1104 maphi->phi = use_stmt;
1105 maphi->indirection_level = current_indirect_level;
1106
1107 /* Insert to hash table. */
1108 pmaphi = (struct matrix_access_phi_node **)
1109 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1110 gcc_assert (pmaphi);
1111 *pmaphi = maphi;
1112
1113 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1114 {
1115 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1116 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1117 current_indirect_level, false, visited,
1118 record_accesses);
1119 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1120 }
1121 }
1122
1123 /* USE_STMT represents a modify statement (the rhs or lhs include
1124 the ssa var that we want to check because it came from some use of matrix
1125 MI.
1126 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1127
1128 static int
1129 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1130 tree use_stmt, int current_indirect_level,
1131 bool last_op, sbitmap visited,
1132 bool record_accesses)
1133 {
1134
1135 tree lhs = GIMPLE_STMT_OPERAND (use_stmt, 0);
1136 tree rhs = GIMPLE_STMT_OPERAND (use_stmt, 1);
1137 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1138
1139 memset (&lhs_acc, 0, sizeof (lhs_acc));
1140 memset (&rhs_acc, 0, sizeof (rhs_acc));
1141
1142 lhs_acc.ssa_var = ssa_var;
1143 lhs_acc.t_code = ERROR_MARK;
1144 ssa_accessed_in_tree (lhs, &lhs_acc);
1145 rhs_acc.ssa_var = ssa_var;
1146 rhs_acc.t_code = ERROR_MARK;
1147 ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1148
1149 /* The SSA must be either in the left side or in the right side,
1150 to understand what is happening.
1151 In case the SSA_NAME is found in both sides we should be escaping
1152 at this level because in this case we cannot calculate the
1153 address correctly. */
1154 if ((lhs_acc.var_found && rhs_acc.var_found
1155 && lhs_acc.t_code == INDIRECT_REF)
1156 || (!rhs_acc.var_found && !lhs_acc.var_found))
1157 {
1158 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1159 return current_indirect_level;
1160 }
1161 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1162
1163 /* If we are storing to the matrix at some level, then mark it as
1164 escaping at that level. */
1165 if (lhs_acc.var_found)
1166 {
1167 tree def;
1168 int l = current_indirect_level + 1;
1169
1170 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1171 def = get_inner_of_cast_expr (rhs);
1172 if (TREE_CODE (def) != SSA_NAME)
1173 mark_min_matrix_escape_level (mi, l, use_stmt);
1174 else
1175 {
1176 def = SSA_NAME_DEF_STMT (def);
1177 analyze_matrix_allocation_site (mi, def, l, visited);
1178 if (record_accesses)
1179 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1180 NULL_TREE, l, true);
1181 update_type_size (mi, use_stmt, NULL, l);
1182 }
1183 return current_indirect_level;
1184 }
1185 /* Now, check the right-hand-side, to see how the SSA variable
1186 is used. */
1187 if (rhs_acc.var_found)
1188 {
1189 /* If we are passing the ssa name to a function call and
1190 the pointer escapes when passed to the function
1191 (not the case of free), then we mark the matrix as
1192 escaping at this level. */
1193 if (rhs_acc.t_code == CALL_EXPR)
1194 {
1195 analyze_accesses_for_call_expr (mi, use_stmt,
1196 current_indirect_level);
1197
1198 return current_indirect_level;
1199 }
1200 if (rhs_acc.t_code != INDIRECT_REF
1201 && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1202 {
1203 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1204 return current_indirect_level;
1205 }
1206 /* If the access in the RHS has an indirection increase the
1207 indirection level. */
1208 if (rhs_acc.t_code == INDIRECT_REF)
1209 {
1210 if (record_accesses)
1211 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1212 NULL_TREE,
1213 current_indirect_level, true);
1214 current_indirect_level += 1;
1215 }
1216 else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1217 {
1218 gcc_assert (rhs_acc.second_op);
1219 if (last_op)
1220 /* Currently we support only one PLUS expression on the
1221 SSA_NAME that holds the base address of the current
1222 indirection level; to support more general case there
1223 is a need to hold a stack of expressions and regenerate
1224 the calculation later. */
1225 mark_min_matrix_escape_level (mi, current_indirect_level,
1226 use_stmt);
1227 else
1228 {
1229 tree index;
1230 tree op1, op2;
1231
1232 op1 = TREE_OPERAND (rhs, 0);
1233 op2 = TREE_OPERAND (rhs, 1);
1234
1235 op2 = (op1 == ssa_var) ? op2 : op1;
1236 if (TREE_CODE (op2) == INTEGER_CST)
1237 index =
1238 build_int_cst (TREE_TYPE (op1),
1239 TREE_INT_CST_LOW (op2) /
1240 int_size_in_bytes (TREE_TYPE (op1)));
1241 else
1242 {
1243 index =
1244 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1245 if (index == NULL_TREE)
1246 {
1247 mark_min_matrix_escape_level (mi,
1248 current_indirect_level,
1249 use_stmt);
1250 return current_indirect_level;
1251 }
1252 }
1253 if (record_accesses)
1254 record_access_alloc_site_info (mi, use_stmt, op2,
1255 index,
1256 current_indirect_level, false);
1257 }
1258 }
1259 /* If we are storing this level of indirection mark it as
1260 escaping. */
1261 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1262 {
1263 int l = current_indirect_level;
1264
1265 /* One exception is when we are storing to the matrix
1266 variable itself; this is the case of malloc, we must make
1267 sure that it's the one and only one call to malloc so
1268 we call analyze_matrix_allocation_site to check
1269 this out. */
1270 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1271 mark_min_matrix_escape_level (mi, current_indirect_level,
1272 use_stmt);
1273 else
1274 {
1275 /* Also update the escaping level. */
1276 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1277 if (record_accesses)
1278 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1279 NULL_TREE, l, true);
1280 }
1281 }
1282 else
1283 {
1284 /* We are placing it in an SSA, follow that SSA. */
1285 analyze_matrix_accesses (mi, lhs,
1286 current_indirect_level,
1287 rhs_acc.t_code == POINTER_PLUS_EXPR,
1288 visited, record_accesses);
1289 }
1290 }
1291 return current_indirect_level;
1292 }
1293
1294 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1295 follow its uses and level of indirection and find out the minimum
1296 indirection level it escapes in (the highest dimension) and the maximum
1297 level it is accessed in (this will be the actual dimension of the
1298 matrix). The information is accumulated in MI.
1299 We look at the immediate uses, if one escapes we finish; if not,
1300 we make a recursive call for each one of the immediate uses of the
1301 resulting SSA name. */
1302 static void
1303 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1304 int current_indirect_level, bool last_op,
1305 sbitmap visited, bool record_accesses)
1306 {
1307 imm_use_iterator imm_iter;
1308 use_operand_p use_p;
1309
1310 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1311 current_indirect_level);
1312
1313 /* We don't go beyond the escaping level when we are performing the
1314 flattening. NOTE: we keep the last indirection level that doesn't
1315 escape. */
1316 if (mi->min_indirect_level_escape > -1
1317 && mi->min_indirect_level_escape <= current_indirect_level)
1318 return;
1319
1320 /* Now go over the uses of the SSA_NAME and check how it is used in
1321 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1322 then a POINTER_PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1323 be any number of copies and casts. */
1324 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1325
1326 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1327 {
1328 tree use_stmt = USE_STMT (use_p);
1329 if (TREE_CODE (use_stmt) == PHI_NODE)
1330 /* We check all the escaping levels that get to the PHI node
1331 and make sure they are all the same escaping;
1332 if not (which is rare) we let the escaping level be the
1333 minimum level that gets into that PHI because starting from
1334 that level we cannot expect the behavior of the indirections. */
1335
1336 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1337 visited, record_accesses);
1338
1339 else if (TREE_CODE (use_stmt) == CALL_EXPR)
1340 analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1341 else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1342 current_indirect_level =
1343 analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1344 current_indirect_level, last_op,
1345 visited, record_accesses);
1346 }
1347 }
1348
1349
1350 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1351 the malloc size expression and check that those aren't changed
1352 over the function. */
1353 static tree
1354 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1355 {
1356 basic_block bb;
1357 tree t = *tp;
1358 tree fn = data;
1359 block_stmt_iterator bsi;
1360 tree stmt;
1361
1362 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1363 return NULL_TREE;
1364
1365 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1366 {
1367 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1368 {
1369 stmt = bsi_stmt (bsi);
1370 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1371 continue;
1372 if (GIMPLE_STMT_OPERAND (stmt, 0) == t)
1373 return stmt;
1374 }
1375 }
1376 *walk_subtrees = 1;
1377 return NULL_TREE;
1378 }
1379
1380 /* Go backwards in the use-def chains and find out the expression
1381 represented by the possible SSA name in EXPR, until it is composed
1382 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1383 we make sure that all the arguments represent the same subexpression,
1384 otherwise we fail. */
1385 static tree
1386 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1387 {
1388 tree def_stmt, op1, op2, res;
1389
1390 switch (TREE_CODE (expr))
1391 {
1392 case SSA_NAME:
1393 /* Case of loop, we don't know to represent this expression. */
1394 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1395 return NULL_TREE;
1396
1397 SET_BIT (visited, SSA_NAME_VERSION (expr));
1398 def_stmt = SSA_NAME_DEF_STMT (expr);
1399 res = can_calculate_expr_before_stmt (def_stmt, visited);
1400 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1401 return res;
1402 case VAR_DECL:
1403 case PARM_DECL:
1404 case INTEGER_CST:
1405 return expr;
1406 case POINTER_PLUS_EXPR:
1407 case PLUS_EXPR:
1408 case MINUS_EXPR:
1409 case MULT_EXPR:
1410 op1 = TREE_OPERAND (expr, 0);
1411 op2 = TREE_OPERAND (expr, 1);
1412
1413 op1 = can_calculate_expr_before_stmt (op1, visited);
1414 if (!op1)
1415 return NULL_TREE;
1416 op2 = can_calculate_expr_before_stmt (op2, visited);
1417 if (op2)
1418 return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1419 return NULL_TREE;
1420 case GIMPLE_MODIFY_STMT:
1421 return can_calculate_expr_before_stmt (GIMPLE_STMT_OPERAND (expr, 1),
1422 visited);
1423 case PHI_NODE:
1424 {
1425 int j;
1426
1427 res = NULL_TREE;
1428 /* Make sure all the arguments represent the same value. */
1429 for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1430 {
1431 tree new_res;
1432 tree def = PHI_ARG_DEF (expr, j);
1433
1434 new_res = can_calculate_expr_before_stmt (def, visited);
1435 if (res == NULL_TREE)
1436 res = new_res;
1437 else if (!new_res || !expressions_equal_p (res, new_res))
1438 return NULL_TREE;
1439 }
1440 return res;
1441 }
1442 case NOP_EXPR:
1443 case CONVERT_EXPR:
1444 res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1445 if (res != NULL_TREE)
1446 return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1447 else
1448 return NULL_TREE;
1449
1450 default:
1451 return NULL_TREE;
1452 }
1453 }
1454
1455 /* There should be only one allocation function for the dimensions
1456 that don't escape. Here we check the allocation sites in this
1457 function. We must make sure that all the dimensions are allocated
1458 using malloc and that the malloc size parameter expression could be
1459 pre-calculated before the call to the malloc of dimension 0.
1460
1461 Given a candidate matrix for flattening -- MI -- check if it's
1462 appropriate for flattening -- we analyze the allocation
1463 sites that we recorded in the previous analysis. The result of the
1464 analysis is a level of indirection (matrix dimension) in which the
1465 flattening is safe. We check the following conditions:
1466 1. There is only one allocation site for each dimension.
1467 2. The allocation sites of all the dimensions are in the same
1468 function.
1469 (The above two are being taken care of during the analysis when
1470 we check the allocation site).
1471 3. All the dimensions that we flatten are allocated at once; thus
1472 the total size must be known before the allocation of the
1473 dimension 0 (top level) -- we must make sure we represent the
1474 size of the allocation as an expression of global parameters or
1475 constants and that those doesn't change over the function. */
1476
1477 static int
1478 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1479 {
1480 int level;
1481 block_stmt_iterator bsi;
1482 basic_block bb_level_0;
1483 struct matrix_info *mi = *slot;
1484 sbitmap visited;
1485
1486 if (!mi->malloc_for_level)
1487 return 1;
1488
1489 visited = sbitmap_alloc (num_ssa_names);
1490
1491 /* Do nothing if the current function is not the allocation
1492 function of MI. */
1493 if (mi->allocation_function_decl != current_function_decl
1494 /* We aren't in the main allocation function yet. */
1495 || !mi->malloc_for_level[0])
1496 return 1;
1497
1498 for (level = 1; level < mi->max_malloced_level; level++)
1499 if (!mi->malloc_for_level[level])
1500 break;
1501
1502 mark_min_matrix_escape_level (mi, level, NULL_TREE);
1503
1504 bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1505 bb_level_0 = bsi.bb;
1506
1507 /* Check if the expression of the size passed to malloc could be
1508 pre-calculated before the malloc of level 0. */
1509 for (level = 1; level < mi->min_indirect_level_escape; level++)
1510 {
1511 tree call_stmt, size;
1512 struct malloc_call_data mcd;
1513
1514 call_stmt = mi->malloc_for_level[level];
1515
1516 /* Find the correct malloc information. */
1517 collect_data_for_malloc_call (call_stmt, &mcd);
1518
1519 /* No need to check anticipation for constants. */
1520 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1521 {
1522 if (!mi->dimension_size)
1523 {
1524 mi->dimension_size =
1525 (tree *) xcalloc (mi->min_indirect_level_escape,
1526 sizeof (tree));
1527 mi->dimension_size_orig =
1528 (tree *) xcalloc (mi->min_indirect_level_escape,
1529 sizeof (tree));
1530 }
1531 mi->dimension_size[level] = mcd.size_var;
1532 mi->dimension_size_orig[level] = mcd.size_var;
1533 continue;
1534 }
1535 /* ??? Here we should also add the way to calculate the size
1536 expression not only know that it is anticipated. */
1537 sbitmap_zero (visited);
1538 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1539 if (size == NULL_TREE)
1540 {
1541 mark_min_matrix_escape_level (mi, level, call_stmt);
1542 if (dump_file)
1543 fprintf (dump_file,
1544 "Matrix %s: Cannot calculate the size of allocation. escaping at level %d\n",
1545 get_name (mi->decl), level);
1546 break;
1547 }
1548 if (!mi->dimension_size)
1549 {
1550 mi->dimension_size =
1551 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1552 mi->dimension_size_orig =
1553 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1554 }
1555 mi->dimension_size[level] = size;
1556 mi->dimension_size_orig[level] = size;
1557 }
1558
1559 /* We don't need those anymore. */
1560 for (level = mi->min_indirect_level_escape;
1561 level < mi->max_malloced_level; level++)
1562 mi->malloc_for_level[level] = NULL;
1563 return 1;
1564 }
1565
1566 /* Track all access and allocation sites. */
1567 static void
1568 find_sites_in_func (bool record)
1569 {
1570 sbitmap visited_stmts_1;
1571
1572 block_stmt_iterator bsi;
1573 tree stmt;
1574 basic_block bb;
1575 struct matrix_info tmpmi, *mi;
1576
1577 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1578
1579 FOR_EACH_BB (bb)
1580 {
1581 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1582 {
1583 stmt = bsi_stmt (bsi);
1584 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1585 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == VAR_DECL)
1586 {
1587 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 0);
1588 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1589 {
1590 sbitmap_zero (visited_stmts_1);
1591 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1592 }
1593 }
1594 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1595 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME
1596 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == VAR_DECL)
1597 {
1598 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 1);
1599 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1600 {
1601 sbitmap_zero (visited_stmts_1);
1602 analyze_matrix_accesses (mi,
1603 GIMPLE_STMT_OPERAND (stmt, 0), 0,
1604 false, visited_stmts_1, record);
1605 }
1606 }
1607 }
1608 }
1609 sbitmap_free (visited_stmts_1);
1610 }
1611
1612 /* Traverse the use-def chains to see if there are matrices that
1613 are passed through pointers and we cannot know how they are accessed.
1614 For each SSA-name defined by a global variable of our interest,
1615 we traverse the use-def chains of the SSA and follow the indirections,
1616 and record in what level of indirection the use of the variable
1617 escapes. A use of a pointer escapes when it is passed to a function,
1618 stored into memory or assigned (except in malloc and free calls). */
1619
1620 static void
1621 record_all_accesses_in_func (void)
1622 {
1623 unsigned i;
1624 sbitmap visited_stmts_1;
1625
1626 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1627
1628 for (i = 0; i < num_ssa_names; i++)
1629 {
1630 struct matrix_info tmpmi, *mi;
1631 tree ssa_var = ssa_name (i);
1632 tree rhs, lhs;
1633
1634 if (!ssa_var
1635 || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1636 continue;
1637 rhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1638 lhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1639 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1640 continue;
1641
1642 /* If the RHS is a matrix that we want to analyze, follow the def-use
1643 chain for this SSA_VAR and check for escapes or apply the
1644 flattening. */
1645 tmpmi.decl = rhs;
1646 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1647 {
1648 /* This variable will track the visited PHI nodes, so we can limit
1649 its size to the maximum number of SSA names. */
1650 sbitmap_zero (visited_stmts_1);
1651 analyze_matrix_accesses (mi, ssa_var,
1652 0, false, visited_stmts_1, true);
1653
1654 }
1655 }
1656 sbitmap_free (visited_stmts_1);
1657 }
1658
1659 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1660 static tree
1661 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1662 {
1663
1664 int x, y;
1665 tree result1, ratio, log, orig_tree, new_tree;
1666
1667 x = exact_log2 (orig);
1668 y = exact_log2 (new);
1669
1670 if (x != -1 && y != -1)
1671 {
1672 if (x == y)
1673 return result;
1674 else if (x > y)
1675 {
1676 log = build_int_cst (TREE_TYPE (result), x - y);
1677 result1 =
1678 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1679 return result1;
1680 }
1681 log = build_int_cst (TREE_TYPE (result), y - x);
1682 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1683
1684 return result1;
1685 }
1686 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1687 new_tree = build_int_cst (TREE_TYPE (result), new);
1688 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1689 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1690
1691 return result1;
1692 }
1693
1694
1695 /* We know that we are allowed to perform matrix flattening (according to the
1696 escape analysis), so we traverse the use-def chains of the SSA vars
1697 defined by the global variables pointing to the matrices of our interest.
1698 in each use of the SSA we calculate the offset from the base address
1699 according to the following equation:
1700
1701 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1702 escaping level is m <= k, and a' is the new allocated matrix,
1703 will be translated to :
1704
1705 b[I(m+1)]...[Ik]
1706
1707 where
1708 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1709 */
1710
1711 static int
1712 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1713 {
1714 block_stmt_iterator bsi;
1715 struct matrix_info *mi = *slot;
1716 int min_escape_l = mi->min_indirect_level_escape;
1717 struct access_site_info *acc_info;
1718 int i;
1719
1720 if (min_escape_l < 2 || !mi->access_l)
1721 return 1;
1722 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1723 i++)
1724 {
1725 tree orig, type;
1726
1727 /* This is possible because we collect the access sites before
1728 we determine the final minimum indirection level. */
1729 if (acc_info->level >= min_escape_l)
1730 {
1731 free (acc_info);
1732 continue;
1733 }
1734 if (acc_info->is_alloc)
1735 {
1736 if (acc_info->level >= 0 && bb_for_stmt (acc_info->stmt))
1737 {
1738 ssa_op_iter iter;
1739 tree def;
1740 tree stmt = acc_info->stmt;
1741
1742 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1743 mark_sym_for_renaming (SSA_NAME_VAR (def));
1744 bsi = bsi_for_stmt (stmt);
1745 gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1746 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 0)) ==
1747 SSA_NAME && acc_info->level < min_escape_l - 1)
1748 {
1749 imm_use_iterator imm_iter;
1750 use_operand_p use_p;
1751 tree use_stmt;
1752
1753 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1754 GIMPLE_STMT_OPERAND (acc_info->stmt,
1755 0))
1756 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1757 {
1758 tree conv, tmp, stmts;
1759
1760 /* Emit convert statement to convert to type of use. */
1761 conv =
1762 fold_build1 (CONVERT_EXPR,
1763 TREE_TYPE (GIMPLE_STMT_OPERAND
1764 (acc_info->stmt, 0)),
1765 TREE_OPERAND (GIMPLE_STMT_OPERAND
1766 (acc_info->stmt, 1), 0));
1767 tmp =
1768 create_tmp_var (TREE_TYPE
1769 (GIMPLE_STMT_OPERAND
1770 (acc_info->stmt, 0)), "new");
1771 add_referenced_var (tmp);
1772 stmts =
1773 fold_build2 (GIMPLE_MODIFY_STMT,
1774 TREE_TYPE (GIMPLE_STMT_OPERAND
1775 (acc_info->stmt, 0)), tmp,
1776 conv);
1777 tmp = make_ssa_name (tmp, stmts);
1778 GIMPLE_STMT_OPERAND (stmts, 0) = tmp;
1779 bsi = bsi_for_stmt (acc_info->stmt);
1780 bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1781 SET_USE (use_p, tmp);
1782 }
1783 }
1784 if (acc_info->level < min_escape_l - 1)
1785 bsi_remove (&bsi, true);
1786 }
1787 free (acc_info);
1788 continue;
1789 }
1790 orig = GIMPLE_STMT_OPERAND (acc_info->stmt, 1);
1791 type = TREE_TYPE (orig);
1792 if (TREE_CODE (orig) == INDIRECT_REF
1793 && acc_info->level < min_escape_l - 1)
1794 {
1795 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1796 from "pointer to type" to "type". */
1797 orig =
1798 build1 (NOP_EXPR, TREE_TYPE (orig),
1799 GIMPLE_STMT_OPERAND (orig, 0));
1800 GIMPLE_STMT_OPERAND (acc_info->stmt, 1) = orig;
1801 }
1802 else if (TREE_CODE (orig) == POINTER_PLUS_EXPR
1803 && acc_info->level < (min_escape_l))
1804 {
1805 imm_use_iterator imm_iter;
1806 use_operand_p use_p;
1807
1808 tree offset;
1809 int k = acc_info->level;
1810 tree num_elements, total_elements;
1811 tree tmp1;
1812 tree d_size = mi->dimension_size[k];
1813
1814 /* We already make sure in the analysis that the first operand
1815 is the base and the second is the offset. */
1816 offset = acc_info->offset;
1817 if (mi->dim_map[k] == min_escape_l - 1)
1818 {
1819 if (!check_transpose_p || mi->is_transposed_p == false)
1820 tmp1 = offset;
1821 else
1822 {
1823 tree new_offset;
1824 tree d_type_size, d_type_size_k;
1825
1826 d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
1827 d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
1828
1829 new_offset =
1830 compute_offset (mi->dimension_type_size[min_escape_l],
1831 mi->dimension_type_size[k + 1], offset);
1832
1833 total_elements = new_offset;
1834 if (new_offset != offset)
1835 {
1836 bsi = bsi_for_stmt (acc_info->stmt);
1837 tmp1 = force_gimple_operand_bsi (&bsi, total_elements,
1838 true, NULL,
1839 true, BSI_SAME_STMT);
1840 }
1841 else
1842 tmp1 = offset;
1843 }
1844 }
1845 else
1846 {
1847 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1848 num_elements =
1849 fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1850 fold_convert (sizetype, d_size));
1851 add_referenced_var (d_size);
1852 bsi = bsi_for_stmt (acc_info->stmt);
1853 tmp1 = force_gimple_operand_bsi (&bsi, num_elements, true,
1854 NULL, true, BSI_SAME_STMT);
1855 }
1856 /* Replace the offset if needed. */
1857 if (tmp1 != offset)
1858 {
1859 if (TREE_CODE (offset) == SSA_NAME)
1860 {
1861 tree use_stmt;
1862
1863 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1864 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1865 if (use_stmt == acc_info->stmt)
1866 SET_USE (use_p, tmp1);
1867 }
1868 else
1869 {
1870 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1871 TREE_OPERAND (orig, 1) = tmp1;
1872 }
1873 }
1874 }
1875 /* ??? meanwhile this happens because we record the same access
1876 site more than once; we should be using a hash table to
1877 avoid this and insert the STMT of the access site only
1878 once.
1879 else
1880 gcc_unreachable (); */
1881 free (acc_info);
1882 }
1883 VEC_free (access_site_info_p, heap, mi->access_l);
1884
1885 update_ssa (TODO_update_ssa);
1886 #ifdef ENABLE_CHECKING
1887 verify_ssa (true);
1888 #endif
1889 return 1;
1890 }
1891
1892 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1893
1894 static void
1895 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1896 {
1897 int i, j, tmp1;
1898 gcov_type tmp;
1899
1900 for (i = 0; i < n - 1; i++)
1901 {
1902 for (j = 0; j < n - 1 - i; j++)
1903 {
1904 if (a[j + 1] < a[j])
1905 {
1906 tmp = a[j]; /* swap a[j] and a[j+1] */
1907 a[j] = a[j + 1];
1908 a[j + 1] = tmp;
1909 tmp1 = dim_map[j];
1910 dim_map[j] = dim_map[j + 1];
1911 dim_map[j + 1] = tmp1;
1912 }
1913 }
1914 }
1915 }
1916
1917 /* Replace multiple mallocs (one for each dimension) to one malloc
1918 with the size of DIM1*DIM2*...*DIMN*size_of_element
1919 Make sure that we hold the size in the malloc site inside a
1920 new global variable; this way we ensure that the size doesn't
1921 change and it is accessible from all the other functions that
1922 uses the matrix. Also, the original calls to free are deleted,
1923 and replaced by a new call to free the flattened matrix. */
1924
1925 static int
1926 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1927 {
1928 int i;
1929 struct matrix_info *mi;
1930 tree type, call_stmt_0, malloc_stmt, oldfn, prev_dim_size, use_stmt;
1931 struct cgraph_node *c_node;
1932 struct cgraph_edge *e;
1933 block_stmt_iterator bsi;
1934 struct malloc_call_data mcd;
1935 HOST_WIDE_INT element_size;
1936
1937 imm_use_iterator imm_iter;
1938 use_operand_p use_p;
1939 tree old_size_0, tmp;
1940 int min_escape_l;
1941 int id;
1942
1943 mi = *slot;
1944
1945 min_escape_l = mi->min_indirect_level_escape;
1946
1947 if (!mi->malloc_for_level)
1948 mi->min_indirect_level_escape = 0;
1949
1950 if (mi->min_indirect_level_escape < 2)
1951 return 1;
1952
1953 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1954 for (i = 0; i < mi->min_indirect_level_escape; i++)
1955 mi->dim_map[i] = i;
1956 if (check_transpose_p)
1957 {
1958 int i;
1959
1960 if (dump_file)
1961 {
1962 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1963 for (i = 0; i < min_escape_l; i++)
1964 {
1965 fprintf (dump_file, "dim %d before sort ", i);
1966 if (mi->dim_hot_level)
1967 fprintf (dump_file,
1968 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
1969 mi->dim_hot_level[i]);
1970 }
1971 }
1972 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1973 mi->min_indirect_level_escape);
1974 if (dump_file)
1975 for (i = 0; i < min_escape_l; i++)
1976 {
1977 fprintf (dump_file, "dim %d after sort\n", i);
1978 if (mi->dim_hot_level)
1979 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
1980 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1981 }
1982 for (i = 0; i < mi->min_indirect_level_escape; i++)
1983 {
1984 if (dump_file)
1985 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
1986 mi->dim_map[i]);
1987 if (mi->dim_map[i] != i)
1988 {
1989 if (dump_file)
1990 fprintf (dump_file,
1991 "Transposed dimensions: dim %d is now dim %d\n",
1992 mi->dim_map[i], i);
1993 mi->is_transposed_p = true;
1994 }
1995 }
1996 }
1997 else
1998 {
1999 for (i = 0; i < mi->min_indirect_level_escape; i++)
2000 mi->dim_map[i] = i;
2001 }
2002 /* Call statement of allocation site of level 0. */
2003 call_stmt_0 = mi->malloc_for_level[0];
2004
2005 /* Finds the correct malloc information. */
2006 collect_data_for_malloc_call (call_stmt_0, &mcd);
2007
2008 mi->dimension_size[0] = mcd.size_var;
2009 mi->dimension_size_orig[0] = mcd.size_var;
2010 /* Make sure that the variables in the size expression for
2011 all the dimensions (above level 0) aren't modified in
2012 the allocation function. */
2013 for (i = 1; i < mi->min_indirect_level_escape; i++)
2014 {
2015 tree t;
2016
2017 /* mi->dimension_size must contain the expression of the size calculated
2018 in check_allocation_function. */
2019 gcc_assert (mi->dimension_size[i]);
2020
2021 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2022 check_var_notmodified_p,
2023 mi->allocation_function_decl);
2024 if (t != NULL_TREE)
2025 {
2026 mark_min_matrix_escape_level (mi, i, t);
2027 break;
2028 }
2029 }
2030
2031 if (mi->min_indirect_level_escape < 2)
2032 return 1;
2033
2034 /* Since we should make sure that the size expression is available
2035 before the call to malloc of level 0. */
2036 bsi = bsi_for_stmt (call_stmt_0);
2037
2038 /* Find out the size of each dimension by looking at the malloc
2039 sites and create a global variable to hold it.
2040 We add the assignment to the global before the malloc of level 0. */
2041
2042 /* To be able to produce gimple temporaries. */
2043 oldfn = current_function_decl;
2044 current_function_decl = mi->allocation_function_decl;
2045 cfun = DECL_STRUCT_FUNCTION (mi->allocation_function_decl);
2046
2047 /* Set the dimension sizes as follows:
2048 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2049 where n is the maximum non escaping level. */
2050 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2051 prev_dim_size = NULL_TREE;
2052
2053 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2054 {
2055 tree dim_size, dim_var, tmp;
2056 tree d_type_size;
2057
2058 /* Now put the size expression in a global variable and initialize it to
2059 the size expression before the malloc of level 0. */
2060 dim_var =
2061 add_new_static_var (TREE_TYPE
2062 (mi->dimension_size_orig[mi->dim_map[i]]));
2063 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2064
2065 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2066 /* Find which dim ID becomes dim I. */
2067 for (id = 0; id < mi->min_indirect_level_escape; id++)
2068 if (mi->dim_map[id] == i)
2069 break;
2070 d_type_size =
2071 build_int_cst (type, mi->dimension_type_size[id + 1]);
2072 if (!prev_dim_size)
2073 prev_dim_size = build_int_cst (type, element_size);
2074 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2075 {
2076 dim_size = mi->dimension_size_orig[id];
2077 }
2078 else
2079 {
2080 dim_size =
2081 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2082 d_type_size);
2083
2084 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2085 }
2086 dim_size = force_gimple_operand_bsi (&bsi, dim_size, true, NULL,
2087 true, BSI_SAME_STMT);
2088 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2089 tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2090 GIMPLE_STMT_OPERAND (tmp, 0) = dim_var;
2091 mark_symbols_for_renaming (tmp);
2092 bsi_insert_before (&bsi, tmp, BSI_SAME_STMT);
2093
2094 prev_dim_size = mi->dimension_size[i] = dim_var;
2095 }
2096 update_ssa (TODO_update_ssa);
2097 /* Replace the malloc size argument in the malloc of level 0 to be
2098 the size of all the dimensions. */
2099 malloc_stmt = GIMPLE_STMT_OPERAND (call_stmt_0, 1);
2100 c_node = cgraph_node (mi->allocation_function_decl);
2101 old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2102 tmp = force_gimple_operand_bsi (&bsi, mi->dimension_size[0], true,
2103 NULL, true, BSI_SAME_STMT);
2104 if (TREE_CODE (old_size_0) == SSA_NAME)
2105 {
2106 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2107 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2108 if (use_stmt == call_stmt_0)
2109 SET_USE (use_p, tmp);
2110 }
2111 /* When deleting the calls to malloc we need also to remove the edge from
2112 the call graph to keep it consistent. Notice that cgraph_edge may
2113 create a new node in the call graph if there is no node for the given
2114 declaration; this shouldn't be the case but currently there is no way to
2115 check this outside of "cgraph.c". */
2116 for (i = 1; i < mi->min_indirect_level_escape; i++)
2117 {
2118 block_stmt_iterator bsi;
2119 tree use_stmt1 = NULL;
2120 tree call;
2121
2122 tree call_stmt = mi->malloc_for_level[i];
2123 call = GIMPLE_STMT_OPERAND (call_stmt, 1);
2124 gcc_assert (TREE_CODE (call) == CALL_EXPR);
2125 e = cgraph_edge (c_node, call_stmt);
2126 gcc_assert (e);
2127 cgraph_remove_edge (e);
2128 bsi = bsi_for_stmt (call_stmt);
2129 /* Remove the call stmt. */
2130 bsi_remove (&bsi, true);
2131 /* remove the type cast stmt. */
2132 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2133 GIMPLE_STMT_OPERAND (call_stmt, 0))
2134 {
2135 use_stmt1 = use_stmt;
2136 bsi = bsi_for_stmt (use_stmt);
2137 bsi_remove (&bsi, true);
2138 }
2139 /* Remove the assignment of the allocated area. */
2140 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2141 GIMPLE_STMT_OPERAND (use_stmt1, 0))
2142 {
2143 bsi = bsi_for_stmt (use_stmt);
2144 bsi_remove (&bsi, true);
2145 }
2146 }
2147 update_ssa (TODO_update_ssa);
2148 #ifdef ENABLE_CHECKING
2149 verify_ssa (true);
2150 #endif
2151 /* Delete the calls to free. */
2152 for (i = 1; i < mi->min_indirect_level_escape; i++)
2153 {
2154 block_stmt_iterator bsi;
2155 tree call;
2156
2157 /* ??? wonder why this case is possible but we failed on it once. */
2158 if (!mi->free_stmts[i].stmt)
2159 continue;
2160
2161 call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2162 c_node = cgraph_node (mi->free_stmts[i].func);
2163
2164 gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2165 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2166 gcc_assert (e);
2167 cgraph_remove_edge (e);
2168 current_function_decl = mi->free_stmts[i].func;
2169 cfun = DECL_STRUCT_FUNCTION (mi->free_stmts[i].func);
2170 bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2171 bsi_remove (&bsi, true);
2172 }
2173 /* Return to the previous situation. */
2174 current_function_decl = oldfn;
2175 cfun = oldfn ? DECL_STRUCT_FUNCTION (oldfn) : NULL;
2176 return 1;
2177
2178 }
2179
2180
2181 /* Print out the results of the escape analysis. */
2182 static int
2183 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2184 {
2185 struct matrix_info *mi = *slot;
2186
2187 if (!dump_file)
2188 return 1;
2189 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2190 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2191 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2192 fprintf (dump_file, "\n");
2193 if (mi->min_indirect_level_escape >= 2)
2194 fprintf (dump_file, "Flattened %d dimensions \n",
2195 mi->min_indirect_level_escape);
2196 return 1;
2197 }
2198
2199
2200 /* Perform matrix flattening. */
2201
2202 static unsigned int
2203 matrix_reorg (void)
2204 {
2205 struct cgraph_node *node;
2206
2207 if (profile_info)
2208 check_transpose_p = true;
2209 else
2210 check_transpose_p = false;
2211 /* If there are hand written vectors, we skip this optimization. */
2212 for (node = cgraph_nodes; node; node = node->next)
2213 if (!may_flatten_matrices (node))
2214 return 0;
2215 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2216 /* Find and record all potential matrices in the program. */
2217 find_matrices_decl ();
2218 /* Analyze the accesses of the matrices (escaping analysis). */
2219 for (node = cgraph_nodes; node; node = node->next)
2220 if (node->analyzed)
2221 {
2222 tree temp_fn;
2223
2224 temp_fn = current_function_decl;
2225 current_function_decl = node->decl;
2226 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2227 bitmap_obstack_initialize (NULL);
2228 tree_register_cfg_hooks ();
2229
2230 if (!gimple_in_ssa_p (cfun))
2231 {
2232 free_dominance_info (CDI_DOMINATORS);
2233 free_dominance_info (CDI_POST_DOMINATORS);
2234 pop_cfun ();
2235 current_function_decl = temp_fn;
2236
2237 return 0;
2238 }
2239
2240 #ifdef ENABLE_CHECKING
2241 verify_flow_info ();
2242 #endif
2243
2244 if (!matrices_to_reorg)
2245 {
2246 free_dominance_info (CDI_DOMINATORS);
2247 free_dominance_info (CDI_POST_DOMINATORS);
2248 pop_cfun ();
2249 current_function_decl = temp_fn;
2250
2251 return 0;
2252 }
2253
2254 /* Create htap for phi nodes. */
2255 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2256 mat_acc_phi_eq, free);
2257 if (!check_transpose_p)
2258 find_sites_in_func (false);
2259 else
2260 {
2261 find_sites_in_func (true);
2262 loop_optimizer_init (LOOPS_NORMAL);
2263 if (current_loops)
2264 scev_initialize ();
2265 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2266 if (current_loops)
2267 {
2268 scev_finalize ();
2269 loop_optimizer_finalize ();
2270 current_loops = NULL;
2271 }
2272 }
2273 /* If the current function is the allocation function for any of
2274 the matrices we check its allocation and the escaping level. */
2275 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2276 free_dominance_info (CDI_DOMINATORS);
2277 free_dominance_info (CDI_POST_DOMINATORS);
2278 pop_cfun ();
2279 current_function_decl = temp_fn;
2280 }
2281 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2282 /* Now transform the accesses. */
2283 for (node = cgraph_nodes; node; node = node->next)
2284 if (node->analyzed)
2285 {
2286 /* Remember that allocation sites have been handled. */
2287 tree temp_fn;
2288
2289 temp_fn = current_function_decl;
2290 current_function_decl = node->decl;
2291 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2292 bitmap_obstack_initialize (NULL);
2293 tree_register_cfg_hooks ();
2294 record_all_accesses_in_func ();
2295 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2296 free_dominance_info (CDI_DOMINATORS);
2297 free_dominance_info (CDI_POST_DOMINATORS);
2298 pop_cfun ();
2299 current_function_decl = temp_fn;
2300 }
2301 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2302
2303 current_function_decl = NULL;
2304 cfun = NULL;
2305 matrices_to_reorg = NULL;
2306 return 0;
2307 }
2308
2309
2310 /* The condition for matrix flattening to be performed. */
2311 static bool
2312 gate_matrix_reorg (void)
2313 {
2314 return flag_ipa_matrix_reorg /*&& flag_whole_program */ ;
2315 }
2316
2317 struct tree_opt_pass pass_ipa_matrix_reorg = {
2318 "matrix-reorg", /* name */
2319 gate_matrix_reorg, /* gate */
2320 matrix_reorg, /* execute */
2321 NULL, /* sub */
2322 NULL, /* next */
2323 0, /* static_pass_number */
2324 0, /* tv_id */
2325 0, /* properties_required */
2326 PROP_trees, /* properties_provided */
2327 0, /* properties_destroyed */
2328 0, /* todo_flags_start */
2329 TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */
2330 0 /* letter */
2331 };