i386.c (enum pta_flags): Move out of struct scope...
[gcc.git] / gcc / matrix-reorg.c
1 /* Matrix layout transformations.
2 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Razya Ladelsky <razya@il.ibm.com>
4 Originally written by Revital Eres and Mustafa Hagog.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23 /*
24 Matrix flattening optimization tries to replace a N-dimensional
25 matrix with its equivalent M-dimensional matrix, where M < N.
26 This first implementation focuses on global matrices defined dynamically.
27
28 When N==1, we actually flatten the whole matrix.
29 For instance consider a two-dimensional array a [dim1] [dim2].
30 The code for allocating space for it usually looks like:
31
32 a = (int **) malloc(dim1 * sizeof(int *));
33 for (i=0; i<dim1; i++)
34 a[i] = (int *) malloc (dim2 * sizeof(int));
35
36 If the array "a" is found suitable for this optimization,
37 its allocation is replaced by:
38
39 a = (int *) malloc (dim1 * dim2 *sizeof(int));
40
41 and all the references to a[i][j] are replaced by a[i * dim2 + j].
42
43 The two main phases of the optimization are the analysis
44 and transformation.
45 The driver of the optimization is matrix_reorg ().
46
47
48
49 Analysis phase:
50 ===============
51
52 We'll number the dimensions outside-in, meaning the most external
53 is 0, then 1, and so on.
54 The analysis part of the optimization determines K, the escape
55 level of a N-dimensional matrix (K <= N), that allows flattening of
56 the external dimensions 0,1,..., K-1. Escape level 0 means that the
57 whole matrix escapes and no flattening is possible.
58
59 The analysis part is implemented in analyze_matrix_allocation_site()
60 and analyze_matrix_accesses().
61
62 Transformation phase:
63 =====================
64 In this phase we define the new flattened matrices that replace the
65 original matrices in the code.
66 Implemented in transform_allocation_sites(),
67 transform_access_sites().
68
69 Matrix Transposing
70 ==================
71 The idea of Matrix Transposing is organizing the matrix in a different
72 layout such that the dimensions are reordered.
73 This could produce better cache behavior in some cases.
74
75 For example, lets look at the matrix accesses in the following loop:
76
77 for (i=0; i<N; i++)
78 for (j=0; j<M; j++)
79 access to a[i][j]
80
81 This loop can produce good cache behavior because the elements of
82 the inner dimension are accessed sequentially.
83
84 However, if the accesses of the matrix were of the following form:
85
86 for (i=0; i<N; i++)
87 for (j=0; j<M; j++)
88 access to a[j][i]
89
90 In this loop we iterate the columns and not the rows.
91 Therefore, replacing the rows and columns
92 would have had an organization with better (cache) locality.
93 Replacing the dimensions of the matrix is called matrix transposing.
94
95 This example, of course, could be enhanced to multiple dimensions matrices
96 as well.
97
98 Since a program could include all kind of accesses, there is a decision
99 mechanism, implemented in analyze_transpose(), which implements a
100 heuristic that tries to determine whether to transpose the matrix or not,
101 according to the form of the more dominant accesses.
102 This decision is transferred to the flattening mechanism, and whether
103 the matrix was transposed or not, the matrix is flattened (if possible).
104
105 This decision making is based on profiling information and loop information.
106 If profiling information is available, decision making mechanism will be
107 operated, otherwise the matrix will only be flattened (if possible).
108
109 Both optimizations are described in the paper "Matrix flattening and
110 transposing in GCC" which was presented in GCC summit 2006.
111 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf
112
113 */
114
115 #include "config.h"
116 #include "system.h"
117 #include "coretypes.h"
118 #include "tm.h"
119 #include "tree.h"
120 #include "rtl.h"
121 #include "c-tree.h"
122 #include "tree-inline.h"
123 #include "tree-flow.h"
124 #include "tree-flow-inline.h"
125 #include "langhooks.h"
126 #include "hashtab.h"
127 #include "toplev.h"
128 #include "flags.h"
129 #include "ggc.h"
130 #include "debug.h"
131 #include "target.h"
132 #include "cgraph.h"
133 #include "diagnostic.h"
134 #include "timevar.h"
135 #include "params.h"
136 #include "fibheap.h"
137 #include "c-common.h"
138 #include "intl.h"
139 #include "function.h"
140 #include "basic-block.h"
141 #include "cfgloop.h"
142 #include "tree-iterator.h"
143 #include "tree-pass.h"
144 #include "opts.h"
145 #include "tree-data-ref.h"
146 #include "tree-chrec.h"
147 #include "tree-scalar-evolution.h"
148
149 /*
150 We need to collect a lot of data from the original malloc,
151 particularly as the gimplifier has converted:
152
153 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
154
155 into
156
157 T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
158 T4 = malloc (T3);
159 T5 = (struct_type *) T4;
160 orig_var = T5;
161
162 The following struct fields allow us to collect all the necessary data from
163 the gimplified program. The comments in the struct below are all based
164 on the gimple example above. */
165
166 struct malloc_call_data
167 {
168 tree call_stmt; /* Tree for "T4 = malloc (T3);" */
169 tree size_var; /* Var decl for T3. */
170 tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
171 };
172
173 /* The front end of the compiler, when parsing statements of the form:
174
175 var = (type_cast) malloc (sizeof (type));
176
177 always converts this single statement into the following statements
178 (GIMPLE form):
179
180 T.1 = sizeof (type);
181 T.2 = malloc (T.1);
182 T.3 = (type_cast) T.2;
183 var = T.3;
184
185 Since we need to create new malloc statements and modify the original
186 statements somewhat, we need to find all four of the above statements.
187 Currently record_call_1 (called for building cgraph edges) finds and
188 records the statements containing the actual call to malloc, but we
189 need to find the rest of the variables/statements on our own. That
190 is what the following function does. */
191 static void
192 collect_data_for_malloc_call (tree stmt, struct malloc_call_data *m_data)
193 {
194 tree size_var = NULL;
195 tree malloc_fn_decl;
196 tree tmp;
197 tree arg1;
198
199 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
200
201 tmp = get_call_expr_in (stmt);
202 malloc_fn_decl = CALL_EXPR_FN (tmp);
203 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
204 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) != FUNCTION_DECL
205 || DECL_FUNCTION_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
206 BUILT_IN_MALLOC)
207 return;
208
209 arg1 = CALL_EXPR_ARG (tmp, 0);
210 size_var = arg1;
211
212 m_data->call_stmt = stmt;
213 m_data->size_var = size_var;
214 if (TREE_CODE (size_var) != VAR_DECL)
215 m_data->malloc_size = size_var;
216 else
217 m_data->malloc_size = NULL_TREE;
218 }
219
220 /* Information about matrix access site.
221 For example: if an access site of matrix arr is arr[i][j]
222 the ACCESS_SITE_INFO structure will have the address
223 of arr as its stmt. The INDEX_INFO will hold information about the
224 initial address and index of each dimension. */
225 struct access_site_info
226 {
227 /* The statement (INDIRECT_REF or PLUS_EXPR). */
228 tree stmt;
229
230 /* In case of PLUS_EXPR, what is the offset. */
231 tree offset;
232
233 /* The index which created the offset. */
234 tree index;
235
236 /* The indirection level of this statement. */
237 int level;
238
239 /* TRUE for allocation site FALSE for access site. */
240 bool is_alloc;
241
242 /* The function containing the access site. */
243 tree function_decl;
244
245 /* This access is iterated in the inner most loop */
246 bool iterated_by_inner_most_loop_p;
247 };
248
249 typedef struct access_site_info *access_site_info_p;
250 DEF_VEC_P (access_site_info_p);
251 DEF_VEC_ALLOC_P (access_site_info_p, heap);
252
253 /* Information about matrix to flatten. */
254 struct matrix_info
255 {
256 /* Decl tree of this matrix. */
257 tree decl;
258 /* Number of dimensions; number
259 of "*" in the type declaration. */
260 int num_dims;
261
262 /* Minimum indirection level that escapes, 0 means that
263 the whole matrix escapes, k means that dimensions
264 0 to ACTUAL_DIM - k escapes. */
265 int min_indirect_level_escape;
266
267 tree min_indirect_level_escape_stmt;
268
269 /* Is the matrix transposed. */
270 bool is_transposed_p;
271
272 /* Hold the allocation site for each level (dimension).
273 We can use NUM_DIMS as the upper bound and allocate the array
274 once with this number of elements and no need to use realloc and
275 MAX_MALLOCED_LEVEL. */
276 tree *malloc_for_level;
277
278 int max_malloced_level;
279
280 /* The location of the allocation sites (they must be in one
281 function). */
282 tree allocation_function_decl;
283
284 /* The calls to free for each level of indirection. */
285 struct free_info
286 {
287 tree stmt;
288 tree func;
289 } *free_stmts;
290
291 /* An array which holds for each dimension its size. where
292 dimension 0 is the outer most (one that contains all the others).
293 */
294 tree *dimension_size;
295
296 /* An array which holds for each dimension it's original size
297 (before transposing and flattening take place). */
298 tree *dimension_size_orig;
299
300 /* An array which holds for each dimension the size of the type of
301 of elements accessed in that level (in bytes). */
302 HOST_WIDE_INT *dimension_type_size;
303
304 int dimension_type_size_len;
305
306 /* An array collecting the count of accesses for each dimension. */
307 gcov_type *dim_hot_level;
308
309 /* An array of the accesses to be flattened.
310 elements are of type "struct access_site_info *". */
311 VEC (access_site_info_p, heap) * access_l;
312
313 /* A map of how the dimensions will be organized at the end of
314 the analyses. */
315 int *dim_map;
316 };
317
318 /* In each phi node we want to record the indirection level we have when we
319 get to the phi node. Usually we will have phi nodes with more than two
320 arguments, then we must assure that all of them get to the phi node with
321 the same indirection level, otherwise it's not safe to do the flattening.
322 So we record the information regarding the indirection level each time we
323 get to the phi node in this hash table. */
324
325 struct matrix_access_phi_node
326 {
327 tree phi;
328 int indirection_level;
329 };
330
331 /* We use this structure to find if the SSA variable is accessed inside the
332 tree and record the tree containing it. */
333
334 struct ssa_acc_in_tree
335 {
336 /* The variable whose accesses in the tree we are looking for. */
337 tree ssa_var;
338 /* The tree and code inside it the ssa_var is accessed, currently
339 it could be an INDIRECT_REF or CALL_EXPR. */
340 enum tree_code t_code;
341 tree t_tree;
342 /* The place in the containing tree. */
343 tree *tp;
344 tree second_op;
345 bool var_found;
346 };
347
348 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
349 sbitmap, bool);
350 static int transform_allocation_sites (void **, void *);
351 static int transform_access_sites (void **, void *);
352 static int analyze_transpose (void **, void *);
353 static int dump_matrix_reorg_analysis (void **, void *);
354
355 static bool check_transpose_p;
356
357 /* Hash function used for the phi nodes. */
358
359 static hashval_t
360 mat_acc_phi_hash (const void *p)
361 {
362 const struct matrix_access_phi_node *ma_phi = p;
363
364 return htab_hash_pointer (ma_phi->phi);
365 }
366
367 /* Equality means phi node pointers are the same. */
368
369 static int
370 mat_acc_phi_eq (const void *p1, const void *p2)
371 {
372 const struct matrix_access_phi_node *phi1 = p1;
373 const struct matrix_access_phi_node *phi2 = p2;
374
375 if (phi1->phi == phi2->phi)
376 return 1;
377
378 return 0;
379 }
380
381 /* Hold the PHI nodes we visit during the traversal for escaping
382 analysis. */
383 static htab_t htab_mat_acc_phi_nodes = NULL;
384
385 /* This hash-table holds the information about the matrices we are
386 going to handle. */
387 static htab_t matrices_to_reorg = NULL;
388
389 /* Return a hash for MTT, which is really a "matrix_info *". */
390 static hashval_t
391 mtt_info_hash (const void *mtt)
392 {
393 return htab_hash_pointer (((struct matrix_info *) mtt)->decl);
394 }
395
396 /* Return true if MTT1 and MTT2 (which are really both of type
397 "matrix_info *") refer to the same decl. */
398 static int
399 mtt_info_eq (const void *mtt1, const void *mtt2)
400 {
401 const struct matrix_info *i1 = mtt1;
402 const struct matrix_info *i2 = mtt2;
403
404 if (i1->decl == i2->decl)
405 return true;
406
407 return false;
408 }
409
410 /* Return the inner most tree that is not a cast. */
411 static tree
412 get_inner_of_cast_expr (tree t)
413 {
414 while (TREE_CODE (t) == CONVERT_EXPR || TREE_CODE (t) == NOP_EXPR
415 || TREE_CODE (t) == VIEW_CONVERT_EXPR)
416 t = TREE_OPERAND (t, 0);
417
418 return t;
419 }
420
421 /* Return false if STMT may contain a vector expression.
422 In this situation, all matrices should not be flattened. */
423 static bool
424 may_flatten_matrices_1 (tree stmt)
425 {
426 tree t;
427
428 switch (TREE_CODE (stmt))
429 {
430 case GIMPLE_MODIFY_STMT:
431 t = GIMPLE_STMT_OPERAND (stmt, 1);
432 while (TREE_CODE (t) == CONVERT_EXPR || TREE_CODE (t) == NOP_EXPR)
433 {
434 if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
435 {
436 tree pointee;
437
438 pointee = TREE_TYPE (t);
439 while (POINTER_TYPE_P (pointee))
440 pointee = TREE_TYPE (pointee);
441 if (TREE_CODE (pointee) == VECTOR_TYPE)
442 {
443 if (dump_file)
444 fprintf (dump_file,
445 "Found vector type, don't flatten matrix\n");
446 return false;
447 }
448 }
449 t = TREE_OPERAND (t, 0);
450 }
451 break;
452 case ASM_EXPR:
453 /* Asm code could contain vector operations. */
454 return false;
455 break;
456 default:
457 break;
458 }
459 return true;
460 }
461
462 /* Return false if there are hand-written vectors in the program.
463 We disable the flattening in such a case. */
464 static bool
465 may_flatten_matrices (struct cgraph_node *node)
466 {
467 tree decl;
468 struct function *func;
469 basic_block bb;
470 block_stmt_iterator bsi;
471
472 decl = node->decl;
473 if (node->analyzed)
474 {
475 func = DECL_STRUCT_FUNCTION (decl);
476 FOR_EACH_BB_FN (bb, func)
477 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
478 if (!may_flatten_matrices_1 (bsi_stmt (bsi)))
479 return false;
480 }
481 return true;
482 }
483
484 /* Given a VAR_DECL, check its type to determine whether it is
485 a definition of a dynamic allocated matrix and therefore is
486 a suitable candidate for the matrix flattening optimization.
487 Return NULL if VAR_DECL is not such decl. Otherwise, allocate
488 a MATRIX_INFO structure, fill it with the relevant information
489 and return a pointer to it.
490 TODO: handle also statically defined arrays. */
491 static struct matrix_info *
492 analyze_matrix_decl (tree var_decl)
493 {
494 struct matrix_info *m_node, tmpmi, *mi;
495 tree var_type;
496 int dim_num = 0;
497
498 gcc_assert (matrices_to_reorg);
499
500 if (TREE_CODE (var_decl) == PARM_DECL)
501 var_type = DECL_ARG_TYPE (var_decl);
502 else if (TREE_CODE (var_decl) == VAR_DECL)
503 var_type = TREE_TYPE (var_decl);
504 else
505 return NULL;
506
507 if (!POINTER_TYPE_P (var_type))
508 return NULL;
509
510 while (POINTER_TYPE_P (var_type))
511 {
512 var_type = TREE_TYPE (var_type);
513 dim_num++;
514 }
515
516 if (dim_num <= 1)
517 return NULL;
518
519 if (!COMPLETE_TYPE_P (var_type)
520 || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
521 return NULL;
522
523 /* Check to see if this pointer is already in there. */
524 tmpmi.decl = var_decl;
525 mi = htab_find (matrices_to_reorg, &tmpmi);
526
527 if (mi)
528 return NULL;
529
530 /* Record the matrix. */
531
532 m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
533 m_node->decl = var_decl;
534 m_node->num_dims = dim_num;
535 m_node->free_stmts
536 = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
537
538 /* Init min_indirect_level_escape to -1 to indicate that no escape
539 analysis has been done yet. */
540 m_node->min_indirect_level_escape = -1;
541 m_node->is_transposed_p = false;
542
543 return m_node;
544 }
545
546 /* Free matrix E. */
547 static void
548 mat_free (void *e)
549 {
550 struct matrix_info *mat = (struct matrix_info *) e;
551
552 if (!mat)
553 return;
554
555 if (mat->free_stmts)
556 free (mat->free_stmts);
557 if (mat->dim_hot_level)
558 free (mat->dim_hot_level);
559 if (mat->malloc_for_level)
560 free (mat->malloc_for_level);
561 }
562
563 /* Find all potential matrices.
564 TODO: currently we handle only multidimensional
565 dynamically allocated arrays. */
566 static void
567 find_matrices_decl (void)
568 {
569 struct matrix_info *tmp;
570 PTR *slot;
571 struct varpool_node *vnode;
572
573 gcc_assert (matrices_to_reorg);
574
575 /* For every global variable in the program:
576 Check to see if it's of a candidate type and record it. */
577 for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
578 {
579 tree var_decl = vnode->decl;
580
581 if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
582 continue;
583
584 if (matrices_to_reorg)
585 if ((tmp = analyze_matrix_decl (var_decl)))
586 {
587 if (!TREE_ADDRESSABLE (var_decl))
588 {
589 slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
590 *slot = tmp;
591 }
592 }
593 }
594 return;
595 }
596
597 /* Mark that the matrix MI escapes at level L. */
598 static void
599 mark_min_matrix_escape_level (struct matrix_info *mi, int l, tree s)
600 {
601 if (mi->min_indirect_level_escape == -1
602 || (mi->min_indirect_level_escape > l))
603 {
604 mi->min_indirect_level_escape = l;
605 mi->min_indirect_level_escape_stmt = s;
606 }
607 }
608
609 /* Find if the SSA variable is accessed inside the
610 tree and record the tree containing it.
611 The only relevant uses are the case of SSA_NAME, or SSA inside
612 INDIRECT_REF, CALL_EXPR, PLUS_EXPR, MULT_EXPR. */
613 static void
614 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
615 {
616 tree call, decl;
617 tree arg;
618 call_expr_arg_iterator iter;
619
620 a->t_code = TREE_CODE (t);
621 switch (a->t_code)
622 {
623 tree op1, op2;
624
625 case SSA_NAME:
626 if (t == a->ssa_var)
627 a->var_found = true;
628 break;
629 case INDIRECT_REF:
630 if (SSA_VAR_P (TREE_OPERAND (t, 0))
631 && TREE_OPERAND (t, 0) == a->ssa_var)
632 a->var_found = true;
633 break;
634 case CALL_EXPR:
635 FOR_EACH_CALL_EXPR_ARG (arg, iter, t)
636 {
637 if (arg == a->ssa_var)
638 {
639 a->var_found = true;
640 call = get_call_expr_in (t);
641 if (call && (decl = get_callee_fndecl (call)))
642 a->t_tree = decl;
643 break;
644 }
645 }
646 break;
647 case PLUS_EXPR:
648 case MULT_EXPR:
649 op1 = TREE_OPERAND (t, 0);
650 op2 = TREE_OPERAND (t, 1);
651
652 if (op1 == a->ssa_var)
653 {
654 a->var_found = true;
655 a->second_op = op2;
656 }
657 else if (op2 == a->ssa_var)
658 {
659 a->var_found = true;
660 a->second_op = op1;
661 }
662 break;
663 default:
664 break;
665 }
666 }
667
668 /* Record the access/allocation site information for matrix MI so we can
669 handle it later in transformation. */
670 static void
671 record_access_alloc_site_info (struct matrix_info *mi, tree stmt, tree offset,
672 tree index, int level, bool is_alloc)
673 {
674 struct access_site_info *acc_info;
675
676 if (!mi->access_l)
677 mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
678
679 acc_info
680 = (struct access_site_info *)
681 xcalloc (1, sizeof (struct access_site_info));
682 acc_info->stmt = stmt;
683 acc_info->offset = offset;
684 acc_info->index = index;
685 acc_info->function_decl = current_function_decl;
686 acc_info->level = level;
687 acc_info->is_alloc = is_alloc;
688
689 VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
690
691 }
692
693 /* Record the malloc as the allocation site of the given LEVEL. But
694 first we Make sure that all the size parameters passed to malloc in
695 all the allocation sites could be pre-calculated before the call to
696 the malloc of level 0 (the main malloc call). */
697 static void
698 add_allocation_site (struct matrix_info *mi, tree stmt, int level)
699 {
700 struct malloc_call_data mcd;
701
702 /* Make sure that the allocation sites are in the same function. */
703 if (!mi->allocation_function_decl)
704 mi->allocation_function_decl = current_function_decl;
705 else if (mi->allocation_function_decl != current_function_decl)
706 {
707 int min_malloc_level;
708
709 gcc_assert (mi->malloc_for_level);
710
711 /* Find the minimum malloc level that already has been seen;
712 we known its allocation function must be
713 MI->allocation_function_decl since it's different than
714 CURRENT_FUNCTION_DECL then the escaping level should be
715 MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
716 must be set accordingly. */
717 for (min_malloc_level = 0;
718 min_malloc_level < mi->max_malloced_level
719 && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
720 if (level < min_malloc_level)
721 {
722 mi->allocation_function_decl = current_function_decl;
723 mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
724 }
725 else
726 {
727 mark_min_matrix_escape_level (mi, level, stmt);
728 /* cannot be that (level == min_malloc_level)
729 we would have returned earlier. */
730 return;
731 }
732 }
733
734 /* Find the correct malloc information. */
735 collect_data_for_malloc_call (stmt, &mcd);
736
737 /* We accept only calls to malloc function; we do not accept
738 calls like calloc and realloc. */
739 if (!mi->malloc_for_level)
740 {
741 mi->malloc_for_level = xcalloc (level + 1, sizeof (tree));
742 mi->max_malloced_level = level + 1;
743 }
744 else if (mi->max_malloced_level <= level)
745 {
746 mi->malloc_for_level
747 = xrealloc (mi->malloc_for_level, (level + 1) * sizeof (tree));
748
749 /* Zero the newly allocated items. */
750 memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
751 0, (level - mi->max_malloced_level) * sizeof (tree));
752
753 mi->max_malloced_level = level + 1;
754 }
755 mi->malloc_for_level[level] = stmt;
756 }
757
758 /* Given an assignment statement STMT that we know that its
759 left-hand-side is the matrix MI variable, we traverse the immediate
760 uses backwards until we get to a malloc site. We make sure that
761 there is one and only one malloc site that sets this variable. When
762 we are performing the flattening we generate a new variable that
763 will hold the size for each dimension; each malloc that allocates a
764 dimension has the size parameter; we use that parameter to
765 initialize the dimension size variable so we can use it later in
766 the address calculations. LEVEL is the dimension we're inspecting.
767 Return if STMT is related to an allocation site. */
768
769 static void
770 analyze_matrix_allocation_site (struct matrix_info *mi, tree stmt,
771 int level, sbitmap visited)
772 {
773 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
774 {
775 tree rhs = GIMPLE_STMT_OPERAND (stmt, 1);
776
777 rhs = get_inner_of_cast_expr (rhs);
778 if (TREE_CODE (rhs) == SSA_NAME)
779 {
780 tree def = SSA_NAME_DEF_STMT (rhs);
781
782 analyze_matrix_allocation_site (mi, def, level, visited);
783 return;
784 }
785
786 /* A result of call to malloc. */
787 else if (TREE_CODE (rhs) == CALL_EXPR)
788 {
789 int call_flags = call_expr_flags (rhs);
790
791 if (!(call_flags & ECF_MALLOC))
792 {
793 mark_min_matrix_escape_level (mi, level, stmt);
794 return;
795 }
796 else
797 {
798 tree malloc_fn_decl;
799 const char *malloc_fname;
800
801 malloc_fn_decl = CALL_EXPR_FN (rhs);
802 if (TREE_CODE (malloc_fn_decl) != ADDR_EXPR
803 || TREE_CODE (TREE_OPERAND (malloc_fn_decl, 0)) !=
804 FUNCTION_DECL)
805 {
806 mark_min_matrix_escape_level (mi, level, stmt);
807 return;
808 }
809 malloc_fn_decl = TREE_OPERAND (malloc_fn_decl, 0);
810 malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
811 if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
812 {
813 if (dump_file)
814 fprintf (dump_file,
815 "Matrix %s is an argument to function %s\n",
816 get_name (mi->decl), get_name (malloc_fn_decl));
817 mark_min_matrix_escape_level (mi, level, stmt);
818 return;
819 }
820 }
821 /* This is a call to malloc. Check to see if this is the first
822 call in this indirection level; if so, mark it; if not, mark
823 as escaping. */
824 if (mi->malloc_for_level
825 && mi->malloc_for_level[level]
826 && mi->malloc_for_level[level] != stmt)
827 {
828 mark_min_matrix_escape_level (mi, level, stmt);
829 return;
830 }
831 else
832 add_allocation_site (mi, stmt, level);
833 return;
834 }
835 /* If we are back to the original matrix variable then we
836 are sure that this is analyzed as an access site. */
837 else if (rhs == mi->decl)
838 return;
839 }
840 /* Looks like we don't know what is happening in this
841 statement so be in the safe side and mark it as escaping. */
842 mark_min_matrix_escape_level (mi, level, stmt);
843 }
844
845 /* The transposing decision making.
846 In order to to calculate the profitability of transposing, we collect two
847 types of information regarding the accesses:
848 1. profiling information used to express the hotness of an access, that
849 is how often the matrix is accessed by this access site (count of the
850 access site).
851 2. which dimension in the access site is iterated by the inner
852 most loop containing this access.
853
854 The matrix will have a calculated value of weighted hotness for each
855 dimension.
856 Intuitively the hotness level of a dimension is a function of how
857 many times it was the most frequently accessed dimension in the
858 highly executed access sites of this matrix.
859
860 As computed by following equation:
861 m n
862 __ __
863 \ \ dim_hot_level[i] +=
864 /_ /_
865 j i
866 acc[j]->dim[i]->iter_by_inner_loop * count(j)
867
868 Where n is the number of dims and m is the number of the matrix
869 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
870 iterates over dim[i] in innermost loop, and is 0 otherwise.
871
872 The organization of the new matrix should be according to the
873 hotness of each dimension. The hotness of the dimension implies
874 the locality of the elements.*/
875 static int
876 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
877 {
878 struct matrix_info *mi = *slot;
879 int min_escape_l = mi->min_indirect_level_escape;
880 struct loop *loop;
881 affine_iv iv;
882 struct access_site_info *acc_info;
883 int i;
884
885 if (min_escape_l < 2 || !mi->access_l)
886 {
887 if (mi->access_l)
888 {
889 for (i = 0;
890 VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
891 i++)
892 free (acc_info);
893 VEC_free (access_site_info_p, heap, mi->access_l);
894
895 }
896 return 1;
897 }
898 if (!mi->dim_hot_level)
899 mi->dim_hot_level =
900 (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
901
902
903 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
904 i++)
905 {
906 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 1)) == PLUS_EXPR
907 && acc_info->level < min_escape_l)
908 {
909 loop = loop_containing_stmt (acc_info->stmt);
910 if (!loop || loop->inner)
911 {
912 free (acc_info);
913 continue;
914 }
915 if (simple_iv (loop, acc_info->stmt, acc_info->offset, &iv, true))
916 {
917 if (iv.step != NULL)
918 {
919 HOST_WIDE_INT istep;
920
921 istep = int_cst_value (iv.step);
922 if (istep != 0)
923 {
924 acc_info->iterated_by_inner_most_loop_p = 1;
925 mi->dim_hot_level[acc_info->level] +=
926 bb_for_stmt (acc_info->stmt)->count;
927 }
928
929 }
930 }
931 }
932 free (acc_info);
933 }
934 VEC_free (access_site_info_p, heap, mi->access_l);
935
936 return 1;
937 }
938
939 /* Find the index which defines the OFFSET from base.
940 We walk from use to def until we find how the offset was defined. */
941 static tree
942 get_index_from_offset (tree offset, tree def_stmt)
943 {
944 tree op1, op2, expr, index;
945
946 if (TREE_CODE (def_stmt) == PHI_NODE)
947 return NULL;
948 expr = get_inner_of_cast_expr (GIMPLE_STMT_OPERAND (def_stmt, 1));
949 if (TREE_CODE (expr) == SSA_NAME)
950 return get_index_from_offset (offset, SSA_NAME_DEF_STMT (expr));
951 else if (TREE_CODE (expr) == MULT_EXPR)
952 {
953 op1 = TREE_OPERAND (expr, 0);
954 op2 = TREE_OPERAND (expr, 1);
955 if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
956 return NULL;
957 index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
958 return index;
959 }
960 else
961 return NULL_TREE;
962 }
963
964 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
965 of the type related to the SSA_VAR, or the type related to the
966 lhs of STMT, in the case that it is an INDIRECT_REF. */
967 static void
968 update_type_size (struct matrix_info *mi, tree stmt, tree ssa_var,
969 int current_indirect_level)
970 {
971 tree lhs;
972 HOST_WIDE_INT type_size;
973
974 /* Update type according to the type of the INDIRECT_REF expr. */
975 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
976 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == INDIRECT_REF)
977 {
978 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
979 gcc_assert (POINTER_TYPE_P
980 (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
981 type_size =
982 int_size_in_bytes (TREE_TYPE
983 (TREE_TYPE
984 (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
985 }
986 else
987 type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
988
989 /* Record the size of elements accessed (as a whole)
990 in the current indirection level (dimension). If the size of
991 elements is not known at compile time, mark it as escaping. */
992 if (type_size <= 0)
993 mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
994 else
995 {
996 int l = current_indirect_level;
997
998 if (!mi->dimension_type_size)
999 {
1000 mi->dimension_type_size
1001 = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1002 mi->dimension_type_size_len = l + 1;
1003 }
1004 else if (mi->dimension_type_size_len < l + 1)
1005 {
1006 mi->dimension_type_size
1007 = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1008 (l + 1) * sizeof (HOST_WIDE_INT));
1009 memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1010 0, (l + 1 - mi->dimension_type_size_len)
1011 * sizeof (HOST_WIDE_INT));
1012 mi->dimension_type_size_len = l + 1;
1013 }
1014 /* Make sure all the accesses in the same level have the same size
1015 of the type. */
1016 if (!mi->dimension_type_size[l])
1017 mi->dimension_type_size[l] = type_size;
1018 else if (mi->dimension_type_size[l] != type_size)
1019 mark_min_matrix_escape_level (mi, l, stmt);
1020 }
1021 }
1022
1023 /* USE_STMT represents a call_expr ,where one of the arguments is the
1024 ssa var that we want to check because it came from some use of matrix
1025 MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1026 far. */
1027
1028 static void
1029 analyze_accesses_for_call_expr (struct matrix_info *mi, tree use_stmt,
1030 int current_indirect_level)
1031 {
1032 tree call = get_call_expr_in (use_stmt);
1033 if (call && get_callee_fndecl (call))
1034 {
1035 if (DECL_FUNCTION_CODE (get_callee_fndecl (call)) != BUILT_IN_FREE)
1036 {
1037 if (dump_file)
1038 fprintf (dump_file,
1039 "Matrix %s: Function call %s, level %d escapes.\n",
1040 get_name (mi->decl), get_name (get_callee_fndecl (call)),
1041 current_indirect_level);
1042 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1043 }
1044 else if (mi->free_stmts[current_indirect_level].stmt != NULL
1045 && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1046 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1047 else
1048 {
1049 /*Record the free statements so we can delete them
1050 later. */
1051 int l = current_indirect_level;
1052
1053 mi->free_stmts[l].stmt = use_stmt;
1054 mi->free_stmts[l].func = current_function_decl;
1055 }
1056 }
1057 }
1058
1059 /* USE_STMT represents a phi node of the ssa var that we want to
1060 check because it came from some use of matrix
1061 MI.
1062 We check all the escaping levels that get to the PHI node
1063 and make sure they are all the same escaping;
1064 if not (which is rare) we let the escaping level be the
1065 minimum level that gets into that PHI because starting from
1066 that level we cannot expect the behavior of the indirections.
1067 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1068
1069 static void
1070 analyze_accesses_for_phi_node (struct matrix_info *mi, tree use_stmt,
1071 int current_indirect_level, sbitmap visited,
1072 bool record_accesses)
1073 {
1074
1075 struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1076
1077 tmp_maphi.phi = use_stmt;
1078 if ((maphi = htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1079 {
1080 if (maphi->indirection_level == current_indirect_level)
1081 return;
1082 else
1083 {
1084 int level = MIN (maphi->indirection_level,
1085 current_indirect_level);
1086 int j;
1087 tree t = NULL_TREE;
1088
1089 maphi->indirection_level = level;
1090 for (j = 0; j < PHI_NUM_ARGS (use_stmt); j++)
1091 {
1092 tree def = PHI_ARG_DEF (use_stmt, j);
1093
1094 if (TREE_CODE (SSA_NAME_DEF_STMT (def)) != PHI_NODE)
1095 t = SSA_NAME_DEF_STMT (def);
1096 }
1097 mark_min_matrix_escape_level (mi, level, t);
1098 }
1099 return;
1100 }
1101 maphi = (struct matrix_access_phi_node *)
1102 xcalloc (1, sizeof (struct matrix_access_phi_node));
1103 maphi->phi = use_stmt;
1104 maphi->indirection_level = current_indirect_level;
1105
1106 /* Insert to hash table. */
1107 pmaphi = (struct matrix_access_phi_node **)
1108 htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1109 gcc_assert (pmaphi);
1110 *pmaphi = maphi;
1111
1112 if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1113 {
1114 SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1115 analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1116 current_indirect_level, false, visited,
1117 record_accesses);
1118 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1119 }
1120 }
1121
1122 /* USE_STMT represents a modify statement (the rhs or lhs include
1123 the ssa var that we want to check because it came from some use of matrix
1124 MI.
1125 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1126
1127 static int
1128 analyze_accesses_for_modify_stmt (struct matrix_info *mi, tree ssa_var,
1129 tree use_stmt, int current_indirect_level,
1130 bool last_op, sbitmap visited,
1131 bool record_accesses)
1132 {
1133
1134 tree lhs = GIMPLE_STMT_OPERAND (use_stmt, 0);
1135 tree rhs = GIMPLE_STMT_OPERAND (use_stmt, 1);
1136 struct ssa_acc_in_tree lhs_acc, rhs_acc;
1137
1138 memset (&lhs_acc, 0, sizeof (lhs_acc));
1139 memset (&rhs_acc, 0, sizeof (rhs_acc));
1140
1141 lhs_acc.ssa_var = ssa_var;
1142 lhs_acc.t_code = ERROR_MARK;
1143 ssa_accessed_in_tree (lhs, &lhs_acc);
1144 rhs_acc.ssa_var = ssa_var;
1145 rhs_acc.t_code = ERROR_MARK;
1146 ssa_accessed_in_tree (get_inner_of_cast_expr (rhs), &rhs_acc);
1147
1148 /* The SSA must be either in the left side or in the right side,
1149 to understand what is happening.
1150 In case the SSA_NAME is found in both sides we should be escaping
1151 at this level because in this case we cannot calculate the
1152 address correctly. */
1153 if ((lhs_acc.var_found && rhs_acc.var_found
1154 && lhs_acc.t_code == INDIRECT_REF)
1155 || (!rhs_acc.var_found && !lhs_acc.var_found))
1156 {
1157 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1158 return current_indirect_level;
1159 }
1160 gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1161
1162 /* If we are storing to the matrix at some level, then mark it as
1163 escaping at that level. */
1164 if (lhs_acc.var_found)
1165 {
1166 tree def;
1167 int l = current_indirect_level + 1;
1168
1169 gcc_assert (lhs_acc.t_code == INDIRECT_REF);
1170 def = get_inner_of_cast_expr (rhs);
1171 if (TREE_CODE (def) != SSA_NAME)
1172 mark_min_matrix_escape_level (mi, l, use_stmt);
1173 else
1174 {
1175 def = SSA_NAME_DEF_STMT (def);
1176 analyze_matrix_allocation_site (mi, def, l, visited);
1177 if (record_accesses)
1178 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1179 NULL_TREE, l, true);
1180 update_type_size (mi, use_stmt, NULL, l);
1181 }
1182 return current_indirect_level;
1183 }
1184 /* Now, check the right-hand-side, to see how the SSA variable
1185 is used. */
1186 if (rhs_acc.var_found)
1187 {
1188 /* If we are passing the ssa name to a function call and
1189 the pointer escapes when passed to the function
1190 (not the case of free), then we mark the matrix as
1191 escaping at this level. */
1192 if (rhs_acc.t_code == CALL_EXPR)
1193 {
1194 analyze_accesses_for_call_expr (mi, use_stmt,
1195 current_indirect_level);
1196
1197 return current_indirect_level;
1198 }
1199 if (rhs_acc.t_code != INDIRECT_REF
1200 && rhs_acc.t_code != PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1201 {
1202 mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1203 return current_indirect_level;
1204 }
1205 /* If the access in the RHS has an indirection increase the
1206 indirection level. */
1207 if (rhs_acc.t_code == INDIRECT_REF)
1208 {
1209 if (record_accesses)
1210 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1211 NULL_TREE,
1212 current_indirect_level, true);
1213 current_indirect_level += 1;
1214 }
1215 else if (rhs_acc.t_code == PLUS_EXPR)
1216 {
1217 /* ??? maybe we should check
1218 the type of the PLUS_EXP and make sure it's
1219 integral type. */
1220 gcc_assert (rhs_acc.second_op);
1221 if (last_op)
1222 /* Currently we support only one PLUS expression on the
1223 SSA_NAME that holds the base address of the current
1224 indirection level; to support more general case there
1225 is a need to hold a stack of expressions and regenerate
1226 the calculation later. */
1227 mark_min_matrix_escape_level (mi, current_indirect_level,
1228 use_stmt);
1229 else
1230 {
1231 tree index;
1232 tree op1, op2;
1233
1234 op1 = TREE_OPERAND (rhs, 0);
1235 op2 = TREE_OPERAND (rhs, 1);
1236
1237 op2 = (op1 == ssa_var) ? op2 : op1;
1238 if (TREE_CODE (op2) == INTEGER_CST)
1239 index =
1240 build_int_cst (TREE_TYPE (op1),
1241 TREE_INT_CST_LOW (op2) /
1242 int_size_in_bytes (TREE_TYPE (op1)));
1243 else
1244 {
1245 index =
1246 get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1247 if (index == NULL_TREE)
1248 {
1249 mark_min_matrix_escape_level (mi,
1250 current_indirect_level,
1251 use_stmt);
1252 return current_indirect_level;
1253 }
1254 }
1255 if (record_accesses)
1256 record_access_alloc_site_info (mi, use_stmt, op2,
1257 index,
1258 current_indirect_level, false);
1259 }
1260 }
1261 /* If we are storing this level of indirection mark it as
1262 escaping. */
1263 if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
1264 {
1265 int l = current_indirect_level;
1266
1267 /* One exception is when we are storing to the matrix
1268 variable itself; this is the case of malloc, we must make
1269 sure that it's the one and only one call to malloc so
1270 we call analyze_matrix_allocation_site to check
1271 this out. */
1272 if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1273 mark_min_matrix_escape_level (mi, current_indirect_level,
1274 use_stmt);
1275 else
1276 {
1277 /* Also update the escaping level. */
1278 analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1279 if (record_accesses)
1280 record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1281 NULL_TREE, l, true);
1282 }
1283 }
1284 else
1285 {
1286 /* We are placing it in an SSA, follow that SSA. */
1287 analyze_matrix_accesses (mi, lhs,
1288 current_indirect_level,
1289 rhs_acc.t_code == PLUS_EXPR,
1290 visited, record_accesses);
1291 }
1292 }
1293 return current_indirect_level;
1294 }
1295
1296 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1297 follow its uses and level of indirection and find out the minimum
1298 indirection level it escapes in (the highest dimension) and the maximum
1299 level it is accessed in (this will be the actual dimension of the
1300 matrix). The information is accumulated in MI.
1301 We look at the immediate uses, if one escapes we finish; if not,
1302 we make a recursive call for each one of the immediate uses of the
1303 resulting SSA name. */
1304 static void
1305 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1306 int current_indirect_level, bool last_op,
1307 sbitmap visited, bool record_accesses)
1308 {
1309 imm_use_iterator imm_iter;
1310 use_operand_p use_p;
1311
1312 update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1313 current_indirect_level);
1314
1315 /* We don't go beyond the escaping level when we are performing the
1316 flattening. NOTE: we keep the last indirection level that doesn't
1317 escape. */
1318 if (mi->min_indirect_level_escape > -1
1319 && mi->min_indirect_level_escape <= current_indirect_level)
1320 return;
1321
1322 /* Now go over the uses of the SSA_NAME and check how it is used in
1323 each one of them. We are mainly looking for the pattern INDIRECT_REF,
1324 then a PLUS_EXPR, then INDIRECT_REF etc. while in between there could
1325 be any number of copies and casts. */
1326 gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1327
1328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1329 {
1330 tree use_stmt = USE_STMT (use_p);
1331 if (TREE_CODE (use_stmt) == PHI_NODE)
1332 /* We check all the escaping levels that get to the PHI node
1333 and make sure they are all the same escaping;
1334 if not (which is rare) we let the escaping level be the
1335 minimum level that gets into that PHI because starting from
1336 that level we cannot expect the behavior of the indirections. */
1337
1338 analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1339 visited, record_accesses);
1340
1341 else if (TREE_CODE (use_stmt) == CALL_EXPR)
1342 analyze_accesses_for_call_expr (mi, use_stmt, current_indirect_level);
1343 else if (TREE_CODE (use_stmt) == GIMPLE_MODIFY_STMT)
1344 current_indirect_level =
1345 analyze_accesses_for_modify_stmt (mi, ssa_var, use_stmt,
1346 current_indirect_level, last_op,
1347 visited, record_accesses);
1348 }
1349 }
1350
1351
1352 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1353 the malloc size expression and check that those aren't changed
1354 over the function. */
1355 static tree
1356 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1357 {
1358 basic_block bb;
1359 tree t = *tp;
1360 tree fn = data;
1361 block_stmt_iterator bsi;
1362 tree stmt;
1363
1364 if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1365 return NULL_TREE;
1366
1367 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1368 {
1369 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1370 {
1371 stmt = bsi_stmt (bsi);
1372 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1373 continue;
1374 if (GIMPLE_STMT_OPERAND (stmt, 0) == t)
1375 return stmt;
1376 }
1377 }
1378 *walk_subtrees = 1;
1379 return NULL_TREE;
1380 }
1381
1382 /* Go backwards in the use-def chains and find out the expression
1383 represented by the possible SSA name in EXPR, until it is composed
1384 of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1385 we make sure that all the arguments represent the same subexpression,
1386 otherwise we fail. */
1387 static tree
1388 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1389 {
1390 tree def_stmt, op1, op2, res;
1391
1392 switch (TREE_CODE (expr))
1393 {
1394 case SSA_NAME:
1395 /* Case of loop, we don't know to represent this expression. */
1396 if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1397 return NULL_TREE;
1398
1399 SET_BIT (visited, SSA_NAME_VERSION (expr));
1400 def_stmt = SSA_NAME_DEF_STMT (expr);
1401 res = can_calculate_expr_before_stmt (def_stmt, visited);
1402 RESET_BIT (visited, SSA_NAME_VERSION (expr));
1403 return res;
1404 case VAR_DECL:
1405 case PARM_DECL:
1406 case INTEGER_CST:
1407 return expr;
1408 case PLUS_EXPR:
1409 case MINUS_EXPR:
1410 case MULT_EXPR:
1411 op1 = TREE_OPERAND (expr, 0);
1412 op2 = TREE_OPERAND (expr, 1);
1413
1414 op1 = can_calculate_expr_before_stmt (op1, visited);
1415 if (!op1)
1416 return NULL_TREE;
1417 op2 = can_calculate_expr_before_stmt (op2, visited);
1418 if (op2)
1419 return fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op1, op2);
1420 return NULL_TREE;
1421 case GIMPLE_MODIFY_STMT:
1422 return can_calculate_expr_before_stmt (GIMPLE_STMT_OPERAND (expr, 1),
1423 visited);
1424 case PHI_NODE:
1425 {
1426 int j;
1427
1428 res = NULL_TREE;
1429 /* Make sure all the arguments represent the same value. */
1430 for (j = 0; j < PHI_NUM_ARGS (expr); j++)
1431 {
1432 tree new_res;
1433 tree def = PHI_ARG_DEF (expr, j);
1434
1435 new_res = can_calculate_expr_before_stmt (def, visited);
1436 if (res == NULL_TREE)
1437 res = new_res;
1438 else if (!new_res || !expressions_equal_p (res, new_res))
1439 return NULL_TREE;
1440 }
1441 return res;
1442 }
1443 case NOP_EXPR:
1444 case CONVERT_EXPR:
1445 res = can_calculate_expr_before_stmt (TREE_OPERAND (expr, 0), visited);
1446 if (res != NULL_TREE)
1447 return build1 (TREE_CODE (expr), TREE_TYPE (expr), res);
1448 else
1449 return NULL_TREE;
1450
1451 default:
1452 return NULL_TREE;
1453 }
1454 }
1455
1456 /* There should be only one allocation function for the dimensions
1457 that don't escape. Here we check the allocation sites in this
1458 function. We must make sure that all the dimensions are allocated
1459 using malloc and that the malloc size parameter expression could be
1460 pre-calculated before the call to the malloc of dimension 0.
1461
1462 Given a candidate matrix for flattening -- MI -- check if it's
1463 appropriate for flattening -- we analyze the allocation
1464 sites that we recorded in the previous analysis. The result of the
1465 analysis is a level of indirection (matrix dimension) in which the
1466 flattening is safe. We check the following conditions:
1467 1. There is only one allocation site for each dimension.
1468 2. The allocation sites of all the dimensions are in the same
1469 function.
1470 (The above two are being taken care of during the analysis when
1471 we check the allocation site).
1472 3. All the dimensions that we flatten are allocated at once; thus
1473 the total size must be known before the allocation of the
1474 dimension 0 (top level) -- we must make sure we represent the
1475 size of the allocation as an expression of global parameters or
1476 constants and that those doesn't change over the function. */
1477
1478 static int
1479 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1480 {
1481 int level;
1482 block_stmt_iterator bsi;
1483 basic_block bb_level_0;
1484 struct matrix_info *mi = *slot;
1485 sbitmap visited = sbitmap_alloc (num_ssa_names);
1486
1487 if (!mi->malloc_for_level)
1488 return 1;
1489 /* Do nothing if the current function is not the allocation
1490 function of MI. */
1491 if (mi->allocation_function_decl != current_function_decl
1492 /* We aren't in the main allocation function yet. */
1493 || !mi->malloc_for_level[0])
1494 return 1;
1495
1496 for (level = 1; level < mi->max_malloced_level; level++)
1497 if (!mi->malloc_for_level[level])
1498 break;
1499
1500 mark_min_matrix_escape_level (mi, level, NULL_TREE);
1501
1502 bsi = bsi_for_stmt (mi->malloc_for_level[0]);
1503 bb_level_0 = bsi.bb;
1504
1505 /* Check if the expression of the size passed to malloc could be
1506 pre-calculated before the malloc of level 0. */
1507 for (level = 1; level < mi->min_indirect_level_escape; level++)
1508 {
1509 tree call_stmt, size;
1510 struct malloc_call_data mcd;
1511
1512 call_stmt = mi->malloc_for_level[level];
1513
1514 /* Find the correct malloc information. */
1515 collect_data_for_malloc_call (call_stmt, &mcd);
1516
1517 /* No need to check anticipation for constants. */
1518 if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1519 {
1520 if (!mi->dimension_size)
1521 {
1522 mi->dimension_size =
1523 (tree *) xcalloc (mi->min_indirect_level_escape,
1524 sizeof (tree));
1525 mi->dimension_size_orig =
1526 (tree *) xcalloc (mi->min_indirect_level_escape,
1527 sizeof (tree));
1528 }
1529 mi->dimension_size[level] = mcd.size_var;
1530 mi->dimension_size_orig[level] = mcd.size_var;
1531 continue;
1532 }
1533 /* ??? Here we should also add the way to calculate the size
1534 expression not only know that it is anticipated. */
1535 sbitmap_zero (visited);
1536 size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1537 if (size == NULL_TREE)
1538 {
1539 mark_min_matrix_escape_level (mi, level, call_stmt);
1540 if (dump_file)
1541 fprintf (dump_file,
1542 "Matrix %s: Cannot calculate the size of allocation. escaping at level %d\n",
1543 get_name (mi->decl), level);
1544 break;
1545 }
1546 if (!mi->dimension_size)
1547 {
1548 mi->dimension_size =
1549 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1550 mi->dimension_size_orig =
1551 (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1552 }
1553 mi->dimension_size[level] = size;
1554 mi->dimension_size_orig[level] = size;
1555 }
1556
1557 /* We don't need those anymore. */
1558 for (level = mi->min_indirect_level_escape;
1559 level < mi->max_malloced_level; level++)
1560 mi->malloc_for_level[level] = NULL;
1561 return 1;
1562 }
1563
1564 /* Track all access and allocation sites. */
1565 static void
1566 find_sites_in_func (bool record)
1567 {
1568 sbitmap visited_stmts_1;
1569
1570 block_stmt_iterator bsi;
1571 tree stmt;
1572 basic_block bb;
1573 struct matrix_info tmpmi, *mi;
1574
1575 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1576
1577 FOR_EACH_BB (bb)
1578 {
1579 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1580 {
1581 stmt = bsi_stmt (bsi);
1582 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1583 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == VAR_DECL)
1584 {
1585 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 0);
1586 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1587 {
1588 sbitmap_zero (visited_stmts_1);
1589 analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1590 }
1591 }
1592 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT
1593 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME
1594 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1)) == VAR_DECL)
1595 {
1596 tmpmi.decl = GIMPLE_STMT_OPERAND (stmt, 1);
1597 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1598 {
1599 sbitmap_zero (visited_stmts_1);
1600 analyze_matrix_accesses (mi,
1601 GIMPLE_STMT_OPERAND (stmt, 0), 0,
1602 false, visited_stmts_1, record);
1603 }
1604 }
1605 }
1606 }
1607 sbitmap_free (visited_stmts_1);
1608 }
1609
1610 /* Traverse the use-def chains to see if there are matrices that
1611 are passed through pointers and we cannot know how they are accessed.
1612 For each SSA-name defined by a global variable of our interest,
1613 we traverse the use-def chains of the SSA and follow the indirections,
1614 and record in what level of indirection the use of the variable
1615 escapes. A use of a pointer escapes when it is passed to a function,
1616 stored into memory or assigned (except in malloc and free calls). */
1617
1618 static void
1619 record_all_accesses_in_func (void)
1620 {
1621 unsigned i;
1622 sbitmap visited_stmts_1;
1623
1624 visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1625
1626 for (i = 0; i < num_ssa_names; i++)
1627 {
1628 struct matrix_info tmpmi, *mi;
1629 tree ssa_var = ssa_name (i);
1630 tree rhs, lhs;
1631
1632 if (!ssa_var
1633 || TREE_CODE (SSA_NAME_DEF_STMT (ssa_var)) != GIMPLE_MODIFY_STMT)
1634 continue;
1635 rhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 1);
1636 lhs = GIMPLE_STMT_OPERAND (SSA_NAME_DEF_STMT (ssa_var), 0);
1637 if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1638 continue;
1639
1640 /* If the RHS is a matrix that we want to analyze, follow the def-use
1641 chain for this SSA_VAR and check for escapes or apply the
1642 flattening. */
1643 tmpmi.decl = rhs;
1644 if ((mi = htab_find (matrices_to_reorg, &tmpmi)))
1645 {
1646 /* This variable will track the visited PHI nodes, so we can limit
1647 its size to the maximum number of SSA names. */
1648 sbitmap_zero (visited_stmts_1);
1649 analyze_matrix_accesses (mi, ssa_var,
1650 0, false, visited_stmts_1, true);
1651
1652 }
1653 }
1654 sbitmap_free (visited_stmts_1);
1655 }
1656
1657 /* Used when we want to convert the expression: RESULT = something * ORIG to RESULT = something * NEW. If ORIG and NEW are power of 2, shift operations can be done, else division and multiplication. */
1658 static tree
1659 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new, tree result)
1660 {
1661
1662 int x, y;
1663 tree result1, ratio, log, orig_tree, new_tree;
1664
1665 x = exact_log2 (orig);
1666 y = exact_log2 (new);
1667
1668 if (x != -1 && y != -1)
1669 {
1670 if (x == y)
1671 return result;
1672 else if (x > y)
1673 {
1674 log = build_int_cst (TREE_TYPE (result), x - y);
1675 result1 =
1676 fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1677 return result1;
1678 }
1679 log = build_int_cst (TREE_TYPE (result), y - x);
1680 result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1681
1682 return result1;
1683 }
1684 orig_tree = build_int_cst (TREE_TYPE (result), orig);
1685 new_tree = build_int_cst (TREE_TYPE (result), new);
1686 ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1687 result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1688
1689 return result1;
1690 }
1691
1692
1693 /* We know that we are allowed to perform matrix flattening (according to the
1694 escape analysis), so we traverse the use-def chains of the SSA vars
1695 defined by the global variables pointing to the matrices of our interest.
1696 in each use of the SSA we calculate the offset from the base address
1697 according to the following equation:
1698
1699 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1700 escaping level is m <= k, and a' is the new allocated matrix,
1701 will be translated to :
1702
1703 b[I(m+1)]...[Ik]
1704
1705 where
1706 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1707 */
1708
1709 static int
1710 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1711 {
1712 tree stmts;
1713 block_stmt_iterator bsi;
1714 struct matrix_info *mi = *slot;
1715 int min_escape_l = mi->min_indirect_level_escape;
1716 struct access_site_info *acc_info;
1717 int i;
1718
1719 if (min_escape_l < 2 || !mi->access_l)
1720 return 1;
1721 for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1722 i++)
1723 {
1724 tree orig, type;
1725
1726 /* This is possible because we collect the access sites before
1727 we determine the final minimum indirection level. */
1728 if (acc_info->level >= min_escape_l)
1729 {
1730 free (acc_info);
1731 continue;
1732 }
1733 if (acc_info->is_alloc)
1734 {
1735 if (acc_info->level >= 0 && bb_for_stmt (acc_info->stmt))
1736 {
1737 ssa_op_iter iter;
1738 tree def;
1739 tree stmt = acc_info->stmt;
1740
1741 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1742 mark_sym_for_renaming (SSA_NAME_VAR (def));
1743 bsi = bsi_for_stmt (stmt);
1744 gcc_assert (TREE_CODE (acc_info->stmt) == GIMPLE_MODIFY_STMT);
1745 if (TREE_CODE (GIMPLE_STMT_OPERAND (acc_info->stmt, 0)) ==
1746 SSA_NAME && acc_info->level < min_escape_l - 1)
1747 {
1748 imm_use_iterator imm_iter;
1749 use_operand_p use_p;
1750 tree use_stmt;
1751
1752 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
1753 GIMPLE_STMT_OPERAND (acc_info->stmt,
1754 0))
1755 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1756 {
1757 tree conv, tmp, stmts;
1758
1759 /* Emit convert statement to convert to type of use. */
1760 conv =
1761 fold_build1 (CONVERT_EXPR,
1762 TREE_TYPE (GIMPLE_STMT_OPERAND
1763 (acc_info->stmt, 0)),
1764 TREE_OPERAND (GIMPLE_STMT_OPERAND
1765 (acc_info->stmt, 1), 0));
1766 tmp =
1767 create_tmp_var (TREE_TYPE
1768 (GIMPLE_STMT_OPERAND
1769 (acc_info->stmt, 0)), "new");
1770 add_referenced_var (tmp);
1771 stmts =
1772 fold_build2 (GIMPLE_MODIFY_STMT,
1773 TREE_TYPE (GIMPLE_STMT_OPERAND
1774 (acc_info->stmt, 0)), tmp,
1775 conv);
1776 tmp = make_ssa_name (tmp, stmts);
1777 GIMPLE_STMT_OPERAND (stmts, 0) = tmp;
1778 bsi = bsi_for_stmt (acc_info->stmt);
1779 bsi_insert_after (&bsi, stmts, BSI_SAME_STMT);
1780 SET_USE (use_p, tmp);
1781 }
1782 }
1783 if (acc_info->level < min_escape_l - 1)
1784 bsi_remove (&bsi, true);
1785 }
1786 free (acc_info);
1787 continue;
1788 }
1789 orig = GIMPLE_STMT_OPERAND (acc_info->stmt, 1);
1790 type = TREE_TYPE (orig);
1791 if (TREE_CODE (orig) == INDIRECT_REF
1792 && acc_info->level < min_escape_l - 1)
1793 {
1794 /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
1795 from "pointer to type" to "type". */
1796 orig =
1797 build1 (NOP_EXPR, TREE_TYPE (orig),
1798 GIMPLE_STMT_OPERAND (orig, 0));
1799 GIMPLE_STMT_OPERAND (acc_info->stmt, 1) = orig;
1800 }
1801 else if (TREE_CODE (orig) == PLUS_EXPR
1802 && acc_info->level < (min_escape_l))
1803 {
1804 imm_use_iterator imm_iter;
1805 use_operand_p use_p;
1806
1807 tree offset;
1808 int k = acc_info->level;
1809 tree num_elements, total_elements;
1810 tree tmp1;
1811 tree d_size = mi->dimension_size[k];
1812
1813 /* We already make sure in the analysis that the first operand
1814 is the base and the second is the offset. */
1815 offset = acc_info->offset;
1816 if (mi->dim_map[k] == min_escape_l - 1)
1817 {
1818 if (!check_transpose_p || mi->is_transposed_p == false)
1819 tmp1 = offset;
1820 else
1821 {
1822 tree new_offset;
1823 tree d_type_size, d_type_size_k;
1824
1825 d_type_size =
1826 build_int_cst (type,
1827 mi->dimension_type_size[min_escape_l]);
1828 d_type_size_k =
1829 build_int_cst (type, mi->dimension_type_size[k + 1]);
1830
1831 new_offset =
1832 compute_offset (mi->dimension_type_size[min_escape_l],
1833 mi->dimension_type_size[k + 1], offset);
1834
1835 total_elements = new_offset;
1836 if (new_offset != offset)
1837 {
1838 tmp1 =
1839 force_gimple_operand (total_elements, &stmts, true,
1840 NULL);
1841 if (stmts)
1842 {
1843 tree_stmt_iterator tsi;
1844
1845 for (tsi = tsi_start (stmts); !tsi_end_p (tsi);
1846 tsi_next (&tsi))
1847 mark_symbols_for_renaming (tsi_stmt (tsi));
1848 bsi = bsi_for_stmt (acc_info->stmt);
1849 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1850 }
1851 }
1852 else
1853 tmp1 = offset;
1854 }
1855 }
1856 else
1857 {
1858 d_size = mi->dimension_size[mi->dim_map[k] + 1];
1859 num_elements =
1860 fold_build2 (MULT_EXPR, type, acc_info->index, d_size);
1861 tmp1 = force_gimple_operand (num_elements, &stmts, true, NULL);
1862 add_referenced_var (d_size);
1863 if (stmts)
1864 {
1865 tree_stmt_iterator tsi;
1866
1867 for (tsi = tsi_start (stmts); !tsi_end_p (tsi);
1868 tsi_next (&tsi))
1869 mark_symbols_for_renaming (tsi_stmt (tsi));
1870 bsi = bsi_for_stmt (acc_info->stmt);
1871 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1872 }
1873 }
1874 /* Replace the offset if needed. */
1875 if (tmp1 != offset)
1876 {
1877 if (TREE_CODE (offset) == SSA_NAME)
1878 {
1879 tree use_stmt;
1880
1881 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1882 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1883 if (use_stmt == acc_info->stmt)
1884 SET_USE (use_p, tmp1);
1885 }
1886 else
1887 {
1888 gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1889 TREE_OPERAND (orig, 1) = tmp1;
1890 }
1891 }
1892 }
1893 /* ??? meanwhile this happens because we record the same access
1894 site more than once; we should be using a hash table to
1895 avoid this and insert the STMT of the access site only
1896 once.
1897 else
1898 gcc_unreachable (); */
1899 free (acc_info);
1900 }
1901 VEC_free (access_site_info_p, heap, mi->access_l);
1902
1903 update_ssa (TODO_update_ssa);
1904 #ifdef ENABLE_CHECKING
1905 verify_ssa (true);
1906 #endif
1907 return 1;
1908 }
1909
1910 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1911
1912 static void
1913 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1914 {
1915 int i, j, tmp1;
1916 gcov_type tmp;
1917
1918 for (i = 0; i < n - 1; i++)
1919 {
1920 for (j = 0; j < n - 1 - i; j++)
1921 {
1922 if (a[j + 1] < a[j])
1923 {
1924 tmp = a[j]; /* swap a[j] and a[j+1] */
1925 a[j] = a[j + 1];
1926 a[j + 1] = tmp;
1927 tmp1 = dim_map[j];
1928 dim_map[j] = dim_map[j + 1];
1929 dim_map[j + 1] = tmp1;
1930 }
1931 }
1932 }
1933 }
1934
1935 /* Replace multiple mallocs (one for each dimension) to one malloc
1936 with the size of DIM1*DIM2*...*DIMN*size_of_element
1937 Make sure that we hold the size in the malloc site inside a
1938 new global variable; this way we ensure that the size doesn't
1939 change and it is accessible from all the other functions that
1940 uses the matrix. Also, the original calls to free are deleted,
1941 and replaced by a new call to free the flattened matrix. */
1942
1943 static int
1944 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1945 {
1946 int i;
1947 struct matrix_info *mi;
1948 tree type, call_stmt_0, malloc_stmt, oldfn, stmts, prev_dim_size, use_stmt;
1949 struct cgraph_node *c_node;
1950 struct cgraph_edge *e;
1951 block_stmt_iterator bsi;
1952 struct malloc_call_data mcd;
1953 HOST_WIDE_INT element_size;
1954
1955 imm_use_iterator imm_iter;
1956 use_operand_p use_p;
1957 tree old_size_0, tmp;
1958 int min_escape_l;
1959 int id;
1960
1961 mi = *slot;
1962
1963 min_escape_l = mi->min_indirect_level_escape;
1964
1965 if (!mi->malloc_for_level)
1966 mi->min_indirect_level_escape = 0;
1967
1968 if (mi->min_indirect_level_escape < 2)
1969 return 1;
1970
1971 mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
1972 for (i = 0; i < mi->min_indirect_level_escape; i++)
1973 mi->dim_map[i] = i;
1974 if (check_transpose_p)
1975 {
1976 int i;
1977
1978 if (dump_file)
1979 {
1980 fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
1981 for (i = 0; i < min_escape_l; i++)
1982 {
1983 fprintf (dump_file, "dim %d before sort ", i);
1984 if (mi->dim_hot_level)
1985 fprintf (dump_file,
1986 "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
1987 mi->dim_hot_level[i]);
1988 }
1989 }
1990 sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
1991 mi->min_indirect_level_escape);
1992 if (dump_file)
1993 for (i = 0; i < min_escape_l; i++)
1994 {
1995 fprintf (dump_file, "dim %d after sort\n", i);
1996 if (mi->dim_hot_level)
1997 fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
1998 " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
1999 }
2000 for (i = 0; i < mi->min_indirect_level_escape; i++)
2001 {
2002 if (dump_file)
2003 fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2004 mi->dim_map[i]);
2005 if (mi->dim_map[i] != i)
2006 {
2007 if (dump_file)
2008 fprintf (dump_file,
2009 "Transposed dimensions: dim %d is now dim %d\n",
2010 mi->dim_map[i], i);
2011 mi->is_transposed_p = true;
2012 }
2013 }
2014 }
2015 else
2016 {
2017 for (i = 0; i < mi->min_indirect_level_escape; i++)
2018 mi->dim_map[i] = i;
2019 }
2020 /* Call statement of allocation site of level 0. */
2021 call_stmt_0 = mi->malloc_for_level[0];
2022
2023 /* Finds the correct malloc information. */
2024 collect_data_for_malloc_call (call_stmt_0, &mcd);
2025
2026 mi->dimension_size[0] = mcd.size_var;
2027 mi->dimension_size_orig[0] = mcd.size_var;
2028 /* Make sure that the variables in the size expression for
2029 all the dimensions (above level 0) aren't modified in
2030 the allocation function. */
2031 for (i = 1; i < mi->min_indirect_level_escape; i++)
2032 {
2033 tree t;
2034
2035 /* mi->dimension_size must contain the expression of the size calculated
2036 in check_allocation_function. */
2037 gcc_assert (mi->dimension_size[i]);
2038
2039 t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2040 check_var_notmodified_p,
2041 mi->allocation_function_decl);
2042 if (t != NULL_TREE)
2043 {
2044 mark_min_matrix_escape_level (mi, i, t);
2045 break;
2046 }
2047 }
2048
2049 if (mi->min_indirect_level_escape < 2)
2050 return 1;
2051
2052 /* Since we should make sure that the size expression is available
2053 before the call to malloc of level 0. */
2054 bsi = bsi_for_stmt (call_stmt_0);
2055
2056 /* Find out the size of each dimension by looking at the malloc
2057 sites and create a global variable to hold it.
2058 We add the assignment to the global before the malloc of level 0. */
2059
2060 /* To be able to produce gimple temporaries. */
2061 oldfn = current_function_decl;
2062 current_function_decl = mi->allocation_function_decl;
2063 cfun = DECL_STRUCT_FUNCTION (mi->allocation_function_decl);
2064
2065 /* Set the dimension sizes as follows:
2066 DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2067 where n is the maximum non escaping level. */
2068 element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2069 prev_dim_size = NULL_TREE;
2070
2071 for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2072 {
2073 tree dim_size, dim_var, tmp;
2074 tree d_type_size;
2075 tree_stmt_iterator tsi;
2076
2077 /* Now put the size expression in a global variable and initialize it to
2078 the size expression before the malloc of level 0. */
2079 dim_var =
2080 add_new_static_var (TREE_TYPE
2081 (mi->dimension_size_orig[mi->dim_map[i]]));
2082 type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2083
2084 /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2085 /* Find which dim ID becomes dim I. */
2086 for (id = 0; id < mi->min_indirect_level_escape; id++)
2087 if (mi->dim_map[id] == i)
2088 break;
2089 d_type_size =
2090 build_int_cst (type, mi->dimension_type_size[id + 1]);
2091 if (!prev_dim_size)
2092 prev_dim_size = build_int_cst (type, element_size);
2093 if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2094 {
2095 dim_size = mi->dimension_size_orig[id];
2096 }
2097 else
2098 {
2099 dim_size =
2100 fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2101 d_type_size);
2102
2103 dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2104 }
2105 dim_size = force_gimple_operand (dim_size, &stmts, true, NULL);
2106 if (stmts)
2107 {
2108 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
2109 mark_symbols_for_renaming (tsi_stmt (tsi));
2110 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2111 bsi = bsi_for_stmt (call_stmt_0);
2112 }
2113 /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2114 tmp = fold_build2 (GIMPLE_MODIFY_STMT, type, dim_var, dim_size);
2115 GIMPLE_STMT_OPERAND (tmp, 0) = dim_var;
2116 mark_symbols_for_renaming (tmp);
2117 bsi_insert_before (&bsi, tmp, BSI_NEW_STMT);
2118 bsi = bsi_for_stmt (call_stmt_0);
2119
2120 prev_dim_size = mi->dimension_size[i] = dim_var;
2121 }
2122 update_ssa (TODO_update_ssa);
2123 /* Replace the malloc size argument in the malloc of level 0 to be
2124 the size of all the dimensions. */
2125 malloc_stmt = GIMPLE_STMT_OPERAND (call_stmt_0, 1);
2126 c_node = cgraph_node (mi->allocation_function_decl);
2127 old_size_0 = CALL_EXPR_ARG (malloc_stmt, 0);
2128 bsi = bsi_for_stmt (call_stmt_0);
2129 tmp = force_gimple_operand (mi->dimension_size[0], &stmts, true, NULL);
2130 if (stmts)
2131 {
2132 tree_stmt_iterator tsi;
2133
2134 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
2135 mark_symbols_for_renaming (tsi_stmt (tsi));
2136 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2137 bsi = bsi_for_stmt (call_stmt_0);
2138 }
2139 if (TREE_CODE (old_size_0) == SSA_NAME)
2140 {
2141 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2142 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2143 if (use_stmt == call_stmt_0)
2144 SET_USE (use_p, tmp);
2145 }
2146 /* When deleting the calls to malloc we need also to remove the edge from
2147 the call graph to keep it consistent. Notice that cgraph_edge may
2148 create a new node in the call graph if there is no node for the given
2149 declaration; this shouldn't be the case but currently there is no way to
2150 check this outside of "cgraph.c". */
2151 for (i = 1; i < mi->min_indirect_level_escape; i++)
2152 {
2153 block_stmt_iterator bsi;
2154 tree use_stmt1 = NULL;
2155 tree call;
2156
2157 tree call_stmt = mi->malloc_for_level[i];
2158 call = GIMPLE_STMT_OPERAND (call_stmt, 1);
2159 gcc_assert (TREE_CODE (call) == CALL_EXPR);
2160 e = cgraph_edge (c_node, call_stmt);
2161 gcc_assert (e);
2162 cgraph_remove_edge (e);
2163 bsi = bsi_for_stmt (call_stmt);
2164 /* Remove the call stmt. */
2165 bsi_remove (&bsi, true);
2166 /* remove the type cast stmt. */
2167 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2168 GIMPLE_STMT_OPERAND (call_stmt, 0))
2169 {
2170 use_stmt1 = use_stmt;
2171 bsi = bsi_for_stmt (use_stmt);
2172 bsi_remove (&bsi, true);
2173 }
2174 /* Remove the assignment of the allocated area. */
2175 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2176 GIMPLE_STMT_OPERAND (use_stmt1, 0))
2177 {
2178 bsi = bsi_for_stmt (use_stmt);
2179 bsi_remove (&bsi, true);
2180 }
2181 }
2182 update_ssa (TODO_update_ssa);
2183 #ifdef ENABLE_CHECKING
2184 verify_ssa (true);
2185 #endif
2186 /* Delete the calls to free. */
2187 for (i = 1; i < mi->min_indirect_level_escape; i++)
2188 {
2189 block_stmt_iterator bsi;
2190 tree call;
2191
2192 /* ??? wonder why this case is possible but we failed on it once. */
2193 if (!mi->free_stmts[i].stmt)
2194 continue;
2195
2196 call = TREE_OPERAND (mi->free_stmts[i].stmt, 1);
2197 c_node = cgraph_node (mi->free_stmts[i].func);
2198
2199 gcc_assert (TREE_CODE (mi->free_stmts[i].stmt) == CALL_EXPR);
2200 e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2201 gcc_assert (e);
2202 cgraph_remove_edge (e);
2203 current_function_decl = mi->free_stmts[i].func;
2204 cfun = DECL_STRUCT_FUNCTION (mi->free_stmts[i].func);
2205 bsi = bsi_for_stmt (mi->free_stmts[i].stmt);
2206 bsi_remove (&bsi, true);
2207 }
2208 /* Return to the previous situation. */
2209 current_function_decl = oldfn;
2210 cfun = oldfn ? DECL_STRUCT_FUNCTION (oldfn) : NULL;
2211 return 1;
2212
2213 }
2214
2215
2216 /* Print out the results of the escape analysis. */
2217 static int
2218 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2219 {
2220 struct matrix_info *mi = *slot;
2221
2222 if (!dump_file)
2223 return 1;
2224 fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2225 get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2226 fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2227 fprintf (dump_file, "\n");
2228 if (mi->min_indirect_level_escape >= 2)
2229 fprintf (dump_file, "Flattened %d dimensions \n",
2230 mi->min_indirect_level_escape);
2231 return 1;
2232 }
2233
2234
2235 /* Perform matrix flattening. */
2236
2237 static unsigned int
2238 matrix_reorg (void)
2239 {
2240 struct cgraph_node *node;
2241
2242 if (profile_info)
2243 check_transpose_p = true;
2244 else
2245 check_transpose_p = false;
2246 /* If there are hand written vectors, we skip this optimization. */
2247 for (node = cgraph_nodes; node; node = node->next)
2248 if (!may_flatten_matrices (node))
2249 return 0;
2250 matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2251 /* Find and record all potential matrices in the program. */
2252 find_matrices_decl ();
2253 /* Analyze the accesses of the matrices (escaping analysis). */
2254 for (node = cgraph_nodes; node; node = node->next)
2255 if (node->analyzed)
2256 {
2257 tree temp_fn;
2258
2259 temp_fn = current_function_decl;
2260 current_function_decl = node->decl;
2261 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2262 bitmap_obstack_initialize (NULL);
2263 tree_register_cfg_hooks ();
2264
2265 if (!gimple_in_ssa_p (cfun))
2266 {
2267 free_dominance_info (CDI_DOMINATORS);
2268 free_dominance_info (CDI_POST_DOMINATORS);
2269 pop_cfun ();
2270 current_function_decl = temp_fn;
2271
2272 return 0;
2273 }
2274
2275 #ifdef ENABLE_CHECKING
2276 verify_flow_info ();
2277 #endif
2278
2279 if (!matrices_to_reorg)
2280 {
2281 free_dominance_info (CDI_DOMINATORS);
2282 free_dominance_info (CDI_POST_DOMINATORS);
2283 pop_cfun ();
2284 current_function_decl = temp_fn;
2285
2286 return 0;
2287 }
2288
2289 /* Create htap for phi nodes. */
2290 htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2291 mat_acc_phi_eq, free);
2292 if (!check_transpose_p)
2293 find_sites_in_func (false);
2294 else
2295 {
2296 find_sites_in_func (true);
2297 loop_optimizer_init (LOOPS_NORMAL);
2298 if (current_loops)
2299 scev_initialize ();
2300 htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2301 if (current_loops)
2302 {
2303 scev_finalize ();
2304 loop_optimizer_finalize ();
2305 current_loops = NULL;
2306 }
2307 }
2308 /* If the current function is the allocation function for any of
2309 the matrices we check its allocation and the escaping level. */
2310 htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2311 free_dominance_info (CDI_DOMINATORS);
2312 free_dominance_info (CDI_POST_DOMINATORS);
2313 pop_cfun ();
2314 current_function_decl = temp_fn;
2315 }
2316 htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2317 /* Now transform the accesses. */
2318 for (node = cgraph_nodes; node; node = node->next)
2319 if (node->analyzed)
2320 {
2321 /* Remember that allocation sites have been handled. */
2322 tree temp_fn;
2323
2324 temp_fn = current_function_decl;
2325 current_function_decl = node->decl;
2326 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2327 bitmap_obstack_initialize (NULL);
2328 tree_register_cfg_hooks ();
2329 record_all_accesses_in_func ();
2330 htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2331 free_dominance_info (CDI_DOMINATORS);
2332 free_dominance_info (CDI_POST_DOMINATORS);
2333 pop_cfun ();
2334 current_function_decl = temp_fn;
2335 }
2336 htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2337
2338 current_function_decl = NULL;
2339 cfun = NULL;
2340 matrices_to_reorg = NULL;
2341 return 0;
2342 }
2343
2344
2345 /* The condition for matrix flattening to be performed. */
2346 static bool
2347 gate_matrix_reorg (void)
2348 {
2349 return flag_ipa_matrix_reorg /*&& flag_whole_program */ ;
2350 }
2351
2352 struct tree_opt_pass pass_ipa_matrix_reorg = {
2353 "matrix-reorg", /* name */
2354 gate_matrix_reorg, /* gate */
2355 matrix_reorg, /* execute */
2356 NULL, /* sub */
2357 NULL, /* next */
2358 0, /* static_pass_number */
2359 0, /* tv_id */
2360 0, /* properties_required */
2361 PROP_trees, /* properties_provided */
2362 0, /* properties_destroyed */
2363 0, /* todo_flags_start */
2364 TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */
2365 0 /* letter */
2366 };