i386.c (legitimize_tls_address): Generate tls_initial_exec_64_sun only when !TARGET_X32.
[gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
72
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75 {
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
78 }
79
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82 {
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
85 }
86
87 /* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
91
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
94
95 static bool
96 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
97 {
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
101
102 if (!gimple_bb (stmt2))
103 return false;
104
105 if (loop_vinfo)
106 {
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
110 }
111 else
112 {
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
116 }
117
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
120 }
121
122 /* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
124
125 static gimple
126 vect_single_imm_use (gimple def_stmt)
127 {
128 tree lhs = gimple_assign_lhs (def_stmt);
129 use_operand_p use_p;
130 gimple use_stmt;
131
132 if (!single_imm_use (lhs, &use_p, &use_stmt))
133 return NULL;
134
135 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
136 return NULL;
137
138 return use_stmt;
139 }
140
141 /* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
143 DEF_STMT: NAME = NOP (name0)
144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
146 unsigned. */
147
148 static bool
149 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
150 tree *orig_type, gimple *def_stmt, bool *promotion)
151 {
152 tree dummy;
153 gimple dummy_gimple;
154 loop_vec_info loop_vinfo;
155 stmt_vec_info stmt_vinfo;
156 tree type = TREE_TYPE (name);
157 tree oprnd0;
158 enum vect_def_type dt;
159 tree def;
160 bb_vec_info bb_vinfo;
161
162 stmt_vinfo = vinfo_for_stmt (use_stmt);
163 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
164 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
165 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
166 &def, &dt))
167 return false;
168
169 if (dt != vect_internal_def
170 && dt != vect_external_def && dt != vect_constant_def)
171 return false;
172
173 if (!*def_stmt)
174 return false;
175
176 if (!is_gimple_assign (*def_stmt))
177 return false;
178
179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
180 return false;
181
182 oprnd0 = gimple_assign_rhs1 (*def_stmt);
183
184 *orig_type = TREE_TYPE (oprnd0);
185 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
186 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
187 return false;
188
189 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
190 *promotion = true;
191 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
192 *promotion = false;
193 else
194 return false;
195
196 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
197 bb_vinfo, &dummy_gimple, &dummy, &dt))
198 return false;
199
200 return true;
201 }
202
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
205
206 static tree
207 vect_recog_temp_ssa_var (tree type, gimple stmt)
208 {
209 tree var = create_tmp_var (type, "patt");
210
211 add_referenced_var (var);
212 var = make_ssa_name (var, stmt);
213 return var;
214 }
215
216 /* Function vect_recog_dot_prod_pattern
217
218 Try to find the following pattern:
219
220 type x_t, y_t;
221 TYPE1 prod;
222 TYPE2 sum = init;
223 loop:
224 sum_0 = phi <init, sum_1>
225 S1 x_t = ...
226 S2 y_t = ...
227 S3 x_T = (TYPE1) x_t;
228 S4 y_T = (TYPE1) y_t;
229 S5 prod = x_T * y_T;
230 [S6 prod = (TYPE2) prod; #optional]
231 S7 sum_1 = prod + sum_0;
232
233 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
234 same size of 'TYPE1' or bigger. This is a special case of a reduction
235 computation.
236
237 Input:
238
239 * STMTS: Contains a stmt from which the pattern search begins. In the
240 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
241 will be detected.
242
243 Output:
244
245 * TYPE_IN: The type of the input arguments to the pattern.
246
247 * TYPE_OUT: The type of the output of this pattern.
248
249 * Return value: A new stmt that will be used to replace the sequence of
250 stmts that constitute the pattern. In this case it will be:
251 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
252
253 Note: The dot-prod idiom is a widening reduction pattern that is
254 vectorized without preserving all the intermediate results. It
255 produces only N/2 (widened) results (by summing up pairs of
256 intermediate results) rather than all N results. Therefore, we
257 cannot allow this pattern when we want to get all the results and in
258 the correct order (as is the case when this computation is in an
259 inner-loop nested in an outer-loop that us being vectorized). */
260
261 static gimple
262 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
263 tree *type_out)
264 {
265 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
266 tree oprnd0, oprnd1;
267 tree oprnd00, oprnd01;
268 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
269 tree type, half_type;
270 gimple pattern_stmt;
271 tree prod_type;
272 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
273 struct loop *loop;
274 tree var;
275 bool promotion;
276
277 if (!loop_info)
278 return NULL;
279
280 loop = LOOP_VINFO_LOOP (loop_info);
281
282 if (!is_gimple_assign (last_stmt))
283 return NULL;
284
285 type = gimple_expr_type (last_stmt);
286
287 /* Look for the following pattern
288 DX = (TYPE1) X;
289 DY = (TYPE1) Y;
290 DPROD = DX * DY;
291 DDPROD = (TYPE2) DPROD;
292 sum_1 = DDPROD + sum_0;
293 In which
294 - DX is double the size of X
295 - DY is double the size of Y
296 - DX, DY, DPROD all have the same type
297 - sum is the same size of DPROD or bigger
298 - sum has been recognized as a reduction variable.
299
300 This is equivalent to:
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD w+ sum_0; #widen summation
303 or
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD + sum_0; #summation
306 */
307
308 /* Starting from LAST_STMT, follow the defs of its uses in search
309 of the above pattern. */
310
311 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
312 return NULL;
313
314 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
315 {
316 /* Has been detected as widening-summation? */
317
318 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
319 type = gimple_expr_type (stmt);
320 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
321 return NULL;
322 oprnd0 = gimple_assign_rhs1 (stmt);
323 oprnd1 = gimple_assign_rhs2 (stmt);
324 half_type = TREE_TYPE (oprnd0);
325 }
326 else
327 {
328 gimple def_stmt;
329
330 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
331 return NULL;
332 oprnd0 = gimple_assign_rhs1 (last_stmt);
333 oprnd1 = gimple_assign_rhs2 (last_stmt);
334 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
335 || !types_compatible_p (TREE_TYPE (oprnd1), type))
336 return NULL;
337 stmt = last_stmt;
338
339 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
340 &promotion)
341 && promotion)
342 {
343 stmt = def_stmt;
344 oprnd0 = gimple_assign_rhs1 (stmt);
345 }
346 else
347 half_type = type;
348 }
349
350 /* So far so good. Since last_stmt was detected as a (summation) reduction,
351 we know that oprnd1 is the reduction variable (defined by a loop-header
352 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
353 Left to check that oprnd0 is defined by a (widen_)mult_expr */
354 if (TREE_CODE (oprnd0) != SSA_NAME)
355 return NULL;
356
357 prod_type = half_type;
358 stmt = SSA_NAME_DEF_STMT (oprnd0);
359
360 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
361 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
362 return NULL;
363
364 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
365 inside the loop (in case we are analyzing an outer-loop). */
366 if (!is_gimple_assign (stmt))
367 return NULL;
368 stmt_vinfo = vinfo_for_stmt (stmt);
369 gcc_assert (stmt_vinfo);
370 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
371 return NULL;
372 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
373 return NULL;
374 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
375 {
376 /* Has been detected as a widening multiplication? */
377
378 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
379 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
380 return NULL;
381 stmt_vinfo = vinfo_for_stmt (stmt);
382 gcc_assert (stmt_vinfo);
383 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
384 oprnd00 = gimple_assign_rhs1 (stmt);
385 oprnd01 = gimple_assign_rhs2 (stmt);
386 }
387 else
388 {
389 tree half_type0, half_type1;
390 gimple def_stmt;
391 tree oprnd0, oprnd1;
392
393 oprnd0 = gimple_assign_rhs1 (stmt);
394 oprnd1 = gimple_assign_rhs2 (stmt);
395 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
396 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
397 return NULL;
398 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
399 &promotion)
400 || !promotion)
401 return NULL;
402 oprnd00 = gimple_assign_rhs1 (def_stmt);
403 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
404 &promotion)
405 || !promotion)
406 return NULL;
407 oprnd01 = gimple_assign_rhs1 (def_stmt);
408 if (!types_compatible_p (half_type0, half_type1))
409 return NULL;
410 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
411 return NULL;
412 }
413
414 half_type = TREE_TYPE (oprnd00);
415 *type_in = half_type;
416 *type_out = type;
417
418 /* Pattern detected. Create a stmt to be used to replace the pattern: */
419 var = vect_recog_temp_ssa_var (type, NULL);
420 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
421 oprnd00, oprnd01, oprnd1);
422
423 if (vect_print_dump_info (REPORT_DETAILS))
424 {
425 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
426 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
427 }
428
429 /* We don't allow changing the order of the computation in the inner-loop
430 when doing outer-loop vectorization. */
431 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
432
433 return pattern_stmt;
434 }
435
436
437 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
438 and LSHIFT_EXPR.
439
440 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
441 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
442
443 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
444 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
445 that satisfies the above restrictions, we can perform a widening opeartion
446 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
447 with a_it = (interm_type) a_t; */
448
449 static bool
450 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
451 tree const_oprnd, tree *oprnd,
452 VEC (gimple, heap) **stmts, tree type,
453 tree *half_type, gimple def_stmt)
454 {
455 tree new_type, new_oprnd, tmp;
456 gimple new_stmt;
457
458 if (code != MULT_EXPR && code != LSHIFT_EXPR)
459 return false;
460
461 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
462 || (code == LSHIFT_EXPR
463 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
464 != 1))
465 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
466 {
467 /* CONST_OPRND is a constant of HALF_TYPE. */
468 *oprnd = gimple_assign_rhs1 (def_stmt);
469 return true;
470 }
471
472 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
473 return false;
474
475 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
476 return false;
477
478 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
479 a type 2 times bigger than HALF_TYPE. */
480 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
481 TYPE_UNSIGNED (type));
482 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
483 || (code == LSHIFT_EXPR
484 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
485 return false;
486
487 /* Use NEW_TYPE for widening operation. */
488 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
489 {
490 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
491 /* Check if the already created pattern stmt is what we need. */
492 if (!is_gimple_assign (new_stmt)
493 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
494 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
495 return false;
496
497 VEC_safe_push (gimple, heap, *stmts, def_stmt);
498 *oprnd = gimple_assign_lhs (new_stmt);
499 }
500 else
501 {
502 /* Create a_T = (NEW_TYPE) a_t; */
503 *oprnd = gimple_assign_rhs1 (def_stmt);
504 tmp = create_tmp_var (new_type, NULL);
505 add_referenced_var (tmp);
506 new_oprnd = make_ssa_name (tmp, NULL);
507 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
508 NULL_TREE);
509 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
510 VEC_safe_push (gimple, heap, *stmts, def_stmt);
511 *oprnd = new_oprnd;
512 }
513
514 *half_type = new_type;
515 return true;
516 }
517
518
519 /* Function vect_recog_widen_mult_pattern
520
521 Try to find the following pattern:
522
523 type a_t, b_t;
524 TYPE a_T, b_T, prod_T;
525
526 S1 a_t = ;
527 S2 b_t = ;
528 S3 a_T = (TYPE) a_t;
529 S4 b_T = (TYPE) b_t;
530 S5 prod_T = a_T * b_T;
531
532 where type 'TYPE' is at least double the size of type 'type'.
533
534 Also detect unsigned cases:
535
536 unsigned type a_t, b_t;
537 unsigned TYPE u_prod_T;
538 TYPE a_T, b_T, prod_T;
539
540 S1 a_t = ;
541 S2 b_t = ;
542 S3 a_T = (TYPE) a_t;
543 S4 b_T = (TYPE) b_t;
544 S5 prod_T = a_T * b_T;
545 S6 u_prod_T = (unsigned TYPE) prod_T;
546
547 and multiplication by constants:
548
549 type a_t;
550 TYPE a_T, prod_T;
551
552 S1 a_t = ;
553 S3 a_T = (TYPE) a_t;
554 S5 prod_T = a_T * CONST;
555
556 A special case of multiplication by constants is when 'TYPE' is 4 times
557 bigger than 'type', but CONST fits an intermediate type 2 times smaller
558 than 'TYPE'. In that case we create an additional pattern stmt for S3
559 to create a variable of the intermediate type, and perform widen-mult
560 on the intermediate type as well:
561
562 type a_t;
563 interm_type a_it;
564 TYPE a_T, prod_T, prod_T';
565
566 S1 a_t = ;
567 S3 a_T = (TYPE) a_t;
568 '--> a_it = (interm_type) a_t;
569 S5 prod_T = a_T * CONST;
570 '--> prod_T' = a_it w* CONST;
571
572 Input/Output:
573
574 * STMTS: Contains a stmt from which the pattern search begins. In the
575 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
576 is detected. In case of unsigned widen-mult, the original stmt (S5) is
577 replaced with S6 in STMTS. In case of multiplication by a constant
578 of an intermediate type (the last case above), STMTS also contains S3
579 (inserted before S5).
580
581 Output:
582
583 * TYPE_IN: The type of the input arguments to the pattern.
584
585 * TYPE_OUT: The type of the output of this pattern.
586
587 * Return value: A new stmt that will be used to replace the sequence of
588 stmts that constitute the pattern. In this case it will be:
589 WIDEN_MULT <a_t, b_t>
590 */
591
592 static gimple
593 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
594 tree *type_in, tree *type_out)
595 {
596 gimple last_stmt = VEC_pop (gimple, *stmts);
597 gimple def_stmt0, def_stmt1;
598 tree oprnd0, oprnd1;
599 tree type, half_type0, half_type1;
600 gimple pattern_stmt;
601 tree vectype, vectype_out = NULL_TREE;
602 tree dummy;
603 tree var;
604 enum tree_code dummy_code;
605 int dummy_int;
606 VEC (tree, heap) *dummy_vec;
607 bool op1_ok;
608 bool promotion;
609
610 if (!is_gimple_assign (last_stmt))
611 return NULL;
612
613 type = gimple_expr_type (last_stmt);
614
615 /* Starting from LAST_STMT, follow the defs of its uses in search
616 of the above pattern. */
617
618 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
619 return NULL;
620
621 oprnd0 = gimple_assign_rhs1 (last_stmt);
622 oprnd1 = gimple_assign_rhs2 (last_stmt);
623 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
624 || !types_compatible_p (TREE_TYPE (oprnd1), type))
625 return NULL;
626
627 /* Check argument 0. */
628 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
629 &promotion)
630 || !promotion)
631 return NULL;
632 /* Check argument 1. */
633 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
634 &def_stmt1, &promotion);
635
636 if (op1_ok && promotion)
637 {
638 oprnd0 = gimple_assign_rhs1 (def_stmt0);
639 oprnd1 = gimple_assign_rhs1 (def_stmt1);
640 }
641 else
642 {
643 if (TREE_CODE (oprnd1) == INTEGER_CST
644 && TREE_CODE (half_type0) == INTEGER_TYPE
645 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
646 &oprnd0, stmts, type,
647 &half_type0, def_stmt0))
648 half_type1 = half_type0;
649 else
650 return NULL;
651 }
652
653 /* Handle unsigned case. Look for
654 S6 u_prod_T = (unsigned TYPE) prod_T;
655 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
656 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
657 {
658 gimple use_stmt;
659 tree use_lhs;
660 tree use_type;
661
662 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
663 return NULL;
664
665 use_stmt = vect_single_imm_use (last_stmt);
666 if (!use_stmt || !is_gimple_assign (use_stmt)
667 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
668 return NULL;
669
670 use_lhs = gimple_assign_lhs (use_stmt);
671 use_type = TREE_TYPE (use_lhs);
672 if (!INTEGRAL_TYPE_P (use_type)
673 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
674 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
675 return NULL;
676
677 type = use_type;
678 last_stmt = use_stmt;
679 }
680
681 if (!types_compatible_p (half_type0, half_type1))
682 return NULL;
683
684 /* Pattern detected. */
685 if (vect_print_dump_info (REPORT_DETAILS))
686 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
687
688 /* Check target support */
689 vectype = get_vectype_for_scalar_type (half_type0);
690 vectype_out = get_vectype_for_scalar_type (type);
691 if (!vectype
692 || !vectype_out
693 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
694 vectype_out, vectype,
695 &dummy, &dummy, &dummy_code,
696 &dummy_code, &dummy_int, &dummy_vec))
697 return NULL;
698
699 *type_in = vectype;
700 *type_out = vectype_out;
701
702 /* Pattern supported. Create a stmt to be used to replace the pattern: */
703 var = vect_recog_temp_ssa_var (type, NULL);
704 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
705 oprnd1);
706
707 if (vect_print_dump_info (REPORT_DETAILS))
708 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
709
710 VEC_safe_push (gimple, heap, *stmts, last_stmt);
711 return pattern_stmt;
712 }
713
714
715 /* Function vect_recog_pow_pattern
716
717 Try to find the following pattern:
718
719 x = POW (y, N);
720
721 with POW being one of pow, powf, powi, powif and N being
722 either 2 or 0.5.
723
724 Input:
725
726 * LAST_STMT: A stmt from which the pattern search begins.
727
728 Output:
729
730 * TYPE_IN: The type of the input arguments to the pattern.
731
732 * TYPE_OUT: The type of the output of this pattern.
733
734 * Return value: A new stmt that will be used to replace the sequence of
735 stmts that constitute the pattern. In this case it will be:
736 x = x * x
737 or
738 x = sqrt (x)
739 */
740
741 static gimple
742 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
743 tree *type_out)
744 {
745 gimple last_stmt = VEC_index (gimple, *stmts, 0);
746 tree fn, base, exp = NULL;
747 gimple stmt;
748 tree var;
749
750 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
751 return NULL;
752
753 fn = gimple_call_fndecl (last_stmt);
754 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
755 return NULL;
756
757 switch (DECL_FUNCTION_CODE (fn))
758 {
759 case BUILT_IN_POWIF:
760 case BUILT_IN_POWI:
761 case BUILT_IN_POWF:
762 case BUILT_IN_POW:
763 base = gimple_call_arg (last_stmt, 0);
764 exp = gimple_call_arg (last_stmt, 1);
765 if (TREE_CODE (exp) != REAL_CST
766 && TREE_CODE (exp) != INTEGER_CST)
767 return NULL;
768 break;
769
770 default:
771 return NULL;
772 }
773
774 /* We now have a pow or powi builtin function call with a constant
775 exponent. */
776
777 *type_out = NULL_TREE;
778
779 /* Catch squaring. */
780 if ((host_integerp (exp, 0)
781 && tree_low_cst (exp, 0) == 2)
782 || (TREE_CODE (exp) == REAL_CST
783 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
784 {
785 *type_in = TREE_TYPE (base);
786
787 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
788 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
789 return stmt;
790 }
791
792 /* Catch square root. */
793 if (TREE_CODE (exp) == REAL_CST
794 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
795 {
796 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
797 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
798 if (*type_in)
799 {
800 gimple stmt = gimple_build_call (newfn, 1, base);
801 if (vectorizable_function (stmt, *type_in, *type_in)
802 != NULL_TREE)
803 {
804 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
805 gimple_call_set_lhs (stmt, var);
806 return stmt;
807 }
808 }
809 }
810
811 return NULL;
812 }
813
814
815 /* Function vect_recog_widen_sum_pattern
816
817 Try to find the following pattern:
818
819 type x_t;
820 TYPE x_T, sum = init;
821 loop:
822 sum_0 = phi <init, sum_1>
823 S1 x_t = *p;
824 S2 x_T = (TYPE) x_t;
825 S3 sum_1 = x_T + sum_0;
826
827 where type 'TYPE' is at least double the size of type 'type', i.e - we're
828 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
829 a special case of a reduction computation.
830
831 Input:
832
833 * LAST_STMT: A stmt from which the pattern search begins. In the example,
834 when this function is called with S3, the pattern {S2,S3} will be detected.
835
836 Output:
837
838 * TYPE_IN: The type of the input arguments to the pattern.
839
840 * TYPE_OUT: The type of the output of this pattern.
841
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_SUM <x_t, sum_0>
845
846 Note: The widening-sum idiom is a widening reduction pattern that is
847 vectorized without preserving all the intermediate results. It
848 produces only N/2 (widened) results (by summing up pairs of
849 intermediate results) rather than all N results. Therefore, we
850 cannot allow this pattern when we want to get all the results and in
851 the correct order (as is the case when this computation is in an
852 inner-loop nested in an outer-loop that us being vectorized). */
853
854 static gimple
855 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
856 tree *type_out)
857 {
858 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
859 tree oprnd0, oprnd1;
860 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
861 tree type, half_type;
862 gimple pattern_stmt;
863 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
864 struct loop *loop;
865 tree var;
866 bool promotion;
867
868 if (!loop_info)
869 return NULL;
870
871 loop = LOOP_VINFO_LOOP (loop_info);
872
873 if (!is_gimple_assign (last_stmt))
874 return NULL;
875
876 type = gimple_expr_type (last_stmt);
877
878 /* Look for the following pattern
879 DX = (TYPE) X;
880 sum_1 = DX + sum_0;
881 In which DX is at least double the size of X, and sum_1 has been
882 recognized as a reduction variable.
883 */
884
885 /* Starting from LAST_STMT, follow the defs of its uses in search
886 of the above pattern. */
887
888 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
889 return NULL;
890
891 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
892 return NULL;
893
894 oprnd0 = gimple_assign_rhs1 (last_stmt);
895 oprnd1 = gimple_assign_rhs2 (last_stmt);
896 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
897 || !types_compatible_p (TREE_TYPE (oprnd1), type))
898 return NULL;
899
900 /* So far so good. Since last_stmt was detected as a (summation) reduction,
901 we know that oprnd1 is the reduction variable (defined by a loop-header
902 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
903 Left to check that oprnd0 is defined by a cast from type 'type' to type
904 'TYPE'. */
905
906 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
907 &promotion)
908 || !promotion)
909 return NULL;
910
911 oprnd0 = gimple_assign_rhs1 (stmt);
912 *type_in = half_type;
913 *type_out = type;
914
915 /* Pattern detected. Create a stmt to be used to replace the pattern: */
916 var = vect_recog_temp_ssa_var (type, NULL);
917 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
918 oprnd0, oprnd1);
919
920 if (vect_print_dump_info (REPORT_DETAILS))
921 {
922 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
923 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
924 }
925
926 /* We don't allow changing the order of the computation in the inner-loop
927 when doing outer-loop vectorization. */
928 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
929
930 return pattern_stmt;
931 }
932
933
934 /* Return TRUE if the operation in STMT can be performed on a smaller type.
935
936 Input:
937 STMT - a statement to check.
938 DEF - we support operations with two operands, one of which is constant.
939 The other operand can be defined by a demotion operation, or by a
940 previous statement in a sequence of over-promoted operations. In the
941 later case DEF is used to replace that operand. (It is defined by a
942 pattern statement we created for the previous statement in the
943 sequence).
944
945 Input/output:
946 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
947 NULL, it's the type of DEF.
948 STMTS - additional pattern statements. If a pattern statement (type
949 conversion) is created in this function, its original statement is
950 added to STMTS.
951
952 Output:
953 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
954 operands to use in the new pattern statement for STMT (will be created
955 in vect_recog_over_widening_pattern ()).
956 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
957 statements for STMT: the first one is a type promotion and the second
958 one is the operation itself. We return the type promotion statement
959 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
960 the second pattern statement. */
961
962 static bool
963 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
964 tree *op0, tree *op1, gimple *new_def_stmt,
965 VEC (gimple, heap) **stmts)
966 {
967 enum tree_code code;
968 tree const_oprnd, oprnd;
969 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
970 gimple def_stmt, new_stmt;
971 bool first = false;
972 bool promotion;
973
974 *op0 = NULL_TREE;
975 *op1 = NULL_TREE;
976 *new_def_stmt = NULL;
977
978 if (!is_gimple_assign (stmt))
979 return false;
980
981 code = gimple_assign_rhs_code (stmt);
982 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
983 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
984 return false;
985
986 oprnd = gimple_assign_rhs1 (stmt);
987 const_oprnd = gimple_assign_rhs2 (stmt);
988 type = gimple_expr_type (stmt);
989
990 if (TREE_CODE (oprnd) != SSA_NAME
991 || TREE_CODE (const_oprnd) != INTEGER_CST)
992 return false;
993
994 /* If we are in the middle of a sequence, we use DEF from a previous
995 statement. Otherwise, OPRND has to be a result of type promotion. */
996 if (*new_type)
997 {
998 half_type = *new_type;
999 oprnd = def;
1000 }
1001 else
1002 {
1003 first = true;
1004 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1005 &promotion)
1006 || !promotion
1007 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1008 return false;
1009 }
1010
1011 /* Can we perform the operation on a smaller type? */
1012 switch (code)
1013 {
1014 case BIT_IOR_EXPR:
1015 case BIT_XOR_EXPR:
1016 case BIT_AND_EXPR:
1017 if (!int_fits_type_p (const_oprnd, half_type))
1018 {
1019 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1020 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1021 return false;
1022
1023 interm_type = build_nonstandard_integer_type (
1024 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1025 if (!int_fits_type_p (const_oprnd, interm_type))
1026 return false;
1027 }
1028
1029 break;
1030
1031 case LSHIFT_EXPR:
1032 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1033 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034 return false;
1035
1036 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1037 (e.g., if the original value was char, the shift amount is at most 8
1038 if we want to use short). */
1039 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1040 return false;
1041
1042 interm_type = build_nonstandard_integer_type (
1043 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1044
1045 if (!vect_supportable_shift (code, interm_type))
1046 return false;
1047
1048 break;
1049
1050 case RSHIFT_EXPR:
1051 if (vect_supportable_shift (code, half_type))
1052 break;
1053
1054 /* Try intermediate type - HALF_TYPE is not supported. */
1055 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1056 return false;
1057
1058 interm_type = build_nonstandard_integer_type (
1059 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1060
1061 if (!vect_supportable_shift (code, interm_type))
1062 return false;
1063
1064 break;
1065
1066 default:
1067 gcc_unreachable ();
1068 }
1069
1070 /* There are four possible cases:
1071 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1072 the first statement in the sequence)
1073 a. The original, HALF_TYPE, is not enough - we replace the promotion
1074 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1075 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1076 promotion.
1077 2. OPRND is defined by a pattern statement we created.
1078 a. Its type is not sufficient for the operation, we create a new stmt:
1079 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1080 this statement in NEW_DEF_STMT, and it is later put in
1081 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1082 b. OPRND is good to use in the new statement. */
1083 if (first)
1084 {
1085 if (interm_type)
1086 {
1087 /* Replace the original type conversion HALF_TYPE->TYPE with
1088 HALF_TYPE->INTERM_TYPE. */
1089 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1090 {
1091 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1092 /* Check if the already created pattern stmt is what we need. */
1093 if (!is_gimple_assign (new_stmt)
1094 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1095 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1096 return false;
1097
1098 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1099 oprnd = gimple_assign_lhs (new_stmt);
1100 }
1101 else
1102 {
1103 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1104 oprnd = gimple_assign_rhs1 (def_stmt);
1105 tmp = create_tmp_reg (interm_type, NULL);
1106 add_referenced_var (tmp);
1107 new_oprnd = make_ssa_name (tmp, NULL);
1108 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1109 oprnd, NULL_TREE);
1110 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1111 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1112 oprnd = new_oprnd;
1113 }
1114 }
1115 else
1116 {
1117 /* Retrieve the operand before the type promotion. */
1118 oprnd = gimple_assign_rhs1 (def_stmt);
1119 }
1120 }
1121 else
1122 {
1123 if (interm_type)
1124 {
1125 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1126 tmp = create_tmp_reg (interm_type, NULL);
1127 add_referenced_var (tmp);
1128 new_oprnd = make_ssa_name (tmp, NULL);
1129 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1130 oprnd, NULL_TREE);
1131 oprnd = new_oprnd;
1132 *new_def_stmt = new_stmt;
1133 }
1134
1135 /* Otherwise, OPRND is already set. */
1136 }
1137
1138 if (interm_type)
1139 *new_type = interm_type;
1140 else
1141 *new_type = half_type;
1142
1143 *op0 = oprnd;
1144 *op1 = fold_convert (*new_type, const_oprnd);
1145
1146 return true;
1147 }
1148
1149
1150 /* Try to find a statement or a sequence of statements that can be performed
1151 on a smaller type:
1152
1153 type x_t;
1154 TYPE x_T, res0_T, res1_T;
1155 loop:
1156 S1 x_t = *p;
1157 S2 x_T = (TYPE) x_t;
1158 S3 res0_T = op (x_T, C0);
1159 S4 res1_T = op (res0_T, C1);
1160 S5 ... = () res1_T; - type demotion
1161
1162 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1163 constants.
1164 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1165 be 'type' or some intermediate type. For now, we expect S5 to be a type
1166 demotion operation. We also check that S3 and S4 have only one use. */
1167
1168 static gimple
1169 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1170 tree *type_in, tree *type_out)
1171 {
1172 gimple stmt = VEC_pop (gimple, *stmts);
1173 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1174 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1175 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1176 bool first;
1177 tree type = NULL;
1178
1179 first = true;
1180 while (1)
1181 {
1182 if (!vinfo_for_stmt (stmt)
1183 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1184 return NULL;
1185
1186 new_def_stmt = NULL;
1187 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1188 &op0, &op1, &new_def_stmt,
1189 stmts))
1190 {
1191 if (first)
1192 return NULL;
1193 else
1194 break;
1195 }
1196
1197 /* STMT can be performed on a smaller type. Check its uses. */
1198 use_stmt = vect_single_imm_use (stmt);
1199 if (!use_stmt || !is_gimple_assign (use_stmt))
1200 return NULL;
1201
1202 /* Create pattern statement for STMT. */
1203 vectype = get_vectype_for_scalar_type (new_type);
1204 if (!vectype)
1205 return NULL;
1206
1207 /* We want to collect all the statements for which we create pattern
1208 statetments, except for the case when the last statement in the
1209 sequence doesn't have a corresponding pattern statement. In such
1210 case we associate the last pattern statement with the last statement
1211 in the sequence. Therefore, we only add the original statement to
1212 the list if we know that it is not the last. */
1213 if (prev_stmt)
1214 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1215
1216 var = vect_recog_temp_ssa_var (new_type, NULL);
1217 pattern_stmt
1218 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1219 op0, op1);
1220 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1221 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1222
1223 if (vect_print_dump_info (REPORT_DETAILS))
1224 {
1225 fprintf (vect_dump, "created pattern stmt: ");
1226 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1227 }
1228
1229 type = gimple_expr_type (stmt);
1230 prev_stmt = stmt;
1231 stmt = use_stmt;
1232
1233 first = false;
1234 }
1235
1236 /* We got a sequence. We expect it to end with a type demotion operation.
1237 Otherwise, we quit (for now). There are three possible cases: the
1238 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1239 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1240 NEW_TYPE differs (we create a new conversion statement). */
1241 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1242 {
1243 use_lhs = gimple_assign_lhs (use_stmt);
1244 use_type = TREE_TYPE (use_lhs);
1245 /* Support only type demotion or signedess change. */
1246 if (!INTEGRAL_TYPE_P (use_type)
1247 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1248 return NULL;
1249
1250 /* Check that NEW_TYPE is not bigger than the conversion result. */
1251 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1252 return NULL;
1253
1254 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1255 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1256 {
1257 /* Create NEW_TYPE->USE_TYPE conversion. */
1258 tmp = create_tmp_reg (use_type, NULL);
1259 add_referenced_var (tmp);
1260 new_oprnd = make_ssa_name (tmp, NULL);
1261 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1262 var, NULL_TREE);
1263 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1264
1265 *type_in = get_vectype_for_scalar_type (new_type);
1266 *type_out = get_vectype_for_scalar_type (use_type);
1267
1268 /* We created a pattern statement for the last statement in the
1269 sequence, so we don't need to associate it with the pattern
1270 statement created for PREV_STMT. Therefore, we add PREV_STMT
1271 to the list in order to mark it later in vect_pattern_recog_1. */
1272 if (prev_stmt)
1273 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1274 }
1275 else
1276 {
1277 if (prev_stmt)
1278 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1279 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1280
1281 *type_in = vectype;
1282 *type_out = NULL_TREE;
1283 }
1284
1285 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1286 }
1287 else
1288 /* TODO: support general case, create a conversion to the correct type. */
1289 return NULL;
1290
1291 /* Pattern detected. */
1292 if (vect_print_dump_info (REPORT_DETAILS))
1293 {
1294 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1295 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1296 }
1297
1298 return pattern_stmt;
1299 }
1300
1301 /* Detect widening shift pattern:
1302
1303 type a_t;
1304 TYPE a_T, res_T;
1305
1306 S1 a_t = ;
1307 S2 a_T = (TYPE) a_t;
1308 S3 res_T = a_T << CONST;
1309
1310 where type 'TYPE' is at least double the size of type 'type'.
1311
1312 Also detect cases where the shift result is immediately converted
1313 to another type 'result_type' that is no larger in size than 'TYPE'.
1314 In those cases we perform a widen-shift that directly results in
1315 'result_type', to avoid a possible over-widening situation:
1316
1317 type a_t;
1318 TYPE a_T, res_T;
1319 result_type res_result;
1320
1321 S1 a_t = ;
1322 S2 a_T = (TYPE) a_t;
1323 S3 res_T = a_T << CONST;
1324 S4 res_result = (result_type) res_T;
1325 '--> res_result' = a_t w<< CONST;
1326
1327 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1328 create an additional pattern stmt for S2 to create a variable of an
1329 intermediate type, and perform widen-shift on the intermediate type:
1330
1331 type a_t;
1332 interm_type a_it;
1333 TYPE a_T, res_T, res_T';
1334
1335 S1 a_t = ;
1336 S2 a_T = (TYPE) a_t;
1337 '--> a_it = (interm_type) a_t;
1338 S3 res_T = a_T << CONST;
1339 '--> res_T' = a_it <<* CONST;
1340
1341 Input/Output:
1342
1343 * STMTS: Contains a stmt from which the pattern search begins.
1344 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1345 in STMTS. When an intermediate type is used and a pattern statement is
1346 created for S2, we also put S2 here (before S3).
1347
1348 Output:
1349
1350 * TYPE_IN: The type of the input arguments to the pattern.
1351
1352 * TYPE_OUT: The type of the output of this pattern.
1353
1354 * Return value: A new stmt that will be used to replace the sequence of
1355 stmts that constitute the pattern. In this case it will be:
1356 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1357
1358 static gimple
1359 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1360 tree *type_in, tree *type_out)
1361 {
1362 gimple last_stmt = VEC_pop (gimple, *stmts);
1363 gimple def_stmt0;
1364 tree oprnd0, oprnd1;
1365 tree type, half_type0;
1366 gimple pattern_stmt;
1367 tree vectype, vectype_out = NULL_TREE;
1368 tree dummy;
1369 tree var;
1370 enum tree_code dummy_code;
1371 int dummy_int;
1372 VEC (tree, heap) * dummy_vec;
1373 gimple use_stmt;
1374 bool promotion;
1375
1376 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1377 return NULL;
1378
1379 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1380 return NULL;
1381
1382 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1383 return NULL;
1384
1385 oprnd0 = gimple_assign_rhs1 (last_stmt);
1386 oprnd1 = gimple_assign_rhs2 (last_stmt);
1387 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1388 return NULL;
1389
1390 /* Check operand 0: it has to be defined by a type promotion. */
1391 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1392 &promotion)
1393 || !promotion)
1394 return NULL;
1395
1396 /* Check operand 1: has to be positive. We check that it fits the type
1397 in vect_handle_widen_op_by_const (). */
1398 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1399 return NULL;
1400
1401 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1402 type = gimple_expr_type (last_stmt);
1403
1404 /* Check for subsequent conversion to another type. */
1405 use_stmt = vect_single_imm_use (last_stmt);
1406 if (use_stmt && is_gimple_assign (use_stmt)
1407 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1408 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1409 {
1410 tree use_lhs = gimple_assign_lhs (use_stmt);
1411 tree use_type = TREE_TYPE (use_lhs);
1412
1413 if (INTEGRAL_TYPE_P (use_type)
1414 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1415 {
1416 last_stmt = use_stmt;
1417 type = use_type;
1418 }
1419 }
1420
1421 /* Check if this a widening operation. */
1422 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1423 &oprnd0, stmts,
1424 type, &half_type0, def_stmt0))
1425 return NULL;
1426
1427 /* Pattern detected. */
1428 if (vect_print_dump_info (REPORT_DETAILS))
1429 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1430
1431 /* Check target support. */
1432 vectype = get_vectype_for_scalar_type (half_type0);
1433 vectype_out = get_vectype_for_scalar_type (type);
1434
1435 if (!vectype
1436 || !vectype_out
1437 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1438 vectype_out, vectype,
1439 &dummy, &dummy, &dummy_code,
1440 &dummy_code, &dummy_int,
1441 &dummy_vec))
1442 return NULL;
1443
1444 *type_in = vectype;
1445 *type_out = vectype_out;
1446
1447 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1448 var = vect_recog_temp_ssa_var (type, NULL);
1449 pattern_stmt =
1450 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1451
1452 if (vect_print_dump_info (REPORT_DETAILS))
1453 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1454
1455 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1456 return pattern_stmt;
1457 }
1458
1459 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1460 vectorized:
1461
1462 type a_t;
1463 TYPE b_T, res_T;
1464
1465 S1 a_t = ;
1466 S2 b_T = ;
1467 S3 res_T = b_T op a_t;
1468
1469 where type 'TYPE' is a type with different size than 'type',
1470 and op is <<, >> or rotate.
1471
1472 Also detect cases:
1473
1474 type a_t;
1475 TYPE b_T, c_T, res_T;
1476
1477 S0 c_T = ;
1478 S1 a_t = (type) c_T;
1479 S2 b_T = ;
1480 S3 res_T = b_T op a_t;
1481
1482 Input/Output:
1483
1484 * STMTS: Contains a stmt from which the pattern search begins,
1485 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1486 with a shift/rotate which has same type on both operands, in the
1487 second case just b_T op c_T, in the first case with added cast
1488 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1489
1490 Output:
1491
1492 * TYPE_IN: The type of the input arguments to the pattern.
1493
1494 * TYPE_OUT: The type of the output of this pattern.
1495
1496 * Return value: A new stmt that will be used to replace the shift/rotate
1497 S3 stmt. */
1498
1499 static gimple
1500 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1501 tree *type_in, tree *type_out)
1502 {
1503 gimple last_stmt = VEC_pop (gimple, *stmts);
1504 tree oprnd0, oprnd1, lhs, var;
1505 gimple pattern_stmt, def_stmt;
1506 enum tree_code rhs_code;
1507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1508 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1509 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1510 enum vect_def_type dt;
1511 tree def;
1512
1513 if (!is_gimple_assign (last_stmt))
1514 return NULL;
1515
1516 rhs_code = gimple_assign_rhs_code (last_stmt);
1517 switch (rhs_code)
1518 {
1519 case LSHIFT_EXPR:
1520 case RSHIFT_EXPR:
1521 case LROTATE_EXPR:
1522 case RROTATE_EXPR:
1523 break;
1524 default:
1525 return NULL;
1526 }
1527
1528 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1529 return NULL;
1530
1531 lhs = gimple_assign_lhs (last_stmt);
1532 oprnd0 = gimple_assign_rhs1 (last_stmt);
1533 oprnd1 = gimple_assign_rhs2 (last_stmt);
1534 if (TREE_CODE (oprnd0) != SSA_NAME
1535 || TREE_CODE (oprnd1) != SSA_NAME
1536 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1537 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1538 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1539 || TYPE_PRECISION (TREE_TYPE (lhs))
1540 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1541 return NULL;
1542
1543 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1544 &def, &dt))
1545 return NULL;
1546
1547 if (dt != vect_internal_def)
1548 return NULL;
1549
1550 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1551 *type_out = *type_in;
1552 if (*type_in == NULL_TREE)
1553 return NULL;
1554
1555 def = NULL_TREE;
1556 if (gimple_assign_cast_p (def_stmt))
1557 {
1558 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1559 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1560 && TYPE_PRECISION (TREE_TYPE (rhs1))
1561 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1562 def = rhs1;
1563 }
1564
1565 if (def == NULL_TREE)
1566 {
1567 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1568 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1569 NULL_TREE);
1570 new_pattern_def_seq (stmt_vinfo, def_stmt);
1571 }
1572
1573 /* Pattern detected. */
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1576
1577 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1578 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1579 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1580
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1583
1584 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1585 return pattern_stmt;
1586 }
1587
1588 /* Detect a signed division by power of two constant that wouldn't be
1589 otherwise vectorized:
1590
1591 type a_t, b_t;
1592
1593 S1 a_t = b_t / N;
1594
1595 where type 'type' is a signed integral type and N is a constant positive
1596 power of two.
1597
1598 Similarly handle signed modulo by power of two constant:
1599
1600 S4 a_t = b_t % N;
1601
1602 Input/Output:
1603
1604 * STMTS: Contains a stmt from which the pattern search begins,
1605 i.e. the division stmt. S1 is replaced by:
1606 S3 y_t = b_t < 0 ? N - 1 : 0;
1607 S2 x_t = b_t + y_t;
1608 S1' a_t = x_t >> log2 (N);
1609
1610 S4 is replaced by (where *_T temporaries have unsigned type):
1611 S9 y_T = b_t < 0 ? -1U : 0U;
1612 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1613 S7 z_t = (type) z_T;
1614 S6 w_t = b_t + z_t;
1615 S5 x_t = w_t & (N - 1);
1616 S4' a_t = x_t - z_t;
1617
1618 Output:
1619
1620 * TYPE_IN: The type of the input arguments to the pattern.
1621
1622 * TYPE_OUT: The type of the output of this pattern.
1623
1624 * Return value: A new stmt that will be used to replace the division
1625 S1 or modulo S4 stmt. */
1626
1627 static gimple
1628 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1629 tree *type_in, tree *type_out)
1630 {
1631 gimple last_stmt = VEC_pop (gimple, *stmts);
1632 tree oprnd0, oprnd1, vectype, itype, cond;
1633 gimple pattern_stmt, def_stmt;
1634 enum tree_code rhs_code;
1635 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1636 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1637 optab optab;
1638
1639 if (!is_gimple_assign (last_stmt))
1640 return NULL;
1641
1642 rhs_code = gimple_assign_rhs_code (last_stmt);
1643 switch (rhs_code)
1644 {
1645 case TRUNC_DIV_EXPR:
1646 case TRUNC_MOD_EXPR:
1647 break;
1648 default:
1649 return NULL;
1650 }
1651
1652 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1653 return NULL;
1654
1655 oprnd0 = gimple_assign_rhs1 (last_stmt);
1656 oprnd1 = gimple_assign_rhs2 (last_stmt);
1657 itype = TREE_TYPE (oprnd0);
1658 if (TREE_CODE (oprnd0) != SSA_NAME
1659 || TREE_CODE (oprnd1) != INTEGER_CST
1660 || TREE_CODE (itype) != INTEGER_TYPE
1661 || TYPE_UNSIGNED (itype)
1662 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1663 || !integer_pow2p (oprnd1)
1664 || tree_int_cst_sgn (oprnd1) != 1)
1665 return NULL;
1666
1667 vectype = get_vectype_for_scalar_type (itype);
1668 if (vectype == NULL_TREE)
1669 return NULL;
1670
1671 /* If the target can handle vectorized division or modulo natively,
1672 don't attempt to optimize this. */
1673 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1674 if (optab != NULL)
1675 {
1676 enum machine_mode vec_mode = TYPE_MODE (vectype);
1677 int icode = (int) optab_handler (optab, vec_mode);
1678 if (icode != CODE_FOR_nothing
1679 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1680 return NULL;
1681 }
1682
1683 /* Pattern detected. */
1684 if (vect_print_dump_info (REPORT_DETAILS))
1685 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1686
1687 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1688 if (rhs_code == TRUNC_DIV_EXPR)
1689 {
1690 tree var = vect_recog_temp_ssa_var (itype, NULL);
1691 def_stmt
1692 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1693 fold_build2 (MINUS_EXPR, itype,
1694 oprnd1,
1695 build_int_cst (itype,
1696 1)),
1697 build_int_cst (itype, 0));
1698 new_pattern_def_seq (stmt_vinfo, def_stmt);
1699 var = vect_recog_temp_ssa_var (itype, NULL);
1700 def_stmt
1701 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1702 gimple_assign_lhs (def_stmt));
1703 append_pattern_def_seq (stmt_vinfo, def_stmt);
1704
1705 pattern_stmt
1706 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1707 vect_recog_temp_ssa_var (itype, NULL),
1708 var,
1709 build_int_cst (itype,
1710 tree_log2 (oprnd1)));
1711 }
1712 else
1713 {
1714 tree signmask;
1715 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1716 if (compare_tree_int (oprnd1, 2) == 0)
1717 {
1718 signmask = vect_recog_temp_ssa_var (itype, NULL);
1719 def_stmt
1720 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1721 build_int_cst (itype, 1),
1722 build_int_cst (itype, 0));
1723 append_pattern_def_seq (stmt_vinfo, def_stmt);
1724 }
1725 else
1726 {
1727 tree utype
1728 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1729 tree vecutype = get_vectype_for_scalar_type (utype);
1730 tree shift
1731 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1732 - tree_log2 (oprnd1));
1733 tree var = vect_recog_temp_ssa_var (utype, NULL);
1734 stmt_vec_info def_stmt_vinfo;
1735
1736 def_stmt
1737 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1738 build_int_cst (utype, -1),
1739 build_int_cst (utype, 0));
1740 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1741 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1742 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1743 append_pattern_def_seq (stmt_vinfo, def_stmt);
1744 var = vect_recog_temp_ssa_var (utype, NULL);
1745 def_stmt
1746 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1747 gimple_assign_lhs (def_stmt),
1748 shift);
1749 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1750 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1751 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1752 append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 signmask = vect_recog_temp_ssa_var (itype, NULL);
1754 def_stmt
1755 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1756 NULL_TREE);
1757 append_pattern_def_seq (stmt_vinfo, def_stmt);
1758 }
1759 def_stmt
1760 = gimple_build_assign_with_ops (PLUS_EXPR,
1761 vect_recog_temp_ssa_var (itype, NULL),
1762 oprnd0, signmask);
1763 append_pattern_def_seq (stmt_vinfo, def_stmt);
1764 def_stmt
1765 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1766 vect_recog_temp_ssa_var (itype, NULL),
1767 gimple_assign_lhs (def_stmt),
1768 fold_build2 (MINUS_EXPR, itype,
1769 oprnd1,
1770 build_int_cst (itype,
1771 1)));
1772 append_pattern_def_seq (stmt_vinfo, def_stmt);
1773
1774 pattern_stmt
1775 = gimple_build_assign_with_ops (MINUS_EXPR,
1776 vect_recog_temp_ssa_var (itype, NULL),
1777 gimple_assign_lhs (def_stmt),
1778 signmask);
1779 }
1780
1781 if (vect_print_dump_info (REPORT_DETAILS))
1782 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1783
1784 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1785
1786 *type_in = vectype;
1787 *type_out = vectype;
1788 return pattern_stmt;
1789 }
1790
1791 /* Function vect_recog_mixed_size_cond_pattern
1792
1793 Try to find the following pattern:
1794
1795 type x_t, y_t;
1796 TYPE a_T, b_T, c_T;
1797 loop:
1798 S1 a_T = x_t CMP y_t ? b_T : c_T;
1799
1800 where type 'TYPE' is an integral type which has different size
1801 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
1802 than 'type', the constants need to fit into an integer type
1803 with the same width as 'type') or results of conversion from 'type'.
1804
1805 Input:
1806
1807 * LAST_STMT: A stmt from which the pattern search begins.
1808
1809 Output:
1810
1811 * TYPE_IN: The type of the input arguments to the pattern.
1812
1813 * TYPE_OUT: The type of the output of this pattern.
1814
1815 * Return value: A new stmt that will be used to replace the pattern.
1816 Additionally a def_stmt is added.
1817
1818 a_it = x_t CMP y_t ? b_it : c_it;
1819 a_T = (TYPE) a_it; */
1820
1821 static gimple
1822 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1823 tree *type_out)
1824 {
1825 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1826 tree cond_expr, then_clause, else_clause;
1827 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1828 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
1829 enum machine_mode cmpmode;
1830 gimple pattern_stmt, def_stmt;
1831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1832 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1833 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
1834 gimple def_stmt0 = NULL, def_stmt1 = NULL;
1835 bool promotion;
1836 tree comp_scalar_type;
1837
1838 if (!is_gimple_assign (last_stmt)
1839 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1840 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1841 return NULL;
1842
1843 cond_expr = gimple_assign_rhs1 (last_stmt);
1844 then_clause = gimple_assign_rhs2 (last_stmt);
1845 else_clause = gimple_assign_rhs3 (last_stmt);
1846
1847 if (!COMPARISON_CLASS_P (cond_expr))
1848 return NULL;
1849
1850 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
1851 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
1852 if (comp_vectype == NULL_TREE)
1853 return NULL;
1854
1855 type = gimple_expr_type (last_stmt);
1856 if (types_compatible_p (type, comp_scalar_type)
1857 || ((TREE_CODE (then_clause) != INTEGER_CST
1858 || TREE_CODE (else_clause) != INTEGER_CST)
1859 && !INTEGRAL_TYPE_P (comp_scalar_type))
1860 || !INTEGRAL_TYPE_P (type))
1861 return NULL;
1862
1863 if ((TREE_CODE (then_clause) != INTEGER_CST
1864 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
1865 &def_stmt0, &promotion))
1866 || (TREE_CODE (else_clause) != INTEGER_CST
1867 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
1868 &def_stmt1, &promotion)))
1869 return NULL;
1870
1871 if (orig_type0 && orig_type1
1872 && !types_compatible_p (orig_type0, orig_type1))
1873 return NULL;
1874
1875 if (orig_type0)
1876 {
1877 if (!types_compatible_p (orig_type0, comp_scalar_type))
1878 return NULL;
1879 then_clause = gimple_assign_rhs1 (def_stmt0);
1880 itype = orig_type0;
1881 }
1882
1883 if (orig_type1)
1884 {
1885 if (!types_compatible_p (orig_type1, comp_scalar_type))
1886 return NULL;
1887 else_clause = gimple_assign_rhs1 (def_stmt1);
1888 itype = orig_type1;
1889 }
1890
1891 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1892
1893 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1894 return NULL;
1895
1896 vectype = get_vectype_for_scalar_type (type);
1897 if (vectype == NULL_TREE)
1898 return NULL;
1899
1900 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1901 return NULL;
1902
1903 if (itype == NULL_TREE)
1904 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1905 TYPE_UNSIGNED (type));
1906
1907 if (itype == NULL_TREE
1908 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1909 return NULL;
1910
1911 vecitype = get_vectype_for_scalar_type (itype);
1912 if (vecitype == NULL_TREE)
1913 return NULL;
1914
1915 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1916 return NULL;
1917
1918 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1919 {
1920 if ((TREE_CODE (then_clause) == INTEGER_CST
1921 && !int_fits_type_p (then_clause, itype))
1922 || (TREE_CODE (else_clause) == INTEGER_CST
1923 && !int_fits_type_p (else_clause, itype)))
1924 return NULL;
1925 }
1926
1927 def_stmt
1928 = gimple_build_assign_with_ops3 (COND_EXPR,
1929 vect_recog_temp_ssa_var (itype, NULL),
1930 unshare_expr (cond_expr),
1931 fold_convert (itype, then_clause),
1932 fold_convert (itype, else_clause));
1933 pattern_stmt
1934 = gimple_build_assign_with_ops (NOP_EXPR,
1935 vect_recog_temp_ssa_var (type, NULL),
1936 gimple_assign_lhs (def_stmt), NULL_TREE);
1937
1938 new_pattern_def_seq (stmt_vinfo, def_stmt);
1939 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1940 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1941 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1942 *type_in = vecitype;
1943 *type_out = vectype;
1944
1945 if (vect_print_dump_info (REPORT_DETAILS))
1946 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
1947
1948 return pattern_stmt;
1949 }
1950
1951
1952 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1953 true if bool VAR can be optimized that way. */
1954
1955 static bool
1956 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1957 {
1958 gimple def_stmt;
1959 enum vect_def_type dt;
1960 tree def, rhs1;
1961 enum tree_code rhs_code;
1962
1963 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
1964 &dt))
1965 return false;
1966
1967 if (dt != vect_internal_def)
1968 return false;
1969
1970 if (!is_gimple_assign (def_stmt))
1971 return false;
1972
1973 if (!has_single_use (def))
1974 return false;
1975
1976 rhs1 = gimple_assign_rhs1 (def_stmt);
1977 rhs_code = gimple_assign_rhs_code (def_stmt);
1978 switch (rhs_code)
1979 {
1980 case SSA_NAME:
1981 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
1982
1983 CASE_CONVERT:
1984 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1985 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1986 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1987 return false;
1988 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
1989
1990 case BIT_NOT_EXPR:
1991 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
1992
1993 case BIT_AND_EXPR:
1994 case BIT_IOR_EXPR:
1995 case BIT_XOR_EXPR:
1996 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
1997 return false;
1998 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
1999 bb_vinfo);
2000
2001 default:
2002 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2003 {
2004 tree vecitype, comp_vectype;
2005
2006 /* If the comparison can throw, then is_gimple_condexpr will be
2007 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2008 if (stmt_could_throw_p (def_stmt))
2009 return false;
2010
2011 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2012 if (comp_vectype == NULL_TREE)
2013 return false;
2014
2015 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2016 {
2017 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2018 tree itype
2019 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2020 vecitype = get_vectype_for_scalar_type (itype);
2021 if (vecitype == NULL_TREE)
2022 return false;
2023 }
2024 else
2025 vecitype = comp_vectype;
2026 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2027 }
2028 return false;
2029 }
2030 }
2031
2032
2033 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2034 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2035 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2036
2037 static tree
2038 adjust_bool_pattern_cast (tree type, tree var)
2039 {
2040 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2041 gimple cast_stmt, pattern_stmt;
2042
2043 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2044 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2045 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2046 cast_stmt
2047 = gimple_build_assign_with_ops (NOP_EXPR,
2048 vect_recog_temp_ssa_var (type, NULL),
2049 gimple_assign_lhs (pattern_stmt),
2050 NULL_TREE);
2051 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2052 return gimple_assign_lhs (cast_stmt);
2053 }
2054
2055
2056 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2057 recursively. VAR is an SSA_NAME that should be transformed from bool
2058 to a wider integer type, OUT_TYPE is the desired final integer type of
2059 the whole pattern, TRUEVAL should be NULL unless optimizing
2060 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2061 in the then_clause, STMTS is where statements with added pattern stmts
2062 should be pushed to. */
2063
2064 static tree
2065 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2066 VEC (gimple, heap) **stmts)
2067 {
2068 gimple stmt = SSA_NAME_DEF_STMT (var);
2069 enum tree_code rhs_code, def_rhs_code;
2070 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2071 location_t loc;
2072 gimple pattern_stmt, def_stmt;
2073
2074 rhs1 = gimple_assign_rhs1 (stmt);
2075 rhs2 = gimple_assign_rhs2 (stmt);
2076 rhs_code = gimple_assign_rhs_code (stmt);
2077 loc = gimple_location (stmt);
2078 switch (rhs_code)
2079 {
2080 case SSA_NAME:
2081 CASE_CONVERT:
2082 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2083 itype = TREE_TYPE (irhs1);
2084 pattern_stmt
2085 = gimple_build_assign_with_ops (SSA_NAME,
2086 vect_recog_temp_ssa_var (itype, NULL),
2087 irhs1, NULL_TREE);
2088 break;
2089
2090 case BIT_NOT_EXPR:
2091 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2092 itype = TREE_TYPE (irhs1);
2093 pattern_stmt
2094 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2095 vect_recog_temp_ssa_var (itype, NULL),
2096 irhs1, build_int_cst (itype, 1));
2097 break;
2098
2099 case BIT_AND_EXPR:
2100 /* Try to optimize x = y & (a < b ? 1 : 0); into
2101 x = (a < b ? y : 0);
2102
2103 E.g. for:
2104 bool a_b, b_b, c_b;
2105 TYPE d_T;
2106
2107 S1 a_b = x1 CMP1 y1;
2108 S2 b_b = x2 CMP2 y2;
2109 S3 c_b = a_b & b_b;
2110 S4 d_T = (TYPE) c_b;
2111
2112 we would normally emit:
2113
2114 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2115 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2116 S3' c_T = a_T & b_T;
2117 S4' d_T = c_T;
2118
2119 but we can save one stmt by using the
2120 result of one of the COND_EXPRs in the other COND_EXPR and leave
2121 BIT_AND_EXPR stmt out:
2122
2123 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2124 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2125 S4' f_T = c_T;
2126
2127 At least when VEC_COND_EXPR is implemented using masks
2128 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2129 computes the comparison masks and ands it, in one case with
2130 all ones vector, in the other case with a vector register.
2131 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2132 often more expensive. */
2133 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2134 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2135 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2136 {
2137 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2138 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2139 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2140 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2141 {
2142 gimple tstmt;
2143 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2144 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2145 tstmt = VEC_pop (gimple, *stmts);
2146 gcc_assert (tstmt == def_stmt);
2147 VEC_quick_push (gimple, *stmts, stmt);
2148 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2149 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2150 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2151 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2152 return irhs2;
2153 }
2154 else
2155 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2156 goto and_ior_xor;
2157 }
2158 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2159 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2160 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2161 {
2162 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2163 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2164 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2165 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2166 {
2167 gimple tstmt;
2168 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2169 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2170 tstmt = VEC_pop (gimple, *stmts);
2171 gcc_assert (tstmt == def_stmt);
2172 VEC_quick_push (gimple, *stmts, stmt);
2173 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2174 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2175 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2176 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2177 return irhs1;
2178 }
2179 else
2180 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2181 goto and_ior_xor;
2182 }
2183 /* FALLTHRU */
2184 case BIT_IOR_EXPR:
2185 case BIT_XOR_EXPR:
2186 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2187 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2188 and_ior_xor:
2189 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2190 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2191 {
2192 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2193 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2194 int out_prec = TYPE_PRECISION (out_type);
2195 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2196 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2197 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2198 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2199 else
2200 {
2201 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2202 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2203 }
2204 }
2205 itype = TREE_TYPE (irhs1);
2206 pattern_stmt
2207 = gimple_build_assign_with_ops (rhs_code,
2208 vect_recog_temp_ssa_var (itype, NULL),
2209 irhs1, irhs2);
2210 break;
2211
2212 default:
2213 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2214 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2215 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2216 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2217 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2218 {
2219 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2220 itype
2221 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2222 }
2223 else
2224 itype = TREE_TYPE (rhs1);
2225 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2226 if (trueval == NULL_TREE)
2227 trueval = build_int_cst (itype, 1);
2228 else
2229 gcc_checking_assert (useless_type_conversion_p (itype,
2230 TREE_TYPE (trueval)));
2231 pattern_stmt
2232 = gimple_build_assign_with_ops3 (COND_EXPR,
2233 vect_recog_temp_ssa_var (itype, NULL),
2234 cond_expr, trueval,
2235 build_int_cst (itype, 0));
2236 break;
2237 }
2238
2239 VEC_safe_push (gimple, heap, *stmts, stmt);
2240 gimple_set_location (pattern_stmt, loc);
2241 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2242 return gimple_assign_lhs (pattern_stmt);
2243 }
2244
2245
2246 /* Function vect_recog_bool_pattern
2247
2248 Try to find pattern like following:
2249
2250 bool a_b, b_b, c_b, d_b, e_b;
2251 TYPE f_T;
2252 loop:
2253 S1 a_b = x1 CMP1 y1;
2254 S2 b_b = x2 CMP2 y2;
2255 S3 c_b = a_b & b_b;
2256 S4 d_b = x3 CMP3 y3;
2257 S5 e_b = c_b | d_b;
2258 S6 f_T = (TYPE) e_b;
2259
2260 where type 'TYPE' is an integral type.
2261
2262 Input:
2263
2264 * LAST_STMT: A stmt at the end from which the pattern
2265 search begins, i.e. cast of a bool to
2266 an integer type.
2267
2268 Output:
2269
2270 * TYPE_IN: The type of the input arguments to the pattern.
2271
2272 * TYPE_OUT: The type of the output of this pattern.
2273
2274 * Return value: A new stmt that will be used to replace the pattern.
2275
2276 Assuming size of TYPE is the same as size of all comparisons
2277 (otherwise some casts would be added where needed), the above
2278 sequence we create related pattern stmts:
2279 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2280 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2281 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2282 S5' e_T = c_T | d_T;
2283 S6' f_T = e_T;
2284
2285 Instead of the above S3' we could emit:
2286 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2287 S3' c_T = a_T | b_T;
2288 but the above is more efficient. */
2289
2290 static gimple
2291 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2292 tree *type_out)
2293 {
2294 gimple last_stmt = VEC_pop (gimple, *stmts);
2295 enum tree_code rhs_code;
2296 tree var, lhs, rhs, vectype;
2297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2298 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2299 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2300 gimple pattern_stmt;
2301
2302 if (!is_gimple_assign (last_stmt))
2303 return NULL;
2304
2305 var = gimple_assign_rhs1 (last_stmt);
2306 lhs = gimple_assign_lhs (last_stmt);
2307
2308 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2309 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2310 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2311 return NULL;
2312
2313 rhs_code = gimple_assign_rhs_code (last_stmt);
2314 if (CONVERT_EXPR_CODE_P (rhs_code))
2315 {
2316 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2317 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2318 return NULL;
2319 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2320 if (vectype == NULL_TREE)
2321 return NULL;
2322
2323 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2324 return NULL;
2325
2326 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2327 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2328 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2329 pattern_stmt
2330 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2331 else
2332 pattern_stmt
2333 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2334 *type_out = vectype;
2335 *type_in = vectype;
2336 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2337 if (vect_print_dump_info (REPORT_DETAILS))
2338 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2339
2340 return pattern_stmt;
2341 }
2342 else if (rhs_code == SSA_NAME
2343 && STMT_VINFO_DATA_REF (stmt_vinfo))
2344 {
2345 stmt_vec_info pattern_stmt_info;
2346 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2347 gcc_assert (vectype != NULL_TREE);
2348 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2349 return NULL;
2350 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2351 return NULL;
2352
2353 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2354 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2355 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2356 {
2357 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2358 gimple cast_stmt
2359 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2360 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2361 rhs = rhs2;
2362 }
2363 pattern_stmt
2364 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2365 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2366 bb_vinfo);
2367 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2368 STMT_VINFO_DATA_REF (pattern_stmt_info)
2369 = STMT_VINFO_DATA_REF (stmt_vinfo);
2370 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2371 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2372 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2373 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2374 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2375 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2376 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2377 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2378 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2379 *type_out = vectype;
2380 *type_in = vectype;
2381 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2384 return pattern_stmt;
2385 }
2386 else
2387 return NULL;
2388 }
2389
2390
2391 /* Mark statements that are involved in a pattern. */
2392
2393 static inline void
2394 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2395 tree pattern_vectype)
2396 {
2397 stmt_vec_info pattern_stmt_info, def_stmt_info;
2398 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2400 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2401 gimple def_stmt;
2402
2403 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2404 if (pattern_stmt_info == NULL)
2405 {
2406 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2407 bb_vinfo);
2408 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2409 }
2410 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2411
2412 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2413 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2414 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2415 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2416 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2417 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2418 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2419 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2420 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2421 {
2422 gimple_stmt_iterator si;
2423 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2424 !gsi_end_p (si); gsi_next (&si))
2425 {
2426 def_stmt = gsi_stmt (si);
2427 def_stmt_info = vinfo_for_stmt (def_stmt);
2428 if (def_stmt_info == NULL)
2429 {
2430 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2431 bb_vinfo);
2432 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2433 }
2434 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2435 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2436 STMT_VINFO_DEF_TYPE (def_stmt_info)
2437 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2438 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2439 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2440 }
2441 }
2442 }
2443
2444 /* Function vect_pattern_recog_1
2445
2446 Input:
2447 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2448 computation pattern.
2449 STMT: A stmt from which the pattern search should start.
2450
2451 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2452 expression that computes the same functionality and can be used to
2453 replace the sequence of stmts that are involved in the pattern.
2454
2455 Output:
2456 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2457 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2458 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2459 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2460 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2461 to the available target pattern.
2462
2463 This function also does some bookkeeping, as explained in the documentation
2464 for vect_recog_pattern. */
2465
2466 static void
2467 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2468 gimple_stmt_iterator si,
2469 VEC (gimple, heap) **stmts_to_replace)
2470 {
2471 gimple stmt = gsi_stmt (si), pattern_stmt;
2472 stmt_vec_info stmt_info;
2473 loop_vec_info loop_vinfo;
2474 tree pattern_vectype;
2475 tree type_in, type_out;
2476 enum tree_code code;
2477 int i;
2478 gimple next;
2479
2480 VEC_truncate (gimple, *stmts_to_replace, 0);
2481 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2482 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2483 if (!pattern_stmt)
2484 return;
2485
2486 stmt = VEC_last (gimple, *stmts_to_replace);
2487 stmt_info = vinfo_for_stmt (stmt);
2488 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2489
2490 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2491 {
2492 /* No need to check target support (already checked by the pattern
2493 recognition function). */
2494 pattern_vectype = type_out ? type_out : type_in;
2495 }
2496 else
2497 {
2498 enum machine_mode vec_mode;
2499 enum insn_code icode;
2500 optab optab;
2501
2502 /* Check target support */
2503 type_in = get_vectype_for_scalar_type (type_in);
2504 if (!type_in)
2505 return;
2506 if (type_out)
2507 type_out = get_vectype_for_scalar_type (type_out);
2508 else
2509 type_out = type_in;
2510 if (!type_out)
2511 return;
2512 pattern_vectype = type_out;
2513
2514 if (is_gimple_assign (pattern_stmt))
2515 code = gimple_assign_rhs_code (pattern_stmt);
2516 else
2517 {
2518 gcc_assert (is_gimple_call (pattern_stmt));
2519 code = CALL_EXPR;
2520 }
2521
2522 optab = optab_for_tree_code (code, type_in, optab_default);
2523 vec_mode = TYPE_MODE (type_in);
2524 if (!optab
2525 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2526 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2527 return;
2528 }
2529
2530 /* Found a vectorizable pattern. */
2531 if (vect_print_dump_info (REPORT_DETAILS))
2532 {
2533 fprintf (vect_dump, "pattern recognized: ");
2534 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2535 }
2536
2537 /* Mark the stmts that are involved in the pattern. */
2538 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2539
2540 /* Patterns cannot be vectorized using SLP, because they change the order of
2541 computation. */
2542 if (loop_vinfo)
2543 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2544 if (next == stmt)
2545 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2546
2547 /* It is possible that additional pattern stmts are created and inserted in
2548 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2549 relevant statements. */
2550 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2551 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2552 i++)
2553 {
2554 stmt_info = vinfo_for_stmt (stmt);
2555 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2556 if (vect_print_dump_info (REPORT_DETAILS))
2557 {
2558 fprintf (vect_dump, "additional pattern stmt: ");
2559 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2560 }
2561
2562 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2563 }
2564 }
2565
2566
2567 /* Function vect_pattern_recog
2568
2569 Input:
2570 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2571 computation idioms.
2572
2573 Output - for each computation idiom that is detected we create a new stmt
2574 that provides the same functionality and that can be vectorized. We
2575 also record some information in the struct_stmt_info of the relevant
2576 stmts, as explained below:
2577
2578 At the entry to this function we have the following stmts, with the
2579 following initial value in the STMT_VINFO fields:
2580
2581 stmt in_pattern_p related_stmt vec_stmt
2582 S1: a_i = .... - - -
2583 S2: a_2 = ..use(a_i).. - - -
2584 S3: a_1 = ..use(a_2).. - - -
2585 S4: a_0 = ..use(a_1).. - - -
2586 S5: ... = ..use(a_0).. - - -
2587
2588 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2589 represented by a single stmt. We then:
2590 - create a new stmt S6 equivalent to the pattern (the stmt is not
2591 inserted into the code)
2592 - fill in the STMT_VINFO fields as follows:
2593
2594 in_pattern_p related_stmt vec_stmt
2595 S1: a_i = .... - - -
2596 S2: a_2 = ..use(a_i).. - - -
2597 S3: a_1 = ..use(a_2).. - - -
2598 S4: a_0 = ..use(a_1).. true S6 -
2599 '---> S6: a_new = .... - S4 -
2600 S5: ... = ..use(a_0).. - - -
2601
2602 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2603 to each other through the RELATED_STMT field).
2604
2605 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2606 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2607 remain irrelevant unless used by stmts other than S4.
2608
2609 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2610 (because they are marked as irrelevant). It will vectorize S6, and record
2611 a pointer to the new vector stmt VS6 from S6 (as usual).
2612 S4 will be skipped, and S5 will be vectorized as usual:
2613
2614 in_pattern_p related_stmt vec_stmt
2615 S1: a_i = .... - - -
2616 S2: a_2 = ..use(a_i).. - - -
2617 S3: a_1 = ..use(a_2).. - - -
2618 > VS6: va_new = .... - - -
2619 S4: a_0 = ..use(a_1).. true S6 VS6
2620 '---> S6: a_new = .... - S4 VS6
2621 > VS5: ... = ..vuse(va_new).. - - -
2622 S5: ... = ..use(a_0).. - - -
2623
2624 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2625 elsewhere), and we'll end up with:
2626
2627 VS6: va_new = ....
2628 VS5: ... = ..vuse(va_new)..
2629
2630 In case of more than one pattern statements, e.g., widen-mult with
2631 intermediate type:
2632
2633 S1 a_t = ;
2634 S2 a_T = (TYPE) a_t;
2635 '--> S3: a_it = (interm_type) a_t;
2636 S4 prod_T = a_T * CONST;
2637 '--> S5: prod_T' = a_it w* CONST;
2638
2639 there may be other users of a_T outside the pattern. In that case S2 will
2640 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2641 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2642 be recorded in S3. */
2643
2644 void
2645 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2646 {
2647 struct loop *loop;
2648 basic_block *bbs, bb;
2649 unsigned int nbbs;
2650 gimple_stmt_iterator si;
2651 unsigned int i, j;
2652 vect_recog_func_ptr vect_recog_func;
2653 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2654 gimple stmt;
2655
2656 if (vect_print_dump_info (REPORT_DETAILS))
2657 fprintf (vect_dump, "=== vect_pattern_recog ===");
2658
2659 if (loop_vinfo)
2660 {
2661 loop = LOOP_VINFO_LOOP (loop_vinfo);
2662 bbs = LOOP_VINFO_BBS (loop_vinfo);
2663 nbbs = loop->num_nodes;
2664 }
2665 else
2666 {
2667 bb = BB_VINFO_BB (bb_vinfo);
2668 nbbs = 1;
2669 bbs = XNEW (basic_block);
2670 bbs[0] = bb;
2671 }
2672
2673 /* Scan through the loop stmts, applying the pattern recognition
2674 functions starting at each stmt visited: */
2675 for (i = 0; i < nbbs; i++)
2676 {
2677 basic_block bb = bbs[i];
2678 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2679 {
2680 if (bb_vinfo && (stmt = gsi_stmt (si))
2681 && vinfo_for_stmt (stmt)
2682 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2683 continue;
2684
2685 /* Scan over all generic vect_recog_xxx_pattern functions. */
2686 for (j = 0; j < NUM_PATTERNS; j++)
2687 {
2688 vect_recog_func = vect_vect_recog_func_ptrs[j];
2689 vect_pattern_recog_1 (vect_recog_func, si,
2690 &stmts_to_replace);
2691 }
2692 }
2693 }
2694
2695 VEC_free (gimple, heap, stmts_to_replace);
2696 if (bb_vinfo)
2697 free (bbs);
2698 }