re PR c++/60572 (ICE deriving from class with invalid member)
[gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "tree-eh.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "cfgloop.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "params.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
51 #include "dumpfile.h"
52
53 /* Pattern recognition functions */
54 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
55 tree *);
56 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
57 tree *);
58 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
59 tree *);
60 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
61 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
62 tree *);
63 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
64 tree *, tree *);
65 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
66 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
67 tree *, tree *);
68 static gimple vect_recog_divmod_pattern (vec<gimple> *,
69 tree *, tree *);
70 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
71 tree *, tree *);
72 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
73 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
74 vect_recog_widen_mult_pattern,
75 vect_recog_widen_sum_pattern,
76 vect_recog_dot_prod_pattern,
77 vect_recog_pow_pattern,
78 vect_recog_widen_shift_pattern,
79 vect_recog_over_widening_pattern,
80 vect_recog_rotate_pattern,
81 vect_recog_vector_vector_shift_pattern,
82 vect_recog_divmod_pattern,
83 vect_recog_mixed_size_cond_pattern,
84 vect_recog_bool_pattern};
85
86 static inline void
87 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
88 {
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
90 stmt);
91 }
92
93 static inline void
94 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
95 {
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
97 append_pattern_def_seq (stmt_info, stmt);
98 }
99
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
104
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
107
108 static bool
109 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
110 {
111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
114
115 if (!gimple_bb (stmt2))
116 return false;
117
118 if (loop_vinfo)
119 {
120 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
121 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
122 return false;
123 }
124 else
125 {
126 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
127 || gimple_code (stmt2) == GIMPLE_PHI)
128 return false;
129 }
130
131 gcc_assert (vinfo_for_stmt (stmt2));
132 return true;
133 }
134
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
137
138 static gimple
139 vect_single_imm_use (gimple def_stmt)
140 {
141 tree lhs = gimple_assign_lhs (def_stmt);
142 use_operand_p use_p;
143 gimple use_stmt;
144
145 if (!single_imm_use (lhs, &use_p, &use_stmt))
146 return NULL;
147
148 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
149 return NULL;
150
151 return use_stmt;
152 }
153
154 /* Check whether NAME, an ssa-name used in USE_STMT,
155 is a result of a type promotion or demotion, such that:
156 DEF_STMT: NAME = NOP (name0)
157 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
158 If CHECK_SIGN is TRUE, check that either both types are signed or both are
159 unsigned. */
160
161 static bool
162 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
163 tree *orig_type, gimple *def_stmt, bool *promotion)
164 {
165 tree dummy;
166 gimple dummy_gimple;
167 loop_vec_info loop_vinfo;
168 stmt_vec_info stmt_vinfo;
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
172 tree def;
173 bb_vec_info bb_vinfo;
174
175 stmt_vinfo = vinfo_for_stmt (use_stmt);
176 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
177 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
178 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
179 &def, &dt))
180 return false;
181
182 if (dt != vect_internal_def
183 && dt != vect_external_def && dt != vect_constant_def)
184 return false;
185
186 if (!*def_stmt)
187 return false;
188
189 if (!is_gimple_assign (*def_stmt))
190 return false;
191
192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
193 return false;
194
195 oprnd0 = gimple_assign_rhs1 (*def_stmt);
196
197 *orig_type = TREE_TYPE (oprnd0);
198 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
199 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
200 return false;
201
202 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
203 *promotion = true;
204 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
205 *promotion = false;
206 else
207 return false;
208
209 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
210 bb_vinfo, &dummy_gimple, &dummy, &dt))
211 return false;
212
213 return true;
214 }
215
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
218
219 static tree
220 vect_recog_temp_ssa_var (tree type, gimple stmt)
221 {
222 return make_temp_ssa_name (type, stmt, "patt");
223 }
224
225 /* Function vect_recog_dot_prod_pattern
226
227 Try to find the following pattern:
228
229 type x_t, y_t;
230 TYPE1 prod;
231 TYPE2 sum = init;
232 loop:
233 sum_0 = phi <init, sum_1>
234 S1 x_t = ...
235 S2 y_t = ...
236 S3 x_T = (TYPE1) x_t;
237 S4 y_T = (TYPE1) y_t;
238 S5 prod = x_T * y_T;
239 [S6 prod = (TYPE2) prod; #optional]
240 S7 sum_1 = prod + sum_0;
241
242 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243 same size of 'TYPE1' or bigger. This is a special case of a reduction
244 computation.
245
246 Input:
247
248 * STMTS: Contains a stmt from which the pattern search begins. In the
249 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
250 will be detected.
251
252 Output:
253
254 * TYPE_IN: The type of the input arguments to the pattern.
255
256 * TYPE_OUT: The type of the output of this pattern.
257
258 * Return value: A new stmt that will be used to replace the sequence of
259 stmts that constitute the pattern. In this case it will be:
260 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
261
262 Note: The dot-prod idiom is a widening reduction pattern that is
263 vectorized without preserving all the intermediate results. It
264 produces only N/2 (widened) results (by summing up pairs of
265 intermediate results) rather than all N results. Therefore, we
266 cannot allow this pattern when we want to get all the results and in
267 the correct order (as is the case when this computation is in an
268 inner-loop nested in an outer-loop that us being vectorized). */
269
270 static gimple
271 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
272 tree *type_out)
273 {
274 gimple stmt, last_stmt = (*stmts)[0];
275 tree oprnd0, oprnd1;
276 tree oprnd00, oprnd01;
277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
278 tree type, half_type;
279 gimple pattern_stmt;
280 tree prod_type;
281 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
282 struct loop *loop;
283 tree var;
284 bool promotion;
285
286 if (!loop_info)
287 return NULL;
288
289 loop = LOOP_VINFO_LOOP (loop_info);
290
291 if (!is_gimple_assign (last_stmt))
292 return NULL;
293
294 type = gimple_expr_type (last_stmt);
295
296 /* Look for the following pattern
297 DX = (TYPE1) X;
298 DY = (TYPE1) Y;
299 DPROD = DX * DY;
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
302 In which
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
308
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
312 or
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
315 */
316
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
319
320 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
321 return NULL;
322
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
324 {
325 /* Has been detected as widening-summation? */
326
327 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
328 type = gimple_expr_type (stmt);
329 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
330 return NULL;
331 oprnd0 = gimple_assign_rhs1 (stmt);
332 oprnd1 = gimple_assign_rhs2 (stmt);
333 half_type = TREE_TYPE (oprnd0);
334 }
335 else
336 {
337 gimple def_stmt;
338
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
340 return NULL;
341 oprnd0 = gimple_assign_rhs1 (last_stmt);
342 oprnd1 = gimple_assign_rhs2 (last_stmt);
343 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
344 || !types_compatible_p (TREE_TYPE (oprnd1), type))
345 return NULL;
346 stmt = last_stmt;
347
348 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
349 &promotion)
350 && promotion)
351 {
352 stmt = def_stmt;
353 oprnd0 = gimple_assign_rhs1 (stmt);
354 }
355 else
356 half_type = type;
357 }
358
359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
363 if (TREE_CODE (oprnd0) != SSA_NAME)
364 return NULL;
365
366 prod_type = half_type;
367 stmt = SSA_NAME_DEF_STMT (oprnd0);
368
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
370 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
371 return NULL;
372
373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
374 inside the loop (in case we are analyzing an outer-loop). */
375 if (!is_gimple_assign (stmt))
376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
380 return NULL;
381 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
382 return NULL;
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
384 {
385 /* Has been detected as a widening multiplication? */
386
387 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
388 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
389 return NULL;
390 stmt_vinfo = vinfo_for_stmt (stmt);
391 gcc_assert (stmt_vinfo);
392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
393 oprnd00 = gimple_assign_rhs1 (stmt);
394 oprnd01 = gimple_assign_rhs2 (stmt);
395 }
396 else
397 {
398 tree half_type0, half_type1;
399 gimple def_stmt;
400 tree oprnd0, oprnd1;
401
402 oprnd0 = gimple_assign_rhs1 (stmt);
403 oprnd1 = gimple_assign_rhs2 (stmt);
404 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
405 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
406 return NULL;
407 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
408 &promotion)
409 || !promotion)
410 return NULL;
411 oprnd00 = gimple_assign_rhs1 (def_stmt);
412 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
413 &promotion)
414 || !promotion)
415 return NULL;
416 oprnd01 = gimple_assign_rhs1 (def_stmt);
417 if (!types_compatible_p (half_type0, half_type1))
418 return NULL;
419 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
420 return NULL;
421 }
422
423 half_type = TREE_TYPE (oprnd00);
424 *type_in = half_type;
425 *type_out = type;
426
427 /* Pattern detected. Create a stmt to be used to replace the pattern: */
428 var = vect_recog_temp_ssa_var (type, NULL);
429 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
430 oprnd00, oprnd01, oprnd1);
431
432 if (dump_enabled_p ())
433 {
434 dump_printf_loc (MSG_NOTE, vect_location,
435 "vect_recog_dot_prod_pattern: detected: ");
436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
437 dump_printf (MSG_NOTE, "\n");
438 }
439
440 /* We don't allow changing the order of the computation in the inner-loop
441 when doing outer-loop vectorization. */
442 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
443
444 return pattern_stmt;
445 }
446
447
448 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
449 and LSHIFT_EXPR.
450
451 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
452 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
453
454 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
455 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
456 that satisfies the above restrictions, we can perform a widening opeartion
457 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
458 with a_it = (interm_type) a_t; */
459
460 static bool
461 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
462 tree const_oprnd, tree *oprnd,
463 vec<gimple> *stmts, tree type,
464 tree *half_type, gimple def_stmt)
465 {
466 tree new_type, new_oprnd;
467 gimple new_stmt;
468
469 if (code != MULT_EXPR && code != LSHIFT_EXPR)
470 return false;
471
472 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
473 || (code == LSHIFT_EXPR
474 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
475 != 1))
476 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
477 {
478 /* CONST_OPRND is a constant of HALF_TYPE. */
479 *oprnd = gimple_assign_rhs1 (def_stmt);
480 return true;
481 }
482
483 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
484 return false;
485
486 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
487 return false;
488
489 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
490 a type 2 times bigger than HALF_TYPE. */
491 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
492 TYPE_UNSIGNED (type));
493 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
494 || (code == LSHIFT_EXPR
495 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
496 return false;
497
498 /* Use NEW_TYPE for widening operation. */
499 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
500 {
501 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
502 /* Check if the already created pattern stmt is what we need. */
503 if (!is_gimple_assign (new_stmt)
504 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
505 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
506 return false;
507
508 stmts->safe_push (def_stmt);
509 *oprnd = gimple_assign_lhs (new_stmt);
510 }
511 else
512 {
513 /* Create a_T = (NEW_TYPE) a_t; */
514 *oprnd = gimple_assign_rhs1 (def_stmt);
515 new_oprnd = make_ssa_name (new_type, NULL);
516 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
517 NULL_TREE);
518 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
519 stmts->safe_push (def_stmt);
520 *oprnd = new_oprnd;
521 }
522
523 *half_type = new_type;
524 return true;
525 }
526
527
528 /* Function vect_recog_widen_mult_pattern
529
530 Try to find the following pattern:
531
532 type a_t, b_t;
533 TYPE a_T, b_T, prod_T;
534
535 S1 a_t = ;
536 S2 b_t = ;
537 S3 a_T = (TYPE) a_t;
538 S4 b_T = (TYPE) b_t;
539 S5 prod_T = a_T * b_T;
540
541 where type 'TYPE' is at least double the size of type 'type'.
542
543 Also detect unsigned cases:
544
545 unsigned type a_t, b_t;
546 unsigned TYPE u_prod_T;
547 TYPE a_T, b_T, prod_T;
548
549 S1 a_t = ;
550 S2 b_t = ;
551 S3 a_T = (TYPE) a_t;
552 S4 b_T = (TYPE) b_t;
553 S5 prod_T = a_T * b_T;
554 S6 u_prod_T = (unsigned TYPE) prod_T;
555
556 and multiplication by constants:
557
558 type a_t;
559 TYPE a_T, prod_T;
560
561 S1 a_t = ;
562 S3 a_T = (TYPE) a_t;
563 S5 prod_T = a_T * CONST;
564
565 A special case of multiplication by constants is when 'TYPE' is 4 times
566 bigger than 'type', but CONST fits an intermediate type 2 times smaller
567 than 'TYPE'. In that case we create an additional pattern stmt for S3
568 to create a variable of the intermediate type, and perform widen-mult
569 on the intermediate type as well:
570
571 type a_t;
572 interm_type a_it;
573 TYPE a_T, prod_T, prod_T';
574
575 S1 a_t = ;
576 S3 a_T = (TYPE) a_t;
577 '--> a_it = (interm_type) a_t;
578 S5 prod_T = a_T * CONST;
579 '--> prod_T' = a_it w* CONST;
580
581 Input/Output:
582
583 * STMTS: Contains a stmt from which the pattern search begins. In the
584 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
585 is detected. In case of unsigned widen-mult, the original stmt (S5) is
586 replaced with S6 in STMTS. In case of multiplication by a constant
587 of an intermediate type (the last case above), STMTS also contains S3
588 (inserted before S5).
589
590 Output:
591
592 * TYPE_IN: The type of the input arguments to the pattern.
593
594 * TYPE_OUT: The type of the output of this pattern.
595
596 * Return value: A new stmt that will be used to replace the sequence of
597 stmts that constitute the pattern. In this case it will be:
598 WIDEN_MULT <a_t, b_t>
599 */
600
601 static gimple
602 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
603 tree *type_in, tree *type_out)
604 {
605 gimple last_stmt = stmts->pop ();
606 gimple def_stmt0, def_stmt1;
607 tree oprnd0, oprnd1;
608 tree type, half_type0, half_type1;
609 gimple pattern_stmt;
610 tree vectype, vectype_out = NULL_TREE;
611 tree var;
612 enum tree_code dummy_code;
613 int dummy_int;
614 vec<tree> dummy_vec;
615 bool op1_ok;
616 bool promotion;
617
618 if (!is_gimple_assign (last_stmt))
619 return NULL;
620
621 type = gimple_expr_type (last_stmt);
622
623 /* Starting from LAST_STMT, follow the defs of its uses in search
624 of the above pattern. */
625
626 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
627 return NULL;
628
629 oprnd0 = gimple_assign_rhs1 (last_stmt);
630 oprnd1 = gimple_assign_rhs2 (last_stmt);
631 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
632 || !types_compatible_p (TREE_TYPE (oprnd1), type))
633 return NULL;
634
635 /* Check argument 0. */
636 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
637 &promotion)
638 || !promotion)
639 return NULL;
640 /* Check argument 1. */
641 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
642 &def_stmt1, &promotion);
643
644 if (op1_ok && promotion)
645 {
646 oprnd0 = gimple_assign_rhs1 (def_stmt0);
647 oprnd1 = gimple_assign_rhs1 (def_stmt1);
648 }
649 else
650 {
651 if (TREE_CODE (oprnd1) == INTEGER_CST
652 && TREE_CODE (half_type0) == INTEGER_TYPE
653 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
654 &oprnd0, stmts, type,
655 &half_type0, def_stmt0))
656 {
657 half_type1 = half_type0;
658 oprnd1 = fold_convert (half_type1, oprnd1);
659 }
660 else
661 return NULL;
662 }
663
664 /* Handle unsigned case. Look for
665 S6 u_prod_T = (unsigned TYPE) prod_T;
666 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
667 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
668 {
669 gimple use_stmt;
670 tree use_lhs;
671 tree use_type;
672
673 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
674 return NULL;
675
676 use_stmt = vect_single_imm_use (last_stmt);
677 if (!use_stmt || !is_gimple_assign (use_stmt)
678 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
679 return NULL;
680
681 use_lhs = gimple_assign_lhs (use_stmt);
682 use_type = TREE_TYPE (use_lhs);
683 if (!INTEGRAL_TYPE_P (use_type)
684 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
685 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
686 return NULL;
687
688 type = use_type;
689 last_stmt = use_stmt;
690 }
691
692 if (!types_compatible_p (half_type0, half_type1))
693 return NULL;
694
695 /* Pattern detected. */
696 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE, vect_location,
698 "vect_recog_widen_mult_pattern: detected:\n");
699
700 /* Check target support */
701 vectype = get_vectype_for_scalar_type (half_type0);
702 vectype_out = get_vectype_for_scalar_type (type);
703 if (!vectype
704 || !vectype_out
705 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
706 vectype_out, vectype,
707 &dummy_code, &dummy_code,
708 &dummy_int, &dummy_vec))
709 return NULL;
710
711 *type_in = vectype;
712 *type_out = vectype_out;
713
714 /* Pattern supported. Create a stmt to be used to replace the pattern: */
715 var = vect_recog_temp_ssa_var (type, NULL);
716 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
717 oprnd1);
718
719 if (dump_enabled_p ())
720 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
721
722 stmts->safe_push (last_stmt);
723 return pattern_stmt;
724 }
725
726
727 /* Function vect_recog_pow_pattern
728
729 Try to find the following pattern:
730
731 x = POW (y, N);
732
733 with POW being one of pow, powf, powi, powif and N being
734 either 2 or 0.5.
735
736 Input:
737
738 * LAST_STMT: A stmt from which the pattern search begins.
739
740 Output:
741
742 * TYPE_IN: The type of the input arguments to the pattern.
743
744 * TYPE_OUT: The type of the output of this pattern.
745
746 * Return value: A new stmt that will be used to replace the sequence of
747 stmts that constitute the pattern. In this case it will be:
748 x = x * x
749 or
750 x = sqrt (x)
751 */
752
753 static gimple
754 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
755 tree *type_out)
756 {
757 gimple last_stmt = (*stmts)[0];
758 tree fn, base, exp = NULL;
759 gimple stmt;
760 tree var;
761
762 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
763 return NULL;
764
765 fn = gimple_call_fndecl (last_stmt);
766 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
767 return NULL;
768
769 switch (DECL_FUNCTION_CODE (fn))
770 {
771 case BUILT_IN_POWIF:
772 case BUILT_IN_POWI:
773 case BUILT_IN_POWF:
774 case BUILT_IN_POW:
775 base = gimple_call_arg (last_stmt, 0);
776 exp = gimple_call_arg (last_stmt, 1);
777 if (TREE_CODE (exp) != REAL_CST
778 && TREE_CODE (exp) != INTEGER_CST)
779 return NULL;
780 break;
781
782 default:
783 return NULL;
784 }
785
786 /* We now have a pow or powi builtin function call with a constant
787 exponent. */
788
789 *type_out = NULL_TREE;
790
791 /* Catch squaring. */
792 if ((tree_fits_shwi_p (exp)
793 && tree_to_shwi (exp) == 2)
794 || (TREE_CODE (exp) == REAL_CST
795 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
796 {
797 *type_in = TREE_TYPE (base);
798
799 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
800 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
801 return stmt;
802 }
803
804 /* Catch square root. */
805 if (TREE_CODE (exp) == REAL_CST
806 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
807 {
808 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
809 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
810 if (*type_in)
811 {
812 gimple stmt = gimple_build_call (newfn, 1, base);
813 if (vectorizable_function (stmt, *type_in, *type_in)
814 != NULL_TREE)
815 {
816 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
817 gimple_call_set_lhs (stmt, var);
818 return stmt;
819 }
820 }
821 }
822
823 return NULL;
824 }
825
826
827 /* Function vect_recog_widen_sum_pattern
828
829 Try to find the following pattern:
830
831 type x_t;
832 TYPE x_T, sum = init;
833 loop:
834 sum_0 = phi <init, sum_1>
835 S1 x_t = *p;
836 S2 x_T = (TYPE) x_t;
837 S3 sum_1 = x_T + sum_0;
838
839 where type 'TYPE' is at least double the size of type 'type', i.e - we're
840 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
841 a special case of a reduction computation.
842
843 Input:
844
845 * LAST_STMT: A stmt from which the pattern search begins. In the example,
846 when this function is called with S3, the pattern {S2,S3} will be detected.
847
848 Output:
849
850 * TYPE_IN: The type of the input arguments to the pattern.
851
852 * TYPE_OUT: The type of the output of this pattern.
853
854 * Return value: A new stmt that will be used to replace the sequence of
855 stmts that constitute the pattern. In this case it will be:
856 WIDEN_SUM <x_t, sum_0>
857
858 Note: The widening-sum idiom is a widening reduction pattern that is
859 vectorized without preserving all the intermediate results. It
860 produces only N/2 (widened) results (by summing up pairs of
861 intermediate results) rather than all N results. Therefore, we
862 cannot allow this pattern when we want to get all the results and in
863 the correct order (as is the case when this computation is in an
864 inner-loop nested in an outer-loop that us being vectorized). */
865
866 static gimple
867 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
868 tree *type_out)
869 {
870 gimple stmt, last_stmt = (*stmts)[0];
871 tree oprnd0, oprnd1;
872 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
873 tree type, half_type;
874 gimple pattern_stmt;
875 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
876 struct loop *loop;
877 tree var;
878 bool promotion;
879
880 if (!loop_info)
881 return NULL;
882
883 loop = LOOP_VINFO_LOOP (loop_info);
884
885 if (!is_gimple_assign (last_stmt))
886 return NULL;
887
888 type = gimple_expr_type (last_stmt);
889
890 /* Look for the following pattern
891 DX = (TYPE) X;
892 sum_1 = DX + sum_0;
893 In which DX is at least double the size of X, and sum_1 has been
894 recognized as a reduction variable.
895 */
896
897 /* Starting from LAST_STMT, follow the defs of its uses in search
898 of the above pattern. */
899
900 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
901 return NULL;
902
903 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
904 return NULL;
905
906 oprnd0 = gimple_assign_rhs1 (last_stmt);
907 oprnd1 = gimple_assign_rhs2 (last_stmt);
908 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
909 || !types_compatible_p (TREE_TYPE (oprnd1), type))
910 return NULL;
911
912 /* So far so good. Since last_stmt was detected as a (summation) reduction,
913 we know that oprnd1 is the reduction variable (defined by a loop-header
914 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
915 Left to check that oprnd0 is defined by a cast from type 'type' to type
916 'TYPE'. */
917
918 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
919 &promotion)
920 || !promotion)
921 return NULL;
922
923 oprnd0 = gimple_assign_rhs1 (stmt);
924 *type_in = half_type;
925 *type_out = type;
926
927 /* Pattern detected. Create a stmt to be used to replace the pattern: */
928 var = vect_recog_temp_ssa_var (type, NULL);
929 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
930 oprnd0, oprnd1);
931
932 if (dump_enabled_p ())
933 {
934 dump_printf_loc (MSG_NOTE, vect_location,
935 "vect_recog_widen_sum_pattern: detected: ");
936 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
937 dump_printf (MSG_NOTE, "\n");
938 }
939
940 /* We don't allow changing the order of the computation in the inner-loop
941 when doing outer-loop vectorization. */
942 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
943
944 return pattern_stmt;
945 }
946
947
948 /* Return TRUE if the operation in STMT can be performed on a smaller type.
949
950 Input:
951 STMT - a statement to check.
952 DEF - we support operations with two operands, one of which is constant.
953 The other operand can be defined by a demotion operation, or by a
954 previous statement in a sequence of over-promoted operations. In the
955 later case DEF is used to replace that operand. (It is defined by a
956 pattern statement we created for the previous statement in the
957 sequence).
958
959 Input/output:
960 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
961 NULL, it's the type of DEF.
962 STMTS - additional pattern statements. If a pattern statement (type
963 conversion) is created in this function, its original statement is
964 added to STMTS.
965
966 Output:
967 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
968 operands to use in the new pattern statement for STMT (will be created
969 in vect_recog_over_widening_pattern ()).
970 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
971 statements for STMT: the first one is a type promotion and the second
972 one is the operation itself. We return the type promotion statement
973 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
974 the second pattern statement. */
975
976 static bool
977 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
978 tree *op0, tree *op1, gimple *new_def_stmt,
979 vec<gimple> *stmts)
980 {
981 enum tree_code code;
982 tree const_oprnd, oprnd;
983 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
984 gimple def_stmt, new_stmt;
985 bool first = false;
986 bool promotion;
987
988 *op0 = NULL_TREE;
989 *op1 = NULL_TREE;
990 *new_def_stmt = NULL;
991
992 if (!is_gimple_assign (stmt))
993 return false;
994
995 code = gimple_assign_rhs_code (stmt);
996 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
997 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
998 return false;
999
1000 oprnd = gimple_assign_rhs1 (stmt);
1001 const_oprnd = gimple_assign_rhs2 (stmt);
1002 type = gimple_expr_type (stmt);
1003
1004 if (TREE_CODE (oprnd) != SSA_NAME
1005 || TREE_CODE (const_oprnd) != INTEGER_CST)
1006 return false;
1007
1008 /* If oprnd has other uses besides that in stmt we cannot mark it
1009 as being part of a pattern only. */
1010 if (!has_single_use (oprnd))
1011 return false;
1012
1013 /* If we are in the middle of a sequence, we use DEF from a previous
1014 statement. Otherwise, OPRND has to be a result of type promotion. */
1015 if (*new_type)
1016 {
1017 half_type = *new_type;
1018 oprnd = def;
1019 }
1020 else
1021 {
1022 first = true;
1023 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1024 &promotion)
1025 || !promotion
1026 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1027 return false;
1028 }
1029
1030 /* Can we perform the operation on a smaller type? */
1031 switch (code)
1032 {
1033 case BIT_IOR_EXPR:
1034 case BIT_XOR_EXPR:
1035 case BIT_AND_EXPR:
1036 if (!int_fits_type_p (const_oprnd, half_type))
1037 {
1038 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1039 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1040 return false;
1041
1042 interm_type = build_nonstandard_integer_type (
1043 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1044 if (!int_fits_type_p (const_oprnd, interm_type))
1045 return false;
1046 }
1047
1048 break;
1049
1050 case LSHIFT_EXPR:
1051 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1052 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1053 return false;
1054
1055 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1056 (e.g., if the original value was char, the shift amount is at most 8
1057 if we want to use short). */
1058 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1059 return false;
1060
1061 interm_type = build_nonstandard_integer_type (
1062 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1063
1064 if (!vect_supportable_shift (code, interm_type))
1065 return false;
1066
1067 break;
1068
1069 case RSHIFT_EXPR:
1070 if (vect_supportable_shift (code, half_type))
1071 break;
1072
1073 /* Try intermediate type - HALF_TYPE is not supported. */
1074 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1075 return false;
1076
1077 interm_type = build_nonstandard_integer_type (
1078 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1079
1080 if (!vect_supportable_shift (code, interm_type))
1081 return false;
1082
1083 break;
1084
1085 default:
1086 gcc_unreachable ();
1087 }
1088
1089 /* There are four possible cases:
1090 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1091 the first statement in the sequence)
1092 a. The original, HALF_TYPE, is not enough - we replace the promotion
1093 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1094 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1095 promotion.
1096 2. OPRND is defined by a pattern statement we created.
1097 a. Its type is not sufficient for the operation, we create a new stmt:
1098 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1099 this statement in NEW_DEF_STMT, and it is later put in
1100 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1101 b. OPRND is good to use in the new statement. */
1102 if (first)
1103 {
1104 if (interm_type)
1105 {
1106 /* Replace the original type conversion HALF_TYPE->TYPE with
1107 HALF_TYPE->INTERM_TYPE. */
1108 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1109 {
1110 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1111 /* Check if the already created pattern stmt is what we need. */
1112 if (!is_gimple_assign (new_stmt)
1113 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1114 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1115 return false;
1116
1117 stmts->safe_push (def_stmt);
1118 oprnd = gimple_assign_lhs (new_stmt);
1119 }
1120 else
1121 {
1122 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1123 oprnd = gimple_assign_rhs1 (def_stmt);
1124 new_oprnd = make_ssa_name (interm_type, NULL);
1125 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1126 oprnd, NULL_TREE);
1127 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1128 stmts->safe_push (def_stmt);
1129 oprnd = new_oprnd;
1130 }
1131 }
1132 else
1133 {
1134 /* Retrieve the operand before the type promotion. */
1135 oprnd = gimple_assign_rhs1 (def_stmt);
1136 }
1137 }
1138 else
1139 {
1140 if (interm_type)
1141 {
1142 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1143 new_oprnd = make_ssa_name (interm_type, NULL);
1144 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1145 oprnd, NULL_TREE);
1146 oprnd = new_oprnd;
1147 *new_def_stmt = new_stmt;
1148 }
1149
1150 /* Otherwise, OPRND is already set. */
1151 }
1152
1153 if (interm_type)
1154 *new_type = interm_type;
1155 else
1156 *new_type = half_type;
1157
1158 *op0 = oprnd;
1159 *op1 = fold_convert (*new_type, const_oprnd);
1160
1161 return true;
1162 }
1163
1164
1165 /* Try to find a statement or a sequence of statements that can be performed
1166 on a smaller type:
1167
1168 type x_t;
1169 TYPE x_T, res0_T, res1_T;
1170 loop:
1171 S1 x_t = *p;
1172 S2 x_T = (TYPE) x_t;
1173 S3 res0_T = op (x_T, C0);
1174 S4 res1_T = op (res0_T, C1);
1175 S5 ... = () res1_T; - type demotion
1176
1177 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1178 constants.
1179 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1180 be 'type' or some intermediate type. For now, we expect S5 to be a type
1181 demotion operation. We also check that S3 and S4 have only one use. */
1182
1183 static gimple
1184 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1185 tree *type_in, tree *type_out)
1186 {
1187 gimple stmt = stmts->pop ();
1188 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1189 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1190 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1191 bool first;
1192 tree type = NULL;
1193
1194 first = true;
1195 while (1)
1196 {
1197 if (!vinfo_for_stmt (stmt)
1198 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1199 return NULL;
1200
1201 new_def_stmt = NULL;
1202 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1203 &op0, &op1, &new_def_stmt,
1204 stmts))
1205 {
1206 if (first)
1207 return NULL;
1208 else
1209 break;
1210 }
1211
1212 /* STMT can be performed on a smaller type. Check its uses. */
1213 use_stmt = vect_single_imm_use (stmt);
1214 if (!use_stmt || !is_gimple_assign (use_stmt))
1215 return NULL;
1216
1217 /* Create pattern statement for STMT. */
1218 vectype = get_vectype_for_scalar_type (new_type);
1219 if (!vectype)
1220 return NULL;
1221
1222 /* We want to collect all the statements for which we create pattern
1223 statetments, except for the case when the last statement in the
1224 sequence doesn't have a corresponding pattern statement. In such
1225 case we associate the last pattern statement with the last statement
1226 in the sequence. Therefore, we only add the original statement to
1227 the list if we know that it is not the last. */
1228 if (prev_stmt)
1229 stmts->safe_push (prev_stmt);
1230
1231 var = vect_recog_temp_ssa_var (new_type, NULL);
1232 pattern_stmt
1233 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1234 op0, op1);
1235 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1236 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1237
1238 if (dump_enabled_p ())
1239 {
1240 dump_printf_loc (MSG_NOTE, vect_location,
1241 "created pattern stmt: ");
1242 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1243 dump_printf (MSG_NOTE, "\n");
1244 }
1245
1246 type = gimple_expr_type (stmt);
1247 prev_stmt = stmt;
1248 stmt = use_stmt;
1249
1250 first = false;
1251 }
1252
1253 /* We got a sequence. We expect it to end with a type demotion operation.
1254 Otherwise, we quit (for now). There are three possible cases: the
1255 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1256 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1257 NEW_TYPE differs (we create a new conversion statement). */
1258 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1259 {
1260 use_lhs = gimple_assign_lhs (use_stmt);
1261 use_type = TREE_TYPE (use_lhs);
1262 /* Support only type demotion or signedess change. */
1263 if (!INTEGRAL_TYPE_P (use_type)
1264 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1265 return NULL;
1266
1267 /* Check that NEW_TYPE is not bigger than the conversion result. */
1268 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1269 return NULL;
1270
1271 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1272 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1273 {
1274 /* Create NEW_TYPE->USE_TYPE conversion. */
1275 new_oprnd = make_ssa_name (use_type, NULL);
1276 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1277 var, NULL_TREE);
1278 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1279
1280 *type_in = get_vectype_for_scalar_type (new_type);
1281 *type_out = get_vectype_for_scalar_type (use_type);
1282
1283 /* We created a pattern statement for the last statement in the
1284 sequence, so we don't need to associate it with the pattern
1285 statement created for PREV_STMT. Therefore, we add PREV_STMT
1286 to the list in order to mark it later in vect_pattern_recog_1. */
1287 if (prev_stmt)
1288 stmts->safe_push (prev_stmt);
1289 }
1290 else
1291 {
1292 if (prev_stmt)
1293 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1294 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1295
1296 *type_in = vectype;
1297 *type_out = NULL_TREE;
1298 }
1299
1300 stmts->safe_push (use_stmt);
1301 }
1302 else
1303 /* TODO: support general case, create a conversion to the correct type. */
1304 return NULL;
1305
1306 /* Pattern detected. */
1307 if (dump_enabled_p ())
1308 {
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_recog_over_widening_pattern: detected: ");
1311 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1312 dump_printf (MSG_NOTE, "\n");
1313 }
1314
1315 return pattern_stmt;
1316 }
1317
1318 /* Detect widening shift pattern:
1319
1320 type a_t;
1321 TYPE a_T, res_T;
1322
1323 S1 a_t = ;
1324 S2 a_T = (TYPE) a_t;
1325 S3 res_T = a_T << CONST;
1326
1327 where type 'TYPE' is at least double the size of type 'type'.
1328
1329 Also detect cases where the shift result is immediately converted
1330 to another type 'result_type' that is no larger in size than 'TYPE'.
1331 In those cases we perform a widen-shift that directly results in
1332 'result_type', to avoid a possible over-widening situation:
1333
1334 type a_t;
1335 TYPE a_T, res_T;
1336 result_type res_result;
1337
1338 S1 a_t = ;
1339 S2 a_T = (TYPE) a_t;
1340 S3 res_T = a_T << CONST;
1341 S4 res_result = (result_type) res_T;
1342 '--> res_result' = a_t w<< CONST;
1343
1344 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1345 create an additional pattern stmt for S2 to create a variable of an
1346 intermediate type, and perform widen-shift on the intermediate type:
1347
1348 type a_t;
1349 interm_type a_it;
1350 TYPE a_T, res_T, res_T';
1351
1352 S1 a_t = ;
1353 S2 a_T = (TYPE) a_t;
1354 '--> a_it = (interm_type) a_t;
1355 S3 res_T = a_T << CONST;
1356 '--> res_T' = a_it <<* CONST;
1357
1358 Input/Output:
1359
1360 * STMTS: Contains a stmt from which the pattern search begins.
1361 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1362 in STMTS. When an intermediate type is used and a pattern statement is
1363 created for S2, we also put S2 here (before S3).
1364
1365 Output:
1366
1367 * TYPE_IN: The type of the input arguments to the pattern.
1368
1369 * TYPE_OUT: The type of the output of this pattern.
1370
1371 * Return value: A new stmt that will be used to replace the sequence of
1372 stmts that constitute the pattern. In this case it will be:
1373 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1374
1375 static gimple
1376 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1377 tree *type_in, tree *type_out)
1378 {
1379 gimple last_stmt = stmts->pop ();
1380 gimple def_stmt0;
1381 tree oprnd0, oprnd1;
1382 tree type, half_type0;
1383 gimple pattern_stmt;
1384 tree vectype, vectype_out = NULL_TREE;
1385 tree var;
1386 enum tree_code dummy_code;
1387 int dummy_int;
1388 vec<tree> dummy_vec;
1389 gimple use_stmt;
1390 bool promotion;
1391
1392 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1393 return NULL;
1394
1395 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1396 return NULL;
1397
1398 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1399 return NULL;
1400
1401 oprnd0 = gimple_assign_rhs1 (last_stmt);
1402 oprnd1 = gimple_assign_rhs2 (last_stmt);
1403 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1404 return NULL;
1405
1406 /* Check operand 0: it has to be defined by a type promotion. */
1407 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1408 &promotion)
1409 || !promotion)
1410 return NULL;
1411
1412 /* Check operand 1: has to be positive. We check that it fits the type
1413 in vect_handle_widen_op_by_const (). */
1414 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1415 return NULL;
1416
1417 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1418 type = gimple_expr_type (last_stmt);
1419
1420 /* Check for subsequent conversion to another type. */
1421 use_stmt = vect_single_imm_use (last_stmt);
1422 if (use_stmt && is_gimple_assign (use_stmt)
1423 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1424 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1425 {
1426 tree use_lhs = gimple_assign_lhs (use_stmt);
1427 tree use_type = TREE_TYPE (use_lhs);
1428
1429 if (INTEGRAL_TYPE_P (use_type)
1430 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1431 {
1432 last_stmt = use_stmt;
1433 type = use_type;
1434 }
1435 }
1436
1437 /* Check if this a widening operation. */
1438 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1439 &oprnd0, stmts,
1440 type, &half_type0, def_stmt0))
1441 return NULL;
1442
1443 /* Pattern detected. */
1444 if (dump_enabled_p ())
1445 dump_printf_loc (MSG_NOTE, vect_location,
1446 "vect_recog_widen_shift_pattern: detected:\n");
1447
1448 /* Check target support. */
1449 vectype = get_vectype_for_scalar_type (half_type0);
1450 vectype_out = get_vectype_for_scalar_type (type);
1451
1452 if (!vectype
1453 || !vectype_out
1454 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1455 vectype_out, vectype,
1456 &dummy_code, &dummy_code,
1457 &dummy_int, &dummy_vec))
1458 return NULL;
1459
1460 *type_in = vectype;
1461 *type_out = vectype_out;
1462
1463 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1464 var = vect_recog_temp_ssa_var (type, NULL);
1465 pattern_stmt =
1466 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1467
1468 if (dump_enabled_p ())
1469 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1470
1471 stmts->safe_push (last_stmt);
1472 return pattern_stmt;
1473 }
1474
1475 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1476
1477 type a_t, b_t, c_t;
1478
1479 S0 a_t = b_t r<< c_t;
1480
1481 Input/Output:
1482
1483 * STMTS: Contains a stmt from which the pattern search begins,
1484 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1485 with a sequence:
1486
1487 S1 d_t = -c_t;
1488 S2 e_t = d_t & (B - 1);
1489 S3 f_t = b_t << c_t;
1490 S4 g_t = b_t >> e_t;
1491 S0 a_t = f_t | g_t;
1492
1493 where B is element bitsize of type.
1494
1495 Output:
1496
1497 * TYPE_IN: The type of the input arguments to the pattern.
1498
1499 * TYPE_OUT: The type of the output of this pattern.
1500
1501 * Return value: A new stmt that will be used to replace the rotate
1502 S0 stmt. */
1503
1504 static gimple
1505 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1506 {
1507 gimple last_stmt = stmts->pop ();
1508 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1509 gimple pattern_stmt, def_stmt;
1510 enum tree_code rhs_code;
1511 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1512 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1513 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1514 enum vect_def_type dt;
1515 optab optab1, optab2;
1516 edge ext_def = NULL;
1517
1518 if (!is_gimple_assign (last_stmt))
1519 return NULL;
1520
1521 rhs_code = gimple_assign_rhs_code (last_stmt);
1522 switch (rhs_code)
1523 {
1524 case LROTATE_EXPR:
1525 case RROTATE_EXPR:
1526 break;
1527 default:
1528 return NULL;
1529 }
1530
1531 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1532 return NULL;
1533
1534 lhs = gimple_assign_lhs (last_stmt);
1535 oprnd0 = gimple_assign_rhs1 (last_stmt);
1536 type = TREE_TYPE (oprnd0);
1537 oprnd1 = gimple_assign_rhs2 (last_stmt);
1538 if (TREE_CODE (oprnd0) != SSA_NAME
1539 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1540 || !INTEGRAL_TYPE_P (type)
1541 || !TYPE_UNSIGNED (type))
1542 return NULL;
1543
1544 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1545 &def, &dt))
1546 return NULL;
1547
1548 if (dt != vect_internal_def
1549 && dt != vect_constant_def
1550 && dt != vect_external_def)
1551 return NULL;
1552
1553 vectype = get_vectype_for_scalar_type (type);
1554 if (vectype == NULL_TREE)
1555 return NULL;
1556
1557 /* If vector/vector or vector/scalar rotate is supported by the target,
1558 don't do anything here. */
1559 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1560 if (optab1
1561 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1562 return NULL;
1563
1564 if (bb_vinfo != NULL || dt != vect_internal_def)
1565 {
1566 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1567 if (optab2
1568 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1569 return NULL;
1570 }
1571
1572 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1573 don't do anything here either. */
1574 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1575 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1576 if (!optab1
1577 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1578 || !optab2
1579 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1580 {
1581 if (bb_vinfo == NULL && dt == vect_internal_def)
1582 return NULL;
1583 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1584 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1585 if (!optab1
1586 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1587 || !optab2
1588 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1589 return NULL;
1590 }
1591
1592 *type_in = vectype;
1593 *type_out = vectype;
1594 if (*type_in == NULL_TREE)
1595 return NULL;
1596
1597 if (dt == vect_external_def
1598 && TREE_CODE (oprnd1) == SSA_NAME
1599 && loop_vinfo)
1600 {
1601 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1602 ext_def = loop_preheader_edge (loop);
1603 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1604 {
1605 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1606 if (bb == NULL
1607 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1608 ext_def = NULL;
1609 }
1610 }
1611
1612 def = NULL_TREE;
1613 if (TREE_CODE (oprnd1) == INTEGER_CST
1614 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1615 def = oprnd1;
1616 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1617 {
1618 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1619 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1620 && TYPE_PRECISION (TREE_TYPE (rhs1))
1621 == TYPE_PRECISION (type))
1622 def = rhs1;
1623 }
1624
1625 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1626 if (def == NULL_TREE)
1627 {
1628 def = vect_recog_temp_ssa_var (type, NULL);
1629 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1630 NULL_TREE);
1631 if (ext_def)
1632 {
1633 basic_block new_bb
1634 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1635 gcc_assert (!new_bb);
1636 }
1637 else
1638 append_pattern_def_seq (stmt_vinfo, def_stmt);
1639 }
1640 stype = TREE_TYPE (def);
1641
1642 if (TREE_CODE (def) == INTEGER_CST)
1643 {
1644 if (!tree_fits_uhwi_p (def)
1645 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1646 || integer_zerop (def))
1647 return NULL;
1648 def2 = build_int_cst (stype,
1649 GET_MODE_PRECISION (TYPE_MODE (type))
1650 - tree_to_uhwi (def));
1651 }
1652 else
1653 {
1654 tree vecstype = get_vectype_for_scalar_type (stype);
1655 stmt_vec_info def_stmt_vinfo;
1656
1657 if (vecstype == NULL_TREE)
1658 return NULL;
1659 def2 = vect_recog_temp_ssa_var (stype, NULL);
1660 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1661 NULL_TREE);
1662 if (ext_def)
1663 {
1664 basic_block new_bb
1665 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1666 gcc_assert (!new_bb);
1667 }
1668 else
1669 {
1670 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1671 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1672 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1673 append_pattern_def_seq (stmt_vinfo, def_stmt);
1674 }
1675
1676 def2 = vect_recog_temp_ssa_var (stype, NULL);
1677 tree mask
1678 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1679 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1680 gimple_assign_lhs (def_stmt),
1681 mask);
1682 if (ext_def)
1683 {
1684 basic_block new_bb
1685 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1686 gcc_assert (!new_bb);
1687 }
1688 else
1689 {
1690 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1691 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1692 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1693 append_pattern_def_seq (stmt_vinfo, def_stmt);
1694 }
1695 }
1696
1697 var1 = vect_recog_temp_ssa_var (type, NULL);
1698 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1699 ? LSHIFT_EXPR : RSHIFT_EXPR,
1700 var1, oprnd0, def);
1701 append_pattern_def_seq (stmt_vinfo, def_stmt);
1702
1703 var2 = vect_recog_temp_ssa_var (type, NULL);
1704 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1705 ? RSHIFT_EXPR : LSHIFT_EXPR,
1706 var2, oprnd0, def2);
1707 append_pattern_def_seq (stmt_vinfo, def_stmt);
1708
1709 /* Pattern detected. */
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE, vect_location,
1712 "vect_recog_rotate_pattern: detected:\n");
1713
1714 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1715 var = vect_recog_temp_ssa_var (type, NULL);
1716 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1717
1718 if (dump_enabled_p ())
1719 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1720
1721 stmts->safe_push (last_stmt);
1722 return pattern_stmt;
1723 }
1724
1725 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1726 vectorized:
1727
1728 type a_t;
1729 TYPE b_T, res_T;
1730
1731 S1 a_t = ;
1732 S2 b_T = ;
1733 S3 res_T = b_T op a_t;
1734
1735 where type 'TYPE' is a type with different size than 'type',
1736 and op is <<, >> or rotate.
1737
1738 Also detect cases:
1739
1740 type a_t;
1741 TYPE b_T, c_T, res_T;
1742
1743 S0 c_T = ;
1744 S1 a_t = (type) c_T;
1745 S2 b_T = ;
1746 S3 res_T = b_T op a_t;
1747
1748 Input/Output:
1749
1750 * STMTS: Contains a stmt from which the pattern search begins,
1751 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1752 with a shift/rotate which has same type on both operands, in the
1753 second case just b_T op c_T, in the first case with added cast
1754 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1755
1756 Output:
1757
1758 * TYPE_IN: The type of the input arguments to the pattern.
1759
1760 * TYPE_OUT: The type of the output of this pattern.
1761
1762 * Return value: A new stmt that will be used to replace the shift/rotate
1763 S3 stmt. */
1764
1765 static gimple
1766 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1767 tree *type_in, tree *type_out)
1768 {
1769 gimple last_stmt = stmts->pop ();
1770 tree oprnd0, oprnd1, lhs, var;
1771 gimple pattern_stmt, def_stmt;
1772 enum tree_code rhs_code;
1773 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1774 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1775 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1776 enum vect_def_type dt;
1777 tree def;
1778
1779 if (!is_gimple_assign (last_stmt))
1780 return NULL;
1781
1782 rhs_code = gimple_assign_rhs_code (last_stmt);
1783 switch (rhs_code)
1784 {
1785 case LSHIFT_EXPR:
1786 case RSHIFT_EXPR:
1787 case LROTATE_EXPR:
1788 case RROTATE_EXPR:
1789 break;
1790 default:
1791 return NULL;
1792 }
1793
1794 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1795 return NULL;
1796
1797 lhs = gimple_assign_lhs (last_stmt);
1798 oprnd0 = gimple_assign_rhs1 (last_stmt);
1799 oprnd1 = gimple_assign_rhs2 (last_stmt);
1800 if (TREE_CODE (oprnd0) != SSA_NAME
1801 || TREE_CODE (oprnd1) != SSA_NAME
1802 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1803 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1804 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1805 || TYPE_PRECISION (TREE_TYPE (lhs))
1806 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1807 return NULL;
1808
1809 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1810 &def, &dt))
1811 return NULL;
1812
1813 if (dt != vect_internal_def)
1814 return NULL;
1815
1816 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1817 *type_out = *type_in;
1818 if (*type_in == NULL_TREE)
1819 return NULL;
1820
1821 def = NULL_TREE;
1822 if (gimple_assign_cast_p (def_stmt))
1823 {
1824 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1825 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1826 && TYPE_PRECISION (TREE_TYPE (rhs1))
1827 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1828 def = rhs1;
1829 }
1830
1831 if (def == NULL_TREE)
1832 {
1833 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1834 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1835 NULL_TREE);
1836 new_pattern_def_seq (stmt_vinfo, def_stmt);
1837 }
1838
1839 /* Pattern detected. */
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE, vect_location,
1842 "vect_recog_vector_vector_shift_pattern: detected:\n");
1843
1844 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1845 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1846 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1847
1848 if (dump_enabled_p ())
1849 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1850
1851 stmts->safe_push (last_stmt);
1852 return pattern_stmt;
1853 }
1854
1855 /* Detect a signed division by a constant that wouldn't be
1856 otherwise vectorized:
1857
1858 type a_t, b_t;
1859
1860 S1 a_t = b_t / N;
1861
1862 where type 'type' is an integral type and N is a constant.
1863
1864 Similarly handle modulo by a constant:
1865
1866 S4 a_t = b_t % N;
1867
1868 Input/Output:
1869
1870 * STMTS: Contains a stmt from which the pattern search begins,
1871 i.e. the division stmt. S1 is replaced by if N is a power
1872 of two constant and type is signed:
1873 S3 y_t = b_t < 0 ? N - 1 : 0;
1874 S2 x_t = b_t + y_t;
1875 S1' a_t = x_t >> log2 (N);
1876
1877 S4 is replaced if N is a power of two constant and
1878 type is signed by (where *_T temporaries have unsigned type):
1879 S9 y_T = b_t < 0 ? -1U : 0U;
1880 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1881 S7 z_t = (type) z_T;
1882 S6 w_t = b_t + z_t;
1883 S5 x_t = w_t & (N - 1);
1884 S4' a_t = x_t - z_t;
1885
1886 Output:
1887
1888 * TYPE_IN: The type of the input arguments to the pattern.
1889
1890 * TYPE_OUT: The type of the output of this pattern.
1891
1892 * Return value: A new stmt that will be used to replace the division
1893 S1 or modulo S4 stmt. */
1894
1895 static gimple
1896 vect_recog_divmod_pattern (vec<gimple> *stmts,
1897 tree *type_in, tree *type_out)
1898 {
1899 gimple last_stmt = stmts->pop ();
1900 tree oprnd0, oprnd1, vectype, itype, cond;
1901 gimple pattern_stmt, def_stmt;
1902 enum tree_code rhs_code;
1903 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1904 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1905 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1906 optab optab;
1907 tree q;
1908 int dummy_int, prec;
1909 stmt_vec_info def_stmt_vinfo;
1910
1911 if (!is_gimple_assign (last_stmt))
1912 return NULL;
1913
1914 rhs_code = gimple_assign_rhs_code (last_stmt);
1915 switch (rhs_code)
1916 {
1917 case TRUNC_DIV_EXPR:
1918 case TRUNC_MOD_EXPR:
1919 break;
1920 default:
1921 return NULL;
1922 }
1923
1924 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1925 return NULL;
1926
1927 oprnd0 = gimple_assign_rhs1 (last_stmt);
1928 oprnd1 = gimple_assign_rhs2 (last_stmt);
1929 itype = TREE_TYPE (oprnd0);
1930 if (TREE_CODE (oprnd0) != SSA_NAME
1931 || TREE_CODE (oprnd1) != INTEGER_CST
1932 || TREE_CODE (itype) != INTEGER_TYPE
1933 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1934 return NULL;
1935
1936 vectype = get_vectype_for_scalar_type (itype);
1937 if (vectype == NULL_TREE)
1938 return NULL;
1939
1940 /* If the target can handle vectorized division or modulo natively,
1941 don't attempt to optimize this. */
1942 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1943 if (optab != unknown_optab)
1944 {
1945 enum machine_mode vec_mode = TYPE_MODE (vectype);
1946 int icode = (int) optab_handler (optab, vec_mode);
1947 if (icode != CODE_FOR_nothing)
1948 return NULL;
1949 }
1950
1951 prec = TYPE_PRECISION (itype);
1952 if (integer_pow2p (oprnd1))
1953 {
1954 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1955 return NULL;
1956
1957 /* Pattern detected. */
1958 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_NOTE, vect_location,
1960 "vect_recog_divmod_pattern: detected:\n");
1961
1962 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1963 build_int_cst (itype, 0));
1964 if (rhs_code == TRUNC_DIV_EXPR)
1965 {
1966 tree var = vect_recog_temp_ssa_var (itype, NULL);
1967 tree shift;
1968 def_stmt
1969 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1970 fold_build2 (MINUS_EXPR, itype,
1971 oprnd1,
1972 build_int_cst (itype,
1973 1)),
1974 build_int_cst (itype, 0));
1975 new_pattern_def_seq (stmt_vinfo, def_stmt);
1976 var = vect_recog_temp_ssa_var (itype, NULL);
1977 def_stmt
1978 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1979 gimple_assign_lhs (def_stmt));
1980 append_pattern_def_seq (stmt_vinfo, def_stmt);
1981
1982 shift = build_int_cst (itype, tree_log2 (oprnd1));
1983 pattern_stmt
1984 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1985 vect_recog_temp_ssa_var (itype,
1986 NULL),
1987 var, shift);
1988 }
1989 else
1990 {
1991 tree signmask;
1992 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1993 if (compare_tree_int (oprnd1, 2) == 0)
1994 {
1995 signmask = vect_recog_temp_ssa_var (itype, NULL);
1996 def_stmt
1997 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1998 build_int_cst (itype, 1),
1999 build_int_cst (itype, 0));
2000 append_pattern_def_seq (stmt_vinfo, def_stmt);
2001 }
2002 else
2003 {
2004 tree utype
2005 = build_nonstandard_integer_type (prec, 1);
2006 tree vecutype = get_vectype_for_scalar_type (utype);
2007 tree shift
2008 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2009 - tree_log2 (oprnd1));
2010 tree var = vect_recog_temp_ssa_var (utype, NULL);
2011
2012 def_stmt
2013 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2014 build_int_cst (utype, -1),
2015 build_int_cst (utype, 0));
2016 def_stmt_vinfo
2017 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2018 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2019 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2020 append_pattern_def_seq (stmt_vinfo, def_stmt);
2021 var = vect_recog_temp_ssa_var (utype, NULL);
2022 def_stmt
2023 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2024 gimple_assign_lhs (def_stmt),
2025 shift);
2026 def_stmt_vinfo
2027 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2028 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2029 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2030 append_pattern_def_seq (stmt_vinfo, def_stmt);
2031 signmask = vect_recog_temp_ssa_var (itype, NULL);
2032 def_stmt
2033 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2034 NULL_TREE);
2035 append_pattern_def_seq (stmt_vinfo, def_stmt);
2036 }
2037 def_stmt
2038 = gimple_build_assign_with_ops (PLUS_EXPR,
2039 vect_recog_temp_ssa_var (itype,
2040 NULL),
2041 oprnd0, signmask);
2042 append_pattern_def_seq (stmt_vinfo, def_stmt);
2043 def_stmt
2044 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2045 vect_recog_temp_ssa_var (itype,
2046 NULL),
2047 gimple_assign_lhs (def_stmt),
2048 fold_build2 (MINUS_EXPR, itype,
2049 oprnd1,
2050 build_int_cst (itype,
2051 1)));
2052 append_pattern_def_seq (stmt_vinfo, def_stmt);
2053
2054 pattern_stmt
2055 = gimple_build_assign_with_ops (MINUS_EXPR,
2056 vect_recog_temp_ssa_var (itype,
2057 NULL),
2058 gimple_assign_lhs (def_stmt),
2059 signmask);
2060 }
2061
2062 if (dump_enabled_p ())
2063 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2064 0);
2065
2066 stmts->safe_push (last_stmt);
2067
2068 *type_in = vectype;
2069 *type_out = vectype;
2070 return pattern_stmt;
2071 }
2072
2073 if (prec > HOST_BITS_PER_WIDE_INT
2074 || integer_zerop (oprnd1))
2075 return NULL;
2076
2077 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2078 return NULL;
2079
2080 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2081
2082 if (TYPE_UNSIGNED (itype))
2083 {
2084 unsigned HOST_WIDE_INT mh, ml;
2085 int pre_shift, post_shift;
2086 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2087 & GET_MODE_MASK (TYPE_MODE (itype)));
2088 tree t1, t2, t3, t4;
2089
2090 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2091 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2092 return NULL;
2093
2094 /* Find a suitable multiplier and right shift count
2095 instead of multiplying with D. */
2096 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2097
2098 /* If the suggested multiplier is more than SIZE bits, we can do better
2099 for even divisors, using an initial right shift. */
2100 if (mh != 0 && (d & 1) == 0)
2101 {
2102 pre_shift = floor_log2 (d & -d);
2103 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2104 &ml, &post_shift, &dummy_int);
2105 gcc_assert (!mh);
2106 }
2107 else
2108 pre_shift = 0;
2109
2110 if (mh != 0)
2111 {
2112 if (post_shift - 1 >= prec)
2113 return NULL;
2114
2115 /* t1 = oprnd0 h* ml;
2116 t2 = oprnd0 - t1;
2117 t3 = t2 >> 1;
2118 t4 = t1 + t3;
2119 q = t4 >> (post_shift - 1); */
2120 t1 = vect_recog_temp_ssa_var (itype, NULL);
2121 def_stmt
2122 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2123 build_int_cst (itype, ml));
2124 append_pattern_def_seq (stmt_vinfo, def_stmt);
2125
2126 t2 = vect_recog_temp_ssa_var (itype, NULL);
2127 def_stmt
2128 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2129 append_pattern_def_seq (stmt_vinfo, def_stmt);
2130
2131 t3 = vect_recog_temp_ssa_var (itype, NULL);
2132 def_stmt
2133 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2134 integer_one_node);
2135 append_pattern_def_seq (stmt_vinfo, def_stmt);
2136
2137 t4 = vect_recog_temp_ssa_var (itype, NULL);
2138 def_stmt
2139 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2140
2141 if (post_shift != 1)
2142 {
2143 append_pattern_def_seq (stmt_vinfo, def_stmt);
2144
2145 q = vect_recog_temp_ssa_var (itype, NULL);
2146 pattern_stmt
2147 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2148 build_int_cst (itype,
2149 post_shift
2150 - 1));
2151 }
2152 else
2153 {
2154 q = t4;
2155 pattern_stmt = def_stmt;
2156 }
2157 }
2158 else
2159 {
2160 if (pre_shift >= prec || post_shift >= prec)
2161 return NULL;
2162
2163 /* t1 = oprnd0 >> pre_shift;
2164 t2 = t1 h* ml;
2165 q = t2 >> post_shift; */
2166 if (pre_shift)
2167 {
2168 t1 = vect_recog_temp_ssa_var (itype, NULL);
2169 def_stmt
2170 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2171 build_int_cst (NULL,
2172 pre_shift));
2173 append_pattern_def_seq (stmt_vinfo, def_stmt);
2174 }
2175 else
2176 t1 = oprnd0;
2177
2178 t2 = vect_recog_temp_ssa_var (itype, NULL);
2179 def_stmt
2180 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2181 build_int_cst (itype, ml));
2182
2183 if (post_shift)
2184 {
2185 append_pattern_def_seq (stmt_vinfo, def_stmt);
2186
2187 q = vect_recog_temp_ssa_var (itype, NULL);
2188 def_stmt
2189 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2190 build_int_cst (itype,
2191 post_shift));
2192 }
2193 else
2194 q = t2;
2195
2196 pattern_stmt = def_stmt;
2197 }
2198 }
2199 else
2200 {
2201 unsigned HOST_WIDE_INT ml;
2202 int post_shift;
2203 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2204 unsigned HOST_WIDE_INT abs_d;
2205 bool add = false;
2206 tree t1, t2, t3, t4;
2207
2208 /* Give up for -1. */
2209 if (d == -1)
2210 return NULL;
2211
2212 /* Since d might be INT_MIN, we have to cast to
2213 unsigned HOST_WIDE_INT before negating to avoid
2214 undefined signed overflow. */
2215 abs_d = (d >= 0
2216 ? (unsigned HOST_WIDE_INT) d
2217 : - (unsigned HOST_WIDE_INT) d);
2218
2219 /* n rem d = n rem -d */
2220 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2221 {
2222 d = abs_d;
2223 oprnd1 = build_int_cst (itype, abs_d);
2224 }
2225 else if (HOST_BITS_PER_WIDE_INT >= prec
2226 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2227 /* This case is not handled correctly below. */
2228 return NULL;
2229
2230 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2231 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2232 {
2233 add = true;
2234 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2235 }
2236 if (post_shift >= prec)
2237 return NULL;
2238
2239 /* t1 = oprnd0 h* ml; */
2240 t1 = vect_recog_temp_ssa_var (itype, NULL);
2241 def_stmt
2242 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2243 build_int_cst (itype, ml));
2244
2245 if (add)
2246 {
2247 /* t2 = t1 + oprnd0; */
2248 append_pattern_def_seq (stmt_vinfo, def_stmt);
2249 t2 = vect_recog_temp_ssa_var (itype, NULL);
2250 def_stmt
2251 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2252 }
2253 else
2254 t2 = t1;
2255
2256 if (post_shift)
2257 {
2258 /* t3 = t2 >> post_shift; */
2259 append_pattern_def_seq (stmt_vinfo, def_stmt);
2260 t3 = vect_recog_temp_ssa_var (itype, NULL);
2261 def_stmt
2262 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2263 build_int_cst (itype, post_shift));
2264 }
2265 else
2266 t3 = t2;
2267
2268 double_int oprnd0_min, oprnd0_max;
2269 int msb = 1;
2270 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2271 {
2272 if (!oprnd0_min.is_negative ())
2273 msb = 0;
2274 else if (oprnd0_max.is_negative ())
2275 msb = -1;
2276 }
2277
2278 if (msb == 0 && d >= 0)
2279 {
2280 /* q = t3; */
2281 q = t3;
2282 pattern_stmt = def_stmt;
2283 }
2284 else
2285 {
2286 /* t4 = oprnd0 >> (prec - 1);
2287 or if we know from VRP that oprnd0 >= 0
2288 t4 = 0;
2289 or if we know from VRP that oprnd0 < 0
2290 t4 = -1; */
2291 append_pattern_def_seq (stmt_vinfo, def_stmt);
2292 t4 = vect_recog_temp_ssa_var (itype, NULL);
2293 if (msb != 1)
2294 def_stmt
2295 = gimple_build_assign_with_ops (INTEGER_CST,
2296 t4, build_int_cst (itype, msb),
2297 NULL_TREE);
2298 else
2299 def_stmt
2300 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2301 build_int_cst (itype, prec - 1));
2302 append_pattern_def_seq (stmt_vinfo, def_stmt);
2303
2304 /* q = t3 - t4; or q = t4 - t3; */
2305 q = vect_recog_temp_ssa_var (itype, NULL);
2306 pattern_stmt
2307 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2308 d < 0 ? t3 : t4);
2309 }
2310 }
2311
2312 if (rhs_code == TRUNC_MOD_EXPR)
2313 {
2314 tree r, t1;
2315
2316 /* We divided. Now finish by:
2317 t1 = q * oprnd1;
2318 r = oprnd0 - t1; */
2319 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2320
2321 t1 = vect_recog_temp_ssa_var (itype, NULL);
2322 def_stmt
2323 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2324 append_pattern_def_seq (stmt_vinfo, def_stmt);
2325
2326 r = vect_recog_temp_ssa_var (itype, NULL);
2327 pattern_stmt
2328 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2329 }
2330
2331 /* Pattern detected. */
2332 if (dump_enabled_p ())
2333 {
2334 dump_printf_loc (MSG_NOTE, vect_location,
2335 "vect_recog_divmod_pattern: detected: ");
2336 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2337 dump_printf (MSG_NOTE, "\n");
2338 }
2339
2340 stmts->safe_push (last_stmt);
2341
2342 *type_in = vectype;
2343 *type_out = vectype;
2344 return pattern_stmt;
2345 }
2346
2347 /* Function vect_recog_mixed_size_cond_pattern
2348
2349 Try to find the following pattern:
2350
2351 type x_t, y_t;
2352 TYPE a_T, b_T, c_T;
2353 loop:
2354 S1 a_T = x_t CMP y_t ? b_T : c_T;
2355
2356 where type 'TYPE' is an integral type which has different size
2357 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2358 than 'type', the constants need to fit into an integer type
2359 with the same width as 'type') or results of conversion from 'type'.
2360
2361 Input:
2362
2363 * LAST_STMT: A stmt from which the pattern search begins.
2364
2365 Output:
2366
2367 * TYPE_IN: The type of the input arguments to the pattern.
2368
2369 * TYPE_OUT: The type of the output of this pattern.
2370
2371 * Return value: A new stmt that will be used to replace the pattern.
2372 Additionally a def_stmt is added.
2373
2374 a_it = x_t CMP y_t ? b_it : c_it;
2375 a_T = (TYPE) a_it; */
2376
2377 static gimple
2378 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2379 tree *type_out)
2380 {
2381 gimple last_stmt = (*stmts)[0];
2382 tree cond_expr, then_clause, else_clause;
2383 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2384 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2385 enum machine_mode cmpmode;
2386 gimple pattern_stmt, def_stmt;
2387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2388 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2389 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2390 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2391 bool promotion;
2392 tree comp_scalar_type;
2393
2394 if (!is_gimple_assign (last_stmt)
2395 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2396 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2397 return NULL;
2398
2399 cond_expr = gimple_assign_rhs1 (last_stmt);
2400 then_clause = gimple_assign_rhs2 (last_stmt);
2401 else_clause = gimple_assign_rhs3 (last_stmt);
2402
2403 if (!COMPARISON_CLASS_P (cond_expr))
2404 return NULL;
2405
2406 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2407 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2408 if (comp_vectype == NULL_TREE)
2409 return NULL;
2410
2411 type = gimple_expr_type (last_stmt);
2412 if (types_compatible_p (type, comp_scalar_type)
2413 || ((TREE_CODE (then_clause) != INTEGER_CST
2414 || TREE_CODE (else_clause) != INTEGER_CST)
2415 && !INTEGRAL_TYPE_P (comp_scalar_type))
2416 || !INTEGRAL_TYPE_P (type))
2417 return NULL;
2418
2419 if ((TREE_CODE (then_clause) != INTEGER_CST
2420 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2421 &def_stmt0, &promotion))
2422 || (TREE_CODE (else_clause) != INTEGER_CST
2423 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2424 &def_stmt1, &promotion)))
2425 return NULL;
2426
2427 if (orig_type0 && orig_type1
2428 && !types_compatible_p (orig_type0, orig_type1))
2429 return NULL;
2430
2431 if (orig_type0)
2432 {
2433 if (!types_compatible_p (orig_type0, comp_scalar_type))
2434 return NULL;
2435 then_clause = gimple_assign_rhs1 (def_stmt0);
2436 itype = orig_type0;
2437 }
2438
2439 if (orig_type1)
2440 {
2441 if (!types_compatible_p (orig_type1, comp_scalar_type))
2442 return NULL;
2443 else_clause = gimple_assign_rhs1 (def_stmt1);
2444 itype = orig_type1;
2445 }
2446
2447 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2448
2449 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2450 return NULL;
2451
2452 vectype = get_vectype_for_scalar_type (type);
2453 if (vectype == NULL_TREE)
2454 return NULL;
2455
2456 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2457 return NULL;
2458
2459 if (itype == NULL_TREE)
2460 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2461 TYPE_UNSIGNED (type));
2462
2463 if (itype == NULL_TREE
2464 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2465 return NULL;
2466
2467 vecitype = get_vectype_for_scalar_type (itype);
2468 if (vecitype == NULL_TREE)
2469 return NULL;
2470
2471 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2472 return NULL;
2473
2474 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2475 {
2476 if ((TREE_CODE (then_clause) == INTEGER_CST
2477 && !int_fits_type_p (then_clause, itype))
2478 || (TREE_CODE (else_clause) == INTEGER_CST
2479 && !int_fits_type_p (else_clause, itype)))
2480 return NULL;
2481 }
2482
2483 def_stmt
2484 = gimple_build_assign_with_ops (COND_EXPR,
2485 vect_recog_temp_ssa_var (itype, NULL),
2486 unshare_expr (cond_expr),
2487 fold_convert (itype, then_clause),
2488 fold_convert (itype, else_clause));
2489 pattern_stmt
2490 = gimple_build_assign_with_ops (NOP_EXPR,
2491 vect_recog_temp_ssa_var (type, NULL),
2492 gimple_assign_lhs (def_stmt), NULL_TREE);
2493
2494 new_pattern_def_seq (stmt_vinfo, def_stmt);
2495 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2496 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2497 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2498 *type_in = vecitype;
2499 *type_out = vectype;
2500
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_NOTE, vect_location,
2503 "vect_recog_mixed_size_cond_pattern: detected:\n");
2504
2505 return pattern_stmt;
2506 }
2507
2508
2509 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2510 true if bool VAR can be optimized that way. */
2511
2512 static bool
2513 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2514 {
2515 gimple def_stmt;
2516 enum vect_def_type dt;
2517 tree def, rhs1;
2518 enum tree_code rhs_code;
2519
2520 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2521 &dt))
2522 return false;
2523
2524 if (dt != vect_internal_def)
2525 return false;
2526
2527 if (!is_gimple_assign (def_stmt))
2528 return false;
2529
2530 if (!has_single_use (def))
2531 return false;
2532
2533 rhs1 = gimple_assign_rhs1 (def_stmt);
2534 rhs_code = gimple_assign_rhs_code (def_stmt);
2535 switch (rhs_code)
2536 {
2537 case SSA_NAME:
2538 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2539
2540 CASE_CONVERT:
2541 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2542 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2543 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2544 return false;
2545 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2546
2547 case BIT_NOT_EXPR:
2548 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2549
2550 case BIT_AND_EXPR:
2551 case BIT_IOR_EXPR:
2552 case BIT_XOR_EXPR:
2553 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2554 return false;
2555 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2556 bb_vinfo);
2557
2558 default:
2559 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2560 {
2561 tree vecitype, comp_vectype;
2562
2563 /* If the comparison can throw, then is_gimple_condexpr will be
2564 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2565 if (stmt_could_throw_p (def_stmt))
2566 return false;
2567
2568 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2569 if (comp_vectype == NULL_TREE)
2570 return false;
2571
2572 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2573 {
2574 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2575 tree itype
2576 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2577 vecitype = get_vectype_for_scalar_type (itype);
2578 if (vecitype == NULL_TREE)
2579 return false;
2580 }
2581 else
2582 vecitype = comp_vectype;
2583 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2584 }
2585 return false;
2586 }
2587 }
2588
2589
2590 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2591 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2592 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2593
2594 static tree
2595 adjust_bool_pattern_cast (tree type, tree var)
2596 {
2597 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2598 gimple cast_stmt, pattern_stmt;
2599
2600 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2601 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2602 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2603 cast_stmt
2604 = gimple_build_assign_with_ops (NOP_EXPR,
2605 vect_recog_temp_ssa_var (type, NULL),
2606 gimple_assign_lhs (pattern_stmt),
2607 NULL_TREE);
2608 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2609 return gimple_assign_lhs (cast_stmt);
2610 }
2611
2612
2613 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2614 recursively. VAR is an SSA_NAME that should be transformed from bool
2615 to a wider integer type, OUT_TYPE is the desired final integer type of
2616 the whole pattern, TRUEVAL should be NULL unless optimizing
2617 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2618 in the then_clause, STMTS is where statements with added pattern stmts
2619 should be pushed to. */
2620
2621 static tree
2622 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2623 vec<gimple> *stmts)
2624 {
2625 gimple stmt = SSA_NAME_DEF_STMT (var);
2626 enum tree_code rhs_code, def_rhs_code;
2627 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2628 location_t loc;
2629 gimple pattern_stmt, def_stmt;
2630
2631 rhs1 = gimple_assign_rhs1 (stmt);
2632 rhs2 = gimple_assign_rhs2 (stmt);
2633 rhs_code = gimple_assign_rhs_code (stmt);
2634 loc = gimple_location (stmt);
2635 switch (rhs_code)
2636 {
2637 case SSA_NAME:
2638 CASE_CONVERT:
2639 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2640 itype = TREE_TYPE (irhs1);
2641 pattern_stmt
2642 = gimple_build_assign_with_ops (SSA_NAME,
2643 vect_recog_temp_ssa_var (itype, NULL),
2644 irhs1, NULL_TREE);
2645 break;
2646
2647 case BIT_NOT_EXPR:
2648 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2649 itype = TREE_TYPE (irhs1);
2650 pattern_stmt
2651 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2652 vect_recog_temp_ssa_var (itype, NULL),
2653 irhs1, build_int_cst (itype, 1));
2654 break;
2655
2656 case BIT_AND_EXPR:
2657 /* Try to optimize x = y & (a < b ? 1 : 0); into
2658 x = (a < b ? y : 0);
2659
2660 E.g. for:
2661 bool a_b, b_b, c_b;
2662 TYPE d_T;
2663
2664 S1 a_b = x1 CMP1 y1;
2665 S2 b_b = x2 CMP2 y2;
2666 S3 c_b = a_b & b_b;
2667 S4 d_T = (TYPE) c_b;
2668
2669 we would normally emit:
2670
2671 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2672 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2673 S3' c_T = a_T & b_T;
2674 S4' d_T = c_T;
2675
2676 but we can save one stmt by using the
2677 result of one of the COND_EXPRs in the other COND_EXPR and leave
2678 BIT_AND_EXPR stmt out:
2679
2680 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2681 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2682 S4' f_T = c_T;
2683
2684 At least when VEC_COND_EXPR is implemented using masks
2685 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2686 computes the comparison masks and ands it, in one case with
2687 all ones vector, in the other case with a vector register.
2688 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2689 often more expensive. */
2690 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2691 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2692 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2693 {
2694 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2695 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2696 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2697 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2698 {
2699 gimple tstmt;
2700 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2701 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2702 tstmt = stmts->pop ();
2703 gcc_assert (tstmt == def_stmt);
2704 stmts->quick_push (stmt);
2705 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2706 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2707 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2708 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2709 return irhs2;
2710 }
2711 else
2712 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2713 goto and_ior_xor;
2714 }
2715 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2716 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2717 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2718 {
2719 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2720 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2721 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2722 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2723 {
2724 gimple tstmt;
2725 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2726 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2727 tstmt = stmts->pop ();
2728 gcc_assert (tstmt == def_stmt);
2729 stmts->quick_push (stmt);
2730 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2731 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2732 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2733 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2734 return irhs1;
2735 }
2736 else
2737 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2738 goto and_ior_xor;
2739 }
2740 /* FALLTHRU */
2741 case BIT_IOR_EXPR:
2742 case BIT_XOR_EXPR:
2743 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2744 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2745 and_ior_xor:
2746 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2747 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2748 {
2749 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2750 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2751 int out_prec = TYPE_PRECISION (out_type);
2752 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2753 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2754 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2755 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2756 else
2757 {
2758 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2759 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2760 }
2761 }
2762 itype = TREE_TYPE (irhs1);
2763 pattern_stmt
2764 = gimple_build_assign_with_ops (rhs_code,
2765 vect_recog_temp_ssa_var (itype, NULL),
2766 irhs1, irhs2);
2767 break;
2768
2769 default:
2770 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2771 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2772 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2773 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2774 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2775 {
2776 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2777 itype
2778 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2779 }
2780 else
2781 itype = TREE_TYPE (rhs1);
2782 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2783 if (trueval == NULL_TREE)
2784 trueval = build_int_cst (itype, 1);
2785 else
2786 gcc_checking_assert (useless_type_conversion_p (itype,
2787 TREE_TYPE (trueval)));
2788 pattern_stmt
2789 = gimple_build_assign_with_ops (COND_EXPR,
2790 vect_recog_temp_ssa_var (itype, NULL),
2791 cond_expr, trueval,
2792 build_int_cst (itype, 0));
2793 break;
2794 }
2795
2796 stmts->safe_push (stmt);
2797 gimple_set_location (pattern_stmt, loc);
2798 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2799 return gimple_assign_lhs (pattern_stmt);
2800 }
2801
2802
2803 /* Function vect_recog_bool_pattern
2804
2805 Try to find pattern like following:
2806
2807 bool a_b, b_b, c_b, d_b, e_b;
2808 TYPE f_T;
2809 loop:
2810 S1 a_b = x1 CMP1 y1;
2811 S2 b_b = x2 CMP2 y2;
2812 S3 c_b = a_b & b_b;
2813 S4 d_b = x3 CMP3 y3;
2814 S5 e_b = c_b | d_b;
2815 S6 f_T = (TYPE) e_b;
2816
2817 where type 'TYPE' is an integral type.
2818
2819 Input:
2820
2821 * LAST_STMT: A stmt at the end from which the pattern
2822 search begins, i.e. cast of a bool to
2823 an integer type.
2824
2825 Output:
2826
2827 * TYPE_IN: The type of the input arguments to the pattern.
2828
2829 * TYPE_OUT: The type of the output of this pattern.
2830
2831 * Return value: A new stmt that will be used to replace the pattern.
2832
2833 Assuming size of TYPE is the same as size of all comparisons
2834 (otherwise some casts would be added where needed), the above
2835 sequence we create related pattern stmts:
2836 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2837 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2838 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2839 S5' e_T = c_T | d_T;
2840 S6' f_T = e_T;
2841
2842 Instead of the above S3' we could emit:
2843 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2844 S3' c_T = a_T | b_T;
2845 but the above is more efficient. */
2846
2847 static gimple
2848 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2849 tree *type_out)
2850 {
2851 gimple last_stmt = stmts->pop ();
2852 enum tree_code rhs_code;
2853 tree var, lhs, rhs, vectype;
2854 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2856 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2857 gimple pattern_stmt;
2858
2859 if (!is_gimple_assign (last_stmt))
2860 return NULL;
2861
2862 var = gimple_assign_rhs1 (last_stmt);
2863 lhs = gimple_assign_lhs (last_stmt);
2864
2865 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2866 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2867 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2868 return NULL;
2869
2870 rhs_code = gimple_assign_rhs_code (last_stmt);
2871 if (CONVERT_EXPR_CODE_P (rhs_code))
2872 {
2873 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2874 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2875 return NULL;
2876 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2877 if (vectype == NULL_TREE)
2878 return NULL;
2879
2880 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2881 return NULL;
2882
2883 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2884 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2885 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2886 pattern_stmt
2887 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2888 else
2889 pattern_stmt
2890 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2891 *type_out = vectype;
2892 *type_in = vectype;
2893 stmts->safe_push (last_stmt);
2894 if (dump_enabled_p ())
2895 dump_printf_loc (MSG_NOTE, vect_location,
2896 "vect_recog_bool_pattern: detected:\n");
2897
2898 return pattern_stmt;
2899 }
2900 else if (rhs_code == SSA_NAME
2901 && STMT_VINFO_DATA_REF (stmt_vinfo))
2902 {
2903 stmt_vec_info pattern_stmt_info;
2904 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2905 gcc_assert (vectype != NULL_TREE);
2906 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2907 return NULL;
2908 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2909 return NULL;
2910
2911 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2912 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2913 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2914 {
2915 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2916 gimple cast_stmt
2917 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2918 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2919 rhs = rhs2;
2920 }
2921 pattern_stmt
2922 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2923 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2924 bb_vinfo);
2925 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2926 STMT_VINFO_DATA_REF (pattern_stmt_info)
2927 = STMT_VINFO_DATA_REF (stmt_vinfo);
2928 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2929 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2930 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2931 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2932 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2933 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2934 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2935 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2936 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2937 *type_out = vectype;
2938 *type_in = vectype;
2939 stmts->safe_push (last_stmt);
2940 if (dump_enabled_p ())
2941 dump_printf_loc (MSG_NOTE, vect_location,
2942 "vect_recog_bool_pattern: detected:\n");
2943 return pattern_stmt;
2944 }
2945 else
2946 return NULL;
2947 }
2948
2949
2950 /* Mark statements that are involved in a pattern. */
2951
2952 static inline void
2953 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2954 tree pattern_vectype)
2955 {
2956 stmt_vec_info pattern_stmt_info, def_stmt_info;
2957 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2958 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2959 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2960 gimple def_stmt;
2961
2962 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2963 if (pattern_stmt_info == NULL)
2964 {
2965 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2966 bb_vinfo);
2967 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2968 }
2969 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2970
2971 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2972 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2973 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2974 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2975 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2976 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2977 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2978 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2979 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2980 {
2981 gimple_stmt_iterator si;
2982 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2983 !gsi_end_p (si); gsi_next (&si))
2984 {
2985 def_stmt = gsi_stmt (si);
2986 def_stmt_info = vinfo_for_stmt (def_stmt);
2987 if (def_stmt_info == NULL)
2988 {
2989 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2990 bb_vinfo);
2991 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2992 }
2993 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2994 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2995 STMT_VINFO_DEF_TYPE (def_stmt_info)
2996 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2997 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2998 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2999 }
3000 }
3001 }
3002
3003 /* Function vect_pattern_recog_1
3004
3005 Input:
3006 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3007 computation pattern.
3008 STMT: A stmt from which the pattern search should start.
3009
3010 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3011 expression that computes the same functionality and can be used to
3012 replace the sequence of stmts that are involved in the pattern.
3013
3014 Output:
3015 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3016 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3017 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3018 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3019 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3020 to the available target pattern.
3021
3022 This function also does some bookkeeping, as explained in the documentation
3023 for vect_recog_pattern. */
3024
3025 static void
3026 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3027 gimple_stmt_iterator si,
3028 vec<gimple> *stmts_to_replace)
3029 {
3030 gimple stmt = gsi_stmt (si), pattern_stmt;
3031 stmt_vec_info stmt_info;
3032 loop_vec_info loop_vinfo;
3033 tree pattern_vectype;
3034 tree type_in, type_out;
3035 enum tree_code code;
3036 int i;
3037 gimple next;
3038
3039 stmts_to_replace->truncate (0);
3040 stmts_to_replace->quick_push (stmt);
3041 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3042 if (!pattern_stmt)
3043 return;
3044
3045 stmt = stmts_to_replace->last ();
3046 stmt_info = vinfo_for_stmt (stmt);
3047 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3048
3049 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3050 {
3051 /* No need to check target support (already checked by the pattern
3052 recognition function). */
3053 pattern_vectype = type_out ? type_out : type_in;
3054 }
3055 else
3056 {
3057 enum machine_mode vec_mode;
3058 enum insn_code icode;
3059 optab optab;
3060
3061 /* Check target support */
3062 type_in = get_vectype_for_scalar_type (type_in);
3063 if (!type_in)
3064 return;
3065 if (type_out)
3066 type_out = get_vectype_for_scalar_type (type_out);
3067 else
3068 type_out = type_in;
3069 if (!type_out)
3070 return;
3071 pattern_vectype = type_out;
3072
3073 if (is_gimple_assign (pattern_stmt))
3074 code = gimple_assign_rhs_code (pattern_stmt);
3075 else
3076 {
3077 gcc_assert (is_gimple_call (pattern_stmt));
3078 code = CALL_EXPR;
3079 }
3080
3081 optab = optab_for_tree_code (code, type_in, optab_default);
3082 vec_mode = TYPE_MODE (type_in);
3083 if (!optab
3084 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3085 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3086 return;
3087 }
3088
3089 /* Found a vectorizable pattern. */
3090 if (dump_enabled_p ())
3091 {
3092 dump_printf_loc (MSG_NOTE, vect_location,
3093 "pattern recognized: ");
3094 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3095 dump_printf (MSG_NOTE, "\n");
3096 }
3097
3098 /* Mark the stmts that are involved in the pattern. */
3099 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3100
3101 /* Patterns cannot be vectorized using SLP, because they change the order of
3102 computation. */
3103 if (loop_vinfo)
3104 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3105 if (next == stmt)
3106 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3107
3108 /* It is possible that additional pattern stmts are created and inserted in
3109 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3110 relevant statements. */
3111 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3112 && (unsigned) i < (stmts_to_replace->length () - 1);
3113 i++)
3114 {
3115 stmt_info = vinfo_for_stmt (stmt);
3116 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3117 if (dump_enabled_p ())
3118 {
3119 dump_printf_loc (MSG_NOTE, vect_location,
3120 "additional pattern stmt: ");
3121 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3122 dump_printf (MSG_NOTE, "\n");
3123 }
3124
3125 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3126 }
3127 }
3128
3129
3130 /* Function vect_pattern_recog
3131
3132 Input:
3133 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3134 computation idioms.
3135
3136 Output - for each computation idiom that is detected we create a new stmt
3137 that provides the same functionality and that can be vectorized. We
3138 also record some information in the struct_stmt_info of the relevant
3139 stmts, as explained below:
3140
3141 At the entry to this function we have the following stmts, with the
3142 following initial value in the STMT_VINFO fields:
3143
3144 stmt in_pattern_p related_stmt vec_stmt
3145 S1: a_i = .... - - -
3146 S2: a_2 = ..use(a_i).. - - -
3147 S3: a_1 = ..use(a_2).. - - -
3148 S4: a_0 = ..use(a_1).. - - -
3149 S5: ... = ..use(a_0).. - - -
3150
3151 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3152 represented by a single stmt. We then:
3153 - create a new stmt S6 equivalent to the pattern (the stmt is not
3154 inserted into the code)
3155 - fill in the STMT_VINFO fields as follows:
3156
3157 in_pattern_p related_stmt vec_stmt
3158 S1: a_i = .... - - -
3159 S2: a_2 = ..use(a_i).. - - -
3160 S3: a_1 = ..use(a_2).. - - -
3161 S4: a_0 = ..use(a_1).. true S6 -
3162 '---> S6: a_new = .... - S4 -
3163 S5: ... = ..use(a_0).. - - -
3164
3165 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3166 to each other through the RELATED_STMT field).
3167
3168 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3169 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3170 remain irrelevant unless used by stmts other than S4.
3171
3172 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3173 (because they are marked as irrelevant). It will vectorize S6, and record
3174 a pointer to the new vector stmt VS6 from S6 (as usual).
3175 S4 will be skipped, and S5 will be vectorized as usual:
3176
3177 in_pattern_p related_stmt vec_stmt
3178 S1: a_i = .... - - -
3179 S2: a_2 = ..use(a_i).. - - -
3180 S3: a_1 = ..use(a_2).. - - -
3181 > VS6: va_new = .... - - -
3182 S4: a_0 = ..use(a_1).. true S6 VS6
3183 '---> S6: a_new = .... - S4 VS6
3184 > VS5: ... = ..vuse(va_new).. - - -
3185 S5: ... = ..use(a_0).. - - -
3186
3187 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3188 elsewhere), and we'll end up with:
3189
3190 VS6: va_new = ....
3191 VS5: ... = ..vuse(va_new)..
3192
3193 In case of more than one pattern statements, e.g., widen-mult with
3194 intermediate type:
3195
3196 S1 a_t = ;
3197 S2 a_T = (TYPE) a_t;
3198 '--> S3: a_it = (interm_type) a_t;
3199 S4 prod_T = a_T * CONST;
3200 '--> S5: prod_T' = a_it w* CONST;
3201
3202 there may be other users of a_T outside the pattern. In that case S2 will
3203 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3204 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3205 be recorded in S3. */
3206
3207 void
3208 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3209 {
3210 struct loop *loop;
3211 basic_block *bbs;
3212 unsigned int nbbs;
3213 gimple_stmt_iterator si;
3214 unsigned int i, j;
3215 vect_recog_func_ptr vect_recog_func;
3216 auto_vec<gimple, 1> stmts_to_replace;
3217 gimple stmt;
3218
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "=== vect_pattern_recog ===\n");
3222
3223 if (loop_vinfo)
3224 {
3225 loop = LOOP_VINFO_LOOP (loop_vinfo);
3226 bbs = LOOP_VINFO_BBS (loop_vinfo);
3227 nbbs = loop->num_nodes;
3228 }
3229 else
3230 {
3231 bbs = &BB_VINFO_BB (bb_vinfo);
3232 nbbs = 1;
3233 }
3234
3235 /* Scan through the loop stmts, applying the pattern recognition
3236 functions starting at each stmt visited: */
3237 for (i = 0; i < nbbs; i++)
3238 {
3239 basic_block bb = bbs[i];
3240 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3241 {
3242 if (bb_vinfo && (stmt = gsi_stmt (si))
3243 && vinfo_for_stmt (stmt)
3244 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3245 continue;
3246
3247 /* Scan over all generic vect_recog_xxx_pattern functions. */
3248 for (j = 0; j < NUM_PATTERNS; j++)
3249 {
3250 vect_recog_func = vect_vect_recog_func_ptrs[j];
3251 vect_pattern_recog_1 (vect_recog_func, si,
3252 &stmts_to_replace);
3253 }
3254 }
3255 }
3256 }