real.h (struct real_format): Split the signbit field into two two fields, signbit_ro...
[gcc.git] / gcc / tree-complex.c
1 /* Lower complex number and vector operations to scalar operations.
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "expr.h"
28 #include "insn-codes.h"
29 #include "diagnostic.h"
30 #include "optabs.h"
31 #include "machmode.h"
32 #include "langhooks.h"
33 #include "tree-flow.h"
34 #include "tree-gimple.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "flags.h"
38 #include "ggc.h"
39
40
41 /* Extract the real or imaginary part of a complex variable or constant.
42 Make sure that it's a proper gimple_val and gimplify it if not.
43 Emit any new code before BSI. */
44
45 static tree
46 extract_component (block_stmt_iterator *bsi, tree t, bool imagpart_p)
47 {
48 tree ret, inner_type;
49
50 inner_type = TREE_TYPE (TREE_TYPE (t));
51 switch (TREE_CODE (t))
52 {
53 case COMPLEX_CST:
54 ret = (imagpart_p ? TREE_IMAGPART (t) : TREE_REALPART (t));
55 break;
56
57 case COMPLEX_EXPR:
58 ret = TREE_OPERAND (t, imagpart_p);
59 break;
60
61 case VAR_DECL:
62 case PARM_DECL:
63 ret = build1 ((imagpart_p ? IMAGPART_EXPR : REALPART_EXPR),
64 inner_type, t);
65 break;
66
67 default:
68 gcc_unreachable ();
69 }
70
71 return gimplify_val (bsi, inner_type, ret);
72 }
73
74 /* Update an assignment to a complex variable in place. */
75
76 static void
77 update_complex_assignment (block_stmt_iterator *bsi, tree r, tree i)
78 {
79 tree stmt = bsi_stmt (*bsi);
80 tree type;
81
82 if (TREE_CODE (stmt) == RETURN_EXPR)
83 stmt = TREE_OPERAND (stmt, 0);
84
85 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
86 TREE_OPERAND (stmt, 1) = build (COMPLEX_EXPR, type, r, i);
87 modify_stmt (stmt);
88 }
89
90 /* Expand complex addition to scalars:
91 a + b = (ar + br) + i(ai + bi)
92 a - b = (ar - br) + i(ai + bi)
93 */
94
95 static void
96 expand_complex_addition (block_stmt_iterator *bsi, tree inner_type,
97 tree ar, tree ai, tree br, tree bi,
98 enum tree_code code)
99 {
100 tree rr, ri;
101
102 rr = gimplify_build2 (bsi, code, inner_type, ar, br);
103 ri = gimplify_build2 (bsi, code, inner_type, ai, bi);
104
105 update_complex_assignment (bsi, rr, ri);
106 }
107
108 /* Expand a complex multiplication or division to a libcall to the c99
109 compliant routines. */
110
111 static void
112 expand_complex_libcall (block_stmt_iterator *bsi, tree ar, tree ai,
113 tree br, tree bi, enum tree_code code)
114 {
115 enum machine_mode mode;
116 enum built_in_function bcode;
117 tree args, fn, stmt, type;
118
119 args = tree_cons (NULL, bi, NULL);
120 args = tree_cons (NULL, br, args);
121 args = tree_cons (NULL, ai, args);
122 args = tree_cons (NULL, ar, args);
123
124 stmt = bsi_stmt (*bsi);
125 type = TREE_TYPE (TREE_OPERAND (stmt, 1));
126
127 mode = TYPE_MODE (type);
128 gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
129 if (code == MULT_EXPR)
130 bcode = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
131 else if (code == RDIV_EXPR)
132 bcode = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
133 else
134 gcc_unreachable ();
135 fn = built_in_decls[bcode];
136
137 TREE_OPERAND (stmt, 1)
138 = build3 (CALL_EXPR, type, build_fold_addr_expr (fn), args, NULL);
139 modify_stmt (stmt);
140 }
141
142 /* Expand complex multiplication to scalars:
143 a * b = (ar*br - ai*bi) + i(ar*bi + br*ai)
144 */
145
146 static void
147 expand_complex_multiplication (block_stmt_iterator *bsi, tree inner_type,
148 tree ar, tree ai, tree br, tree bi)
149 {
150 tree t1, t2, t3, t4, rr, ri;
151
152 if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type))
153 {
154 expand_complex_libcall (bsi, ar, ai, br, bi, MULT_EXPR);
155 return;
156 }
157
158 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
159 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
160 t3 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
161
162 /* Avoid expanding redundant multiplication for the common
163 case of squaring a complex number. */
164 if (ar == br && ai == bi)
165 t4 = t3;
166 else
167 t4 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
168
169 rr = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
170 ri = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t3, t4);
171
172 update_complex_assignment (bsi, rr, ri);
173 }
174
175 /* Expand complex division to scalars, straightforward algorithm.
176 a / b = ((ar*br + ai*bi)/t) + i((ai*br - ar*bi)/t)
177 t = br*br + bi*bi
178 */
179
180 static void
181 expand_complex_div_straight (block_stmt_iterator *bsi, tree inner_type,
182 tree ar, tree ai, tree br, tree bi,
183 enum tree_code code)
184 {
185 tree rr, ri, div, t1, t2, t3;
186
187 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, br);
188 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, bi);
189 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
190
191 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, br);
192 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, bi);
193 t3 = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, t2);
194 rr = gimplify_build2 (bsi, code, inner_type, t3, div);
195
196 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, br);
197 t2 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, bi);
198 t3 = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, t2);
199 ri = gimplify_build2 (bsi, code, inner_type, t3, div);
200
201 update_complex_assignment (bsi, rr, ri);
202 }
203
204 /* Expand complex division to scalars, modified algorithm to minimize
205 overflow with wide input ranges. */
206
207 static void
208 expand_complex_div_wide (block_stmt_iterator *bsi, tree inner_type,
209 tree ar, tree ai, tree br, tree bi,
210 enum tree_code code)
211 {
212 tree rr, ri, ratio, div, t1, t2, tr, ti, cond;
213 basic_block bb_cond, bb_true, bb_false, bb_join;
214
215 /* Examine |br| < |bi|, and branch. */
216 t1 = gimplify_build1 (bsi, ABS_EXPR, inner_type, br);
217 t2 = gimplify_build1 (bsi, ABS_EXPR, inner_type, bi);
218 cond = fold (build (LT_EXPR, boolean_type_node, t1, t2));
219 STRIP_NOPS (cond);
220
221 bb_cond = bb_true = bb_false = bb_join = NULL;
222 rr = ri = tr = ti = NULL;
223 if (!TREE_CONSTANT (cond))
224 {
225 edge e;
226
227 cond = build (COND_EXPR, void_type_node, cond, NULL, NULL);
228 bsi_insert_before (bsi, cond, BSI_SAME_STMT);
229
230 /* Split the original block, and create the TRUE and FALSE blocks. */
231 e = split_block (bsi->bb, cond);
232 bb_cond = e->src;
233 bb_join = e->dest;
234 bb_true = create_empty_bb (bb_cond);
235 bb_false = create_empty_bb (bb_true);
236
237 t1 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_true));
238 t2 = build (GOTO_EXPR, void_type_node, tree_block_label (bb_false));
239 COND_EXPR_THEN (cond) = t1;
240 COND_EXPR_ELSE (cond) = t2;
241
242 /* Wire the blocks together. */
243 e->flags = EDGE_TRUE_VALUE;
244 redirect_edge_succ (e, bb_true);
245 make_edge (bb_cond, bb_false, EDGE_FALSE_VALUE);
246 make_edge (bb_true, bb_join, EDGE_FALLTHRU);
247 make_edge (bb_false, bb_join, EDGE_FALLTHRU);
248
249 /* Update dominance info. Note that bb_join's data was
250 updated by split_block. */
251 if (dom_info_available_p (CDI_DOMINATORS))
252 {
253 set_immediate_dominator (CDI_DOMINATORS, bb_true, bb_cond);
254 set_immediate_dominator (CDI_DOMINATORS, bb_false, bb_cond);
255 }
256
257 rr = make_rename_temp (inner_type, NULL);
258 ri = make_rename_temp (inner_type, NULL);
259 }
260
261 /* In the TRUE branch, we compute
262 ratio = br/bi;
263 div = (br * ratio) + bi;
264 tr = (ar * ratio) + ai;
265 ti = (ai * ratio) - ar;
266 tr = tr / div;
267 ti = ti / div; */
268 if (bb_true || integer_nonzerop (cond))
269 {
270 if (bb_true)
271 {
272 *bsi = bsi_last (bb_true);
273 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
274 }
275
276 ratio = gimplify_build2 (bsi, code, inner_type, br, bi);
277
278 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, br, ratio);
279 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, bi);
280
281 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
282 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ai);
283
284 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
285 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, t1, ar);
286
287 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
288 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
289
290 if (bb_true)
291 {
292 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
293 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
294 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
295 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
296 bsi_remove (bsi);
297 }
298 }
299
300 /* In the FALSE branch, we compute
301 ratio = d/c;
302 divisor = (d * ratio) + c;
303 tr = (b * ratio) + a;
304 ti = b - (a * ratio);
305 tr = tr / div;
306 ti = ti / div; */
307 if (bb_false || integer_zerop (cond))
308 {
309 if (bb_false)
310 {
311 *bsi = bsi_last (bb_false);
312 bsi_insert_after (bsi, build_empty_stmt (), BSI_NEW_STMT);
313 }
314
315 ratio = gimplify_build2 (bsi, code, inner_type, bi, br);
316
317 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, bi, ratio);
318 div = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, br);
319
320 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ai, ratio);
321 tr = gimplify_build2 (bsi, PLUS_EXPR, inner_type, t1, ar);
322
323 t1 = gimplify_build2 (bsi, MULT_EXPR, inner_type, ar, ratio);
324 ti = gimplify_build2 (bsi, MINUS_EXPR, inner_type, ai, t1);
325
326 tr = gimplify_build2 (bsi, code, inner_type, tr, div);
327 ti = gimplify_build2 (bsi, code, inner_type, ti, div);
328
329 if (bb_false)
330 {
331 t1 = build (MODIFY_EXPR, inner_type, rr, tr);
332 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
333 t1 = build (MODIFY_EXPR, inner_type, ri, ti);
334 bsi_insert_before (bsi, t1, BSI_SAME_STMT);
335 bsi_remove (bsi);
336 }
337 }
338
339 if (bb_join)
340 *bsi = bsi_start (bb_join);
341 else
342 rr = tr, ri = ti;
343
344 update_complex_assignment (bsi, rr, ri);
345 }
346
347 /* Expand complex division to scalars. */
348
349 static void
350 expand_complex_division (block_stmt_iterator *bsi, tree inner_type,
351 tree ar, tree ai, tree br, tree bi,
352 enum tree_code code)
353 {
354 switch (flag_complex_method)
355 {
356 case 0:
357 /* straightforward implementation of complex divide acceptable. */
358 expand_complex_div_straight (bsi, inner_type, ar, ai, br, bi, code);
359 break;
360
361 case 2:
362 if (SCALAR_FLOAT_TYPE_P (inner_type))
363 {
364 expand_complex_libcall (bsi, ar, ai, br, bi, code);
365 return;
366 }
367 /* FALLTHRU */
368
369 case 1:
370 /* wide ranges of inputs must work for complex divide. */
371 expand_complex_div_wide (bsi, inner_type, ar, ai, br, bi, code);
372 break;
373
374 default:
375 gcc_unreachable ();
376 }
377 }
378
379 /* Expand complex negation to scalars:
380 -a = (-ar) + i(-ai)
381 */
382
383 static void
384 expand_complex_negation (block_stmt_iterator *bsi, tree inner_type,
385 tree ar, tree ai)
386 {
387 tree rr, ri;
388
389 rr = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ar);
390 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
391
392 update_complex_assignment (bsi, rr, ri);
393 }
394
395 /* Expand complex conjugate to scalars:
396 ~a = (ar) + i(-ai)
397 */
398
399 static void
400 expand_complex_conjugate (block_stmt_iterator *bsi, tree inner_type,
401 tree ar, tree ai)
402 {
403 tree ri;
404
405 ri = gimplify_build1 (bsi, NEGATE_EXPR, inner_type, ai);
406
407 update_complex_assignment (bsi, ar, ri);
408 }
409
410 /* Expand complex comparison (EQ or NE only). */
411
412 static void
413 expand_complex_comparison (block_stmt_iterator *bsi, tree ar, tree ai,
414 tree br, tree bi, enum tree_code code)
415 {
416 tree cr, ci, cc, stmt, expr, type;
417
418 cr = gimplify_build2 (bsi, code, boolean_type_node, ar, br);
419 ci = gimplify_build2 (bsi, code, boolean_type_node, ai, bi);
420 cc = gimplify_build2 (bsi,
421 (code == EQ_EXPR ? TRUTH_AND_EXPR : TRUTH_OR_EXPR),
422 boolean_type_node, cr, ci);
423
424 stmt = expr = bsi_stmt (*bsi);
425
426 switch (TREE_CODE (stmt))
427 {
428 case RETURN_EXPR:
429 expr = TREE_OPERAND (stmt, 0);
430 /* FALLTHRU */
431 case MODIFY_EXPR:
432 type = TREE_TYPE (TREE_OPERAND (expr, 1));
433 TREE_OPERAND (expr, 1) = fold_convert (type, cc);
434 break;
435 case COND_EXPR:
436 TREE_OPERAND (stmt, 0) = cc;
437 break;
438 default:
439 gcc_unreachable ();
440 }
441
442 modify_stmt (stmt);
443 }
444
445 /* Process one statement. If we identify a complex operation, expand it. */
446
447 static void
448 expand_complex_operations_1 (block_stmt_iterator *bsi)
449 {
450 tree stmt = bsi_stmt (*bsi);
451 tree rhs, type, inner_type;
452 tree ac, ar, ai, bc, br, bi;
453 enum tree_code code;
454
455 switch (TREE_CODE (stmt))
456 {
457 case RETURN_EXPR:
458 stmt = TREE_OPERAND (stmt, 0);
459 if (!stmt)
460 return;
461 if (TREE_CODE (stmt) != MODIFY_EXPR)
462 return;
463 /* FALLTHRU */
464
465 case MODIFY_EXPR:
466 rhs = TREE_OPERAND (stmt, 1);
467 break;
468
469 case COND_EXPR:
470 rhs = TREE_OPERAND (stmt, 0);
471 break;
472
473 default:
474 return;
475 }
476
477 type = TREE_TYPE (rhs);
478 code = TREE_CODE (rhs);
479
480 /* Initial filter for operations we handle. */
481 switch (code)
482 {
483 case PLUS_EXPR:
484 case MINUS_EXPR:
485 case MULT_EXPR:
486 case TRUNC_DIV_EXPR:
487 case CEIL_DIV_EXPR:
488 case FLOOR_DIV_EXPR:
489 case ROUND_DIV_EXPR:
490 case RDIV_EXPR:
491 case NEGATE_EXPR:
492 case CONJ_EXPR:
493 if (TREE_CODE (type) != COMPLEX_TYPE)
494 return;
495 inner_type = TREE_TYPE (type);
496 break;
497
498 case EQ_EXPR:
499 case NE_EXPR:
500 inner_type = TREE_TYPE (TREE_OPERAND (rhs, 1));
501 if (TREE_CODE (inner_type) != COMPLEX_TYPE)
502 return;
503 break;
504
505 default:
506 return;
507 }
508
509 /* Extract the components of the two complex values. Make sure and
510 handle the common case of the same value used twice specially. */
511 ac = TREE_OPERAND (rhs, 0);
512 ar = extract_component (bsi, ac, 0);
513 ai = extract_component (bsi, ac, 1);
514
515 if (TREE_CODE_CLASS (code) == tcc_unary)
516 bc = br = bi = NULL;
517 else
518 {
519 bc = TREE_OPERAND (rhs, 1);
520 if (ac == bc)
521 br = ar, bi = ai;
522 else
523 {
524 br = extract_component (bsi, bc, 0);
525 bi = extract_component (bsi, bc, 1);
526 }
527 }
528
529 switch (code)
530 {
531 case PLUS_EXPR:
532 case MINUS_EXPR:
533 expand_complex_addition (bsi, inner_type, ar, ai, br, bi, code);
534 break;
535
536 case MULT_EXPR:
537 expand_complex_multiplication (bsi, inner_type, ar, ai, br, bi);
538 break;
539
540 case TRUNC_DIV_EXPR:
541 case CEIL_DIV_EXPR:
542 case FLOOR_DIV_EXPR:
543 case ROUND_DIV_EXPR:
544 case RDIV_EXPR:
545 expand_complex_division (bsi, inner_type, ar, ai, br, bi, code);
546 break;
547
548 case NEGATE_EXPR:
549 expand_complex_negation (bsi, inner_type, ar, ai);
550 break;
551
552 case CONJ_EXPR:
553 expand_complex_conjugate (bsi, inner_type, ar, ai);
554 break;
555
556 case EQ_EXPR:
557 case NE_EXPR:
558 expand_complex_comparison (bsi, ar, ai, br, bi, code);
559 break;
560
561 default:
562 gcc_unreachable ();
563 }
564 }
565 \f
566 /* Build a constant of type TYPE, made of VALUE's bits replicated
567 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
568 static tree
569 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
570 {
571 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
572 int n = HOST_BITS_PER_WIDE_INT / width;
573 unsigned HOST_WIDE_INT low, high, mask;
574 tree ret;
575
576 gcc_assert (n);
577
578 if (width == HOST_BITS_PER_WIDE_INT)
579 low = value;
580 else
581 {
582 mask = ((HOST_WIDE_INT)1 << width) - 1;
583 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
584 }
585
586 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
587 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
588 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
589 high = 0;
590 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
591 high = low;
592 else
593 gcc_unreachable ();
594
595 ret = build_int_cst_wide (type, low, high);
596 return ret;
597 }
598
599 static GTY(()) tree vector_inner_type;
600 static GTY(()) tree vector_last_type;
601 static GTY(()) int vector_last_nunits;
602
603 /* Return a suitable vector types made of SUBPARTS units each of mode
604 "word_mode" (the global variable). */
605 static tree
606 build_word_mode_vector_type (int nunits)
607 {
608 if (!vector_inner_type)
609 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
610 else if (vector_last_nunits == nunits)
611 {
612 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
613 return vector_last_type;
614 }
615
616 /* We build a new type, but we canonicalize it nevertheless,
617 because it still saves some memory. */
618 vector_last_nunits = nunits;
619 vector_last_type = type_hash_canon (nunits,
620 build_vector_type (vector_inner_type,
621 nunits));
622 return vector_last_type;
623 }
624
625 typedef tree (*elem_op_func) (block_stmt_iterator *,
626 tree, tree, tree, tree, tree, enum tree_code);
627
628 static inline tree
629 tree_vec_extract (block_stmt_iterator *bsi, tree type,
630 tree t, tree bitsize, tree bitpos)
631 {
632 if (bitpos)
633 return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
634 else
635 return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
636 }
637
638 static tree
639 do_unop (block_stmt_iterator *bsi, tree inner_type, tree a,
640 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
641 enum tree_code code)
642 {
643 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
644 return gimplify_build1 (bsi, code, inner_type, a);
645 }
646
647 static tree
648 do_binop (block_stmt_iterator *bsi, tree inner_type, tree a, tree b,
649 tree bitpos, tree bitsize, enum tree_code code)
650 {
651 a = tree_vec_extract (bsi, inner_type, a, bitsize, bitpos);
652 b = tree_vec_extract (bsi, inner_type, b, bitsize, bitpos);
653 return gimplify_build2 (bsi, code, inner_type, a, b);
654 }
655
656 /* Expand vector addition to scalars. This does bit twiddling
657 in order to increase parallelism:
658
659 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
660 (a ^ b) & 0x80808080
661
662 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
663 (a ^ ~b) & 0x80808080
664
665 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
666
667 This optimization should be done only if 4 vector items or more
668 fit into a word. */
669 static tree
670 do_plus_minus (block_stmt_iterator *bsi, tree word_type, tree a, tree b,
671 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
672 enum tree_code code)
673 {
674 tree inner_type = TREE_TYPE (TREE_TYPE (a));
675 unsigned HOST_WIDE_INT max;
676 tree low_bits, high_bits, a_low, b_low, result_low, signs;
677
678 max = GET_MODE_MASK (TYPE_MODE (inner_type));
679 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
680 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
681
682 a = tree_vec_extract (bsi, word_type, a, bitsize, bitpos);
683 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
684
685 signs = gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, a, b);
686 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
687 if (code == PLUS_EXPR)
688 a_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, a, low_bits);
689 else
690 {
691 a_low = gimplify_build2 (bsi, BIT_IOR_EXPR, word_type, a, high_bits);
692 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, signs);
693 }
694
695 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
696 result_low = gimplify_build2 (bsi, code, word_type, a_low, b_low);
697 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
698 }
699
700 static tree
701 do_negate (block_stmt_iterator *bsi, tree word_type, tree b,
702 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
703 tree bitsize ATTRIBUTE_UNUSED,
704 enum tree_code code ATTRIBUTE_UNUSED)
705 {
706 tree inner_type = TREE_TYPE (TREE_TYPE (b));
707 HOST_WIDE_INT max;
708 tree low_bits, high_bits, b_low, result_low, signs;
709
710 max = GET_MODE_MASK (TYPE_MODE (inner_type));
711 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
712 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
713
714 b = tree_vec_extract (bsi, word_type, b, bitsize, bitpos);
715
716 b_low = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, b, low_bits);
717 signs = gimplify_build1 (bsi, BIT_NOT_EXPR, word_type, b);
718 signs = gimplify_build2 (bsi, BIT_AND_EXPR, word_type, signs, high_bits);
719 result_low = gimplify_build2 (bsi, MINUS_EXPR, word_type, high_bits, b_low);
720 return gimplify_build2 (bsi, BIT_XOR_EXPR, word_type, result_low, signs);
721 }
722
723 /* Expand a vector operation to scalars, by using many operations
724 whose type is the vector type's inner type. */
725 static tree
726 expand_vector_piecewise (block_stmt_iterator *bsi, elem_op_func f,
727 tree type, tree inner_type,
728 tree a, tree b, enum tree_code code)
729 {
730 tree head, *chain = &head;
731 tree part_width = TYPE_SIZE (inner_type);
732 tree index = bitsize_int (0);
733 int nunits = TYPE_VECTOR_SUBPARTS (type);
734 int delta = tree_low_cst (part_width, 1)
735 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
736 int i;
737
738 for (i = 0; i < nunits;
739 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
740 {
741 tree result = f (bsi, inner_type, a, b, index, part_width, code);
742 *chain = tree_cons (NULL_TREE, result, NULL_TREE);
743 chain = &TREE_CHAIN (*chain);
744 }
745
746 return build1 (CONSTRUCTOR, type, head);
747 }
748
749 /* Expand a vector operation to scalars with the freedom to use
750 a scalar integer type, or to use a different size for the items
751 in the vector type. */
752 static tree
753 expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
754 tree a, tree b,
755 enum tree_code code)
756 {
757 tree result, compute_type;
758 enum machine_mode mode;
759 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
760
761 /* We have three strategies. If the type is already correct, just do
762 the operation an element at a time. Else, if the vector is wider than
763 one word, do it a word at a time; finally, if the vector is smaller
764 than one word, do it as a scalar. */
765 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
766 return expand_vector_piecewise (bsi, f,
767 type, TREE_TYPE (type),
768 a, b, code);
769 else if (n_words > 1)
770 {
771 tree word_type = build_word_mode_vector_type (n_words);
772 result = expand_vector_piecewise (bsi, f,
773 word_type, TREE_TYPE (word_type),
774 a, b, code);
775 result = gimplify_val (bsi, word_type, result);
776 }
777 else
778 {
779 /* Use a single scalar operation with a mode no wider than word_mode. */
780 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
781 compute_type = lang_hooks.types.type_for_mode (mode, 1);
782 result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
783 }
784
785 return build1 (VIEW_CONVERT_EXPR, type, result);
786 }
787
788 /* Expand a vector operation to scalars; for integer types we can use
789 special bit twiddling tricks to do the sums a word at a time, using
790 function F_PARALLEL instead of F. These tricks are done only if
791 they can process at least four items, that is, only if the vector
792 holds at least four items and if a word can hold four items. */
793 static tree
794 expand_vector_addition (block_stmt_iterator *bsi,
795 elem_op_func f, elem_op_func f_parallel,
796 tree type, tree a, tree b, enum tree_code code)
797 {
798 int parts_per_word = UNITS_PER_WORD
799 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
800
801 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
802 && parts_per_word >= 4
803 && TYPE_VECTOR_SUBPARTS (type) >= 4)
804 return expand_vector_parallel (bsi, f_parallel,
805 type, a, b, code);
806 else
807 return expand_vector_piecewise (bsi, f,
808 type, TREE_TYPE (type),
809 a, b, code);
810 }
811
812 /* Return a type for the widest vector mode whose components are of mode
813 INNER_MODE, or NULL_TREE if none is found. */
814 static tree
815 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op)
816 {
817 enum machine_mode best_mode = VOIDmode, mode;
818 int best_nunits = 0;
819
820 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
821 mode = MIN_MODE_VECTOR_FLOAT;
822 else
823 mode = MIN_MODE_VECTOR_INT;
824
825 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
826 if (GET_MODE_INNER (mode) == inner_mode
827 && GET_MODE_NUNITS (mode) > best_nunits
828 && op->handlers[mode].insn_code != CODE_FOR_nothing)
829 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
830
831 if (best_mode == VOIDmode)
832 return NULL_TREE;
833 else
834 return lang_hooks.types.type_for_mode (best_mode, 1);
835 }
836
837 /* Process one statement. If we identify a vector operation, expand it. */
838
839 static void
840 expand_vector_operations_1 (block_stmt_iterator *bsi)
841 {
842 tree stmt = bsi_stmt (*bsi);
843 tree *p_rhs, rhs, type, compute_type;
844 enum tree_code code;
845 enum machine_mode compute_mode;
846 optab op;
847
848 switch (TREE_CODE (stmt))
849 {
850 case RETURN_EXPR:
851 stmt = TREE_OPERAND (stmt, 0);
852 if (!stmt || TREE_CODE (stmt) != MODIFY_EXPR)
853 return;
854
855 /* FALLTHRU */
856
857 case MODIFY_EXPR:
858 p_rhs = &TREE_OPERAND (stmt, 1);
859 rhs = *p_rhs;
860 break;
861
862 default:
863 return;
864 }
865
866 type = TREE_TYPE (rhs);
867 if (TREE_CODE (type) != VECTOR_TYPE)
868 return;
869
870 code = TREE_CODE (rhs);
871 if (TREE_CODE_CLASS (code) != tcc_unary
872 && TREE_CODE_CLASS (code) != tcc_binary)
873 return;
874
875 if (code == NOP_EXPR || code == VIEW_CONVERT_EXPR)
876 return;
877
878 gcc_assert (code != CONVERT_EXPR);
879 op = optab_for_tree_code (code, type);
880
881 /* Optabs will try converting a negation into a subtraction, so
882 look for it as well. TODO: negation of floating-point vectors
883 might be turned into an exclusive OR toggling the sign bit. */
884 if (op == NULL
885 && code == NEGATE_EXPR
886 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
887 op = optab_for_tree_code (MINUS_EXPR, type);
888
889 /* For very wide vectors, try using a smaller vector mode. */
890 compute_type = type;
891 if (TYPE_MODE (type) == BLKmode && op)
892 {
893 tree vector_compute_type
894 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op);
895 if (vector_compute_type != NULL_TREE)
896 compute_type = vector_compute_type;
897 }
898
899 compute_mode = TYPE_MODE (compute_type);
900
901 /* If we are breaking a BLKmode vector into smaller pieces,
902 type_for_widest_vector_mode has already looked into the optab,
903 so skip these checks. */
904 if (compute_type == type)
905 {
906 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
907 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
908 && op != NULL
909 && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
910 return;
911 else
912 {
913 /* There is no operation in hardware, so fall back to scalars. */
914 compute_type = TREE_TYPE (type);
915 compute_mode = TYPE_MODE (compute_type);
916 }
917 }
918
919 /* If the compute mode is not a vector mode (hence we are decomposing
920 a BLKmode vector to smaller, hardware-supported vectors), we may
921 want to expand the operations in parallel. */
922 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
923 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
924 switch (code)
925 {
926 case PLUS_EXPR:
927 case MINUS_EXPR:
928 if (TYPE_TRAP_SIGNED (type))
929 break;
930
931 *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
932 TREE_OPERAND (rhs, 0),
933 TREE_OPERAND (rhs, 1), code);
934 modify_stmt (bsi_stmt (*bsi));
935 return;
936
937 case NEGATE_EXPR:
938 if (TYPE_TRAP_SIGNED (type))
939 break;
940
941 *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
942 TREE_OPERAND (rhs, 0),
943 NULL_TREE, code);
944 modify_stmt (bsi_stmt (*bsi));
945 return;
946
947 case BIT_AND_EXPR:
948 case BIT_IOR_EXPR:
949 case BIT_XOR_EXPR:
950 *p_rhs = expand_vector_parallel (bsi, do_binop, type,
951 TREE_OPERAND (rhs, 0),
952 TREE_OPERAND (rhs, 1), code);
953 modify_stmt (bsi_stmt (*bsi));
954 return;
955
956 case BIT_NOT_EXPR:
957 *p_rhs = expand_vector_parallel (bsi, do_unop, type,
958 TREE_OPERAND (rhs, 0),
959 NULL_TREE, code);
960 modify_stmt (bsi_stmt (*bsi));
961 return;
962
963 default:
964 break;
965 }
966
967 if (TREE_CODE_CLASS (code) == tcc_unary)
968 *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
969 TREE_OPERAND (rhs, 0),
970 NULL_TREE, code);
971 else
972 *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
973 TREE_OPERAND (rhs, 0),
974 TREE_OPERAND (rhs, 1), code);
975
976 modify_stmt (bsi_stmt (*bsi));
977 }
978 \f
979 static void
980 expand_vector_operations (void)
981 {
982 block_stmt_iterator bsi;
983 basic_block bb;
984
985 FOR_EACH_BB (bb)
986 {
987 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
988 expand_vector_operations_1 (&bsi);
989 }
990 }
991
992 static void
993 tree_lower_operations (void)
994 {
995 int old_last_basic_block = last_basic_block;
996 block_stmt_iterator bsi;
997 basic_block bb;
998
999 FOR_EACH_BB (bb)
1000 {
1001 if (bb->index >= old_last_basic_block)
1002 continue;
1003 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004 {
1005 expand_complex_operations_1 (&bsi);
1006 expand_vector_operations_1 (&bsi);
1007 }
1008 }
1009 }
1010
1011
1012 struct tree_opt_pass pass_lower_vector_ssa =
1013 {
1014 "vector", /* name */
1015 NULL, /* gate */
1016 expand_vector_operations, /* execute */
1017 NULL, /* sub */
1018 NULL, /* next */
1019 0, /* static_pass_number */
1020 0, /* tv_id */
1021 PROP_cfg, /* properties_required */
1022 0, /* properties_provided */
1023 0, /* properties_destroyed */
1024 0, /* todo_flags_start */
1025 TODO_dump_func | TODO_rename_vars /* todo_flags_finish */
1026 | TODO_ggc_collect | TODO_verify_ssa
1027 | TODO_verify_stmts | TODO_verify_flow,
1028 0 /* letter */
1029 };
1030
1031 struct tree_opt_pass pass_pre_expand =
1032 {
1033 "oplower", /* name */
1034 0, /* gate */
1035 tree_lower_operations, /* execute */
1036 NULL, /* sub */
1037 NULL, /* next */
1038 0, /* static_pass_number */
1039 0, /* tv_id */
1040 PROP_cfg, /* properties_required */
1041 0, /* properties_provided */
1042 0, /* properties_destroyed */
1043 0, /* todo_flags_start */
1044 TODO_dump_func | TODO_ggc_collect
1045 | TODO_verify_stmts, /* todo_flags_finish */
1046 0 /* letter */
1047 };
1048
1049 #include "gt-tree-complex.h"