From 1694907238eb106bf7ac0e4eaedaa77bc7719b6d Mon Sep 17 00:00:00 2001 From: Richard Guenther Date: Thu, 4 Nov 2010 10:56:22 +0000 Subject: [PATCH] tree.def (FMA_EXPR): New tree code. 2010-11-04 Richard Guenther Richard Henderson * tree.def (FMA_EXPR): New tree code. * expr.c (expand_expr_real_2): Add FMA_EXPR expansion code. * gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS. * tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types. * tree-inline.c (estimate_operator_cost): Handle FMA_EXPR. * gimple-pretty-print.c (dump_ternary_rhs): Likewise. * tree-ssa-math-opts.c (convert_mult_to_fma): New function. (execute_optimize_widening_mul): Call it. Reorganize to allow dead stmt removal. Move TODO flags ... (pass_optimize_widening_mul): ... here. * flag-types.h (enum fp_contract_mode): New enum. * common.opt (flag_fp_contract_mode): New variable. (-ffp-contract): New option. * opts.c (common_handle_option): Handle it. * doc/invoke.texi (-ffp-contract): Document. * tree.h (fold_fma): Declare. * builtins.c (fold_fma): New function. (fold_builtin_fma): Likewise. (fold_builtin_3): Call it for fma. * fold-const.c (fold_ternary_loc): Fold FMA_EXPR. * optabs.c (optab_for_tree_code): Handle FMA_EXPR. * config/i386/sse.md (fms4, fnma, fnms4): New expanders. * doc/md.texi (fms4, fnma, fnms4): Document new named patterns. * genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab. * optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms. (fms_optab, fnma_optab, fnms_optab): New defines. * gimplify.c (gimplify_expr): Handle binary truth expressions explicitly. Handle FMA_EXPR. * tree-vect-stmts.c (vectorizable_operation): Handle ternary operations. * gcc.target/i386/fma4-vector-2.c: New testcase. Co-Authored-By: Richard Henderson From-SVN: r166304 --- gcc/ChangeLog | 36 ++++ gcc/builtins.c | 39 ++++- gcc/common.opt | 8 + gcc/config/i386/sse.md | 29 +++- gcc/doc/invoke.texi | 14 +- gcc/doc/md.texi | 30 ++++ gcc/expr.c | 45 ++++- gcc/flag-types.h | 7 + gcc/fold-const.c | 21 ++- gcc/genopinit.c | 3 + gcc/gimple-pretty-print.c | 8 + gcc/gimple.c | 3 +- gcc/gimplify.c | 30 +++- gcc/optabs.c | 3 + gcc/optabs.h | 6 + gcc/opts.c | 12 ++ gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.target/i386/fma4-vector-2.c | 21 +++ gcc/tree-cfg.c | 14 ++ gcc/tree-inline.c | 1 + gcc/tree-ssa-math-opts.c | 158 ++++++++++++++++-- gcc/tree-vect-stmts.c | 66 ++++++-- gcc/tree.def | 6 + gcc/tree.h | 1 + 24 files changed, 524 insertions(+), 42 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/fma4-vector-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7c903436186..7a3cdb0535c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,39 @@ +2010-11-04 Richard Guenther + Richard Henderson + + * tree.def (FMA_EXPR): New tree code. + * expr.c (expand_expr_real_2): Add FMA_EXPR expansion code. + * gimple.c (gimple_rhs_class_table): FMA_EXPR is a GIMPLE_TERNARY_RHS. + * tree-cfg.c (verify_gimple_assign_ternary): Verify FMA_EXPR types. + * tree-inline.c (estimate_operator_cost): Handle FMA_EXPR. + * gimple-pretty-print.c (dump_ternary_rhs): Likewise. + * tree-ssa-math-opts.c (convert_mult_to_fma): New function. + (execute_optimize_widening_mul): Call it. Reorganize to allow + dead stmt removal. Move TODO flags ... + (pass_optimize_widening_mul): ... here. + * flag-types.h (enum fp_contract_mode): New enum. + * common.opt (flag_fp_contract_mode): New variable. + (-ffp-contract): New option. + * opts.c (common_handle_option): Handle it. + * doc/invoke.texi (-ffp-contract): Document. + * tree.h (fold_fma): Declare. + * builtins.c (fold_fma): New function. + (fold_builtin_fma): Likewise. + (fold_builtin_3): Call it for fma. + * fold-const.c (fold_ternary_loc): Fold FMA_EXPR. + * optabs.c (optab_for_tree_code): Handle FMA_EXPR. + * config/i386/sse.md (fms4, fnma, fnms4): + New expanders. + * doc/md.texi (fms4, fnma, fnms4): Document new + named patterns. + * genopinit.c (optabs): Initialize fms_optab, fnma_optab and fnms_optab. + * optabs.h (enum optab_index): Add OTI_fms, OTI_fnma and OTI_fnms. + (fms_optab, fnma_optab, fnms_optab): New defines. + * gimplify.c (gimplify_expr): Handle binary truth expressions + explicitly. Handle FMA_EXPR. + * tree-vect-stmts.c (vectorizable_operation): Handle ternary + operations. + 2010-11-04 Artjoms Sinkarovs Richard Guenther diff --git a/gcc/builtins.c b/gcc/builtins.c index 31a869bf1f6..e193791ccc7 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -9266,6 +9266,40 @@ fold_builtin_abs (location_t loc, tree arg, tree type) return fold_build1_loc (loc, ABS_EXPR, type, arg); } +/* Fold a fma operation with arguments ARG[012]. */ + +tree +fold_fma (location_t loc ATTRIBUTE_UNUSED, + tree type, tree arg0, tree arg1, tree arg2) +{ + if (TREE_CODE (arg0) == REAL_CST + && TREE_CODE (arg1) == REAL_CST + && TREE_CODE (arg2) == REAL_CST) + return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma); + + return NULL_TREE; +} + +/* Fold a call to fma, fmaf, or fmal with arguments ARG[012]. */ + +static tree +fold_builtin_fma (location_t loc, tree arg0, tree arg1, tree arg2, tree type) +{ + if (validate_arg (arg0, REAL_TYPE) + && validate_arg(arg1, REAL_TYPE) + && validate_arg(arg2, REAL_TYPE)) + { + tree tem = fold_fma (loc, type, arg0, arg1, arg2); + if (tem) + return tem; + + /* ??? Only expand to FMA_EXPR if it's directly supported. */ + if (optab_handler (fma_optab, TYPE_MODE (type)) != CODE_FOR_nothing) + return fold_build3_loc (loc, FMA_EXPR, type, arg0, arg1, arg2); + } + return NULL_TREE; +} + /* Fold a call to builtin fmin or fmax. */ static tree @@ -10540,10 +10574,7 @@ fold_builtin_3 (location_t loc, tree fndecl, return fold_builtin_sincos (loc, arg0, arg1, arg2); CASE_FLT_FN (BUILT_IN_FMA): - if (validate_arg (arg0, REAL_TYPE) - && validate_arg(arg1, REAL_TYPE) - && validate_arg(arg2, REAL_TYPE)) - return do_mpfr_arg3 (arg0, arg1, arg2, type, mpfr_fma); + return fold_builtin_fma (loc, arg0, arg1, arg2, type); break; CASE_FLT_FN (BUILT_IN_REMQUO): diff --git a/gcc/common.opt b/gcc/common.opt index cd8b0adfff1..551c3358f75 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -58,6 +58,10 @@ bool flag_warn_unused_result = false Variable int *param_values +; Floating-point contraction mode, fast by default. +Variable +enum fp_contract_mode flag_fp_contract_mode = FP_CONTRACT_FAST + ### Driver @@ -857,6 +861,10 @@ fforward-propagate Common Report Var(flag_forward_propagate) Optimization Perform a forward propagation pass on RTL +ffp-contract= +Common Joined RejectNegative +-ffp-contract=[off|on|fast] Perform floating-point expression contraction. + ; Nonzero means don't put addresses of constant functions in registers. ; Used for compiling the Unix kernel, where strange substitutions are ; done on the assembly output. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c359aed0791..717f7fe7c5b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1859,7 +1859,7 @@ ;; Intrinsic FMA operations. -;; The standard name for fma is only available with SSE math enabled. +;; The standard names for fma is only available with SSE math enabled. (define_expand "fma4" [(set (match_operand:FMAMODE 0 "register_operand") (fma:FMAMODE @@ -1869,6 +1869,33 @@ "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" "") +(define_expand "fms4" + [(set (match_operand:FMAMODE 0 "register_operand") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand") + (match_operand:FMAMODE 2 "nonimmediate_operand") + (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" + "") + +(define_expand "fnma4" + [(set (match_operand:FMAMODE 0 "register_operand") + (fma:FMAMODE + (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand")) + (match_operand:FMAMODE 2 "nonimmediate_operand") + (match_operand:FMAMODE 3 "nonimmediate_operand")))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" + "") + +(define_expand "fnms4" + [(set (match_operand:FMAMODE 0 "register_operand") + (fma:FMAMODE + (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand")) + (match_operand:FMAMODE 2 "nonimmediate_operand") + (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] + "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH" + "") + ;; The builtin for fma4intrin.h is not constrained by SSE math enabled. (define_expand "fma4i_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3aefa09c89f..fda884b76eb 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -343,7 +343,7 @@ Objective-C and Objective-C++ Dialects}. -fdelayed-branch -fdelete-null-pointer-checks -fdse -fdse @gol -fearly-inlining -fipa-sra -fexpensive-optimizations -ffast-math @gol -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol --fforward-propagate -ffunction-sections @gol +-fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol -fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol @@ -5992,6 +5992,18 @@ loop unrolling. This option is enabled by default at optimization levels @option{-O}, @option{-O2}, @option{-O3}, @option{-Os}. +@item -ffp-contract=@var{style} +@opindex ffp-contract +@option{-ffp-contract=off} disables floating-point expression contraction. +@option{-ffp-contract=fast} enables floating-point expression contraction +such as forming of fused multiply-add operations if the target has +native support for them. +@option{-ffp-contract=on} enables floating-point expression contraction +if allowed by the language standard. This is currently not implemented +and treated equal to @option{-ffp-contract=off}. + +The default is @option{-ffp-contract=fast}. + @item -fomit-frame-pointer @opindex fomit-frame-pointer Don't keep the frame pointer in a register for functions that diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 6de4f3658a6..8418564d91c 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3958,6 +3958,36 @@ pattern is used to implement the @code{fma}, @code{fmaf}, and multiply followed by the add if the machine does not perform a rounding step between the operations. +@cindex @code{fms@var{m}4} instruction pattern +@item @samp{fms@var{m}4} +Like @code{fma@var{m}4}, except operand 3 subtracted from the +product instead of added to the product. This is represented +in the rtl as + +@smallexample +(fma:@var{m} @var{op1} @var{op2} (neg:@var{m} @var{op3})) +@end smallexample + +@cindex @code{fnma@var{m}4} instruction pattern +@item @samp{fnma@var{m}4} +Like @code{fma@var{m}4} except that the intermediate product +is negated before being added to operand 3. This is represented +in the rtl as + +@smallexample +(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} @var{op3}) +@end smallexample + +@cindex @code{fnms@var{m}4} instruction pattern +@item @samp{fnms@var{m}4} +Like @code{fms@var{m}4} except that the intermediate product +is negated before subtracting operand 3. This is represented +in the rtl as + +@smallexample +(fma:@var{m} (neg:@var{m} @var{op1}) @var{op2} (neg:@var{m} @var{op3})) +@end smallexample + @cindex @code{min@var{m}3} instruction pattern @cindex @code{max@var{m}3} instruction pattern @item @samp{smin@var{m}3}, @samp{smax@var{m}3} diff --git a/gcc/expr.c b/gcc/expr.c index 56f6edaeda2..f29f6dc1244 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -7254,7 +7254,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, int ignore; bool reduce_bit_field; location_t loc = ops->location; - tree treeop0, treeop1; + tree treeop0, treeop1, treeop2; #define REDUCE_BIT_FIELD(expr) (reduce_bit_field \ ? reduce_to_bit_field_precision ((expr), \ target, \ @@ -7267,6 +7267,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, treeop0 = ops->op0; treeop1 = ops->op1; + treeop2 = ops->op2; /* We should be called only on simple (binary or unary) expressions, exactly those that are valid in gimple expressions that aren't @@ -7624,7 +7625,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, case WIDEN_MULT_PLUS_EXPR: case WIDEN_MULT_MINUS_EXPR: expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); - op2 = expand_normal (ops->op2); + op2 = expand_normal (treeop2); target = expand_widen_pattern_expr (ops, op0, op1, op2, target, unsignedp); return target; @@ -7711,6 +7712,46 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp)); + case FMA_EXPR: + { + optab opt = fma_optab; + gimple def0, def2; + + def0 = get_def_for_expr (treeop0, NEGATE_EXPR); + def2 = get_def_for_expr (treeop2, NEGATE_EXPR); + + op0 = op2 = NULL; + + if (def0 && def2 + && optab_handler (fnms_optab, mode) != CODE_FOR_nothing) + { + opt = fnms_optab; + op0 = expand_normal (gimple_assign_rhs1 (def0)); + op2 = expand_normal (gimple_assign_rhs1 (def2)); + } + else if (def0 + && optab_handler (fnma_optab, mode) != CODE_FOR_nothing) + { + opt = fnma_optab; + op0 = expand_normal (gimple_assign_rhs1 (def0)); + } + else if (def2 + && optab_handler (fms_optab, mode) != CODE_FOR_nothing) + { + opt = fms_optab; + op2 = expand_normal (gimple_assign_rhs1 (def2)); + } + + if (op0 == NULL) + op0 = expand_expr (treeop0, subtarget, VOIDmode, EXPAND_NORMAL); + if (op2 == NULL) + op2 = expand_normal (treeop2); + op1 = expand_normal (treeop1); + + return expand_ternary_op (TYPE_MODE (type), opt, + op0, op1, op2, target, 0); + } + case MULT_EXPR: /* If this is a fixed-point operation, then we cannot use the code below because "expand_mult" doesn't support sat/no-sat fixed-point diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 1a8edec3a37..4259985b02a 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -152,4 +152,11 @@ enum warn_strict_overflow_code WARN_STRICT_OVERFLOW_MAGNITUDE = 5 }; +/* Floating-point contraction mode. */ +enum fp_contract_mode { + FP_CONTRACT_OFF = 0, + FP_CONTRACT_ON = 1, + FP_CONTRACT_FAST = 2 +}; + #endif /* ! GCC_FLAG_TYPES_H */ diff --git a/gcc/fold-const.c b/gcc/fold-const.c index da890f14c3d..b6a9814fbe3 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -13281,10 +13281,10 @@ contains_label_p (tree st) tree fold_ternary_loc (location_t loc, enum tree_code code, tree type, - tree op0, tree op1, tree op2) + tree op0, tree op1, tree op2) { tree tem; - tree arg0 = NULL_TREE, arg1 = NULL_TREE; + tree arg0 = NULL_TREE, arg1 = NULL_TREE, arg2 = NULL_TREE; enum tree_code_class kind = TREE_CODE_CLASS (code); gcc_assert (IS_EXPR_CODE_CLASS (kind) @@ -13312,6 +13312,12 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, STRIP_NOPS (arg1); } + if (op2) + { + arg2 = op2; + STRIP_NOPS (arg2); + } + switch (code) { case COMPONENT_REF: @@ -13610,6 +13616,17 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, return NULL_TREE; + case FMA_EXPR: + /* For integers we can decompose the FMA if possible. */ + if (TREE_CODE (arg0) == INTEGER_CST + && TREE_CODE (arg1) == INTEGER_CST) + return fold_build2_loc (loc, PLUS_EXPR, type, + const_binop (MULT_EXPR, arg0, arg1), arg2); + if (integer_zerop (arg2)) + return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1); + + return fold_fma (loc, type, arg0, arg1, arg2); + default: return NULL_TREE; } /* switch (code) */ diff --git a/gcc/genopinit.c b/gcc/genopinit.c index 6e0a714f49e..eee9ef826da 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -160,6 +160,9 @@ static const char * const optabs[] = "set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))", "set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))", "set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))", + "set_optab_handler (fms_optab, $A, CODE_FOR_$(fms$a4$))", + "set_optab_handler (fnma_optab, $A, CODE_FOR_$(fnma$a4$))", + "set_optab_handler (fnms_optab, $A, CODE_FOR_$(fnms$a4$))", "set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))", "set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))", "set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))", diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index c74dd0ec7a7..057f35b9815 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -400,6 +400,14 @@ dump_ternary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags) pp_character (buffer, '>'); break; + case FMA_EXPR: + dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false); + pp_string (buffer, " * "); + dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false); + pp_string (buffer, " + "); + dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false); + break; + default: gcc_unreachable (); } diff --git a/gcc/gimple.c b/gcc/gimple.c index 1bb241a62b3..6704456c0cc 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2529,7 +2529,8 @@ get_gimple_rhs_num_ops (enum tree_code code) || (SYM) == TRUTH_XOR_EXPR) ? GIMPLE_BINARY_RHS \ : (SYM) == TRUTH_NOT_EXPR ? GIMPLE_UNARY_RHS \ : ((SYM) == WIDEN_MULT_PLUS_EXPR \ - || (SYM) == WIDEN_MULT_MINUS_EXPR) ? GIMPLE_TERNARY_RHS \ + || (SYM) == WIDEN_MULT_MINUS_EXPR \ + || (SYM) == FMA_EXPR) ? GIMPLE_TERNARY_RHS \ : ((SYM) == COND_EXPR \ || (SYM) == CONSTRUCTOR \ || (SYM) == OBJ_TYPE_REF \ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 94a6689c61f..d5a633c1b5e 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -7170,6 +7170,16 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, ret = gimplify_omp_atomic (expr_p, pre_p); break; + case TRUTH_AND_EXPR: + case TRUTH_OR_EXPR: + case TRUTH_XOR_EXPR: + /* Classified as tcc_expression. */ + goto expr_2; + + case FMA_EXPR: + /* Classified as tcc_expression. */ + goto expr_3; + case POINTER_PLUS_EXPR: /* Convert ((type *)A)+offset into &A->field_of_type_and_offset. The second is gimple immediate saving a need for extra statement. @@ -7249,16 +7259,28 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, break; } + expr_3: + { + enum gimplify_status r0, r1, r2; + + r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, + post_p, is_gimple_val, fb_rvalue); + r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p, + post_p, is_gimple_val, fb_rvalue); + r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p, + post_p, is_gimple_val, fb_rvalue); + + ret = MIN (MIN (r0, r1), r2); + break; + } + case tcc_declaration: case tcc_constant: ret = GS_ALL_DONE; goto dont_recalculate; default: - gcc_assert (TREE_CODE (*expr_p) == TRUTH_AND_EXPR - || TREE_CODE (*expr_p) == TRUTH_OR_EXPR - || TREE_CODE (*expr_p) == TRUTH_XOR_EXPR); - goto expr_2; + gcc_unreachable (); } recalculate_side_effects (*expr_p); diff --git a/gcc/optabs.c b/gcc/optabs.c index 5d095c1a9b9..a96eea1cdc8 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -374,6 +374,9 @@ optab_for_tree_code (enum tree_code code, const_tree type, : (TYPE_SATURATING (type) ? ssmsub_widen_optab : smsub_widen_optab)); + case FMA_EXPR: + return fma_optab; + case REDUC_MAX_EXPR: return TYPE_UNSIGNED (type) ? reduc_umax_optab : reduc_smax_optab; diff --git a/gcc/optabs.h b/gcc/optabs.h index 8b9c9a730e0..c4dfa60b83d 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -192,6 +192,9 @@ enum optab_index OTI_atan2, /* Floating multiply/add */ OTI_fma, + OTI_fms, + OTI_fnma, + OTI_fnms, /* Move instruction. */ OTI_mov, @@ -435,6 +438,9 @@ enum optab_index #define pow_optab (&optab_table[OTI_pow]) #define atan2_optab (&optab_table[OTI_atan2]) #define fma_optab (&optab_table[OTI_fma]) +#define fms_optab (&optab_table[OTI_fms]) +#define fnma_optab (&optab_table[OTI_fnma]) +#define fnms_optab (&optab_table[OTI_fnms]) #define mov_optab (&optab_table[OTI_mov]) #define movstrict_optab (&optab_table[OTI_movstrict]) diff --git a/gcc/opts.c b/gcc/opts.c index ce2618e04cc..b2019c67a38 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1901,6 +1901,18 @@ common_handle_option (struct gcc_options *opts, return false; break; + case OPT_ffp_contract_: + if (!strcmp (arg, "on")) + /* Not implemented, fall back to conservative FP_CONTRACT_OFF. */ + flag_fp_contract_mode = FP_CONTRACT_OFF; + else if (!strcmp (arg, "off")) + flag_fp_contract_mode = FP_CONTRACT_OFF; + else if (!strcmp (arg, "fast")) + flag_fp_contract_mode = FP_CONTRACT_FAST; + else + error ("unknown floating point contraction style \"%s\"", arg); + break; + case OPT_fexcess_precision_: if (!strcmp (arg, "fast")) flag_excess_precision_cmdline = EXCESS_PRECISION_FAST; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e76da86e091..a025ce4ba94 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2010-11-04 Richard Guenther + Richard Henderson + + * gcc.target/i386/fma4-vector-2.c: New testcase. + 2010-11-04 Artjoms Sinkarovs Richard Guenther diff --git a/gcc/testsuite/gcc.target/i386/fma4-vector-2.c b/gcc/testsuite/gcc.target/i386/fma4-vector-2.c new file mode 100644 index 00000000000..2f3ec96dc96 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma4-vector-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */ + +float r[256], s[256]; +float x[256]; +float y[256]; +float z[256]; + +void foo (void) +{ + int i; + for (i = 0; i < 256; ++i) + { + r[i] = x[i] * y[i] - z[i]; + s[i] = x[i] * y[i] + z[i]; + } +} + +/* { dg-final { scan-assembler "vfmaddps" } } */ +/* { dg-final { scan-assembler "vfmsubps" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index e31a50d9466..3b46283e7e6 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -3655,6 +3655,20 @@ verify_gimple_assign_ternary (gimple stmt) } break; + case FMA_EXPR: + if (!useless_type_conversion_p (lhs_type, rhs1_type) + || !useless_type_conversion_p (lhs_type, rhs2_type) + || !useless_type_conversion_p (lhs_type, rhs3_type)) + { + error ("type mismatch in fused multiply-add expression"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + debug_generic_expr (rhs2_type); + debug_generic_expr (rhs3_type); + return true; + } + break; + default: gcc_unreachable (); } diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index cf8a68e9f93..88806beddd3 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3283,6 +3283,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, case POINTER_PLUS_EXPR: case MINUS_EXPR: case MULT_EXPR: + case FMA_EXPR: case ADDR_SPACE_CONVERT_EXPR: case FIXED_CONVERT_EXPR: diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index a814f6f0288..96140f06f63 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -1494,6 +1494,123 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, return true; } +/* Combine the multiplication at MUL_STMT with uses in additions and + subtractions to form fused multiply-add operations. Returns true + if successful and MUL_STMT should be removed. */ + +static bool +convert_mult_to_fma (gimple mul_stmt) +{ + tree mul_result = gimple_assign_lhs (mul_stmt); + tree type = TREE_TYPE (mul_result); + gimple use_stmt, fma_stmt; + use_operand_p use_p; + imm_use_iterator imm_iter; + + if (FLOAT_TYPE_P (type) + && flag_fp_contract_mode == FP_CONTRACT_OFF) + return false; + + /* We don't want to do bitfield reduction ops. */ + if (INTEGRAL_TYPE_P (type) + && (TYPE_PRECISION (type) + != GET_MODE_PRECISION (TYPE_MODE (type)))) + return false; + + /* If the target doesn't support it, don't generate it. We assume that + if fma isn't available then fms, fnma or fnms are not either. */ + if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) + return false; + + /* Make sure that the multiplication statement becomes dead after + the transformation, thus that all uses are transformed to FMAs. + This means we assume that an FMA operation has the same cost + as an addition. */ + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) + { + enum tree_code use_code; + + use_stmt = USE_STMT (use_p); + + if (!is_gimple_assign (use_stmt)) + return false; + use_code = gimple_assign_rhs_code (use_stmt); + /* ??? We need to handle NEGATE_EXPR to eventually form fnms. */ + if (use_code != PLUS_EXPR + && use_code != MINUS_EXPR) + return false; + + /* For now restrict this operations to single basic blocks. In theory + we would want to support sinking the multiplication in + m = a*b; + if () + ma = m + c; + else + d = m; + to form a fma in the then block and sink the multiplication to the + else block. */ + if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) + return false; + + /* We can't handle a * b + a * b. */ + if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt)) + return false; + + /* If the target doesn't support a * b - c then drop the ball. */ + if (gimple_assign_rhs1 (use_stmt) == mul_result + && use_code == MINUS_EXPR + && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing) + return false; + + /* If the target doesn't support -a * b + c then drop the ball. */ + if (gimple_assign_rhs2 (use_stmt) == mul_result + && use_code == MINUS_EXPR + && optab_handler (fnma_optab, TYPE_MODE (type)) == CODE_FOR_nothing) + return false; + + /* We don't yet generate -a * b - c below yet. */ + } + + FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) + { + tree addop, mulop1; + gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); + + mulop1 = gimple_assign_rhs1 (mul_stmt); + if (gimple_assign_rhs1 (use_stmt) == mul_result) + { + addop = gimple_assign_rhs2 (use_stmt); + /* a * b - c -> a * b + (-c) */ + if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + addop = force_gimple_operand_gsi (&gsi, + build1 (NEGATE_EXPR, + type, addop), + true, NULL_TREE, true, + GSI_SAME_STMT); + } + else + { + addop = gimple_assign_rhs1 (use_stmt); + /* a - b * c -> (-b) * c + a */ + if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + mulop1 = force_gimple_operand_gsi (&gsi, + build1 (NEGATE_EXPR, + type, mulop1), + true, NULL_TREE, true, + GSI_SAME_STMT); + } + + fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR, + gimple_assign_lhs (use_stmt), + mulop1, + gimple_assign_rhs2 (mul_stmt), + addop); + gsi_replace (&gsi, fma_stmt, true); + } + + return true; +} + /* Find integer multiplications where the operands are extended from smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR where appropriate. */ @@ -1501,31 +1618,45 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, static unsigned int execute_optimize_widening_mul (void) { - bool changed = false; basic_block bb; FOR_EACH_BB (bb) { gimple_stmt_iterator gsi; - for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) { gimple stmt = gsi_stmt (gsi); enum tree_code code; - if (!is_gimple_assign (stmt)) - continue; + if (is_gimple_assign (stmt)) + { + code = gimple_assign_rhs_code (stmt); + switch (code) + { + case MULT_EXPR: + if (!convert_mult_to_widen (stmt) + && convert_mult_to_fma (stmt)) + { + gsi_remove (&gsi, true); + release_defs (stmt); + continue; + } + break; + + case PLUS_EXPR: + case MINUS_EXPR: + convert_plusminus_to_widen (&gsi, stmt, code); + break; - code = gimple_assign_rhs_code (stmt); - if (code == MULT_EXPR) - changed |= convert_mult_to_widen (stmt); - else if (code == PLUS_EXPR || code == MINUS_EXPR) - changed |= convert_plusminus_to_widen (&gsi, stmt, code); + default:; + } + } + gsi_next (&gsi); } } - return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa - | TODO_verify_stmts : 0); + return 0; } static bool @@ -1549,6 +1680,9 @@ struct gimple_opt_pass pass_optimize_widening_mul = 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ + TODO_verify_ssa + | TODO_verify_stmts + | TODO_dump_func + | TODO_update_ssa /* todo_flags_finish */ } }; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 4961ccbccc7..2dbc0353421 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2343,7 +2343,8 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, /* Function vectorizable_operation. - Check if STMT performs a binary or unary operation that can be vectorized. + Check if STMT performs a binary, unary or ternary operation that can + be vectorized. If VEC_STMT is also passed, vectorize the STMT: create a vectorized stmt to replace it, put it in VEC_STMT, and insert it at BSI. Return FALSE if not a vectorizable STMT, TRUE otherwise. */ @@ -2354,7 +2355,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, { tree vec_dest; tree scalar_dest; - tree op0, op1 = NULL; + tree op0, op1 = NULL_TREE, op2 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); @@ -2366,7 +2367,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, int icode; tree def; gimple def_stmt; - enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; + enum vect_def_type dt[3] + = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type}; gimple new_stmt = NULL; stmt_vec_info prev_stmt_info; int nunits_in; @@ -2374,8 +2376,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, tree vectype_out; int ncopies; int j, i; - VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL; - tree vop0, vop1; + VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL; + tree vop0, vop1, vop2; bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); int vf; @@ -2401,10 +2403,11 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, /* Support only unary or binary operations. */ op_type = TREE_CODE_LENGTH (code); - if (op_type != unary_op && op_type != binary_op) + if (op_type != unary_op && op_type != binary_op && op_type != ternary_op) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type); + fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).", + op_type); return false; } @@ -2441,7 +2444,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, if (nunits_out != nunits_in) return false; - if (op_type == binary_op) + if (op_type == binary_op || op_type == ternary_op) { op1 = gimple_assign_rhs2 (stmt); if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, @@ -2452,6 +2455,17 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, return false; } } + if (op_type == ternary_op) + { + op2 = gimple_assign_rhs3 (stmt); + if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def, + &dt[2])) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "use not simple."); + return false; + } + } if (loop_vinfo) vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); @@ -2473,7 +2487,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, || code == RROTATE_EXPR) return false; - optab = optab_for_tree_code (code, vectype, optab_default); + optab = optab_for_tree_code (code, vectype, optab_default); /* Supportable by target? */ if (!optab) @@ -2534,8 +2548,10 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, if (!slp_node) { vec_oprnds0 = VEC_alloc (tree, heap, 1); - if (op_type == binary_op) + if (op_type == binary_op || op_type == ternary_op) vec_oprnds1 = VEC_alloc (tree, heap, 1); + if (op_type == ternary_op) + vec_oprnds2 = VEC_alloc (tree, heap, 1); } /* In case the vectorization factor (VF) is bigger than the number @@ -2597,22 +2613,40 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, /* Handle uses. */ if (j == 0) { - if (op_type == binary_op) + if (op_type == binary_op || op_type == ternary_op) vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, slp_node); else vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, slp_node); + if (op_type == ternary_op) + { + vec_oprnds2 = VEC_alloc (tree, heap, 1); + VEC_quick_push (tree, vec_oprnds2, + vect_get_vec_def_for_operand (op2, stmt, NULL)); + } } else - vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); + { + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1); + if (op_type == ternary_op) + { + tree vec_oprnd = VEC_pop (tree, vec_oprnds2); + VEC_quick_push (tree, vec_oprnds2, + vect_get_vec_def_for_stmt_copy (dt[2], + vec_oprnd)); + } + } /* Arguments are ready. Create the new vector stmt. */ FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0) { - vop1 = ((op_type == binary_op) - ? VEC_index (tree, vec_oprnds1, i) : NULL); - new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1); + vop1 = ((op_type == binary_op || op_type == ternary_op) + ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); + vop2 = ((op_type == ternary_op) + ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); + new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, + vop0, vop1, vop2); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_temp); vect_finish_stmt_generation (stmt, new_stmt, gsi); @@ -2633,6 +2667,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, VEC_free (tree, heap, vec_oprnds0); if (vec_oprnds1) VEC_free (tree, heap, vec_oprnds1); + if (vec_oprnds2) + VEC_free (tree, heap, vec_oprnds2); return true; } diff --git a/gcc/tree.def b/gcc/tree.def index 24729e8180d..791d699a0c5 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1092,6 +1092,12 @@ DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3) is subtracted from t3. */ DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_plus_expr", tcc_expression, 3) +/* Fused multiply-add. + All operands and the result are of the same type. No intermediate + rounding is performed after multiplying operand one with operand two + before adding operand three. */ +DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3) + /* Whole vector left/right shift in bits. Operand 0 is a vector to be shifted. Operand 1 is an integer shift amount in bits. */ diff --git a/gcc/tree.h b/gcc/tree.h index 2de56c78735..2392ada9897 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4954,6 +4954,7 @@ extern void fold_defer_overflow_warnings (void); extern void fold_undefer_overflow_warnings (bool, const_gimple, int); extern void fold_undefer_and_ignore_overflow_warnings (void); extern bool fold_deferring_overflow_warnings_p (void); +extern tree fold_fma (location_t, tree, tree, tree, tree); enum operand_equal_flag { -- 2.30.2