From bbe996ec7145da4d484bce0368ddd7b3aa5212bd Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 20 Oct 2011 17:13:30 +0200 Subject: [PATCH] re PR target/47989 (-mrecip causes 482.sphinx3, 464.h264ref and 481.wrf to miscompare) PR target/47989 * config/i386/i386.h (RECIP_MASK_DEFAULT): New define. * config/i386/i386.op (recip_mask): Initialize with RECIP_MASK_DEFAULT. * doc/invoke.texi (ix86 Options, -mrecip): Document that GCC implements vectorized single float division and vectorized sqrtf(x) with reciprocal sequence with additional Newton-Raphson step with -ffast-math. From-SVN: r180256 --- gcc/ChangeLog | 42 +++++++++++++++++++++------------------- gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.opt | 2 +- gcc/doc/invoke.texi | 7 ++++++- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5ec343af7ed..eeed56d67e3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2011-10-20 Uros Bizjak + + PR target/47989 + * config/i386/i386.h (RECIP_MASK_DEFAULT): New define. + * config/i386/i386.op (recip_mask): Initialize with RECIP_MASK_DEFAULT. + * doc/invoke.texi (ix86 Options, -mrecip): Document that GCC + implements vectorized single float division and vectorized sqrtf(x) + with reciprocal sequence with additional Newton-Raphson step with + -ffast-math. + 2011-10-20 Dodji Seketeli PR other/50659 @@ -33,8 +43,7 @@ 2011-10-19 David S. Miller - * config/sparc/sparc.c (sparc_expand_move): Use - can_create_pseudo_p. + * config/sparc/sparc.c (sparc_expand_move): Use can_create_pseudo_p. (sparc_emit_set_const32): Likewise. (sparc_emit_set_const64_longway): Likewise. (sparc_emit_set_const64): Likewise. @@ -279,8 +288,8 @@ 2011-10-19 Jan Hubicka - * cgraphunit.c (handle_alias_pairs): Also handle wekref with destination - declared. + * cgraphunit.c (handle_alias_pairs): Also handle wekref with + destination declared. (output_weakrefs): New function. * varpool.c (varpool_create_variable_alias): Handle external aliases. @@ -319,7 +328,6 @@ 2011-10-18 Andrew Stubbs PR tree-optimization/50717 - * tree-ssa-math-opts.c (is_widening_mult_p): Remove the 'type' parameter. Calculate 'type' from stmt. (convert_mult_to_widen): Update call the is_widening_mult_p. @@ -668,8 +676,7 @@ 2011-10-17 Sergio Durigan Junior - * configure.ac: Display `yes' if the SystemTap header has been - found. + * configure.ac: Display `yes' if the SystemTap header has been found. * configure: Regenerate. 2011-10-08 Andi Kleen @@ -685,8 +692,7 @@ 2011-10-17 Richard Guenther PR tree-optimization/50729 - * tree-vrp.c (extract_range_from_unary_expr_1): Remove - redundant test. + * tree-vrp.c (extract_range_from_unary_expr_1): Remove redundant test. (simplify_conversion_using_ranges): Properly test the intermediate result. @@ -709,8 +715,7 @@ 2011-10-15 Tom Tromey Dodji Seketeli - * input.c (ONE_K, ONE_M, SCALE, STAT_LABEL, FORMAT_AMOUNT): New - macros. + * input.c (ONE_K, ONE_M, SCALE, STAT_LABEL, FORMAT_AMOUNT): New macros. (num_expanded_macros_counter, num_macro_tokens_counter): Declare new counters. (dump_line_table_statistics): Define new function. @@ -721,8 +726,7 @@ Dodji Seketeli * doc/cppopts.texi: Document -fdebug-cpp. - * doc/invoke.texi: Add -fdebug-cpp to the list of preprocessor - options. + * doc/invoke.texi: Add -fdebug-cpp to the list of preprocessor options. 2011-10-15 Tom Tromey Dodji Seketeli @@ -759,8 +763,7 @@ (LOCATION_COLUMN): New accessor (in_system_header_at): Use linemap_location_in_system_header_p. * diagnostic.c (diagnostic_report_current_module): Adjust to avoid - touching the internals of struct line_map. Use the public API. - instead. + touching the internals of struct line_map. Use the public API instead. (diagnostic_report_diagnostic): Don't use relational operator '<' on virtual locations. Use linemap_location_before_p instead. * input.c (expand_location): Adjust to expand to the tokens' @@ -1280,9 +1283,8 @@ 2011-10-12 Bernd Schmidt * function.c (prepare_shrink_wrap, bb_active_p): New function. - (thread_prologue_and_epilogue_insns): Use bb_active_p. - Call prepare_shrink_wrap, then recompute bb_active_p for the - last block. + (thread_prologue_and_epilogue_insns): Use bb_active_p. Call + prepare_shrink_wrap, then recompute bb_active_p for the last block. 2011-10-12 Joseph Myers @@ -1526,8 +1528,8 @@ 2011-10-10 Georg-Johann Lay - * config/avr/avr.c (avr_option_override): Set - flag_omit_frame_pointer to 0 if frame pointer is needed for unwinding. + * config/avr/avr.c (avr_option_override): Set flag_omit_frame_pointer + to 0 if frame pointer is needed for unwinding. 2011-10-10 Uros Bizjak diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index bd69ec2b5e3..7721c465832 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2322,6 +2322,7 @@ extern void debug_dispatch_window (int); #define RECIP_MASK_VEC_SQRT 0x08 #define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \ | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT) +#define RECIP_MASK_DEFAULT (RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_SQRT) #define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0) #define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0) diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 43009a3c2a6..6c516e7b869 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -32,7 +32,7 @@ Variable HOST_WIDE_INT ix86_isa_flags_explicit TargetVariable -int recip_mask +int recip_mask = RECIP_MASK_DEFAULT Variable int recip_mask_explicit diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0ac9f39b51b..09e115c55c7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12922,7 +12922,12 @@ Note that while the throughput of the sequence is higher than the throughput of the non-reciprocal instruction, the precision of the sequence can be decreased by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994). -Note that GCC implements 1.0f/sqrtf(x) in terms of RSQRTSS (or RSQRTPS) +Note that GCC implements @code{1.0f/sqrtf(@var{x})} in terms of RSQRTSS +(or RSQRTPS) already with @option{-ffast-math} (or the above option +combination), and doesn't need @option{-mrecip}. + +Also note that GCC emits the above sequence with additional Newton-Raphson step +for vectorized single float division and vectorized @code{sqrtf(@var{x})} already with @option{-ffast-math} (or the above option combination), and doesn't need @option{-mrecip}. -- 2.30.2