From: Tejas Joshi Date: Mon, 26 Aug 2019 12:41:59 +0000 (+0000) Subject: i386: Roundeven expansion for SSE4.1+ X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d3b92f35d84f44a8599028086286699213b73e7c;p=gcc.git i386: Roundeven expansion for SSE4.1+ gcc/ChangeLog: 2019-08-26 Tejas Joshi Uros Bizjak * builtins.c (mathfn_built_in_2): Change CASE_MATHFN to CASE_MATHFN_FLOATN for roundeven. * config/i386/i386.c (ix86_i387_mode_needed): Add case I387_ROUNDEVEN. (ix86_mode_needed): Likewise. (ix86_mode_after): Likewise. (ix86_mode_entry): Likewise. (ix86_mode_exit): Likewise. (ix86_emit_mode_set): Likewise. (emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN. * config/i386/i386.h (ix86_stack_slot): Add SLOT_CW_ROUNDEVEN. (ix86_entry): Add I387_ROUNDEVEN. (avx_u128_state): Add I387_CW_ANY. * config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN. (define_int_iterator): Likewise. (define_int_attr): Likewise for rounding_insn, rounding and ROUNDING. (define_constant): Define ROUND_ROUNDEVEN mode. (define_attr): Add roundeven mode for i387_cw. (2): Add condition for ROUND_ROUNDEVEN. * internal-fn.def (ROUNDEVEN): New builtin function. * optabs.def (roundeven_optab): New optab. gcc/testsuite/ChangeLog: 2019-08-26 Tejas Joshi * gcc.target/i386/sse4_1-round-roundeven-1.c: New test. * gcc.target/i386/sse4_1-round-roundeven-2.c: New test. Co-Authored-By: Uros Bizjak From-SVN: r274928 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c67cbe3b106..aa2bec6169b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2019-08-26 Tejas Joshi + Uros Bizjak + + * builtins.c (mathfn_built_in_2): Change CASE_MATHFN to + CASE_MATHFN_FLOATN for roundeven. + * config/i386/i386.c (ix86_i387_mode_needed): Add case + I387_ROUNDEVEN. + (ix86_mode_needed): Likewise. + (ix86_mode_after): Likewise. + (ix86_mode_entry): Likewise. + (ix86_mode_exit): Likewise. + (ix86_emit_mode_set): Likewise. + (emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN. + * config/i386/i386.h (ix86_stack_slot): Add SLOT_CW_ROUNDEVEN. + (ix86_entry): Add I387_ROUNDEVEN. + (avx_u128_state): Add I387_CW_ANY. + * config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN. + (define_int_iterator): Likewise. + (define_int_attr): Likewise for rounding_insn, rounding and ROUNDING. + (define_constant): Define ROUND_ROUNDEVEN mode. + (define_attr): Add roundeven mode for i387_cw. + (2): Add condition for ROUND_ROUNDEVEN. + * internal-fn.def (ROUNDEVEN): New builtin function. + * optabs.def (roundeven_optab): New optab. + 2019-08-26 Tejas Joshi * builtins.c (mathfn_built_in_2): Added CASE_MATHFN_FLOATN diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 49ab50ea41b..c712c03dbe2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13557,6 +13557,11 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn) switch (entity) { + case I387_ROUNDEVEN: + if (mode == I387_CW_ROUNDEVEN) + return mode; + break; + case I387_TRUNC: if (mode == I387_CW_TRUNC) return mode; @@ -13591,6 +13596,7 @@ ix86_mode_needed (int entity, rtx_insn *insn) return ix86_dirflag_mode_needed (insn); case AVX_U128: return ix86_avx_u128_mode_needed (insn); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13651,6 +13657,7 @@ ix86_mode_after (int entity, int mode, rtx_insn *insn) return mode; case AVX_U128: return ix86_avx_u128_mode_after (mode, insn); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13703,6 +13710,7 @@ ix86_mode_entry (int entity) return ix86_dirflag_mode_entry (); case AVX_U128: return ix86_avx_u128_mode_entry (); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13740,6 +13748,7 @@ ix86_mode_exit (int entity) return X86_DIRFLAG_ANY; case AVX_U128: return ix86_avx_u128_mode_exit (); + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: @@ -13774,6 +13783,12 @@ emit_i387_cw_initialization (int mode) switch (mode) { + case I387_CW_ROUNDEVEN: + /* round to nearest */ + emit_insn (gen_andhi3 (reg, reg, GEN_INT (0x0c00))); + slot = SLOT_CW_ROUNDEVEN; + break; + case I387_CW_TRUNC: /* round toward zero (truncate) */ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); @@ -13820,6 +13835,7 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, if (mode == AVX_U128_CLEAN) emit_insn (gen_avx_vzeroupper ()); break; + case I387_ROUNDEVEN: case I387_TRUNC: case I387_FLOOR: case I387_CEIL: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 167b73e85fa..a1d0484d71f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2511,6 +2511,7 @@ enum ix86_stack_slot { SLOT_TEMP = 0, SLOT_CW_STORED, + SLOT_CW_ROUNDEVEN, SLOT_CW_TRUNC, SLOT_CW_FLOOR, SLOT_CW_CEIL, @@ -2522,6 +2523,7 @@ enum ix86_entity { X86_DIRFLAG = 0, AVX_U128, + I387_ROUNDEVEN, I387_TRUNC, I387_FLOOR, I387_CEIL, @@ -2557,7 +2559,7 @@ enum avx_u128_state #define NUM_MODES_FOR_MODE_SWITCHING \ { X86_DIRFLAG_ANY, AVX_U128_ANY, \ - I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } + I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } /* Avoid renaming of stack registers, as doing so in combination with diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9951d46d8b2..7ad97882419 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -141,6 +141,7 @@ UNSPEC_FXAM ;; x87 Rounding + UNSPEC_FRNDINT_ROUNDEVEN UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC @@ -303,7 +304,8 @@ ;; Constants to represent rounding modes in the ROUND instruction (define_constants - [(ROUND_FLOOR 0x1) + [(ROUND_ROUNDEVEN 0x0) + (ROUND_FLOOR 0x1) (ROUND_CEIL 0x2) (ROUND_TRUNC 0x3) (ROUND_MXCSR 0x4) @@ -779,7 +781,7 @@ ;; Defines rounding mode of an FP operation. -(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any" +(define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any" (const_string "any")) ;; Define attribute to indicate AVX insns with partial XMM register update. @@ -16212,7 +16214,8 @@ }) (define_int_iterator FRNDINT_ROUNDING - [UNSPEC_FRNDINT_FLOOR + [UNSPEC_FRNDINT_ROUNDEVEN + UNSPEC_FRNDINT_FLOOR UNSPEC_FRNDINT_CEIL UNSPEC_FRNDINT_TRUNC]) @@ -16222,21 +16225,24 @@ ;; Base name for define_insn (define_int_attr rounding_insn - [(UNSPEC_FRNDINT_FLOOR "floor") + [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven") + (UNSPEC_FRNDINT_FLOOR "floor") (UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_TRUNC "btrunc") (UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_CEIL "ceil")]) (define_int_attr rounding - [(UNSPEC_FRNDINT_FLOOR "floor") + [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven") + (UNSPEC_FRNDINT_FLOOR "floor") (UNSPEC_FRNDINT_CEIL "ceil") (UNSPEC_FRNDINT_TRUNC "trunc") (UNSPEC_FIST_FLOOR "floor") (UNSPEC_FIST_CEIL "ceil")]) (define_int_attr ROUNDING - [(UNSPEC_FRNDINT_FLOOR "FLOOR") + [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN") + (UNSPEC_FRNDINT_FLOOR "FLOOR") (UNSPEC_FRNDINT_CEIL "CEIL") (UNSPEC_FRNDINT_TRUNC "TRUNC") (UNSPEC_FIST_FLOOR "FLOOR") @@ -16299,8 +16305,9 @@ || TARGET_MIX_SSE_I387) && (flag_fp_int_builtin_inexact || !flag_trapping_math)) || (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH - && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact - || !flag_trapping_math))" + && (TARGET_SSE4_1 + || (ROUND_ != ROUND_ROUNDEVEN + && (flag_fp_int_builtin_inexact || !flag_trapping_math))))" { if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math)) diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 9461693bcd1..b5a6ca33223 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -238,6 +238,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary) DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary) DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary) DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary) +DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary) DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary) /* Binary math functions. */ diff --git a/gcc/optabs.def b/gcc/optabs.def index 5283e6753f2..0860b38badb 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -271,6 +271,7 @@ OPTAB_D (fnms_optab, "fnms$a4") OPTAB_D (rint_optab, "rint$a2") OPTAB_D (round_optab, "round$a2") +OPTAB_D (roundeven_optab, "roundeven$a2") OPTAB_D (floor_optab, "floor$a2") OPTAB_D (ceil_optab, "ceil$a2") OPTAB_D (btrunc_optab, "btrunc$a2") diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c index 710f14a9544..0f0089acdea 100644 --- a/gcc/reg-stack.c +++ b/gcc/reg-stack.c @@ -1817,6 +1817,7 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr regstack, rtx pat) case UNSPEC_FRNDINT: case UNSPEC_F2XM1: + case UNSPEC_FRNDINT_ROUNDEVEN: case UNSPEC_FRNDINT_FLOOR: case UNSPEC_FRNDINT_CEIL: case UNSPEC_FRNDINT_TRUNC: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c68c8231570..b5a2d7bb50e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-08-26 Tejas Joshi + + * gcc.target/i386/sse4_1-round-roundeven-1.c: New test. + * gcc.target/i386/sse4_1-round-roundeven-2.c: New test. + 2019-08-26 Tejas Joshi * gcc.dg/torture/builtin-round-roundeven.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-1.c new file mode 100644 index 00000000000..36332630618 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +__attribute__((noinline, noclone)) double +f1 (double x) +{ + return __builtin_roundeven (x); +} + +__attribute__((noinline, noclone)) float +f2 (float x) +{ + return __builtin_roundevenf (x); +} + +/* { dg-final { scan-assembler-times "roundsd\[^\n\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "roundss\[^\n\r\]*xmm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-2.c new file mode 100644 index 00000000000..9505796dafb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-2.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + +#include "sse4_1-check.h" +#include "sse4_1-round-roundeven-1.c" + +static void +sse4_1_test (void) +{ + if (f1 (0.5) != 0.0 || f1 (1.5) != 2.0 || f1 (-0.5) != 0.0 || f1 (-1.5) != -2.0) + abort (); + if (f2 (0.5f) != 0.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != 0.0f || f2 (-1.5f) != -2.0f) + abort (); +}