From: Uros Bizjak Date: Fri, 10 Aug 2012 22:25:17 +0000 (+0200) Subject: i386.md (isa): Add fma and fma4. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3e5804e1a90d0ac9c084117709a3db25011b05b3;p=gcc.git i386.md (isa): Add fma and fma4. * config/i386/i386.md (isa): Add fma and fma4. (enabled): Handle fma and fma4. * config/i386/sse.md (*fma_fmadd_): Merge *fma4_fmadd_. (*fma_fmsub_): Merge *fma4_fmsub_. (*fma_fnmadd_): Merge *fma4_fnmadd_. (*fma_fnmsub_): Merge *fma4_fnmsub_. (*fma_fmaddsub_): Merge *fma4_fmaddsub_. (*fma_fmsubadd_): Merge *fma4_fmsubadd_. From-SVN: r190305 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1620bc8cf68..fbcaf591ef9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2012-08-11 Uros Bizjak + + * config/i386/i386.md (isa): Add fma and fma4. + (enabled): Handle fma and fma4. + * config/i386/sse.md (*fma_fmadd_): Merge *fma4_fmadd_. + (*fma_fmsub_): Merge *fma4_fmsub_. + (*fma_fnmadd_): Merge *fma4_fnmadd_. + (*fma_fnmsub_): Merge *fma4_fnmsub_. + (*fma_fmaddsub_): Merge *fma4_fmaddsub_. + (*fma_fmsubadd_): Merge *fma4_fmsubadd_. + 2012-08-10 Uros Bizjak * config/i386/sse.md (*fma_fmadd_, *fma_fmsub_, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 837547e2d8a..8d6f211b52f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -641,7 +641,8 @@ (define_attr "movu" "0,1" (const_string "0")) ;; Used to control the "enabled" attribute on a per-instruction basis. -(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx,avx2,noavx2,bmi2" +(define_attr "isa" "base,sse2,sse2_noavx,sse3,sse4,sse4_noavx,noavx,avx, + avx2,noavx2,bmi2,fma,fma4" (const_string "base")) (define_attr "enabled" "" @@ -657,6 +658,9 @@ (eq_attr "isa" "avx2") (symbol_ref "TARGET_AVX2") (eq_attr "isa" "noavx2") (symbol_ref "!TARGET_AVX2") (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") + (eq_attr "isa" "fma") (symbol_ref "TARGET_FMA") + (eq_attr "isa" "fma4") + (symbol_ref "TARGET_FMA4 && !TARGET_FMA") ] (const_int 1))) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 641a3bab223..0cff3a3417b 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1891,21 +1891,6 @@ (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) -;; In order to match (*a * *b) + *c, particularly when vectorizing, allow -;; combine to generate a multiply/add with two memory references. We then -;; split this insn, into loading up the destination register with one of the -;; memory operations. If we don't manage to split the insn, reload will -;; generate the appropriate moves. The reason this is needed, is that combine -;; has already folded one of the memory references into both the multiply and -;; add insns, and it can't generate a new pseudo. I.e.: -;; (set (reg1) (mem (addr1))) -;; (set (reg2) (mult (reg1) (mem (addr2)))) -;; (set (reg3) (plus (reg2) (mem (addr3)))) -;; -;; ??? This is historic, pre-dating the gimple fma transformation. -;; We could now properly represent that only one memory operand is -;; allowed and not be penalized during optimization. - ;; The standard names for fma is only available with SSE math enabled. (define_expand "fma4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1948,116 +1933,76 @@ (match_operand:FMAMODE 3 "nonimmediate_operand")))] "TARGET_FMA || TARGET_FMA4") -;; FMA3 version - (define_insn "*fma_fmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] + "TARGET_FMA || TARGET_FMA4" "@ vfmadd132\t{%2, %3, %0|%0, %3, %2} vfmadd213\t{%3, %2, %0|%0, %2, %3} - vfmadd231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmadd231\t{%2, %1, %0|%0, %1, %2} + vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] + "TARGET_FMA || TARGET_FMA4" "@ vfmsub132\t{%2, %3, %0|%0, %3, %2} vfmsub213\t{%3, %2, %0|%0, %2, %3} - vfmsub231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmsub231\t{%2, %1, %0|%0, %1, %2} + vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fnmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] + "TARGET_FMA || TARGET_FMA4" "@ vfnmadd132\t{%2, %3, %0|%0, %3, %2} vfnmadd213\t{%3, %2, %0|%0, %2, %3} - vfnmadd231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfnmadd231\t{%2, %1, %0|%0, %1, %2} + vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fnmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") (fma:FMAMODE (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] + "TARGET_FMA || TARGET_FMA4" "@ vfnmsub132\t{%2, %3, %0|%0, %3, %2} vfnmsub213\t{%3, %2, %0|%0, %2, %3} - vfnmsub231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -;; FMA4 version - -(define_insn "*fma4_fmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4" - "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma4_fmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))] - "TARGET_FMA4" - "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma4_fnmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4" - "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma4_fnmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))] - "TARGET_FMA4" - "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") + vfnmsub231\t{%2, %1, %0|%0, %1, %2} + vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "")]) ;; FMA parallel floating point multiply addsub and subadd operations. @@ -2080,64 +2025,41 @@ UNSPEC_FMADDSUB))] "TARGET_FMA || TARGET_FMA4") -;; FMA3 version - (define_insn "*fma_fmaddsub_" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") + [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")] UNSPEC_FMADDSUB))] - "TARGET_FMA" + "TARGET_FMA || TARGET_FMA4" "@ vfmaddsub132\t{%2, %3, %0|%0, %3, %2} vfmaddsub213\t{%3, %2, %0|%0, %2, %3} - vfmaddsub231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") + vfmaddsub231\t{%2, %1, %0|%0, %1, %2} + vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "")]) (define_insn "*fma_fmsubadd_" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") + [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") (neg:VF - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))] UNSPEC_FMADDSUB))] - "TARGET_FMA" + "TARGET_FMA || TARGET_FMA4" "@ vfmsubadd132\t{%2, %3, %0|%0, %3, %2} vfmsubadd213\t{%3, %2, %0|%0, %2, %3} - vfmsubadd231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -;; FMA4 version - -(define_insn "*fma4_fmaddsub_" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (match_operand:VF 3 "nonimmediate_operand" "xm,x")] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma4_fmsubadd_" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (neg:VF - (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") + vfmsubadd231\t{%2, %1, %0|%0, %1, %2} + vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3} + vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "fma,fma,fma,fma4,fma4") + (set_attr "type" "ssemuladd") (set_attr "mode" "")]) ;; FMA3 floating point scalar intrinsics. These merge result with