From b0d5396c7ea5e8ff6952598475244fcbd3d1276e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 10 Aug 2012 22:46:15 +0200 Subject: [PATCH] * config/i386/sse.md (*fma_fmadd_, *fma_fmsub_, *fma_fnmadd_, *fma_fnmsub_, *fma_fmaddsub_, *fma_fmsubadd_): Move FMA3 insn patterns before FMA4 patterns. From-SVN: r190304 --- gcc/ChangeLog | 6 + gcc/config/i386/sse.md | 340 ++++++++++++++++++++--------------------- 2 files changed, 174 insertions(+), 172 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6fe9b3671cc..1620bc8cf68 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2012-08-10 Uros Bizjak + + * config/i386/sse.md (*fma_fmadd_, *fma_fmsub_, + *fma_fnmadd_, *fma_fnmsub_, *fma_fmaddsub_, + *fma_fmsubadd_): Move FMA3 insn patterns before FMA4 patterns. + 2012-08-10 Uros Bizjak * config/i386/i386.md (simple LEA peephole2s): Add zero-extend diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 532ebddd668..641a3bab223 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -403,8 +403,6 @@ ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) -(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) - ;; Mapping of immediate bits for blend instructions (define_mode_attr blendbits [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) @@ -1886,12 +1884,13 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; FMA4 floating point multiply/accumulate instructions. This -;; includes the scalar version of the instructions as well as the -;; vector. +;; FMA floating point multiply/accumulate instructions. These include +;; scalar versions of the instructions as well as vector versions. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) + ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow ;; combine to generate a multiply/add with two memory references. We then ;; split this insn, into loading up the destination register with one of the @@ -1907,8 +1906,6 @@ ;; We could now properly represent that only one memory operand is ;; allowed and not be penalized during optimization. -;; Intrinsic FMA operations. - ;; The standard names for fma is only available with SSE math enabled. (define_expand "fma4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1942,7 +1939,7 @@ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") -;; The builtin for fma4intrin.h is not constrained by SSE math enabled. +;; The builtin for intrinsics is not constrained by SSE math enabled. (define_expand "fma4i_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand") (fma:FMAMODE @@ -1951,7 +1948,71 @@ (match_operand:FMAMODE 3 "nonimmediate_operand")))] "TARGET_FMA || TARGET_FMA4") -(define_insn "*fma4i_fmadd_" +;; FMA3 version + +(define_insn "*fma_fmadd_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] + "TARGET_FMA" + "@ + vfmadd132\t{%2, %3, %0|%0, %3, %2} + vfmadd213\t{%3, %2, %0|%0, %2, %3} + vfmadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma_fmsub_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (neg:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] + "TARGET_FMA" + "@ + vfmsub132\t{%2, %3, %0|%0, %3, %2} + vfmsub213\t{%3, %2, %0|%0, %2, %3} + vfmsub231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma_fnmadd_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] + "TARGET_FMA" + "@ + vfnmadd132\t{%2, %3, %0|%0, %3, %2} + vfnmadd213\t{%3, %2, %0|%0, %2, %3} + vfnmadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma_fnmsub_" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (neg:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] + "TARGET_FMA" + "@ + vfnmsub132\t{%2, %3, %0|%0, %3, %2} + vfnmsub213\t{%3, %2, %0|%0, %2, %3} + vfnmsub231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +;; FMA4 version + +(define_insn "*fma4_fmadd_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") @@ -1962,7 +2023,7 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma4i_fmsub_" +(define_insn "*fma4_fmsub_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") @@ -1974,7 +2035,7 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma4i_fnmadd_" +(define_insn "*fma4_fnmadd_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (neg:FMAMODE @@ -1986,7 +2047,7 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -(define_insn "*fma4i_fnmsub_" +(define_insn "*fma4_fnmsub_" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (neg:FMAMODE @@ -1999,22 +2060,88 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;; Scalar versions of the above. Unlike ADDSS et al, these write the -;; entire destination register, with the high-order elements zeroed. +;; FMA parallel floating point multiply addsub and subadd operations. -(define_expand "fma4i_vmfmadd_" - [(set (match_operand:VF_128 0 "register_operand") - (vec_merge:VF_128 - (fma:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand") - (match_operand:VF_128 2 "nonimmediate_operand") - (match_operand:VF_128 3 "nonimmediate_operand")) - (match_dup 4) - (const_int 1)))] +;; It would be possible to represent these without the UNSPEC as +;; +;; (vec_merge +;; (fma op1 op2 op3) +;; (fma op1 op2 (neg op3)) +;; (merge-const)) +;; +;; But this doesn't seem useful in practice. + +(define_expand "fmaddsub_" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand") + (match_operand:VF 2 "nonimmediate_operand") + (match_operand:VF 3 "nonimmediate_operand")] + UNSPEC_FMADDSUB))] + "TARGET_FMA || TARGET_FMA4") + +;; FMA3 version + +(define_insn "*fma_fmaddsub_" + [(set (match_operand:VF 0 "register_operand" "=x,x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] + UNSPEC_FMADDSUB))] + "TARGET_FMA" + "@ + vfmaddsub132\t{%2, %3, %0|%0, %3, %2} + vfmaddsub213\t{%3, %2, %0|%0, %2, %3} + vfmaddsub231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma_fmsubadd_" + [(set (match_operand:VF 0 "register_operand" "=x,x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] + UNSPEC_FMADDSUB))] + "TARGET_FMA" + "@ + vfmsubadd132\t{%2, %3, %0|%0, %3, %2} + vfmsubadd213\t{%3, %2, %0|%0, %2, %3} + vfmsubadd231\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +;; FMA4 version + +(define_insn "*fma4_fmaddsub_" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%x,x") + (match_operand:VF 2 "nonimmediate_operand" " x,m") + (match_operand:VF 3 "nonimmediate_operand" "xm,x")] + UNSPEC_FMADDSUB))] "TARGET_FMA4" -{ - operands[4] = CONST0_RTX (mode); -}) + "vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +(define_insn "*fma4_fmsubadd_" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%x,x") + (match_operand:VF 2 "nonimmediate_operand" " x,m") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMADDSUB))] + "TARGET_FMA4" + "vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "")]) + +;; FMA3 floating point scalar intrinsics. These merge result with +;; high-order elements from the destination register. (define_expand "fmai_vmfmadd_" [(set (match_operand:VF_128 0 "register_operand") @@ -2099,6 +2226,21 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) +;; FMA4 floating point scalar intrinsics. These write the +;; entire destination register, with the high-order elements zeroed. + +(define_expand "fma4i_vmfmadd_" + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand") + (match_operand:VF_128 2 "nonimmediate_operand") + (match_operand:VF_128 3 "nonimmediate_operand")) + (match_dup 4) + (const_int 1)))] + "TARGET_FMA4" + "operands[4] = CONST0_RTX (mode);") + (define_insn "*fma4i_vmfmadd_" [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 @@ -2159,152 +2301,6 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; FMA4 Parallel floating point multiply addsub and subadd operations. -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; It would be possible to represent these without the UNSPEC as -;; -;; (vec_merge -;; (fma op1 op2 op3) -;; (fma op1 op2 (neg op3)) -;; (merge-const)) -;; -;; But this doesn't seem useful in practice. - -(define_expand "fmaddsub_" - [(set (match_operand:VF 0 "register_operand") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand") - (match_operand:VF 2 "nonimmediate_operand") - (match_operand:VF 3 "nonimmediate_operand")] - UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4") - -(define_insn "*fma4_fmaddsub_" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (match_operand:VF 3 "nonimmediate_operand" "xm,x")] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmaddsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma4_fmsubadd_" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (neg:VF - (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmsubadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; FMA3 floating point multiply/accumulate instructions. -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define_insn "*fma_fmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" - "@ - vfmadd132\t{%2, %3, %0|%0, %3, %2} - vfmadd213\t{%3, %2, %0|%0, %2, %3} - vfmadd231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma_fmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" - "@ - vfmsub132\t{%2, %3, %0|%0, %3, %2} - vfmsub213\t{%3, %2, %0|%0, %2, %3} - vfmsub231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma_fnmadd_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" - "@ - vfnmadd132\t{%2, %3, %0|%0, %3, %2} - vfnmadd213\t{%3, %2, %0|%0, %2, %3} - vfnmadd231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma_fnmsub_" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" - "@ - vfnmsub132\t{%2, %3, %0|%0, %3, %2} - vfnmsub213\t{%3, %2, %0|%0, %2, %3} - vfnmsub231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma_fmaddsub_" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] - UNSPEC_FMADDSUB))] - "TARGET_FMA" - "@ - vfmaddsub132\t{%2, %3, %0|%0, %3, %2} - vfmaddsub213\t{%3, %2, %0|%0, %2, %3} - vfmaddsub231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - -(define_insn "*fma_fmsubadd_" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") - (neg:VF - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] - UNSPEC_FMADDSUB))] - "TARGET_FMA" - "@ - vfmsubadd132\t{%2, %3, %0|%0, %3, %2} - vfmsubadd213\t{%3, %2, %0|%0, %2, %3} - vfmsubadd231\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "")]) - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel single-precision floating point conversion operations -- 2.30.2