i386: Fix V2SF horizontal add/subtract insns
authorUros Bizjak <ubizjak@gmail.com>
Wed, 27 May 2020 19:46:49 +0000 (21:46 +0200)
committerUros Bizjak <ubizjak@gmail.com>
Wed, 27 May 2020 19:46:49 +0000 (21:46 +0200)
PFPNACC insn is incorrectly modelled to perform addition and subtraction
of two operands, but in reality it performs horizontal addition and
subtraction:

Instruction: PFPNACC dest,src

Description:
dest[31:0] <- dest[31:0] - dest[63:32];
dest[63:32] <- src[31:0] + src[63:32];

Also, it is not possible to directly replace PFACC with HADDPS and PFNACC
with HSUBPS, because operands in the second word do not match.

PFACC does:

dest[31..0] <- dest[31..0] + dest[63..32];
dest[63..32] <- src[31..0] + src [63..32];

while HADDPS does:

dest[31..0] <-  dest[31..0]  +  dest[63..32];
dest[63..32] <- dest[127..96] + dest[95..64];
dest[95..64] <- src [31..0]  +  src [64..32];
dest[127:96] <- src [127..96] + src [95..64];

2020-05-27  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
* config/i386/mmx.md (*mmx_haddv2sf3): Remove SSE alternatives.
(mmx_hsubv2sf3): Ditto.
(mmx_haddsubv2sf3): New expander.
(*mmx_haddsubv2sf3): Rename from mmx_addsubv2sf3. Correct
RTL template to model horizontal subtraction and addition.
* config/i386/i386-builtin.def (IX86_BUILTIN_PFPNACC):
Update for rename.

gcc/config/i386/i386-builtin.def
gcc/config/i386/mmx.md

index b873498f3ab976aa1c2629c107171427b1a4a7b7..134981a798f4a1503853516476983900a0f97830 100644 (file)
@@ -555,7 +555,7 @@ BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", I
 BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI)
 BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF)
 BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF)
-BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF)
+BDESC (OPTION_MASK_ISA_3DNOW_A, 0, CODE_FOR_mmx_haddsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF)
 
 /* SSE */
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF)
index 271c1c2e8332c18924f38598dfeeff0b9e6de051..7c9640d4f9f56bd67903f1e8367448e5b562c8d8 100644 (file)
   "TARGET_3DNOW")
 
 (define_insn "*mmx_haddv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
        (vec_concat:V2SF
          (plus:SF
            (vec_select:SF
-             (match_operand:V2SF 1 "register_operand" "0,0,x")
+             (match_operand:V2SF 1 "register_operand" "0")
              (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
            (vec_select:SF (match_dup 1)
            (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
          (plus:SF
             (vec_select:SF
-             (match_operand:V2SF 2 "nonimmediate_operand" "ym,x,x")
+             (match_operand:V2SF 2 "nonimmediate_operand" "ym")
              (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
            (vec_select:SF (match_dup 2)
            (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
   "TARGET_3DNOW
    && INTVAL (operands[3]) != INTVAL (operands[4])
    && INTVAL (operands[5]) != INTVAL (operands[6])"
-  "@
-   pfacc\t{%2, %0|%0, %2}
-   haddps\t{%2, %0|%0, %2}
-   vhaddps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,sse3_noavx,avx")
-   (set_attr "type" "mmxadd,sseadd,sseadd")
-   (set_attr "prefix_extra" "1,*,*")
-   (set_attr "prefix" "*,orig,vex")
-   (set_attr "mode" "V2SF,V4SF,V4SF")])
+  "pfacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
 
 (define_insn "*mmx_haddv2sf3_low"
   [(set (match_operand:SF 0 "register_operand" "=x,x")
    (set_attr "mode" "V4SF")])
 
 (define_insn "mmx_hsubv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y,x,x")
+  [(set (match_operand:V2SF 0 "register_operand" "=y")
        (vec_concat:V2SF
          (minus:SF
            (vec_select:SF
-             (match_operand:V2SF 1 "register_operand" "0,0,x")
+             (match_operand:V2SF 1 "register_operand" "0")
              (parallel [(const_int  0)]))
            (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
          (minus:SF
             (vec_select:SF
-             (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,x")
+             (match_operand:V2SF 2 "nonimmediate_operand" "ym")
              (parallel [(const_int  0)]))
            (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
   "TARGET_3DNOW_A"
-  "@
-   pfnacc\t{%2, %0|%0, %2}
-   hsubps\t{%2, %0|%0, %2}
-   vhsubps\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "*,sse3_noavx,avx")
-   (set_attr "type" "mmxadd,sseadd,sseadd")
-   (set_attr "prefix_extra" "1,*,*")
-   (set_attr "prefix" "*,orig,vex")
-   (set_attr "mode" "V2SF,V4SF,V4SF")])
+  "pfnacc\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "V2SF")])
 
 (define_insn "*mmx_hsubv2sf3_low"
   [(set (match_operand:SF 0 "register_operand" "=x,x")
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "mmx_addsubv2sf3"
+(define_expand "mmx_haddsubv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand")
+       (vec_concat:V2SF
+         (minus:SF
+           (vec_select:SF
+             (match_operand:V2SF 1 "register_operand")
+             (parallel [(const_int 0)]))
+           (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+         (plus:SF
+           (vec_select:SF
+             (match_operand:V2SF 2 "nonimmediate_operand")
+             (parallel [(const_int 0)]))
+           (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_3DNOW_A")
+
+(define_insn "*mmx_haddsubv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (vec_merge:V2SF
-          (plus:V2SF
-            (match_operand:V2SF 1 "register_operand" "0")
-            (match_operand:V2SF 2 "nonimmediate_operand" "ym"))
-          (minus:V2SF (match_dup 1) (match_dup 2))
-          (const_int 1)))]
-  "TARGET_3DNOW_A"
+       (vec_concat:V2SF
+         (minus:SF
+           (vec_select:SF
+             (match_operand:V2SF 1 "register_operand" "0")
+             (parallel [(const_int  0)]))
+           (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
+         (plus:SF
+            (vec_select:SF
+             (match_operand:V2SF 2 "nonimmediate_operand" "ym")
+             (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
+           (vec_select:SF
+             (match_dup 2)
+             (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))))]
+  "TARGET_3DNOW_A
+   && INTVAL (operands[3]) != INTVAL (operands[4])"
   "pfpnacc\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "prefix_extra" "1")