(V8HI "v")
(V4SI "wa")])
+;; Mode iterator for binary floating types other than double to
+;; optimize convert to that floating point type from an extract
+;; of an integer type
+(define_mode_iterator VSX_EXTRACT_FL [SF
+ (IF "FLOAT128_2REG_P (IFmode)")
+ (KF "TARGET_FLOAT128_HW")
+ (TF "FLOAT128_2REG_P (TFmode)
+ || (FLOAT128_IEEE_P (TFmode)
+ && TARGET_FLOAT128_HW)")])
+
;; Iterator for the 2 short vector types to do a splat from an integer
(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
[(set_attr "type" "vecdouble")])
;; Convert from 32-bit to 64-bit types
+;; Provide both vector and scalar targets
(define_insn "vsx_xvcvsxwdp"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
"xvcvsxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
+(define_insn "vsx_xvcvsxwdp_df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+ (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSXWDP))]
+ "TARGET_VSX"
+ "xvcvsxwdp %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
(define_insn "vsx_xvcvuxwdp"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
"xvcvuxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
+(define_insn "vsx_xvcvuxwdp_df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+ (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVUXWDP))]
+ "TARGET_VSX"
+ "xvcvuxwdp %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
(define_insn "vsx_xvcvspsxds"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
[(set_attr "type" "vecsimple")])
(define_insn_and_split "*vsx_extract_si"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
(vec_select:SI
- (match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v")
- (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
- (clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))]
+ (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
+ (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
DONE;
}
- [(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple")
+ [(set_attr "type" "mftgpr,vecperm,fpstore")
(set_attr "length" "8")])
(define_insn_and_split "*vsx_extract_<mode>_p8"
DONE;
})
+;; VSX_EXTRACT optimizations
+;; Optimize double d = (double) vec_extract (vi, <n>)
+;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
+(define_insn_and_split "*vsx_extract_si_<uns>float_df"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
+ (any_float:DF
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+ (clobber (match_scratch:V4SI 3 "=v"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx element = operands[2];
+ rtx v4si_tmp = operands[3];
+ int value;
+
+ if (!VECTOR_ELT_ORDER_BIG)
+ element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+ /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+ instruction. */
+ value = INTVAL (element);
+ if (value != 0)
+ {
+ if (GET_CODE (v4si_tmp) == SCRATCH)
+ v4si_tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+ }
+ else
+ v4si_tmp = src;
+
+ emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
+ DONE;
+})
+
+;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
+;; where <type> is a floating point type that supported by the hardware that is
+;; not double. First convert the value to double, and then to the desired
+;; type.
+(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
+ [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
+ (any_float:VSX_EXTRACT_FL
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+ (clobber (match_scratch:V4SI 3 "=v"))
+ (clobber (match_scratch:DF 4 "=ws"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx element = operands[2];
+ rtx v4si_tmp = operands[3];
+ rtx df_tmp = operands[4];
+ int value;
+
+ if (!VECTOR_ELT_ORDER_BIG)
+ element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+ /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+ instruction. */
+ value = INTVAL (element);
+ if (value != 0)
+ {
+ if (GET_CODE (v4si_tmp) == SCRATCH)
+ v4si_tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+ }
+ else
+ v4si_tmp = src;
+
+ if (GET_CODE (df_tmp) == SCRATCH)
+ df_tmp = gen_reg_rtx (DFmode);
+
+ emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
+
+ if (<MODE>mode == SFmode)
+ emit_insn (gen_truncdfsf2 (dest, df_tmp));
+ else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
+ emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
+ else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
+ && TARGET_FLOAT128_HW)
+ emit_insn (gen_extenddftf2_hw (dest, df_tmp));
+ else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
+ emit_insn (gen_extenddfif2 (dest, df_tmp));
+ else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
+ emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
+ else
+ gcc_unreachable ();
+
+ DONE;
+})
+
;; Expanders for builtins
(define_expand "vsx_mergel_<mode>"
[(use (match_operand:VSX_D 0 "vsx_register_operand" ""))
--- /dev/null
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* { dg-final { scan-assembler-times "vspltw" 6 } } */
+/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */
+/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif
+
+TYPE
+foo_0s (vector int v)
+{
+ int i = vec_extract (v, 0);
+ return (TYPE) i;
+}
+
+TYPE
+foo_1s (vector int v)
+{
+ int i = vec_extract (v, 1);
+ return (TYPE) i;
+}
+
+TYPE
+foo_2s (vector int v)
+{
+ int i = vec_extract (v, 2);
+ return (TYPE) i;
+}
+
+TYPE
+foo_3s (vector int v)
+{
+ int i = vec_extract (v, 3);
+ return (TYPE) i;
+}
+
+TYPE
+foo_0u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 0);
+ return (TYPE) u;
+}
+
+TYPE
+foo_1u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 1);
+ return (TYPE) u;
+}
+
+TYPE
+foo_2u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 2);
+ return (TYPE) u;
+}
+
+TYPE
+foo_3u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 3);
+ return (TYPE) u;
+}
--- /dev/null
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power8" } */
+
+/* { dg-final { scan-assembler-times "vspltw" 6 } } */
+/* { dg-final { scan-assembler-times "xvcvsxwdp" 4 } } */
+/* { dg-final { scan-assembler-times "xvcvuxwdp" 4 } } */
+/* { dg-final { scan-assembler-times "frsp\|xsrsp" 8 } } */
+/* { dg-final { scan-assembler-not "mtvsrd" } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
+/* { dg-final { scan-assembler-not "mfvsrd" } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+
+#include <altivec.h>
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+TYPE
+foo_0s (vector int v)
+{
+ int i = vec_extract (v, 0);
+ return (TYPE) i;
+}
+
+TYPE
+foo_1s (vector int v)
+{
+ int i = vec_extract (v, 1);
+ return (TYPE) i;
+}
+
+TYPE
+foo_2s (vector int v)
+{
+ int i = vec_extract (v, 2);
+ return (TYPE) i;
+}
+
+TYPE
+foo_3s (vector int v)
+{
+ int i = vec_extract (v, 3);
+ return (TYPE) i;
+}
+
+TYPE
+foo_0u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 0);
+ return (TYPE) u;
+}
+
+TYPE
+foo_1u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 1);
+ return (TYPE) u;
+}
+
+TYPE
+foo_2u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 2);
+ return (TYPE) u;
+}
+
+TYPE
+foo_3u (vector unsigned int v)
+{
+ unsigned int u = vec_extract (v, 3);
+ return (TYPE) u;
+}