From 1c47af84a30650b28b6d1957f1689e2107396a2e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 22 Dec 2004 23:49:24 -0800 Subject: [PATCH] emmintrin.h (_mm_loadh_pd): Don't cast pointer arg to __v2si. * config/i386/emmintrin.h (_mm_loadh_pd): Don't cast pointer arg to __v2si. (_mm_storeh_pd, _mm_loadl_pd, _mm_storel_pd): Likewise. * config/i386/i386.c (ix86_init_mmx_sse_builtins): Use double* or const double* for __builtin_ia32_loadhpd, __builtin_ia32_loadlpd, __builtin_ia32_storehpd, __builtin_ia32_storelpd. (ix86_expand_builtin): Update to match. (ix86_expand_vector_init): Use sse2_loadlpd. * config/i386/i386.md (vec_setv2df): Use sse2_loadlpd, sse2_loadhpd. (vec_extractv2df): Use sse2_storelpd, sse2_storehpd. (sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd): New. (sse2_movhpd): Remove. From-SVN: r92536 --- gcc/ChangeLog | 15 ++++ gcc/config/i386/emmintrin.h | 8 +-- gcc/config/i386/i386.c | 77 ++++++++++++-------- gcc/config/i386/i386.md | 138 +++++++++++++++++++++++++----------- 4 files changed, 165 insertions(+), 73 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7ea68150337..fe3acf1ae49 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2004-12-22 Richard Henderson + + * config/i386/emmintrin.h (_mm_loadh_pd): Don't cast pointer arg + to __v2si. + (_mm_storeh_pd, _mm_loadl_pd, _mm_storel_pd): Likewise. + * config/i386/i386.c (ix86_init_mmx_sse_builtins): Use double* or + const double* for __builtin_ia32_loadhpd, __builtin_ia32_loadlpd, + __builtin_ia32_storehpd, __builtin_ia32_storelpd. + (ix86_expand_builtin): Update to match. + (ix86_expand_vector_init): Use sse2_loadlpd. + * config/i386/i386.md (vec_setv2df): Use sse2_loadlpd, sse2_loadhpd. + (vec_extractv2df): Use sse2_storelpd, sse2_storehpd. + (sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd): New. + (sse2_movhpd): Remove. + 2004-12-23 Alan Modra PR target/18751 diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 67450e44e28..49c6a7f8147 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -937,25 +937,25 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B) static __inline __m128d _mm_loadh_pd (__m128d __A, double const *__B) { - return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B); + return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); } static __inline void _mm_storeh_pd (double *__A, __m128d __B) { - __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B); + __builtin_ia32_storehpd (__A, (__v2df)__B); } static __inline __m128d _mm_loadl_pd (__m128d __A, double const *__B) { - return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B); + return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); } static __inline void _mm_storel_pd (double *__A, __m128d __B) { - __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B); + __builtin_ia32_storelpd (__A, (__v2df)__B); } static __inline int diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ef5f37e13b1..2a9dca24d34 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12551,12 +12551,9 @@ ix86_init_mmx_sse_builtins (void) V2DF_type_node, V2DF_type_node, integer_type_node, NULL_TREE); - tree v2df_ftype_v2df_pv2si + tree v2df_ftype_v2df_pcdouble = build_function_type_list (V2DF_type_node, - V2DF_type_node, pv2si_type_node, NULL_TREE); - tree void_ftype_pv2si_v2df - = build_function_type_list (void_type_node, - pv2si_type_node, V2DF_type_node, NULL_TREE); + V2DF_type_node, pcdouble_type_node, NULL_TREE); tree void_ftype_pdouble_v2df = build_function_type_list (void_type_node, pdouble_type_node, V2DF_type_node, NULL_TREE); @@ -12858,10 +12855,10 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD); - def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD); - def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD); - def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD); + def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD); def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD); def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128); @@ -13405,8 +13402,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case IX86_BUILTIN_LOADLPD: icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps - : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd - : CODE_FOR_sse2_movsd); + : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd + : CODE_FOR_sse2_loadlpd); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -13430,12 +13427,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case IX86_BUILTIN_STOREHPS: case IX86_BUILTIN_STORELPS: - case IX86_BUILTIN_STOREHPD: - case IX86_BUILTIN_STORELPD: icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps - : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps - : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd - : CODE_FOR_sse2_movsd); + : CODE_FOR_sse_movlps); arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); @@ -13451,7 +13444,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, if (! pat) return 0; emit_insn (pat); - return 0; + return const0_rtx; + + case IX86_BUILTIN_STOREHPD: + case IX86_BUILTIN_STORELPD: + icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd + : CODE_FOR_sse2_storelpd); + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return const0_rtx; case IX86_BUILTIN_MOVNTPS: return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist); @@ -15189,24 +15203,29 @@ ix86_expand_vector_init (rtx target, rtx vals) /* ... values where only first field is non-constant are best loaded from the pool and overwritten via move later. */ - if (!i) + if (i == 0) { - rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0), - GET_MODE_INNER (mode), 0); - - op = force_reg (mode, op); XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode)); emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); + switch (GET_MODE (target)) { - case V2DFmode: - emit_insn (gen_sse2_movsd (target, target, op)); - break; - case V4SFmode: + case V2DFmode: + emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0))); + break; + + case V4SFmode: + { + /* ??? We can represent this better. */ + rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0), + GET_MODE_INNER (mode), 0); + op = force_reg (mode, op); emit_insn (gen_sse_movss (target, target, op)); - break; - default: - break; + } + break; + + default: + break; } return; } diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 648748cadeb..ff0f9f9dd84 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -4734,16 +4734,10 @@ switch (INTVAL (operands[2])) { case 0: - emit_insn (gen_sse2_movsd (operands[0], operands[0], - simplify_gen_subreg (V2DFmode, operands[1], - DFmode, 0))); + emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1])); break; case 1: - { - rtx op1 = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0); - - emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], op1)); - } + emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1])); break; default: abort (); @@ -4760,14 +4754,10 @@ switch (INTVAL (operands[2])) { case 0: - emit_move_insn (operands[0], gen_lowpart (DFmode, operands[1])); + emit_insn (gen_sse2_storelpd (operands[0], operands[1])); break; case 1: - { - rtx dest = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0); - - emit_insn (gen_sse2_unpckhpd (dest, operands[1], operands[1])); - } + emit_insn (gen_sse2_storehpd (operands[0], operands[1])); break; default: abort (); @@ -23731,17 +23721,103 @@ [(set_attr "type" "ssemov") (set_attr "mode" "TI")]) -(define_insn "sse2_movhpd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") - (vec_merge:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "0,0") - (match_operand:V2DF 2 "nonimmediate_operand" "m,x") - (const_int 1)))] - "TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" - "movhpd\t{%2, %0|%0, %2}" +;; Store the high double of the source vector into the double destination. +(define_insn "sse2_storehpd" + [(set (match_operand:DF 0 "nonimmediate_operand" "=m,Y,Y") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o") + (parallel [(const_int 1)])))] + "TARGET_SSE2" + "@ + movhpd\t{%1, %0|%0, %1} + unpckhpd\t%0, %0 + #" [(set_attr "type" "ssecvt") (set_attr "mode" "V2DF")]) +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (vec_select:DF + (match_operand:V2DF 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_SSE2 && reload_completed" + [(const_int 0)] +{ + emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8)); + DONE; +}) + +;; Load the high double of the target vector from the source scalar. +(define_insn "sse2_loadhpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,o") + (vec_concat:V2DF + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") + (parallel [(const_int 0)])) + (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")))] + "TARGET_SSE2" + "@ + movhpd\t{%2, %0|%0, %2} + unpcklpd\t{%2, %0|%0, %2} + #" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +(define_split + [(set (match_operand:V2DF 0 "memory_operand" "") + (vec_concat:V2DF + (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) + (match_operand:DF 1 "register_operand" "")))] + "TARGET_SSE2 && reload_completed" + [(const_int 0)] +{ + emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]); + DONE; +}) + +;; Store the low double of the source vector into the double destination. +(define_expand "sse2_storelpd" + [(set (match_operand:DF 0 "nonimmediate_operand" "") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" "") + (parallel [(const_int 1)])))] + "TARGET_SSE2" +{ + operands[1] = gen_lowpart (DFmode, operands[1]); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +;; Load the load double of the target vector from the source scalar. +(define_insn "sse2_loadlpd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m") + (vec_concat:V2DF + (match_operand:DF 2 "nonimmediate_operand" " m,Y,Y") + (vec_select:DF + (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") + (parallel [(const_int 1)]))))] + "TARGET_SSE2" + "@ + movlpd\t{%2, %0|%0, %2} + movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "V2DF")]) + +;; Merge the low part of the source vector into the low part of the target. +(define_insn "sse2_movsd" + [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m") + (vec_merge:V2DF + (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y") + (const_int 2)))] + "TARGET_SSE2" + "@movsd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2} + movlpd\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF,V2DF,V2DF")]) + (define_expand "sse2_loadsd" [(match_operand:V2DF 0 "register_operand" "") (match_operand:DF 1 "memory_operand" "")] @@ -23763,24 +23839,6 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "DF")]) -;; ??? We ought to be using ix86_binary_operator_ok on this pattern, so -;; that we enforce the whole matching memory thing through combine et al. -;; But that requires that things be set up properly when invoked via an -;; intrinsic, which we don't do. Which leads to instantiate virtual regs -;; lossage, as seen compiling gcc.dg/i386-sse-2.c for x86_64 at -O0. -(define_insn "sse2_movsd" - [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") - (vec_merge:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0") - (match_operand:V2DF 2 "nonimmediate_operand" "x,m,x") - (const_int 2)))] - "TARGET_SSE2" - "@movsd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2} - movlpd\t{%2, %0|%0, %2}" - [(set_attr "type" "ssecvt") - (set_attr "mode" "DF,V2DF,V2DF")]) - (define_insn "sse2_storesd" [(set (match_operand:DF 0 "memory_operand" "=m") (vec_select:DF -- 2.30.2