From 1194ca059028a55c9026606e5a73c0e01cd060cb Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sun, 20 Oct 2002 00:34:17 +0200 Subject: [PATCH] i386.c (classify_argument): Pass MMX arguments in memory * i386.c (classify_argument): Pass MMX arguments in memory (ix86_expand_builtin): Expand proper address mode for cflush. * i386.md (movdqa): Fix typo. (sse2_cflush): Accept DImode addresses. * xmmintrin.h (_mm_sqrt_sd): Accept two arguments. (_mm_max_sd): Fix pasto. (_mm_storeh_pd, _mm_storel_pd): Fix. * i386.c (bdesc_comi): Fix to match specification. (ix86_expand_sse_comi): Emit the comparison properly. * i386.md (sse_comi, sse2_comi, sse_ucomi, sse2_ucomi): Do not use comparison operator. (vnmaskcmp): Fix template. * xmmintrin.h (_mm_cvtps_pi16): Fix. From-SVN: r58321 --- gcc/ChangeLog | 19 ++++++++++ gcc/config/i386/i386.c | 60 +++++++++++++++---------------- gcc/config/i386/i386.md | 70 ++++++++++++++++++++----------------- gcc/config/i386/xmmintrin.h | 20 +++++------ 4 files changed, 96 insertions(+), 73 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9274dd3bd7e..a73f5dae512 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +Sun Oct 20 00:31:31 CEST 2002 Jan Hubicka + + * i386.c (classify_argument): Pass MMX arguments in memory + (ix86_expand_builtin): Expand proper address mode for cflush. + * i386.md (movdqa): Fix typo. + (sse2_cflush): Accept DImode addresses. + + * xmmintrin.h (_mm_sqrt_sd): Accept two arguments. + (_mm_max_sd): Fix pasto. + (_mm_storeh_pd, _mm_storel_pd): Fix. + + * i386.c (bdesc_comi): Fix to match specification. + (ix86_expand_sse_comi): Emit the comparison properly. + * i386.md (sse_comi, sse2_comi, sse_ucomi, sse2_ucomi): + Do not use comparison operator. + (vnmaskcmp): Fix template. + + * xmmintrin.h (_mm_cvtps_pi16): Fix. + 2002-10-19 Sebastian Pop * dependence.c : Removed. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c96270241d1..11602573dd4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1922,8 +1922,7 @@ classify_argument (mode, type, classes, bit_offset) case V2SImode: case V4HImode: case V8QImode: - classes[0] = X86_64_SSE_CLASS; - return 1; + return 0; case BLKmode: case VOIDmode: return 0; @@ -11755,30 +11754,30 @@ struct builtin_description static const struct builtin_description bdesc_comi[] = { - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 }, - { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 }, - { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 }, - { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 }, - { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 }, - { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 }, - { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 }, - { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 }, - { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 }, - { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 }, - { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 }, - { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 }, - { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 }, - { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 }, - { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, + { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, + { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, + { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, }; static const struct builtin_description bdesc_2arg[] = @@ -12881,14 +12880,14 @@ ix86_expand_sse_comi (d, arglist, target) op1 = copy_to_mode_reg (mode1, op1); op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); - pat = GEN_FCN (d->icode) (op0, op1, op2); + pat = GEN_FCN (d->icode) (op0, op1); if (! pat) return 0; emit_insn (pat); emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), gen_rtx_fmt_ee (comparison, QImode, - gen_rtx_REG (CCmode, FLAGS_REG), + SET_DEST (pat), const0_rtx))); return SUBREG_REG (target); @@ -13349,9 +13348,8 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) arg0 = TREE_VALUE (arglist); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); icode = CODE_FOR_sse2_clflush; - mode0 = insn_data[icode].operand[0].mode; - if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) + op0 = copy_to_mode_reg (Pmode, op0); emit_insn (gen_sse2_clflush (op0)); return 0; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 095de745e40..6825149b1e6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -17828,7 +17828,7 @@ (match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. - "movdga\t{%1, %0|%0, %1}" + "movdqa\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "mode" "V4SF")]) @@ -18883,13 +18883,12 @@ (define_insn "sse_comi" [(set (reg:CCFP 17) - (match_operator:CCFP 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFP (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE" "comiss\t{%1, %0|%0, %1}" [(set_attr "type" "ssecmp") @@ -18897,13 +18896,12 @@ (define_insn "sse_ucomi" [(set (reg:CCFPU 17) - (match_operator:CCFPU 2 "sse_comparison_operator" - [(vec_select:SF - (match_operand:V4SF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:SF - (match_operand:V4SF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFPU (vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE" "ucomiss\t{%1, %0|%0, %1}" [(set_attr "type" "ssecmp") @@ -20409,7 +20407,12 @@ [(match_operand:V2DF 1 "register_operand" "0") (match_operand:V2DF 2 "nonimmediate_operand" "x")])))] "TARGET_SSE2" - "cmpn%D3pd\t{%2, %0|%0, %2}" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordps\t{%2, %0|%0, %2}"; + else + return "cmpn%D3pd\t{%2, %0|%0, %2}"; +} [(set_attr "type" "ssecmp") (set_attr "mode" "V2DF")]) @@ -20436,19 +20439,23 @@ (subreg:V2DI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE2" - "cmp%D3sd\t{%2, %0|%0, %2}" +{ + if (GET_CODE (operands[3]) == UNORDERED) + return "cmpordsd\t{%2, %0|%0, %2}"; + else + return "cmpn%D3sd\t{%2, %0|%0, %2}"; +} [(set_attr "type" "ssecmp") (set_attr "mode" "DF")]) (define_insn "sse2_comi" [(set (reg:CCFP 17) - (match_operator:CCFP 2 "sse_comparison_operator" - [(vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFP (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE2" "comisd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecmp") @@ -20456,13 +20463,12 @@ (define_insn "sse2_ucomi" [(set (reg:CCFPU 17) - (match_operator:CCFPU 2 "sse_comparison_operator" - [(vec_select:DF - (match_operand:V2DF 0 "register_operand" "x") - (parallel [(const_int 0)])) - (vec_select:DF - (match_operand:V2DF 1 "register_operand" "x") - (parallel [(const_int 0)]))]))] + (compare:CCFPU (vec_select:DF + (match_operand:V2DF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:DF + (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] "TARGET_SSE2" "ucomisd\t{%1, %0|%0, %1}" [(set_attr "type" "ssecmp") @@ -21630,7 +21636,7 @@ (set_attr "mode" "V2DF")]) (define_insn "sse2_clflush" - [(unspec_volatile [(match_operand:SI 0 "address_operand" "p")] + [(unspec_volatile [(match_operand 0 "address_operand" "p")] UNSPECV_CLFLUSH)] "TARGET_SSE2" "clflush %0" diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index ed464430c90..42278b7d2e5 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -606,7 +606,7 @@ _mm_cvtps_pi16(__m128 __A) __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf); __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf); __v2si __losi = __builtin_ia32_cvtps2pi (__losf); - return (__m64) __builtin_ia32_packssdw (__losi, __hisi); + return (__m64) __builtin_ia32_packssdw (__hisi, __losi); } /* Convert the four SPFP values in A to four signed 8-bit integers. */ @@ -1293,10 +1293,12 @@ _mm_sqrt_pd (__m128d __A) return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); } +/* Return pair {sqrt (A[0), B[1]}. */ static __inline __m128d -_mm_sqrt_sd (__m128d __A) +_mm_sqrt_sd (__m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__A); + __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); + return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); } static __inline __m128d @@ -1320,7 +1322,7 @@ _mm_max_pd (__m128d __A, __m128d __B) static __inline __m128d _mm_max_sd (__m128d __A, __m128d __B) { - return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); + return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); } static __inline __m128d @@ -1696,10 +1698,9 @@ _mm_loadh_pd (__m128d __A, double *__B) } static __inline void -_mm_storeh_pd (__m128d *__A, double __B) +_mm_storeh_pd (double *__A, __m128d __B) { - __v2df __tmp = __builtin_ia32_loadsd (&__B); - __builtin_ia32_storehpd ((__v2si *)__A, __tmp); + __builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B); } static __inline __m128d @@ -1709,10 +1710,9 @@ _mm_loadl_pd (__m128d __A, double *__B) } static __inline void -_mm_storel_pd (__m128d *__A, double __B) +_mm_storel_pd (double *__A, __m128d __B) { - __v2df __tmp = __builtin_ia32_loadsd (&__B); - __builtin_ia32_storelpd ((__v2si *)__A, __tmp); + __builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B); } static __inline int -- 2.30.2