From f8ca792302bcc455bf7d45f807d71a9f06c4add8 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 29 Oct 2002 20:41:35 +0100 Subject: [PATCH] re PR target/8322 (SSE2 intrinsics broken?) * i386.h (CONST_DOUBLE_OK_FOR_LETTER_P): Remove 'H' * i386.md (movsf*, movdf*): Use 'C' instead of 'H' * md.texi (machine dependent constraints): Document 'C' * simplify-rtx.c (simplify_subreg): Fix const_int->vector subregging. * i386.c (ix86_expand_vector_move): Fix. * i386.c (ix86_expand_builtin): Use sse2_maskmovdqu_rex64. * i386.md (sse2_maskmovdqu_rex64): New pattern PR target/8322 * xmmintrin.h (_mm_stream_pi, _mm_stream_pd): Fix cast. (ix86_init_mmx_sse_builtins): Fix type. From-SVN: r58631 --- gcc/ChangeLog | 17 +++++++++++++++++ gcc/config/i386/i386.c | 15 ++++++--------- gcc/config/i386/i386.h | 2 +- gcc/config/i386/i386.md | 17 ++++++++++++++--- gcc/config/i386/xmmintrin.h | 4 ++-- gcc/doc/md.texi | 4 ++++ gcc/simplify-rtx.c | 5 ++++- 7 files changed, 48 insertions(+), 16 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b64d04531e6..7c7837fe89d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +Tue Oct 29 19:32:16 CET 2002 Jan Hubicka + + * i386.h (CONST_DOUBLE_OK_FOR_LETTER_P): Remove 'H' + * i386.md (movsf*, movdf*): Use 'C' instead of 'H' + * md.texi (machine dependent constraints): Document 'C' + + * simplify-rtx.c (simplify_subreg): Fix const_int->vector subregging. + + * i386.c (ix86_expand_vector_move): Fix. + + * i386.c (ix86_expand_builtin): Use sse2_maskmovdqu_rex64. + * i386.md (sse2_maskmovdqu_rex64): New pattern + + PR target/8322 + * xmmintrin.h (_mm_stream_pi, _mm_stream_pd): Fix cast. + (ix86_init_mmx_sse_builtins): Fix type. + 2002-10-29 Jason Thorpe * gthr-posix.h: Include for feature tests. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f950198f73b..7145e9299f2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -7799,14 +7799,10 @@ ix86_expand_vector_move (mode, operands) if ((reload_in_progress | reload_completed) == 0 && register_operand (operands[0], mode) && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (mode, addr); - } + operands[1] = force_const_mem (mode, operands[1]); /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 + if (!no_new_pseudos && !register_operand (operands[0], mode) && !register_operand (operands[1], mode)) { @@ -12287,10 +12283,10 @@ ix86_init_mmx_sse_builtins () /* @@@ the type is bogus */ tree v4sf_ftype_v4sf_pv2si = build_function_type_list (V4SF_type_node, - V4SF_type_node, pv2di_type_node, NULL_TREE); + V4SF_type_node, pv2si_type_node, NULL_TREE); tree void_ftype_pv2si_v4sf = build_function_type_list (void_type_node, - pv2di_type_node, V4SF_type_node, NULL_TREE); + pv2si_type_node, V4SF_type_node, NULL_TREE); tree void_ftype_pfloat_v4sf = build_function_type_list (void_type_node, pfloat_type_node, V4SF_type_node, NULL_TREE); @@ -13145,7 +13141,8 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_MASKMOVDQU: icode = (fcode == IX86_BUILTIN_MASKMOVQ ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) - : CODE_FOR_sse2_maskmovdqu); + : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64 + : CODE_FOR_sse2_maskmovdqu)); /* Note the arg order is different from the operand order. */ arg1 = TREE_VALUE (arglist); arg2 = TREE_VALUE (TREE_CHAIN (arglist)); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a679d3d469e..87add77f18c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1411,7 +1411,7 @@ enum reg_class #define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ ((C) == 'G' ? standard_80387_constant_p (VALUE) \ - : ((C) == 'H' ? standard_sse_constant_p (VALUE) : 0)) + : 0) /* A C expression that defines the optional machine-dependent constraint letters that can be used to segregate specific types of diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a09da46732d..4b1c5d14200 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2095,7 +2095,7 @@ (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") - (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf,rm,*y,*y"))] + (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) @@ -2280,7 +2280,7 @@ (define_insn "*movdf_nointeger" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m") - (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))] + (match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && (reload_in_progress || reload_completed @@ -2341,7 +2341,7 @@ (define_insn "*movdf_integer" [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m") - (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))] + (match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))] "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && !optimize_size && TARGET_INTEGER_DFMODE_MOVES && (reload_in_progress || reload_completed @@ -20552,6 +20552,17 @@ [(set_attr "type" "ssecvt") (set_attr "mode" "TI")]) +(define_insn "sse2_maskmovdqu_rex64" + [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "register_operand" "x")] + UNSPEC_MASKMOV))] + "TARGET_SSE2" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovdqu\t{%2, %1|%1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + (define_insn "sse2_movntv2df" [(set (match_operand:V2DF 0 "memory_operand" "=m") (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index 2213f96f4a5..4136e901795 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1037,7 +1037,7 @@ _mm_prefetch (void *__P, enum _mm_hint __I) static __inline void _mm_stream_pi (__m64 *__P, __m64 __A) { - __builtin_ia32_movntq (__P, (long long)__A); + __builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A); } /* Likewise. The address must be 16-byte aligned. */ @@ -2426,7 +2426,7 @@ _mm_stream_si128 (__m128i *__A, __m128i __B) static __inline void _mm_stream_pd (double *__A, __m128d __B) { - __builtin_ia32_movntpd ((__m128d *)__A, (__v2df)__B); + __builtin_ia32_movntpd (__A, (__v2df)__B); } static __inline __m128i diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index db640753f9d..0dbeb2b120c 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -1606,6 +1606,10 @@ Second floating point register @item c @samp{c} register +@item C +Specifies constant that can be easilly constructed in SSE register without +loading it from memory. + @item d @samp{d} register diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 3293f0b7c6b..52a92504a00 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -2399,7 +2399,10 @@ simplify_subreg (outermode, op, innermode, byte) /* This might fail, e.g. if taking a subreg from a SYMBOL_REF. */ /* ??? It would be nice if we could actually make such subregs on targets that allow such relocations. */ - elt = simplify_subreg (submode, op, innermode, byte); + if (byte >= GET_MODE_UNIT_SIZE (innermode)) + elt = CONST0_RTX (submode); + else + elt = simplify_subreg (submode, op, innermode, byte); if (! elt) return NULL_RTX; RTVEC_ELT (v, i) = elt; -- 2.30.2