From 0f2698d0b5baf74241d9a1ae4c3087ddeb4c9704 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 20 Jan 2005 10:34:12 -0800 Subject: [PATCH] re PR target/19530 (MMX load intrinsic produces SSE superfluous instructions (movlps)) PR target/19530 * config/i386/mmintrin.h (_mm_cvtsi32_si64): Use __builtin_ia32_vec_init_v2si. (_mm_cvtsi64_si32): Use __builtin_ia32_vec_ext_v2si. * config/i386/i386.c (IX86_BUILTIN_VEC_EXT_V2SI): New. (ix86_init_mmx_sse_builtins): Create it. (ix86_expand_builtin): Expand it. (ix86_expand_vector_set): Handle V2SFmode and V2SImode. * config/i386/mmx.md (vec_extractv2sf_0, vec_extractv2sf_1): New. (vec_extractv2si_0, vec_extractv2si_1): New. From-SVN: r93972 --- gcc/ChangeLog | 13 ++++++ gcc/config/i386/i386.c | 22 +++++++-- gcc/config/i386/mmintrin.h | 6 +-- gcc/config/i386/mmx.md | 92 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 7 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7f2af19f9ad..514bc87e9de 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2005-01-20 Richard Henderson + + PR target/19530 + * config/i386/mmintrin.h (_mm_cvtsi32_si64): Use + __builtin_ia32_vec_init_v2si. + (_mm_cvtsi64_si32): Use __builtin_ia32_vec_ext_v2si. + * config/i386/i386.c (IX86_BUILTIN_VEC_EXT_V2SI): New. + (ix86_init_mmx_sse_builtins): Create it. + (ix86_expand_builtin): Expand it. + (ix86_expand_vector_set): Handle V2SFmode and V2SImode. + * config/i386/mmx.md (vec_extractv2sf_0, vec_extractv2sf_1): New. + (vec_extractv2si_0, vec_extractv2si_1): New. + 2005-01-20 Richard Henderson * config/i386/i386.c (ix86_expand_push): New. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2c1e9fd4c3d..e0fa5107160 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12654,6 +12654,7 @@ enum ix86_builtins IX86_BUILTIN_VEC_EXT_V4SF, IX86_BUILTIN_VEC_EXT_V4SI, IX86_BUILTIN_VEC_EXT_V8HI, + IX86_BUILTIN_VEC_EXT_V2SI, IX86_BUILTIN_VEC_EXT_V4HI, IX86_BUILTIN_VEC_SET_V8HI, IX86_BUILTIN_VEC_SET_V4HI, @@ -13663,6 +13664,11 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI); + ftype = build_function_type_list (intSI_type_node, V2SI_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si", + ftype, IX86_BUILTIN_VEC_EXT_V2SI); + /* Access to the vec_set patterns. */ ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, intHI_type_node, @@ -14475,6 +14481,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, case IX86_BUILTIN_VEC_EXT_V4SF: case IX86_BUILTIN_VEC_EXT_V4SI: case IX86_BUILTIN_VEC_EXT_V8HI: + case IX86_BUILTIN_VEC_EXT_V2SI: case IX86_BUILTIN_VEC_EXT_V4HI: return ix86_expand_vec_ext_builtin (arglist, target); @@ -16276,9 +16283,18 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) { case V2SFmode: case V2SImode: - if (!mmx_ok) - break; - /* FALLTHRU */ + if (mmx_ok) + { + tmp = gen_reg_rtx (GET_MODE_INNER (mode)); + ix86_expand_vector_extract (true, tmp, target, 1 - elt); + if (elt == 0) + tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); + else + tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); + emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); + return; + } + break; case V2DFmode: case V2DImode: diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h index 4f2af6de15a..68c8313f9ae 100644 --- a/gcc/config/i386/mmintrin.h +++ b/gcc/config/i386/mmintrin.h @@ -58,8 +58,7 @@ _m_empty (void) static __inline __m64 _mm_cvtsi32_si64 (int __i) { - long long __tmp = (unsigned int)__i; - return (__m64) __tmp; + return (__m64) __builtin_ia32_vec_init_v2si (__i, 0); } static __inline __m64 @@ -88,8 +87,7 @@ _mm_set_pi64x (long long __i) static __inline int _mm_cvtsi64_si32 (__m64 __i) { - long long __tmp = (long long)__i; - return __tmp; + return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0); } static __inline int diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index d46fb36fd29..9e374e44ebf 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -482,6 +482,51 @@ DONE; }) +(define_insn_and_split "*vec_extractv2sf_0" + [(set (match_operand:SF 0 "nonimmediate_operand" "=x,y,m,m,frxy") + (vec_select:SF + (match_operand:V2SF 1 "nonimmediate_operand" " x,y,x,y,m") + (parallel [(const_int 0)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (SFmode, REGNO (op1)); + else + op1 = gen_lowpart (SFmode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_insn "*vec_extractv2sf_1" + [(set (match_operand:SF 0 "nonimmediate_operand" "=y,x,frxy") + (vec_select:SF + (match_operand:V2SF 1 "nonimmediate_operand" " 0,0,o") + (parallel [(const_int 1)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + punpckhdq\t%0, %0 + unpckhps\t%0, %0 + #" + [(set_attr "type" "mmxcvt,sselog1,*") + (set_attr "mode" "DI,V4SF,SI")]) + +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (vec_select:SF + (match_operand:V2SF 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_MMX && reload_completed" + [(const_int 0)] +{ + operands[1] = adjust_address (operands[1], SFmode, 4); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + (define_expand "vec_extractv2sf" [(match_operand:SF 0 "register_operand" "") (match_operand:V2SF 1 "register_operand" "") @@ -1103,6 +1148,53 @@ DONE; }) +(define_insn_and_split "*vec_extractv2si_0" + [(set (match_operand:SI 0 "nonimmediate_operand" "=x,y,m,m,frxy") + (vec_select:SI + (match_operand:V2SI 1 "nonimmediate_operand" " x,y,x,y,m") + (parallel [(const_int 0)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx op1 = operands[1]; + if (REG_P (op1)) + op1 = gen_rtx_REG (SImode, REGNO (op1)); + else + op1 = gen_lowpart (SImode, op1); + emit_move_insn (operands[0], op1); + DONE; +}) + +(define_insn "*vec_extractv2si_1" + [(set (match_operand:SI 0 "nonimmediate_operand" "=y,Y,Y,x,frxy") + (vec_select:SI + (match_operand:V2SI 1 "nonimmediate_operand" " 0,0,Y,0,o") + (parallel [(const_int 1)])))] + "TARGET_MMX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + punpckhdq\t%0, %0 + punpckhdq\t%0, %0 + pshufd\t{$85, %1, %0|%0, %1, 85} + unpckhps\t%0, %0 + #" + [(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,*") + (set_attr "mode" "DI,TI,TI,V4SF,SI")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "") + (vec_select:SI + (match_operand:V2SI 1 "memory_operand" "") + (parallel [(const_int 1)])))] + "TARGET_MMX && reload_completed" + [(const_int 0)] +{ + operands[1] = adjust_address (operands[1], SImode, 4); + emit_move_insn (operands[0], operands[1]); + DONE; +}) + (define_expand "vec_extractv2si" [(match_operand:SI 0 "register_operand" "") (match_operand:V2SI 1 "register_operand" "") -- 2.30.2