i386.md (rex64suffix): New mode attribute.
authorUros Bizjak <uros@gcc.gnu.org>
Tue, 1 Apr 2008 20:20:09 +0000 (22:20 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Tue, 1 Apr 2008 20:20:09 +0000 (22:20 +0200)
        * config/i386/i386.md (rex64suffix): New mode attribute.
        (floathi<mode>2): Disable expander for SSE math.
        (*floathi<mode>2_1): New insn insn_and_split pattern.
        (*floathi<mode>2_i387_with_temp): New macroized instruction pattern and
        corresponding post-reload splitters.
        (*floathi<mode>2_i387): New macroized insn pattern.
        (float<SSEMODEI24:mode><X87MODEF:mode>2): New macroized expander.
        (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): New macroized
        insn_and_split pattern.
        (*floatsi<mode>2_vector_mixed_with_temp, *floatsi<mode>2_vector_mixed):
        New macroized instruction patterns and corresponding post-reload
        splitters.
        (*floatsi<mode>2_mixed_with_temp): New macroized instruction pattern
        and corresponding post-reload splitters.
        (*floatsi<mode>2_mixed_interunit, *floatsi<mode>2_mixed_nointerunit):
        New macroized instruction patterns.
        (*floatsi<mode>2_vector_sse_with_temp, *floatsi<mode>2_vector_sse): New
        macroized instruction patterns and corresponding post-reload splitters.
        (*floatsi<mode>2_sse_with_temp): New macroized instruction pattern and
        corresponding post-reload splitters.
        (*floatsi<mode>2_sse_interunit, *floatsi<mode>2_mixed_nointerunit):
        New macroized instruction patterns.
        (*floatsi<mode>2_i387_with_temp): New macroized instruction pattern and
        corresponding post-reload splitters.
        (*floatsi<mode>2_i387): New macroized instruction patterns.

From-SVN: r133798

gcc/ChangeLog
gcc/config/i386/i386.md

index a2118e950836cffe616953f0361a6e8f209b1b57..693ea09f3e1dd4435da21a86b03c4404240ea9ad 100644 (file)
@@ -1,3 +1,31 @@
+2008-04-01  Uros Bizjak  <ubizjak@gmail.com>
+
+       * config/i386/i386.md (rex64suffix): New mode attribute.
+       (floathi<mode>2): Disable expander for SSE math.
+       (*floathi<mode>2_1): New insn insn_and_split pattern.
+       (*floathi<mode>2_i387_with_temp): New macroized instruction pattern and
+       corresponding post-reload splitters.
+       (*floathi<mode>2_i387): New macroized insn pattern.
+       (float<SSEMODEI24:mode><X87MODEF:mode>2): New macroized expander.
+       (*float<SSEMODEI24:mode><X87MODEF:mode>2_1): New macroized
+       insn_and_split pattern.
+       (*floatsi<mode>2_vector_mixed_with_temp, *floatsi<mode>2_vector_mixed):
+       New macroized instruction patterns and corresponding post-reload
+       splitters.
+       (*floatsi<mode>2_mixed_with_temp): New macroized instruction pattern
+       and corresponding post-reload splitters.
+       (*floatsi<mode>2_mixed_interunit, *floatsi<mode>2_mixed_nointerunit):
+       New macroized instruction patterns.
+       (*floatsi<mode>2_vector_sse_with_temp, *floatsi<mode>2_vector_sse): New
+       macroized instruction patterns and corresponding post-reload splitters.
+       (*floatsi<mode>2_sse_with_temp): New macroized instruction pattern and
+       corresponding post-reload splitters.
+       (*floatsi<mode>2_sse_interunit, *floatsi<mode>2_mixed_nointerunit):
+       New macroized instruction patterns.
+       (*floatsi<mode>2_i387_with_temp): New macroized instruction pattern and
+       corresponding post-reload splitters.
+       (*floatsi<mode>2_i387): New macroized instruction patterns.
+
 2008-04-01  H.J. Lu  <hongjiu.lu@intel.com>
 
        * config/i386/i386.md (smaxmin): New.
        * common.opt (fprofile-dir=, fprofile-use=, fprofile-generate=):
        New options
        (fprofile-use): Add var flag_profile_use
-       * coverage.c (coverage_begin_output): Do not open a gcno file for output
-       only if -ftest-coverage is set.
+       * coverage.c (coverage_begin_output): Do not open a gcno file for
+       output only if -ftest-coverage is set.
        Do not add getpwd() to gcda file path.
-       (build_gcov_info): Check the new flag 
+       (build_gcov_info): Check the new flag
        flag_profile_datafile_relative_path.
        (coverage_init): Use profile_data_prefix.
        Read profile counter only if flag_profile_use is set.
 
 2008-03-31  James E. Wilson  <wilson@tuliptree.org>
 
-       * varasm.c (output_constant_pool_1): In LABEL_REF check, use tmp
-       consistently.
+       * varasm.c (output_constant_pool_1): In LABEL_REF check,
+       use tmp consistently.
 
        PR target/35695
        * config/ia64/div.md (recip_approx_rf): Use UNSPEC not DIV.
 
 2008-03-31  Ian Lance Taylor  <iant@google.com>
 
-       * tlink.c (scan_linker_output): Look for symbol name in single
-       quotes.
+       * tlink.c (scan_linker_output): Look for symbol name in single quotes.
 
 2008-03-31  Jan Hubicka  <jh@suse.cz>
 
        (regno_reg_rtx): ... new global array.
        (reg_rtx_no, seq_stack, REGNO_POINTER_ALIGN): Update accestors.
        (pending_stack_adjust, inhibit_defer_pop, saveregs_value,
-       apply_args_value, forced_labels, stack_pointer_delta): Update accestors.
+       apply_args_value, forced_labels, stack_pointer_delta):
+       Update accestors.
        (struct varasm_status): Move here from varasm.c
        (struct rtl_data): New. Move here some fields from struct function.
        (return_label, naked_return_label, stack_slot_list, parm_birth_insn,
        (get_arg_pointer_save_area): Update prototype.
        * emit-rtl.c (rtl): Declare.
        (regno_reg_rtx): Declare.
-       (first_insn, last_insn, cur_insn_uid, last_location, first_label_num): Update.
+       (first_insn, last_insn, cur_insn_uid, last_location, first_label_num):
+       Update.
        (gen_reg_rtx): Update.
        (init_virtual_regs): Do not tate emit_status argument.
        (init_emit): Do not allocate emit.
index 8ccb0b2b9194bc49e6460a259524e0b056451ff4..a72e7725d8339973bd6ad43f11cb2f7c7744baf1 100644 (file)
 ;; SSE vector mode corresponding to a scalar mode
 (define_mode_attr ssevecmode
   [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
+
+;; Instruction suffix for REX 64bit operators.
+(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
 \f
 ;; Scheduling descriptions
 
 ;; wants to be able to do this between registers.
 
 (define_expand "floathi<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "")
-       (float:MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-{
-  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-    {
-      emit_insn
-       (gen_floatsi<mode>2 (operands[0],
-                            convert_to_mode (SImode, operands[1], 0)));
-      DONE;
-    }
-})
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "")
 
-(define_insn "*floathi<mode>2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
-       (float:MODEF
-         (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*floathi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+             (float:X87MODEF (match_dup 1)))
+   (clobber (match_dup 2))])]
+  "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);")
+
+(define_insn "*floathi<mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:HI 2 "memory_operand" "=m,m"))]
   "TARGET_80387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)"
-  "@
-   fild%z1\t%1
-   #"
+  "#"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "<MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatsi<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "")
-       (float:MODEF (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-  "
-   /* When we use vector converts, we can't have input in memory.  */
-   if (GET_MODE (operands[0]) == DFmode
-       && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH
-       && SSE_FLOAT_MODE_P (DFmode))
-     operands[1] = force_reg (SImode, operands[1]);
-   else if (GET_MODE (operands[0]) == SFmode
-            && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
-            && SSE_FLOAT_MODE_P (SFmode))
-     {
-       /* When !flag_trapping_math, we handle SImode->SFmode vector
-         conversions same way as SImode->DFmode.
-
-         For flat_trapping_math we can't safely use vector conversion without
-         clearing upper half, otherwise precision exception might occur.
-         However we can still generate the common sequence converting value
-         from general register to XMM register as:
-
-           mov         reg32, mem32
-           movd        mem32, xmm
-           cvtdq2pd xmm,xmm
-
-         because we know that movd clears the upper half.
-
-         Sadly in this case we can't rely on reload moving the value to XMM
-         register, since we need to know if upper half is OK, so we need
-         to do reloading by hand.  We force operand to memory unless target
-         supports inter unit moves.  */
-       if (!flag_trapping_math)
-         operands[1] = force_reg (SImode, operands[1]);
-       else if (!MEM_P (operands[1]))
-        {
-          int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
-          rtx tmp = assign_386_stack_local (SImode, slot);
-          emit_move_insn (tmp, operands[1]);
-          operands[1] = tmp;
-        }
-     }
-   /* Offload operand of cvtsi2ss and cvtsi2sd into memory for
-      !TARGET_INTER_UNIT_CONVERSIONS
-      It is necessary for the patterns to not accept nonmemory operands
-      as we would optimize out later.  */
-   else if (!TARGET_INTER_UNIT_CONVERSIONS
-           && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
-           && !optimize_size
-           && !MEM_P (operands[1]))
-     {
-       int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
-       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot);
-       emit_move_insn (tmp, operands[1]);
-       operands[1] = tmp;
-     }
-  ")
-
-(define_insn "*floatsisf2_mixed_vector"
-  [(set (match_operand:SF 0 "register_operand" "=x,f,?f")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
-  "TARGET_MIX_SSE_I387 && !flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
-  "@
-   cvtdq2ps\t{%1, %0|%0, %1}
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "sseicvt,fmov,multi")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*")
-   (set_attr "athlon_decode" "double,*,*")
-   (set_attr "amdfam10_decode" "double,*,*")
-   (set_attr "fp_int_src" "false,true,true")])
-
-(define_insn "*floatsisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_MIX_SSE_I387
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2ss\t{%1, %0|%0, %1}
-   cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,vector,double")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
-   (set_attr "fp_int_src" "true")])
-
-(define_insn "*floatsisf2_mixed_memory"
-  [(set (match_operand:SF 0 "register_operand" "=f,x")
-       (float:SF (match_operand:SI 1 "memory_operand" "m,m")))]
-  "TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "@
-   fild%z1\t%1
-   cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "*,double")
-   (set_attr "amdfam10_decode" "*,double")
+(define_insn "*floathi<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "fild%z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsisf2_sse_vector_nointernunit"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
-  "TARGET_SSE_MATH && flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
-   && !TARGET_INTER_UNIT_MOVES"
-  "#"
-  [(set_attr "type" "multi")])
-
-(define_insn "*floatsisf2_sse_vector_internunit"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "rm,x")))]
-  "TARGET_SSE_MATH && flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
-   && TARGET_INTER_UNIT_MOVES"
-  "#"
-  [(set_attr "type" "multi")])
-
 (define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && reload_completed
-   && (TARGET_INTER_UNIT_MOVES || MEM_P (operands[1]))
-   && !SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])"
-  [(set (match_dup 0)
-       (float:V4SF (match_dup 2)))]
-{
-  operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0);
-  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
-})
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "register_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+  "")
 
 (define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:SI 1 "register_operand" "")))]
-  "flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && reload_completed
-   && SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])"
-  [(set (match_dup 2) (vec_duplicate:V4SI (match_dup 1)))
-   (set (match_dup 0)
-       (float:V4SF (match_dup 2)))]
-{
-  operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0);
-  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-})
-
-(define_insn "*floatsisf2_sse_vector"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:SI 1 "register_operand" "x")))]
-  "TARGET_SSE_MATH && !flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
-   && !TARGET_INTER_UNIT_MOVES"
-  "cvtdq2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double")
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "fp_int_src" "true")])
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "memory_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+   "TARGET_80387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+        || TARGET_MIX_SSE_I387)
+    && reload_completed"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
 
-(define_insn "*floatsisf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_SSE_MATH
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
-   (set_attr "fp_int_src" "true")])
+(define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+       && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)"
+  "")
 
-(define_insn "*floatsisf2_sse_memory"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
-  "TARGET_SSE_MATH
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double")
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "fp_int_src" "true")])
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*float<SSEMODEI24:mode><X87MODEF:mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))]
+  "((TARGET_80387
+     && (!((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+          && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+        || TARGET_MIX_SSE_I387))
+    || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+       && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+       && ((<SSEMODEI24:MODE>mode == SImode
+            && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+            && flag_trapping_math)
+           || !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size))))
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
+             (clobber (match_dup 2))])]
+  "operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);")
 
-(define_insn "*floatsidf2_mixed_vector"
-  [(set (match_operand:DF 0 "register_operand" "=x,f,f")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
+(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
+       (float:MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=m,m,m,m,m"))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
    && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
-  "@
-   cvtdq2pd\t{%1, %0|%0, %1}
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "sseicvt,fmov,multi")
-   (set_attr "mode" "V2DF,DF,DF")
-   (set_attr "unit" "*,*,i387")
-   (set_attr "athlon_decode" "double,*,*")
-   (set_attr "amdfam10_decode" "double,*,*")
-   (set_attr "fp_int_src" "false,true,true")])
-
-(define_insn "*floatsidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))]
-  "TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtdq2pd\t{%1, %0|%0, %1}"
+  "#"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt")
-   (set_attr "mode" "DF,DF,DF,DF,V2DF")
+   (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<ssevecmode>")
    (set_attr "unit" "*,i387,*,*,*")
    (set_attr "athlon_decode" "*,*,double,direct,double")
    (set_attr "amdfam10_decode" "*,*,vector,double,double")
-   (set_attr "fp_int_src" "true,true,true,true,false")])
+   (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsidf2_mixed_memory"
-  [(set (match_operand:DF 0 "register_operand" "=f,x")
-       (float:DF (match_operand:SI 1 "memory_operand" "m,m")))]
+(define_insn "*floatsi<mode>2_vector_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
   "@
    fild%z1\t%1
-   cvtsi2sd\t{%1, %0|%0, %1}"
+   #"
   [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODE>,<ssevecmode>")
+   (set_attr "unit" "i387,*")
    (set_attr "athlon_decode" "*,direct")
    (set_attr "amdfam10_decode" "*,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsidf2_sse_vector"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (float:DF (match_operand:SI 1 "register_operand" "x")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
-  "cvtdq2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "V2DF")
-   (set_attr "athlon_decode" "double")
-   (set_attr "amdfam10_decode" "double")
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,m,m,m"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387"
+  "#"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "*,i387,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (float:DF (match_operand:SI 1 "memory_operand" "")))]
-  "TARGET_USE_VECTOR_CONVERTS && reload_completed
-   && SSE_REG_P (operands[0])"
-  [(set (match_dup 0)
-       (float:V2DF
-         (vec_select:V2SI
-           (match_dup 2)
-           (parallel [(const_int 0) (const_int 1)]))))]
-{
-  operands[2] = simplify_gen_subreg (V4SImode, operands[0], DFmode, 0);
-  operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
-})
-
-(define_insn "*floatsidf2_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x,!x")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "@
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtdq2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF,DF,V2DF")
-   (set_attr "athlon_decode" "double,direct,double")
-   (set_attr "amdfam10_decode" "vector,double,double")
-   (set_attr "fp_int_src" "true")])
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && TARGET_INTER_UNIT_CONVERSIONS
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
 
-(define_insn "*floatsidf2_memory"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (float:DF (match_operand:SI 1 "memory_operand" "x")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "cvtsi2sd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "direct")
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "fp_int_src" "true")])
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))]
+  "")
 
-(define_insn "*floatsi<mode>2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
        (float:MODEF
-         (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387
-   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
-  "@
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "<MODE>")
-   (set_attr "unit" "*,i387")
-   (set_attr "fp_int_src" "true")])
-
-(define_expand "floatdisf2"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
-{
-  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
-      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode)
-      && !optimize_size
-      && !MEM_P (operands[1]))
-    {
-      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
-      rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot);
-      emit_move_insn (tmp, operands[1]);
-      operands[1] = tmp;
-    }
-})
-
-(define_insn "*floatdisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_64BIT && TARGET_MIX_SSE_I387
+         (match_operand:SSEMODEI24 1 "register_operand" "m,r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
    && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "@
    fild%z1\t%1
-   #
-   cvtsi2ss{q}\t{%1, %0|%0, %1}
-   cvtsi2ss{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,vector,double")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
+   cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}
+   cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt,sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "athlon_decode" "*,double,direct")
+   (set_attr "amdfam10_decode" "*,vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,x")
-       (float:SF (match_operand:DI 1 "memory_operand" "m,m")))]
-  "TARGET_64BIT && TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "@
    fild%z1\t%1
-   cvtsi2ss{q}\t{%1, %0|%0, %1}"
+   cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "*,double")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "*,direct")
    (set_attr "amdfam10_decode" "*,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_SSE_MATH
-   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+(define_insn "*floatsi<mode>2_vector_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x")
+       (float:MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=m,m,m"))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
+  "#"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
+   (set_attr "athlon_decode" "double,direct,double")
+   (set_attr "amdfam10_decode" "vector,double,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_memory"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:DI 1 "memory_operand" "m")))]
-  "TARGET_64BIT && TARGET_SSE_MATH
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+(define_insn "*floatsi<mode>2_vector_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
+  "#"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "direct")
    (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatdidf2"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
 {
-  if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
     {
-      ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
-      DONE;
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+                                 CONST0_RTX (V4SImode), operands[1]));
     }
-  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
-      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)
-      && !optimize_size
-      && !MEM_P (operands[1]))
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
     {
-      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
-      rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), slot);
-      emit_move_insn (tmp, operands[1]);
-      operands[1] = tmp;
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_sse2_loadld (operands[4],
+                                 CONST0_RTX (V4SImode), operands[2]));
     }
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
 })
 
-(define_insn "*floatdidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2sd{q}\t{%1, %0|%0, %1}
-   cvtsi2sd{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,double,direct")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
-   (set_attr "fp_int_src" "true")])
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
 
-(define_insn "*floatdidf2_mixed_memory"
-  [(set (match_operand:DF 0 "register_operand" "=f,x")
-       (float:DF (match_operand:DI 1 "memory_operand" "m,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "@
-   fild%z1\t%1
-   cvtsi2sd{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "*,direct")
-   (set_attr "amdfam10_decode" "*,double")
+  emit_insn (gen_sse2_loadld (operands[4],
+                             CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+                                 CONST0_RTX (V4SImode), operands[1]));
+    }
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
+    gcc_unreachable ();
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+                             CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,m"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdidf2_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+  "@
+   cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}
+   cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "double,direct")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdidf2_sse_memory"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (float:DF (match_operand:DI 1 "memory_operand" "m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
+  "cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "direct")
    (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdi<mode>2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
-       (float:MODEF
-         (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387
-   && (!TARGET_SSE_MATH || !TARGET_64BIT
-       || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_size)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (float:X87MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,m"))]
+  "TARGET_80387"
   "@
    fild%z1\t%1
    #"
   [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "<MODE>")
+   (set_attr "mode" "<X87MODEF:MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "float<mode>xf2"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-       (float:XF (match_operand:X87MODEI 1 "nonimmediate_operand" "m,?r")))]
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (float:X87MODEF
+         (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
   "TARGET_80387"
-  "@
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "XF")
-   (set_attr "unit" "*,i387")
+  "fild%z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<X87MODEF:MODE>")
    (set_attr "fp_int_src" "true")])
 
-;; %%% Kill these when reload knows how to do it.
 (define_split
-  [(set (match_operand 0 "fp_register_operand" "")
-       (float (match_operand 1 "register_operand" "")))]
-  "reload_completed
-   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
-  [(const_int 0)]
-{
-  operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
-  operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
-  ix86_free_from_memory (GET_MODE (operands[1]));
-  DONE;
-})
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
 
 ;; Avoid store forwarding (partial memory) stall penalty by extending
 ;; SImode value to DImode through XMM register instead of pushing two