From: Jan Hubicka Date: Tue, 16 Oct 2001 22:04:20 +0000 (+0200) Subject: i386.c (split_ti): New function. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=44cf5b6a7186bed435ece527a40b8bcc4adb0998;p=gcc.git i386.c (split_ti): New function. * i386.c (split_ti): New function. (ix86_split_to_parts): Support TImodes. * i386.h (VALID_INT_MODE_P): Add TImode. * i386.md (movdi splitter): Fix. (movti): Support 64bit integer registers. (movti_rex64): New function and splitter. * i386.c (*_cost): Add movsx/movzx cost. * i386.h (struct processor_costs): Add movsx/movzx fields. (RTX_COSTS): Handle zero/sign extend + avoid believing that 64bit operations require split on 64bit machine. (CONST_COST): Make large 64bit constants expensive on 64bit compilation. * i386.c (ix86_setup_incomming_varargs): Fix mode of PLUS. (ix86_expand_move): Avoid pushes of memory if size does not match; move long constants to register. (x86_initialize_trampoline): Fix mode. * i386.h (ASM_OUTPUT_ADDR_VEC_ELT, ASM_OUTPUT_ADDR_DIFF_ELT): Use ASM_QUAD on 64bit. * i386.md (test pattern): Disallow impossible constants. (tablejump PIC expander): Fix emitting of sum. (movdicc_rex64): Rename to movdicc. * linux64.h (LINK_SPEC): Add missing '%'. From-SVN: r46296 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d74ca90ad52..de74d35399e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +Wed Oct 17 00:01:02 CEST 2001 Jan Hubicka + + * i386.c (split_ti): New function. + (ix86_split_to_parts): Support TImodes. + * i386.h (VALID_INT_MODE_P): Add TImode. + * i386.md (movdi splitter): Fix. + (movti): Support 64bit integer registers. + (movti_rex64): New function and splitter. + + * i386.c (*_cost): Add movsx/movzx cost. + * i386.h (struct processor_costs): Add movsx/movzx fields. + (RTX_COSTS): Handle zero/sign extend + avoid believing that + 64bit operations require split on 64bit machine. + (CONST_COST): Make large 64bit constants expensive on 64bit compilation. + + * i386.c (ix86_setup_incomming_varargs): Fix mode of PLUS. + (ix86_expand_move): Avoid pushes of memory if size does not match; + move long constants to register. + (x86_initialize_trampoline): Fix mode. + * i386.h (ASM_OUTPUT_ADDR_VEC_ELT, ASM_OUTPUT_ADDR_DIFF_ELT): + Use ASM_QUAD on 64bit. + * i386.md (test pattern): Disallow impossible constants. + (tablejump PIC expander): Fix emitting of sum. + (movdicc_rex64): Rename to movdicc. + * linux64.h (LINK_SPEC): Add missing '%'. + 2001-10-16 Alexandre Oliva * tree-inline.c (inlinable_function_p): Leave it up to the diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f5110d5e037..00b28a94628 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -57,6 +57,8 @@ struct processor_costs size_cost = { /* costs for tunning for size */ 3, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ 3, /* cost of a divide/mod */ + 3, /* cost of movsx */ + 3, /* cost of movzx */ 0, /* "large" insn */ 2, /* MOVE_RATIO */ 2, /* cost for loading QImode using movzbl */ @@ -90,6 +92,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ 6, /* cost of starting a multiply */ 1, /* cost of multiply per each bit set */ 23, /* cost of a divide/mod */ + 3, /* cost of movsx */ + 2, /* cost of movzx */ 15, /* "large" insn */ 3, /* MOVE_RATIO */ 4, /* cost for loading QImode using movzbl */ @@ -123,6 +127,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */ 12, /* cost of starting a multiply */ 1, /* cost of multiply per each bit set */ 40, /* cost of a divide/mod */ + 3, /* cost of movsx */ + 2, /* cost of movzx */ 15, /* "large" insn */ 3, /* MOVE_RATIO */ 4, /* cost for loading QImode using movzbl */ @@ -156,6 +162,8 @@ struct processor_costs pentium_cost = { 11, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ 25, /* cost of a divide/mod */ + 3, /* cost of movsx */ + 2, /* cost of movzx */ 8, /* "large" insn */ 6, /* MOVE_RATIO */ 6, /* cost for loading QImode using movzbl */ @@ -189,6 +197,8 @@ struct processor_costs pentiumpro_cost = { 4, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ 17, /* cost of a divide/mod */ + 1, /* cost of movsx */ + 1, /* cost of movzx */ 8, /* "large" insn */ 6, /* MOVE_RATIO */ 2, /* cost for loading QImode using movzbl */ @@ -222,6 +232,8 @@ struct processor_costs k6_cost = { 3, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ 18, /* cost of a divide/mod */ + 2, /* cost of movsx */ + 2, /* cost of movzx */ 8, /* "large" insn */ 4, /* MOVE_RATIO */ 3, /* cost for loading QImode using movzbl */ @@ -255,6 +267,8 @@ struct processor_costs athlon_cost = { 5, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ 42, /* cost of a divide/mod */ + 1, /* cost of movsx */ + 1, /* cost of movzx */ 8, /* "large" insn */ 9, /* MOVE_RATIO */ 4, /* cost for loading QImode using movzbl */ @@ -288,6 +302,8 @@ struct processor_costs pentium4_cost = { 30, /* cost of starting a multiply */ 0, /* cost of multiply per each bit set */ 112, /* cost of a divide/mod */ + 1, /* cost of movsx */ + 1, /* cost of movzx */ 16, /* "large" insn */ 6, /* MOVE_RATIO */ 2, /* cost for loading QImode using movzbl */ @@ -2212,7 +2228,7 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl) nsse_reg = gen_reg_rtx (Pmode); emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0))); emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, - gen_rtx_MULT (VOIDmode, nsse_reg, + gen_rtx_MULT (Pmode, nsse_reg, GEN_INT (4)))); if (next_cum.sse_regno) emit_move_insn @@ -5938,6 +5954,57 @@ split_di (operands, num, lo_half, hi_half) abort (); } } +/* Split one or more TImode RTL references into pairs of SImode + references. The RTL can be REG, offsettable MEM, integer constant, or + CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to + split and "num" is its length. lo_half and hi_half are output arrays + that parallel "operands". */ + +void +split_ti (operands, num, lo_half, hi_half) + rtx operands[]; + int num; + rtx lo_half[], hi_half[]; +{ + while (num--) + { + rtx op = operands[num]; + if (CONSTANT_P (op)) + { + if (GET_CODE (op) == CONST_INT) + { + lo_half[num] = GEN_INT (trunc_int_for_mode (INTVAL (op), SImode)); + hi_half[num] = (1 << (HOST_BITS_PER_WIDE_INT -1)) != 0 ? constm1_rtx : const0_rtx; + } + else if (GET_CODE (op) == CONST_DOUBLE && HOST_BITS_PER_WIDE_INT == 64) + { + lo_half[num] = GEN_INT (trunc_int_for_mode (CONST_DOUBLE_LOW (op), SImode)); + hi_half[num] = GEN_INT (trunc_int_for_mode (CONST_DOUBLE_HIGH (op), SImode)); + } + else + abort (); + } + else if (! reload_completed) + { + lo_half[num] = gen_lowpart (DImode, op); + hi_half[num] = gen_highpart (DImode, op); + } + else if (GET_CODE (op) == REG) + { + if (TARGET_64BIT) + abort(); + lo_half[num] = gen_rtx_REG (DImode, REGNO (op)); + hi_half[num] = gen_rtx_REG (DImode, REGNO (op) + 1); + } + else if (offsettable_memref_p (op)) + { + lo_half[num] = adjust_address (op, DImode, 0); + hi_half[num] = adjust_address (op, DImode, 8); + } + else + abort (); + } +} /* Output code to perform a 387 binary operation in INSN, one of PLUS, MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] @@ -6621,7 +6688,7 @@ ix86_expand_move (mode, operands) else { if (GET_CODE (operands[0]) == MEM - && (GET_MODE (operands[0]) == QImode + && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) || !push_operand (operands[0], mode)) && GET_CODE (operands[1]) == MEM) operands[1] = force_reg (mode, operands[1]); @@ -6630,6 +6697,15 @@ ix86_expand_move (mode, operands) && ! general_no_elim_operand (operands[1], mode)) operands[1] = copy_to_mode_reg (mode, operands[1]); + /* Force large constants in 64bit compilation into register + to get them CSEed. */ + if (TARGET_64BIT && mode == DImode + && immediate_operand (operands[1], mode) + && !x86_64_zero_extended_value (operands[1]) + && !register_operand (operands[0], mode) + && optimize && !reload_completed && !reload_in_progress) + operands[1] = copy_to_mode_reg (mode, operands[1]); + if (FLOAT_MODE_P (mode)) { /* If we are loading a floating point constant to a register, @@ -8490,6 +8566,8 @@ ix86_split_to_parts (operand, parts, mode) } else { + if (mode == TImode) + split_ti (&operand, 1, &parts[0], &parts[1]); if (mode == XFmode || mode == TFmode) { if (REG_P (operand)) @@ -8515,8 +8593,10 @@ ix86_split_to_parts (operand, parts, mode) /* Do not use shift by 32 to avoid warning on 32bit systems. */ if (HOST_BITS_PER_WIDE_INT >= 64) parts[0] - = GEN_INT (trunc_int_for_mode (l[0] + ((l[1] << 31) << 1), - SImode)); + = GEN_INT (trunc_int_for_mode + ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) + + ((((HOST_WIDE_INT)l[1]) << 31) << 1), + DImode)); else parts[0] = immed_double_const (l[0], l[1], DImode); parts[1] = GEN_INT (trunc_int_for_mode (l[2], SImode)); @@ -10701,7 +10781,7 @@ x86_initialize_trampoline (tramp, fnaddr, cxt) emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), GEN_INT (trunc_int_for_mode (0xff49, HImode))); emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), - GEN_INT (trunc_int_for_mode (0xe3, HImode))); + GEN_INT (trunc_int_for_mode (0xe3, QImode))); offset += 3; if (offset > TRAMPOLINE_SIZE) abort(); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 0bf603a4437..75b72d813e5 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -58,6 +58,8 @@ struct processor_costs { const int mult_init; /* cost of starting a multiply */ const int mult_bit; /* cost of multiply per each bit set */ const int divide; /* cost of a divide/mod */ + int movsx; /* The cost of movsx operation. */ + int movzx; /* The cost of movzx operation. */ const int large_insn; /* insns larger than this cost more */ const int move_ratio; /* The threshold of number of scalar memory-to-memory move insns. */ @@ -953,7 +955,8 @@ extern int ix86_arch; ((mode) == QImode || (mode) == HImode || (mode) == SImode \ || (mode) == DImode \ || (mode) == CQImode || (mode) == CHImode || (mode) == CSImode \ - || (mode) == CDImode) + || (mode) == CDImode \ + || (TARGET_64BIT && ((mode) == TImode || (mode) == CTImode))) /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */ @@ -2427,6 +2430,10 @@ while (0) case CONST: \ case LABEL_REF: \ case SYMBOL_REF: \ + if (TARGET_64BIT && !x86_64_sign_extended_value (RTX)) \ + return 3; \ + if (TARGET_64BIT && !x86_64_zero_extended_value (RTX)) \ + return 2; \ return flag_pic && SYMBOLIC_CONST (RTX) ? 1 : 0; \ \ case CONST_DOUBLE: \ @@ -2456,9 +2463,24 @@ while (0) assumptions are adequate for the target machine. */ #define RTX_COSTS(X,CODE,OUTER_CODE) \ + case ZERO_EXTEND: \ + /* The zero extensions is often completely free on x86_64, so make \ + it as cheap as possible. */ \ + if (TARGET_64BIT && GET_MODE (X) == DImode \ + && GET_MODE (XEXP (X, 0)) == SImode) \ + { \ + total = 1; goto egress_rtx_costs; \ + } \ + else \ + TOPLEVEL_COSTS_N_INSNS (TARGET_ZERO_EXTEND_WITH_AND ? \ + ix86_cost->add : ix86_cost->movzx); \ + break; \ + case SIGN_EXTEND: \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->movsx); \ + break; \ case ASHIFT: \ if (GET_CODE (XEXP (X, 1)) == CONST_INT \ - && GET_MODE (XEXP (X, 0)) == SImode) \ + && (GET_MODE (XEXP (X, 0)) != DImode || TARGET_64BIT)) \ { \ HOST_WIDE_INT value = INTVAL (XEXP (X, 1)); \ if (value == 1) \ @@ -2472,7 +2494,7 @@ while (0) case ASHIFTRT: \ case LSHIFTRT: \ case ROTATERT: \ - if (GET_MODE (XEXP (X, 0)) == DImode) \ + if (!TARGET_64BIT && GET_MODE (XEXP (X, 0)) == DImode) \ { \ if (GET_CODE (XEXP (X, 1)) == CONST_INT) \ { \ @@ -2562,7 +2584,7 @@ while (0) case IOR: \ case XOR: \ case MINUS: \ - if (GET_MODE (X) == DImode) \ + if (!TARGET_64BIT && GET_MODE (X) == DImode) \ return (COSTS_N_INSNS (ix86_cost->add) * 2 \ + (rtx_cost (XEXP (X, 0), OUTER_CODE) \ << (GET_MODE (XEXP (X, 0)) != DImode)) \ @@ -2572,7 +2594,7 @@ while (0) /* fall through */ \ case NEG: \ case NOT: \ - if (GET_MODE (X) == DImode) \ + if (!TARGET_64BIT && GET_MODE (X) == DImode) \ TOPLEVEL_COSTS_N_INSNS (ix86_cost->add * 2); \ TOPLEVEL_COSTS_N_INSNS (ix86_cost->add); \ \ @@ -2961,7 +2983,7 @@ do { long l; \ */ #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \ - fprintf (FILE, "%s%s%d\n", ASM_LONG, LPREFIX, VALUE) + fprintf (FILE, "%s%s%d\n", TARGET_64BIT ? ASM_QUAD : ASM_LONG, LPREFIX, VALUE) /* This is how to output an element of a case-vector that is relative. We don't use these on the 386 yet, because the ATT assembler can't do diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index db143969dc6..c02702ab34f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8106,6 +8106,12 @@ (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) + /* The code bellow cannot deal with constants outside HOST_WIDE_INT. */ + && INTVAL (operands[1]) + INTVAL (operands[2]) < HOST_BITS_PER_WIDE_INT + /* Ensure that resulting mask is zero or sign extended operand. */ + && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32 + || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64 + && INTVAL (operands[1]) > 32)) && (GET_MODE (operands[0]) == SImode || GET_MODE (operands[0]) == DImode || GET_MODE (operands[0]) == HImode @@ -13114,7 +13120,8 @@ { if (TARGET_64BIT) operands[0] = expand_simple_binop (Pmode, PLUS, operands[0], - operands[1], NULL_RTX, 0, + gen_rtx_LABEL_REF (Pmode, operands[1]), + NULL_RTX, 0, OPTAB_DIRECT); else { @@ -15638,11 +15645,11 @@ ;; Conditional move instructions. -(define_expand "movdicc_rex64" +(define_expand "movdicc" [(set (match_operand:DI 0 "register_operand" "") (if_then_else:DI (match_operand 1 "comparison_operator" "") - (match_operand:DI 2 "x86_64_general_operand" "") - (match_operand:DI 3 "x86_64_general_operand" "")))] + (match_operand:DI 2 "general_operand" "") + (match_operand:DI 3 "general_operand" "")))] "TARGET_64BIT" "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;") @@ -17653,8 +17660,13 @@ (define_expand "movti" [(set (match_operand:TI 0 "general_operand" "") (match_operand:TI 1 "general_operand" ""))] - "TARGET_SSE" + "TARGET_SSE || TARGET_64BIT" { + if (TARGET_64BIT) + { + ix86_expand_move (TImode, operands); + DONE; + } /* For constants other than zero into memory. We do not know how the instructions used to build constants modify the upper 64 bits of the register, once we have that information we may be able @@ -17950,12 +17962,33 @@ (define_insn "movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") (match_operand:TI 1 "general_operand" "xm,x"))] - "TARGET_SSE" + "TARGET_SSE && !TARGET_64BIT" "@ movaps\t{%1, %0|%0, %1} movaps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) +(define_insn "*movti_rex64" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,mx,x") + (match_operand:DI 1 "general_operand" "riFo,riF,x,m"))] + "TARGET_64BIT + && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" + "@ + # + # + movaps\\t{%1, %0|%0, %1} + movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,sse,sse") + (set_attr "mode" "TI")]) + +(define_split + [(set (match_operand:TI 0 "nonimmediate_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "reload_completed && GENERAL_REG_P (operands[0]) + && GENERAL_REG_P (operands[1])" + [(const_int 0)] + "ix86_split_long_move (operands); DONE;") + ;; These two patterns are useful for specifying exactly whether to use ;; movaps or movups (define_insn "sse_movaps" diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h index 529c3014290..22e461ffbeb 100644 --- a/gcc/config/i386/linux64.h +++ b/gcc/config/i386/linux64.h @@ -39,7 +39,7 @@ Boston, MA 02111-1307, USA. */ done. */ #undef LINK_SPEC -#define LINK_SPEC "%{!m32:-m elf_x86_64} %{m32:-m elf_i386} {shared:-shared} \ +#define LINK_SPEC "%{!m32:-m elf_x86_64} %{m32:-m elf_i386} %{shared:-shared} \ %{!shared: \ %{!static: \ %{rdynamic:-export-dynamic} \