1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
29 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
35 #include "insn-attr.h"
42 #include "basic-block.h"
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost
= { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
80 struct processor_costs i486_cost
= { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
101 struct processor_costs pentium_cost
= {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
122 struct processor_costs pentiumpro_cost
= {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 4, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
143 struct processor_costs k6_cost
= {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
164 struct processor_costs athlon_cost
= {
165 1, /* cost of an add instruction */
166 2, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 42, /* cost of a divide/mod */
172 8, /* "large" insn */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 20}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 16} /* cost of loading integer registers */
185 struct processor_costs
*ix86_cost
= &pentium_cost
;
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
195 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
196 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
;
197 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
198 const int x86_movx
= m_ATHLON
| m_PPRO
/* m_386 | m_K6 */;
199 const int x86_double_with_add
= ~m_386
;
200 const int x86_use_bit_test
= m_386
;
201 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
202 const int x86_use_q_reg
= m_PENT
| m_PPRO
| m_K6
;
203 const int x86_use_any_reg
= m_486
;
204 const int x86_cmove
= m_PPRO
| m_ATHLON
;
205 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
;
206 const int x86_use_sahf
= m_PPRO
| m_K6
| m_ATHLON
;
207 const int x86_partial_reg_stall
= m_PPRO
;
208 const int x86_use_loop
= m_K6
;
209 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
210 const int x86_use_mov0
= m_K6
;
211 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
212 const int x86_read_modify_write
= ~m_PENT
;
213 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
214 const int x86_split_long_moves
= m_PPRO
;
215 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
;
216 const int x86_single_stringop
= m_386
;
217 const int x86_qimode_math
= ~(0);
218 const int x86_promote_qi_regs
= 0;
219 const int x86_himode_math
= ~(m_PPRO
);
220 const int x86_promote_hi_regs
= m_PPRO
;
221 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
;
222 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
;
223 const int x86_add_esp_4
= m_ATHLON
| m_K6
;
224 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
;
225 const int x86_integer_DFmode_moves
= ~m_ATHLON
;
226 const int x86_partial_reg_dependency
= m_ATHLON
;
227 const int x86_memory_mismatch_stall
= m_ATHLON
;
229 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
231 const char * const hi_reg_name
[] = HI_REGISTER_NAMES
;
232 const char * const qi_reg_name
[] = QI_REGISTER_NAMES
;
233 const char * const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
235 /* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
238 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
241 AREG
, DREG
, CREG
, BREG
,
243 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
245 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
246 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
249 /* flags, fpsr, dirflag, frame */
250 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
253 /* The "default" register map. */
255 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
257 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
258 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
259 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
262 /* Define the register numbers to be used in Dwarf debugging information.
263 The SVR4 reference port C compiler uses the following register numbers
264 in its Dwarf output code:
265 0 for %eax (gcc regno = 0)
266 1 for %ecx (gcc regno = 2)
267 2 for %edx (gcc regno = 1)
268 3 for %ebx (gcc regno = 3)
269 4 for %esp (gcc regno = 7)
270 5 for %ebp (gcc regno = 6)
271 6 for %esi (gcc regno = 4)
272 7 for %edi (gcc regno = 5)
273 The following three DWARF register numbers are never generated by
274 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
275 believes these numbers have these meanings.
276 8 for %eip (no gcc equivalent)
277 9 for %eflags (gcc regno = 17)
278 10 for %trapno (no gcc equivalent)
279 It is not at all clear how we should number the FP stack registers
280 for the x86 architecture. If the version of SDB on x86/svr4 were
281 a bit less brain dead with respect to floating-point then we would
282 have a precedent to follow with respect to DWARF register numbers
283 for x86 FP registers, but the SDB on x86/svr4 is so completely
284 broken with respect to FP registers that it is hardly worth thinking
285 of it as something to strive for compatibility with.
286 The version of x86/svr4 SDB I have at the moment does (partially)
287 seem to believe that DWARF register number 11 is associated with
288 the x86 register %st(0), but that's about all. Higher DWARF
289 register numbers don't seem to be associated with anything in
290 particular, and even for DWARF regno 11, SDB only seems to under-
291 stand that it should say that a variable lives in %st(0) (when
292 asked via an `=' command) if we said it was in DWARF regno 11,
293 but SDB still prints garbage when asked for the value of the
294 variable in question (via a `/' command).
295 (Also note that the labels SDB prints for various FP stack regs
296 when doing an `x' command are all wrong.)
297 Note that these problems generally don't affect the native SVR4
298 C compiler because it doesn't allow the use of -O with -g and
299 because when it is *not* optimizing, it allocates a memory
300 location for each floating-point variable, and the memory
301 location is what gets described in the DWARF AT_location
302 attribute for the variable in question.
303 Regardless of the severe mental illness of the x86/svr4 SDB, we
304 do something sensible here and we use the following DWARF
305 register numbers. Note that these are all stack-top-relative
307 11 for %st(0) (gcc regno = 8)
308 12 for %st(1) (gcc regno = 9)
309 13 for %st(2) (gcc regno = 10)
310 14 for %st(3) (gcc regno = 11)
311 15 for %st(4) (gcc regno = 12)
312 16 for %st(5) (gcc regno = 13)
313 17 for %st(6) (gcc regno = 14)
314 18 for %st(7) (gcc regno = 15)
316 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
318 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
319 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
320 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
325 /* Test and compare insns in i386.md store the information needed to
326 generate branch and scc insns here. */
328 struct rtx_def
*ix86_compare_op0
= NULL_RTX
;
329 struct rtx_def
*ix86_compare_op1
= NULL_RTX
;
331 #define MAX_386_STACK_LOCALS 2
333 /* Define the structure for the machine field in struct function. */
334 struct machine_function
336 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
339 #define ix86_stack_locals (cfun->machine->stack_locals)
341 /* which cpu are we scheduling for */
342 enum processor_type ix86_cpu
;
344 /* which instruction set architecture to use. */
347 /* Strings to hold which cpu and instruction set architecture to use. */
348 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
349 const char *ix86_arch_string
; /* for -march=<xxx> */
351 /* Register allocation order */
352 const char *ix86_reg_alloc_order
;
353 static char regs_allocated
[FIRST_PSEUDO_REGISTER
];
355 /* # of registers to use to pass arguments. */
356 const char *ix86_regparm_string
;
358 /* ix86_regparm_string as a number */
361 /* Alignment to use for loops and jumps: */
363 /* Power of two alignment for loops. */
364 const char *ix86_align_loops_string
;
366 /* Power of two alignment for non-loop jumps. */
367 const char *ix86_align_jumps_string
;
369 /* Power of two alignment for stack boundary in bytes. */
370 const char *ix86_preferred_stack_boundary_string
;
372 /* Preferred alignment for stack boundary in bits. */
373 int ix86_preferred_stack_boundary
;
375 /* Values 1-5: see jump.c */
376 int ix86_branch_cost
;
377 const char *ix86_branch_cost_string
;
379 /* Power of two alignment for functions. */
380 int ix86_align_funcs
;
381 const char *ix86_align_funcs_string
;
383 /* Power of two alignment for loops. */
384 int ix86_align_loops
;
386 /* Power of two alignment for non-loop jumps. */
387 int ix86_align_jumps
;
389 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
390 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
392 static enum rtx_code unsigned_comparison
PARAMS ((enum rtx_code code
));
393 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
394 static enum machine_mode ix86_fp_compare_mode
PARAMS ((enum rtx_code
));
395 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
397 static rtx ix86_expand_compare
PARAMS ((enum rtx_code
));
398 static rtx gen_push
PARAMS ((rtx
));
399 static int memory_address_length
PARAMS ((rtx addr
));
400 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
401 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
402 static int ix86_safe_length
PARAMS ((rtx
));
403 static enum attr_memory ix86_safe_memory
PARAMS ((rtx
));
404 static enum attr_pent_pair ix86_safe_pent_pair
PARAMS ((rtx
));
405 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
406 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
407 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
408 static rtx
* ix86_pent_find_pair
PARAMS ((rtx
*, rtx
*, enum attr_pent_pair
,
410 static void ix86_init_machine_status
PARAMS ((struct function
*));
411 static void ix86_mark_machine_status
PARAMS ((struct function
*));
412 static void ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
413 static int ix86_safe_length_prefix
PARAMS ((rtx
));
414 static HOST_WIDE_INT ix86_compute_frame_size
PARAMS((HOST_WIDE_INT
,
415 int *, int *, int *));
416 static int ix86_nsaved_regs
PARAMS((void));
417 static void ix86_emit_save_regs
PARAMS((void));
418 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int));
419 static void ix86_emit_epilogue_esp_adjustment
PARAMS((int));
420 static void ix86_sched_reorder_pentium
PARAMS((rtx
*, rtx
*));
421 static void ix86_sched_reorder_ppro
PARAMS((rtx
*, rtx
*));
425 rtx base
, index
, disp
;
429 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
431 /* Sometimes certain combinations of command options do not make
432 sense on a particular target machine. You can define a macro
433 `OVERRIDE_OPTIONS' to take account of this. This macro, if
434 defined, is executed once just after all the command options have
437 Don't use this macro to turn on various extra optimizations for
438 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
443 /* Comes from final.c -- no real reason to change it. */
444 #define MAX_CODE_ALIGN 16
448 struct processor_costs
*cost
; /* Processor costs */
449 int target_enable
; /* Target flags to enable. */
450 int target_disable
; /* Target flags to disable. */
451 int align_loop
; /* Default alignments. */
456 const processor_target_table
[PROCESSOR_max
] =
458 {&i386_cost
, 0, 0, 2, 2, 2, 1},
459 {&i486_cost
, 0, 0, 4, 4, 4, 1},
460 {&pentium_cost
, 0, 0, -4, -4, -4, 1},
461 {&pentiumpro_cost
, 0, 0, 4, -4, 4, 1},
462 {&k6_cost
, 0, 0, -5, -5, 4, 1},
463 {&athlon_cost
, 0, 0, 4, -4, 4, 1}
468 const char *name
; /* processor name or nickname. */
469 enum processor_type processor
;
471 const processor_alias_table
[] =
473 {"i386", PROCESSOR_I386
},
474 {"i486", PROCESSOR_I486
},
475 {"i586", PROCESSOR_PENTIUM
},
476 {"pentium", PROCESSOR_PENTIUM
},
477 {"i686", PROCESSOR_PENTIUMPRO
},
478 {"pentiumpro", PROCESSOR_PENTIUMPRO
},
479 {"k6", PROCESSOR_K6
},
480 {"athlon", PROCESSOR_ATHLON
},
483 int const pta_size
= sizeof(processor_alias_table
)/sizeof(struct pta
);
485 #ifdef SUBTARGET_OVERRIDE_OPTIONS
486 SUBTARGET_OVERRIDE_OPTIONS
;
489 ix86_arch
= PROCESSOR_I386
;
490 ix86_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
492 if (ix86_arch_string
!= 0)
495 for (i
= 0; i
< pta_size
; i
++)
496 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
498 ix86_arch
= processor_alias_table
[i
].processor
;
499 /* Default cpu tuning to the architecture. */
500 ix86_cpu
= ix86_arch
;
504 error ("bad value (%s) for -march= switch", ix86_arch_string
);
507 if (ix86_cpu_string
!= 0)
510 for (i
= 0; i
< pta_size
; i
++)
511 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
513 ix86_cpu
= processor_alias_table
[i
].processor
;
517 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
520 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
521 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
522 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
524 /* Arrange to set up i386_stack_locals for all functions. */
525 init_machine_status
= ix86_init_machine_status
;
526 mark_machine_status
= ix86_mark_machine_status
;
528 /* Validate registers in register allocation order. */
529 if (ix86_reg_alloc_order
)
532 for (i
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
538 case 'a': regno
= 0; break;
539 case 'd': regno
= 1; break;
540 case 'c': regno
= 2; break;
541 case 'b': regno
= 3; break;
542 case 'S': regno
= 4; break;
543 case 'D': regno
= 5; break;
544 case 'B': regno
= 6; break;
546 default: fatal ("Register '%c' is unknown", ch
);
549 if (regs_allocated
[regno
])
550 fatal ("Register '%c' already specified in allocation order", ch
);
552 regs_allocated
[regno
] = 1;
556 /* Validate -mregparm= value. */
557 if (ix86_regparm_string
)
559 ix86_regparm
= atoi (ix86_regparm_string
);
560 if (ix86_regparm
< 0 || ix86_regparm
> REGPARM_MAX
)
561 fatal ("-mregparm=%d is not between 0 and %d",
562 ix86_regparm
, REGPARM_MAX
);
565 /* Validate -malign-loops= value, or provide default. */
566 ix86_align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
567 if (ix86_align_loops_string
)
569 ix86_align_loops
= atoi (ix86_align_loops_string
);
570 if (ix86_align_loops
< 0 || ix86_align_loops
> MAX_CODE_ALIGN
)
571 fatal ("-malign-loops=%d is not between 0 and %d",
572 ix86_align_loops
, MAX_CODE_ALIGN
);
575 /* Validate -malign-jumps= value, or provide default. */
576 ix86_align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
577 if (ix86_align_jumps_string
)
579 ix86_align_jumps
= atoi (ix86_align_jumps_string
);
580 if (ix86_align_jumps
< 0 || ix86_align_jumps
> MAX_CODE_ALIGN
)
581 fatal ("-malign-jumps=%d is not between 0 and %d",
582 ix86_align_jumps
, MAX_CODE_ALIGN
);
585 /* Validate -malign-functions= value, or provide default. */
586 ix86_align_funcs
= processor_target_table
[ix86_cpu
].align_func
;
587 if (ix86_align_funcs_string
)
589 ix86_align_funcs
= atoi (ix86_align_funcs_string
);
590 if (ix86_align_funcs
< 0 || ix86_align_funcs
> MAX_CODE_ALIGN
)
591 fatal ("-malign-functions=%d is not between 0 and %d",
592 ix86_align_funcs
, MAX_CODE_ALIGN
);
595 /* Validate -mpreferred-stack-boundary= value, or provide default.
596 The default of 128 bits is for Pentium III's SSE __m128. */
597 ix86_preferred_stack_boundary
= 128;
598 if (ix86_preferred_stack_boundary_string
)
600 int i
= atoi (ix86_preferred_stack_boundary_string
);
602 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i
);
603 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
606 /* Validate -mbranch-cost= value, or provide default. */
607 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
608 if (ix86_branch_cost_string
)
610 ix86_branch_cost
= atoi (ix86_branch_cost_string
);
611 if (ix86_branch_cost
< 0 || ix86_branch_cost
> 5)
612 fatal ("-mbranch-cost=%d is not between 0 and 5",
616 /* Keep nonleaf frame pointers. */
617 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
618 flag_omit_frame_pointer
= 1;
620 /* If we're doing fast math, we don't care about comparison order
621 wrt NaNs. This lets us use a shorter comparison sequence. */
623 target_flags
&= ~MASK_IEEE_FP
;
625 /* If we're planning on using `loop', use it. */
626 if (TARGET_USE_LOOP
&& optimize
)
627 flag_branch_on_count_reg
= 1;
630 /* A C statement (sans semicolon) to choose the order in which to
631 allocate hard registers for pseudo-registers local to a basic
634 Store the desired register order in the array `reg_alloc_order'.
635 Element 0 should be the register to allocate first; element 1, the
636 next register; and so on.
638 The macro body should not assume anything about the contents of
639 `reg_alloc_order' before execution of the macro.
641 On most machines, it is not necessary to define this macro. */
644 order_regs_for_local_alloc ()
648 /* User specified the register allocation order. */
650 if (ix86_reg_alloc_order
)
652 for (i
= order
= 0; (ch
= ix86_reg_alloc_order
[i
]) != '\0'; i
++)
658 case 'a': regno
= 0; break;
659 case 'd': regno
= 1; break;
660 case 'c': regno
= 2; break;
661 case 'b': regno
= 3; break;
662 case 'S': regno
= 4; break;
663 case 'D': regno
= 5; break;
664 case 'B': regno
= 6; break;
667 reg_alloc_order
[order
++] = regno
;
670 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
672 if (! regs_allocated
[i
])
673 reg_alloc_order
[order
++] = i
;
677 /* If user did not specify a register allocation order, use natural order. */
680 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
681 reg_alloc_order
[i
] = i
;
686 optimization_options (level
, size
)
688 int size ATTRIBUTE_UNUSED
;
690 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
691 make the problem with not enough registers even worse. */
692 #ifdef INSN_SCHEDULING
694 flag_schedule_insns
= 0;
698 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
699 attribute for DECL. The attributes in ATTRIBUTES have previously been
703 ix86_valid_decl_attribute_p (decl
, attributes
, identifier
, args
)
704 tree decl ATTRIBUTE_UNUSED
;
705 tree attributes ATTRIBUTE_UNUSED
;
706 tree identifier ATTRIBUTE_UNUSED
;
707 tree args ATTRIBUTE_UNUSED
;
712 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
713 attribute for TYPE. The attributes in ATTRIBUTES have previously been
717 ix86_valid_type_attribute_p (type
, attributes
, identifier
, args
)
719 tree attributes ATTRIBUTE_UNUSED
;
723 if (TREE_CODE (type
) != FUNCTION_TYPE
724 && TREE_CODE (type
) != METHOD_TYPE
725 && TREE_CODE (type
) != FIELD_DECL
726 && TREE_CODE (type
) != TYPE_DECL
)
729 /* Stdcall attribute says callee is responsible for popping arguments
730 if they are not variable. */
731 if (is_attribute_p ("stdcall", identifier
))
732 return (args
== NULL_TREE
);
734 /* Cdecl attribute says the callee is a normal C declaration. */
735 if (is_attribute_p ("cdecl", identifier
))
736 return (args
== NULL_TREE
);
738 /* Regparm attribute specifies how many integer arguments are to be
739 passed in registers. */
740 if (is_attribute_p ("regparm", identifier
))
744 if (! args
|| TREE_CODE (args
) != TREE_LIST
745 || TREE_CHAIN (args
) != NULL_TREE
746 || TREE_VALUE (args
) == NULL_TREE
)
749 cst
= TREE_VALUE (args
);
750 if (TREE_CODE (cst
) != INTEGER_CST
)
753 if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
762 /* Return 0 if the attributes for two types are incompatible, 1 if they
763 are compatible, and 2 if they are nearly compatible (which causes a
764 warning to be generated). */
767 ix86_comp_type_attributes (type1
, type2
)
771 /* Check for mismatch of non-default calling convention. */
772 const char *rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
774 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
777 /* Check for mismatched return types (cdecl vs stdcall). */
778 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
779 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
784 /* Value is the number of bytes of arguments automatically
785 popped when returning from a subroutine call.
786 FUNDECL is the declaration node of the function (as a tree),
787 FUNTYPE is the data type of the function (as a tree),
788 or for a library call it is an identifier node for the subroutine name.
789 SIZE is the number of bytes of arguments passed on the stack.
791 On the 80386, the RTD insn may be used to pop them if the number
792 of args is fixed, but if the number is variable then the caller
793 must pop them all. RTD can't be used for library calls now
794 because the library is compiled with the Unix compiler.
795 Use of RTD is a selectable option, since it is incompatible with
796 standard Unix calling sequences. If the option is not selected,
797 the caller must always pop the args.
799 The attribute stdcall is equivalent to RTD on a per module basis. */
802 ix86_return_pops_args (fundecl
, funtype
, size
)
807 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
809 /* Cdecl functions override -mrtd, and never pop the stack. */
810 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
812 /* Stdcall functions will pop the stack if not variable args. */
813 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
817 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
818 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
823 /* Lose any fake structure return argument. */
824 if (aggregate_value_p (TREE_TYPE (funtype
)))
825 return GET_MODE_SIZE (Pmode
);
830 /* Argument support functions. */
832 /* Initialize a variable CUM of type CUMULATIVE_ARGS
833 for a call to a function whose data type is FNTYPE.
834 For a library call, FNTYPE is 0. */
837 init_cumulative_args (cum
, fntype
, libname
)
838 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
839 tree fntype
; /* tree ptr for function decl */
840 rtx libname
; /* SYMBOL_REF of library name or 0 */
842 static CUMULATIVE_ARGS zero_cum
;
843 tree param
, next_param
;
845 if (TARGET_DEBUG_ARG
)
847 fprintf (stderr
, "\ninit_cumulative_args (");
849 fprintf (stderr
, "fntype code = %s, ret code = %s",
850 tree_code_name
[(int) TREE_CODE (fntype
)],
851 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
853 fprintf (stderr
, "no fntype");
856 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
861 /* Set up the number of registers to use for passing arguments. */
862 cum
->nregs
= ix86_regparm
;
865 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
868 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
871 /* Determine if this function has variable arguments. This is
872 indicated by the last argument being 'void_type_mode' if there
873 are no variable arguments. If there are variable arguments, then
874 we won't pass anything in registers */
878 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
879 param
!= 0; param
= next_param
)
881 next_param
= TREE_CHAIN (param
);
882 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
887 if (TARGET_DEBUG_ARG
)
888 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
893 /* Update the data in CUM to advance over an argument
894 of mode MODE and data type TYPE.
895 (TYPE is null for libcalls where that information may not be available.) */
898 function_arg_advance (cum
, mode
, type
, named
)
899 CUMULATIVE_ARGS
*cum
; /* current arg information */
900 enum machine_mode mode
; /* current arg mode */
901 tree type
; /* type of the argument or 0 if lib support */
902 int named
; /* whether or not the argument was named */
905 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
906 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
908 if (TARGET_DEBUG_ARG
)
910 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
911 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
926 /* Define where to put the arguments to a function.
927 Value is zero to push the argument on the stack,
928 or a hard register in which to store the argument.
930 MODE is the argument's machine mode.
931 TYPE is the data type of the argument (as a tree).
932 This is null for libcalls where that information may
934 CUM is a variable of type CUMULATIVE_ARGS which gives info about
935 the preceding args and about the function being called.
936 NAMED is nonzero if this argument is a named parameter
937 (otherwise it is an extra parameter matching an ellipsis). */
940 function_arg (cum
, mode
, type
, named
)
941 CUMULATIVE_ARGS
*cum
; /* current arg information */
942 enum machine_mode mode
; /* current arg mode */
943 tree type
; /* type of the argument or 0 if lib support */
944 int named
; /* != 0 for normal args, == 0 for ... args */
948 = (mode
== BLKmode
) ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
);
949 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
953 /* For now, pass fp/complex values on the stack. */
962 if (words
<= cum
->nregs
)
963 ret
= gen_rtx_REG (mode
, cum
->regno
);
967 if (TARGET_DEBUG_ARG
)
970 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
971 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
974 fprintf (stderr
, ", reg=%%e%s", reg_names
[ REGNO(ret
) ]);
976 fprintf (stderr
, ", stack");
978 fprintf (stderr
, " )\n");
984 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
985 reference and a constant. */
988 symbolic_operand (op
, mode
)
990 enum machine_mode mode ATTRIBUTE_UNUSED
;
992 switch (GET_CODE (op
))
1000 if (GET_CODE (op
) == SYMBOL_REF
1001 || GET_CODE (op
) == LABEL_REF
1002 || (GET_CODE (op
) == UNSPEC
1003 && XINT (op
, 1) >= 6
1004 && XINT (op
, 1) <= 7))
1006 if (GET_CODE (op
) != PLUS
1007 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1011 if (GET_CODE (op
) == SYMBOL_REF
1012 || GET_CODE (op
) == LABEL_REF
)
1014 /* Only @GOTOFF gets offsets. */
1015 if (GET_CODE (op
) != UNSPEC
1016 || XINT (op
, 1) != 7)
1019 op
= XVECEXP (op
, 0, 0);
1020 if (GET_CODE (op
) == SYMBOL_REF
1021 || GET_CODE (op
) == LABEL_REF
)
1030 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1033 pic_symbolic_operand (op
, mode
)
1035 enum machine_mode mode ATTRIBUTE_UNUSED
;
1037 if (GET_CODE (op
) == CONST
)
1040 if (GET_CODE (op
) == UNSPEC
)
1042 if (GET_CODE (op
) != PLUS
1043 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
1046 if (GET_CODE (op
) == UNSPEC
)
1052 /* Test for a valid operand for a call instruction. Don't allow the
1053 arg pointer register or virtual regs since they may decay into
1054 reg + const, which the patterns can't handle. */
1057 call_insn_operand (op
, mode
)
1059 enum machine_mode mode ATTRIBUTE_UNUSED
;
1061 if (GET_CODE (op
) != MEM
)
1065 /* Disallow indirect through a virtual register. This leads to
1066 compiler aborts when trying to eliminate them. */
1067 if (GET_CODE (op
) == REG
1068 && (op
== arg_pointer_rtx
1069 || op
== frame_pointer_rtx
1070 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
1071 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
1074 /* Disallow `call 1234'. Due to varying assembler lameness this
1075 gets either rejected or translated to `call .+1234'. */
1076 if (GET_CODE (op
) == CONST_INT
)
1079 /* Explicitly allow SYMBOL_REF even if pic. */
1080 if (GET_CODE (op
) == SYMBOL_REF
)
1083 /* Half-pic doesn't allow anything but registers and constants.
1084 We've just taken care of the later. */
1086 return register_operand (op
, Pmode
);
1088 /* Otherwise we can allow any general_operand in the address. */
1089 return general_operand (op
, Pmode
);
1093 constant_call_address_operand (op
, mode
)
1095 enum machine_mode mode ATTRIBUTE_UNUSED
;
1097 return (GET_CODE (op
) == MEM
1098 && CONSTANT_ADDRESS_P (XEXP (op
, 0))
1099 && GET_CODE (XEXP (op
, 0)) != CONST_INT
);
1102 /* Match exactly zero and one. */
1105 const0_operand (op
, mode
)
1107 enum machine_mode mode
;
1109 return op
== CONST0_RTX (mode
);
1113 const1_operand (op
, mode
)
1115 enum machine_mode mode ATTRIBUTE_UNUSED
;
1117 return op
== const1_rtx
;
1120 /* Match 2, 4, or 8. Used for leal multiplicands. */
1123 const248_operand (op
, mode
)
1125 enum machine_mode mode ATTRIBUTE_UNUSED
;
1127 return (GET_CODE (op
) == CONST_INT
1128 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
1131 /* True if this is a constant appropriate for an increment or decremenmt. */
1134 incdec_operand (op
, mode
)
1136 enum machine_mode mode
;
1138 if (op
== const1_rtx
|| op
== constm1_rtx
)
1140 if (GET_CODE (op
) != CONST_INT
)
1142 if (mode
== SImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffffffff)
1144 if (mode
== HImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xffff)
1146 if (mode
== QImode
&& INTVAL (op
) == (HOST_WIDE_INT
) 0xff)
1151 /* Return false if this is the stack pointer, or any other fake
1152 register eliminable to the stack pointer. Otherwise, this is
1155 This is used to prevent esp from being used as an index reg.
1156 Which would only happen in pathological cases. */
1159 reg_no_sp_operand (op
, mode
)
1161 enum machine_mode mode
;
1164 if (GET_CODE (t
) == SUBREG
)
1166 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
1169 return register_operand (op
, mode
);
1172 /* Return false if this is any eliminable register. Otherwise
1176 general_no_elim_operand (op
, mode
)
1178 enum machine_mode mode
;
1181 if (GET_CODE (t
) == SUBREG
)
1183 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1184 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1185 || t
== virtual_stack_dynamic_rtx
)
1188 return general_operand (op
, mode
);
1191 /* Return false if this is any eliminable register. Otherwise
1192 register_operand or const_int. */
1195 nonmemory_no_elim_operand (op
, mode
)
1197 enum machine_mode mode
;
1200 if (GET_CODE (t
) == SUBREG
)
1202 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
1203 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
1204 || t
== virtual_stack_dynamic_rtx
)
1207 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
1210 /* Return true if op is a Q_REGS class register. */
1213 q_regs_operand (op
, mode
)
1215 enum machine_mode mode
;
1217 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1219 if (GET_CODE (op
) == SUBREG
)
1220 op
= SUBREG_REG (op
);
1221 return QI_REG_P (op
);
1224 /* Return true if op is a NON_Q_REGS class register. */
1227 non_q_regs_operand (op
, mode
)
1229 enum machine_mode mode
;
1231 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1233 if (GET_CODE (op
) == SUBREG
)
1234 op
= SUBREG_REG (op
);
1235 return NON_QI_REG_P (op
);
1238 /* Return 1 if OP is a comparison operator that can use the condition code
1239 generated by a logical operation, which characteristicly does not set
1240 overflow or carry. To be used with CCNOmode. */
1243 no_comparison_operator (op
, mode
)
1245 enum machine_mode mode
;
1247 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1250 switch (GET_CODE (op
))
1254 case LEU
: case LTU
: case GEU
: case GTU
:
1262 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1265 fcmov_comparison_operator (op
, mode
)
1267 enum machine_mode mode
;
1269 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1272 switch (GET_CODE (op
))
1275 case LEU
: case LTU
: case GEU
: case GTU
:
1276 case UNORDERED
: case ORDERED
:
1284 /* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1287 uno_comparison_operator (op
, mode
)
1289 enum machine_mode mode
;
1291 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
1294 switch (GET_CODE (op
))
1297 case LE
: case LT
: case GE
: case GT
:
1298 case LEU
: case LTU
: case GEU
: case GTU
:
1299 case UNORDERED
: case ORDERED
:
1307 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1310 promotable_binary_operator (op
, mode
)
1312 enum machine_mode mode ATTRIBUTE_UNUSED
;
1314 switch (GET_CODE (op
))
1317 /* Modern CPUs have same latency for HImode and SImode multiply,
1318 but 386 and 486 do HImode multiply faster. */
1319 return ix86_cpu
> PROCESSOR_I486
;
1331 /* Nearly general operand, but accept any const_double, since we wish
1332 to be able to drop them into memory rather than have them get pulled
1336 cmp_fp_expander_operand (op
, mode
)
1338 enum machine_mode mode
;
1340 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1342 if (GET_CODE (op
) == CONST_DOUBLE
)
1344 return general_operand (op
, mode
);
1347 /* Match an SI or HImode register for a zero_extract. */
1350 ext_register_operand (op
, mode
)
1352 enum machine_mode mode ATTRIBUTE_UNUSED
;
1354 if (GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
1356 return register_operand (op
, VOIDmode
);
1359 /* Return 1 if this is a valid binary floating-point operation.
1360 OP is the expression matched, and MODE is its mode. */
1363 binary_fp_operator (op
, mode
)
1365 enum machine_mode mode
;
1367 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
1370 switch (GET_CODE (op
))
1376 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
1384 mult_operator(op
, mode
)
1386 enum machine_mode mode ATTRIBUTE_UNUSED
;
1388 return GET_CODE (op
) == MULT
;
1392 div_operator(op
, mode
)
1394 enum machine_mode mode ATTRIBUTE_UNUSED
;
1396 return GET_CODE (op
) == DIV
;
1400 arith_or_logical_operator (op
, mode
)
1402 enum machine_mode mode
;
1404 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
1405 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
1406 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
1409 /* Returns 1 if OP is memory operand with a displacement. */
1412 memory_displacement_operand (op
, mode
)
1414 enum machine_mode mode
;
1416 struct ix86_address parts
;
1418 if (! memory_operand (op
, mode
))
1421 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
1424 return parts
.disp
!= NULL_RTX
;
1427 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1428 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1430 ??? It seems likely that this will only work because cmpsi is an
1431 expander, and no actual insns use this. */
1434 cmpsi_operand (op
, mode
)
1436 enum machine_mode mode
;
1438 if (general_operand (op
, mode
))
1441 if (GET_CODE (op
) == AND
1442 && GET_MODE (op
) == SImode
1443 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
1444 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
1445 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
1446 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
1447 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
1448 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
1454 /* Returns 1 if OP is memory operand that can not be represented by the
1458 long_memory_operand (op
, mode
)
1460 enum machine_mode mode
;
1462 if (! memory_operand (op
, mode
))
1465 return memory_address_length (op
) != 0;
1468 /* Return nonzero if the rtx is known aligned. */
1471 aligned_operand (op
, mode
)
1473 enum machine_mode mode
;
1475 struct ix86_address parts
;
1477 if (!general_operand (op
, mode
))
1480 /* Registers and immediate operands are always "aligned". */
1481 if (GET_CODE (op
) != MEM
)
1484 /* Don't even try to do any aligned optimizations with volatiles. */
1485 if (MEM_VOLATILE_P (op
))
1490 /* Pushes and pops are only valid on the stack pointer. */
1491 if (GET_CODE (op
) == PRE_DEC
1492 || GET_CODE (op
) == POST_INC
)
1495 /* Decode the address. */
1496 if (! ix86_decompose_address (op
, &parts
))
1499 /* Look for some component that isn't known to be aligned. */
1503 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
1508 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
1513 if (GET_CODE (parts
.disp
) != CONST_INT
1514 || (INTVAL (parts
.disp
) & 3) != 0)
1518 /* Didn't find one -- this must be an aligned address. */
1522 /* Return true if the constant is something that can be loaded with
1523 a special instruction. Only handle 0.0 and 1.0; others are less
1527 standard_80387_constant_p (x
)
1530 if (GET_CODE (x
) != CONST_DOUBLE
)
1533 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1539 if (setjmp (handler
))
1542 set_float_handler (handler
);
1543 REAL_VALUE_FROM_CONST_DOUBLE (d
, x
);
1544 is0
= REAL_VALUES_EQUAL (d
, dconst0
) && !REAL_VALUE_MINUS_ZERO (d
);
1545 is1
= REAL_VALUES_EQUAL (d
, dconst1
);
1546 set_float_handler (NULL_PTR
);
1554 /* Note that on the 80387, other constants, such as pi,
1555 are much slower to load as standard constants
1556 than to load from doubles in memory! */
1557 /* ??? Not true on K6: all constants are equal cost. */
1564 /* Returns 1 if OP contains a symbol reference */
1567 symbolic_reference_mentioned_p (op
)
1570 register const char *fmt
;
1573 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
1576 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
1577 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
1583 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
1584 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
1588 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
1595 /* Return 1 if it is appropriate to emit `ret' instructions in the
1596 body of a function. Do this only if the epilogue is simple, needing a
1597 couple of insns. Prior to reloading, we can't tell how many registers
1598 must be saved, so return 0 then. Return 0 if there is no frame
1599 marker to de-allocate.
1601 If NON_SAVING_SETJMP is defined and true, then it is not possible
1602 for the epilogue to be simple, so return 0. This is a special case
1603 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1604 until final, but jump_optimize may need to know sooner if a
1608 ix86_can_use_return_insn_p ()
1610 HOST_WIDE_INT tsize
;
1613 #ifdef NON_SAVING_SETJMP
1614 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
1617 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1618 if (profile_block_flag
== 2)
1622 if (! reload_completed
|| frame_pointer_needed
)
1625 /* Don't allow more than 32 pop, since that's all we can do
1626 with one instruction. */
1627 if (current_function_pops_args
1628 && current_function_args_size
>= 32768)
1631 tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
, NULL
, NULL
);
1632 return tsize
== 0 && nregs
== 0;
1635 static char *pic_label_name
;
1636 static int pic_label_output
;
1637 static char *global_offset_table_name
;
1639 /* This function generates code for -fpic that loads %ebx with
1640 the return address of the caller and then returns. */
1643 asm_output_function_prefix (file
, name
)
1645 const char *name ATTRIBUTE_UNUSED
;
1648 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1649 || current_function_uses_const_pool
);
1650 xops
[0] = pic_offset_table_rtx
;
1651 xops
[1] = stack_pointer_rtx
;
1653 /* Deep branch prediction favors having a return for every call. */
1654 if (pic_reg_used
&& TARGET_DEEP_BRANCH_PREDICTION
)
1656 if (!pic_label_output
)
1658 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1659 internal (non-global) label that's being emitted, it didn't make
1660 sense to have .type information for local labels. This caused
1661 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1662 me debug info for a label that you're declaring non-global?) this
1663 was changed to call ASM_OUTPUT_LABEL() instead. */
1665 ASM_OUTPUT_LABEL (file
, pic_label_name
);
1667 xops
[1] = gen_rtx_MEM (SImode
, xops
[1]);
1668 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
1669 output_asm_insn ("ret", xops
);
1671 pic_label_output
= 1;
1677 load_pic_register ()
1681 if (global_offset_table_name
== NULL
)
1683 global_offset_table_name
=
1684 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1685 ggc_add_string_root (&global_offset_table_name
, 1);
1687 gotsym
= gen_rtx_SYMBOL_REF (Pmode
, global_offset_table_name
);
1689 if (TARGET_DEEP_BRANCH_PREDICTION
)
1691 if (pic_label_name
== NULL
)
1693 pic_label_name
= ggc_alloc_string (NULL
, 32);
1694 ggc_add_string_root (&pic_label_name
, 1);
1695 ASM_GENERATE_INTERNAL_LABEL (pic_label_name
, "LPR", 0);
1697 pclab
= gen_rtx_MEM (QImode
, gen_rtx_SYMBOL_REF (Pmode
, pic_label_name
));
1701 pclab
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
1704 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx
, pclab
));
1706 if (! TARGET_DEEP_BRANCH_PREDICTION
)
1707 emit_insn (gen_popsi1 (pic_offset_table_rtx
));
1709 emit_insn (gen_prologue_set_got (pic_offset_table_rtx
, gotsym
, pclab
));
1712 /* Generate an SImode "push" pattern for input ARG. */
1718 return gen_rtx_SET (VOIDmode
,
1719 gen_rtx_MEM (SImode
,
1720 gen_rtx_PRE_DEC (SImode
,
1721 stack_pointer_rtx
)),
1725 /* Return number of registers to be saved on the stack. */
1731 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1732 || current_function_uses_const_pool
);
1733 int limit
= (frame_pointer_needed
1734 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1737 for (regno
= limit
- 1; regno
>= 0; regno
--)
1738 if ((regs_ever_live
[regno
] && ! call_used_regs
[regno
])
1739 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1746 /* Return the offset between two registers, one to be eliminated, and the other
1747 its replacement, at the start of a routine. */
1750 ix86_initial_elimination_offset (from
, to
)
1757 /* Stack grows downward:
1763 saved frame pointer if frame_pointer_needed
1764 <- HARD_FRAME_POINTER
1774 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
1775 /* Skip saved PC and previous frame pointer.
1776 Executed only when frame_pointer_needed. */
1778 else if (from
== FRAME_POINTER_REGNUM
1779 && to
== HARD_FRAME_POINTER_REGNUM
)
1781 ix86_compute_frame_size (get_frame_size (), &nregs
, &padding1
, (int *)0);
1782 padding1
+= nregs
* UNITS_PER_WORD
;
1787 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1788 int frame_size
= frame_pointer_needed
? 8 : 4;
1789 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (),
1790 &nregs
, &padding1
, (int *)0);
1793 if (to
!= STACK_POINTER_REGNUM
)
1795 else if (from
== ARG_POINTER_REGNUM
)
1796 return tsize
+ nregs
* UNITS_PER_WORD
+ frame_size
;
1797 else if (from
!= FRAME_POINTER_REGNUM
)
1800 return tsize
- padding1
;
1804 /* Compute the size of local storage taking into consideration the
1805 desired stack alignment which is to be maintained. Also determine
1806 the number of registers saved below the local storage.
1808 PADDING1 returns padding before stack frame and PADDING2 returns
1809 padding after stack frame;
1812 static HOST_WIDE_INT
1813 ix86_compute_frame_size (size
, nregs_on_stack
, rpadding1
, rpadding2
)
1815 int *nregs_on_stack
;
1822 HOST_WIDE_INT total_size
;
1823 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
1825 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
1827 nregs
= ix86_nsaved_regs ();
1830 offset
= frame_pointer_needed
? 8 : 4;
1832 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1833 since i386 port is the only using those features that may break easilly. */
1835 if (size
&& !stack_alignment_needed
)
1837 if (!size
&& stack_alignment_needed
!= STACK_BOUNDARY
/ BITS_PER_UNIT
)
1839 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
1841 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1843 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
1846 if (stack_alignment_needed
< 4)
1847 stack_alignment_needed
= 4;
1849 offset
+= nregs
* UNITS_PER_WORD
;
1851 if (ACCUMULATE_OUTGOING_ARGS
)
1852 total_size
+= current_function_outgoing_args_size
;
1854 total_size
+= offset
;
1856 /* Align start of frame for local function. */
1857 padding1
= ((offset
+ stack_alignment_needed
- 1)
1858 & -stack_alignment_needed
) - offset
;
1859 total_size
+= padding1
;
1861 /* Align stack boundary. */
1862 padding2
= ((total_size
+ preferred_alignment
- 1)
1863 & -preferred_alignment
) - total_size
;
1865 if (ACCUMULATE_OUTGOING_ARGS
)
1866 padding2
+= current_function_outgoing_args_size
;
1869 *nregs_on_stack
= nregs
;
1871 *rpadding1
= padding1
;
1873 *rpadding2
= padding2
;
1875 return size
+ padding1
+ padding2
;
1878 /* Emit code to save registers in the prologue. */
1881 ix86_emit_save_regs ()
1886 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1887 || current_function_uses_const_pool
);
1888 limit
= (frame_pointer_needed
1889 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
1891 for (regno
= limit
- 1; regno
>= 0; regno
--)
1892 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
1893 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
1895 insn
= emit_insn (gen_push (gen_rtx_REG (SImode
, regno
)));
1896 RTX_FRAME_RELATED_P (insn
) = 1;
1900 /* Expand the prologue into a bunch of separate insns. */
1903 ix86_expand_prologue ()
1905 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1908 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1909 || current_function_uses_const_pool
);
1911 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1912 slower on all targets. Also sdb doesn't like it. */
1914 if (frame_pointer_needed
)
1916 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
1917 RTX_FRAME_RELATED_P (insn
) = 1;
1919 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
1920 RTX_FRAME_RELATED_P (insn
) = 1;
1923 ix86_emit_save_regs ();
1927 else if (! TARGET_STACK_PROBE
|| tsize
< CHECK_STACK_LIMIT
)
1929 if (frame_pointer_needed
)
1930 insn
= emit_insn (gen_pro_epilogue_adjust_stack
1931 (stack_pointer_rtx
, stack_pointer_rtx
,
1932 GEN_INT (-tsize
), hard_frame_pointer_rtx
));
1934 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1936 RTX_FRAME_RELATED_P (insn
) = 1;
1940 /* ??? Is this only valid for Win32? */
1944 arg0
= gen_rtx_REG (SImode
, 0);
1945 emit_move_insn (arg0
, GEN_INT (tsize
));
1947 sym
= gen_rtx_MEM (FUNCTION_MODE
,
1948 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
1949 insn
= emit_call_insn (gen_call (sym
, const0_rtx
));
1951 CALL_INSN_FUNCTION_USAGE (insn
)
1952 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
1953 CALL_INSN_FUNCTION_USAGE (insn
));
1956 #ifdef SUBTARGET_PROLOGUE
1961 load_pic_register ();
1963 /* If we are profiling, make sure no instructions are scheduled before
1964 the call to mcount. However, if -fpic, the above call will have
1966 if ((profile_flag
|| profile_block_flag
) && ! pic_reg_used
)
1967 emit_insn (gen_blockage ());
1970 /* Emit code to add TSIZE to esp value. Use POP instruction when
1974 ix86_emit_epilogue_esp_adjustment (tsize
)
1977 /* If a frame pointer is present, we must be sure to tie the sp
1978 to the fp so that we don't mis-schedule. */
1979 if (frame_pointer_needed
)
1980 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
1983 hard_frame_pointer_rtx
));
1985 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
1989 /* Emit code to restore saved registers using MOV insns. First register
1990 is restored from POINTER + OFFSET. */
1992 ix86_emit_restore_regs_using_mov (pointer
, offset
)
1997 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
1998 || current_function_uses_const_pool
);
1999 int limit
= (frame_pointer_needed
2000 ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM
);
2002 for (regno
= 0; regno
< limit
; regno
++)
2003 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2004 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2006 emit_move_insn (gen_rtx_REG (SImode
, regno
),
2007 adj_offsettable_operand (gen_rtx_MEM (SImode
,
2014 /* Restore function stack, frame, and registers. */
2017 ix86_expand_epilogue (emit_return
)
2023 int pic_reg_used
= flag_pic
&& (current_function_uses_pic_offset_table
2024 || current_function_uses_const_pool
);
2025 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
2026 HOST_WIDE_INT offset
;
2027 HOST_WIDE_INT tsize
= ix86_compute_frame_size (get_frame_size (), &nregs
,
2028 (int *)0, (int *)0);
2031 /* Calculate start of saved registers relative to ebp. */
2032 offset
= -nregs
* UNITS_PER_WORD
;
2034 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2035 if (profile_block_flag
== 2)
2037 FUNCTION_BLOCK_PROFILER_EXIT
;
2041 /* If we're only restoring one register and sp is not valid then
2042 using a move instruction to restore the register since it's
2043 less work than reloading sp and popping the register.
2045 The default code result in stack adjustment using add/lea instruction,
2046 while this code results in LEAVE instruction (or discrete equivalent),
2047 so it is profitable in some other cases as well. Especially when there
2048 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2049 and there is exactly one register to pop. This heruistic may need some
2050 tuning in future. */
2051 if ((!sp_valid
&& nregs
<= 1)
2052 || (frame_pointer_needed
&& !nregs
&& tsize
)
2053 || (frame_pointer_needed
&& TARGET_USE_LEAVE
&& !optimize_size
2056 /* Restore registers. We can use ebp or esp to address the memory
2057 locations. If both are available, default to ebp, since offsets
2058 are known to be small. Only exception is esp pointing directly to the
2059 end of block of saved registers, where we may simplify addressing
2062 if (!frame_pointer_needed
|| (sp_valid
&& !tsize
))
2063 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
, tsize
);
2065 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
, offset
);
2067 if (!frame_pointer_needed
)
2068 ix86_emit_epilogue_esp_adjustment (tsize
+ nregs
* UNITS_PER_WORD
);
2069 /* If not an i386, mov & pop is faster than "leave". */
2070 else if (TARGET_USE_LEAVE
|| optimize_size
)
2071 emit_insn (gen_leave ());
2074 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2075 hard_frame_pointer_rtx
,
2077 hard_frame_pointer_rtx
));
2078 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
2083 /* First step is to deallocate the stack frame so that we can
2084 pop the registers. */
2087 if (!frame_pointer_needed
)
2089 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
2090 hard_frame_pointer_rtx
,
2092 hard_frame_pointer_rtx
));
2095 ix86_emit_epilogue_esp_adjustment (tsize
);
2097 for (regno
= 0; regno
< STACK_POINTER_REGNUM
; regno
++)
2098 if ((regs_ever_live
[regno
] && !call_used_regs
[regno
])
2099 || (regno
== PIC_OFFSET_TABLE_REGNUM
&& pic_reg_used
))
2100 emit_insn (gen_popsi1 (gen_rtx_REG (SImode
, regno
)));
2103 /* Sibcall epilogues don't want a return instruction. */
2107 if (current_function_pops_args
&& current_function_args_size
)
2109 rtx popc
= GEN_INT (current_function_pops_args
);
2111 /* i386 can only pop 64K bytes. If asked to pop more, pop
2112 return address, do explicit add, and jump indirectly to the
2115 if (current_function_pops_args
>= 65536)
2117 rtx ecx
= gen_rtx_REG (SImode
, 2);
2119 emit_insn (gen_popsi1 (ecx
));
2120 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
2121 emit_indirect_jump (ecx
);
2124 emit_jump_insn (gen_return_pop_internal (popc
));
2127 emit_jump_insn (gen_return_internal ());
2130 /* Extract the parts of an RTL expression that is a valid memory address
2131 for an instruction. Return false if the structure of the address is
2135 ix86_decompose_address (addr
, out
)
2137 struct ix86_address
*out
;
2139 rtx base
= NULL_RTX
;
2140 rtx index
= NULL_RTX
;
2141 rtx disp
= NULL_RTX
;
2142 HOST_WIDE_INT scale
= 1;
2143 rtx scale_rtx
= NULL_RTX
;
2145 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == SUBREG
)
2147 else if (GET_CODE (addr
) == PLUS
)
2149 rtx op0
= XEXP (addr
, 0);
2150 rtx op1
= XEXP (addr
, 1);
2151 enum rtx_code code0
= GET_CODE (op0
);
2152 enum rtx_code code1
= GET_CODE (op1
);
2154 if (code0
== REG
|| code0
== SUBREG
)
2156 if (code1
== REG
|| code1
== SUBREG
)
2157 index
= op0
, base
= op1
; /* index + base */
2159 base
= op0
, disp
= op1
; /* base + displacement */
2161 else if (code0
== MULT
)
2163 index
= XEXP (op0
, 0);
2164 scale_rtx
= XEXP (op0
, 1);
2165 if (code1
== REG
|| code1
== SUBREG
)
2166 base
= op1
; /* index*scale + base */
2168 disp
= op1
; /* index*scale + disp */
2170 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
2172 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
2173 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
2174 base
= XEXP (op0
, 1);
2177 else if (code0
== PLUS
)
2179 index
= XEXP (op0
, 0); /* index + base + disp */
2180 base
= XEXP (op0
, 1);
2186 else if (GET_CODE (addr
) == MULT
)
2188 index
= XEXP (addr
, 0); /* index*scale */
2189 scale_rtx
= XEXP (addr
, 1);
2191 else if (GET_CODE (addr
) == ASHIFT
)
2195 /* We're called for lea too, which implements ashift on occasion. */
2196 index
= XEXP (addr
, 0);
2197 tmp
= XEXP (addr
, 1);
2198 if (GET_CODE (tmp
) != CONST_INT
)
2200 scale
= INTVAL (tmp
);
2201 if ((unsigned HOST_WIDE_INT
) scale
> 3)
2206 disp
= addr
; /* displacement */
2208 /* Extract the integral value of scale. */
2211 if (GET_CODE (scale_rtx
) != CONST_INT
)
2213 scale
= INTVAL (scale_rtx
);
2216 /* Allow arg pointer and stack pointer as index if there is not scaling */
2217 if (base
&& index
&& scale
== 1
2218 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
2219 || index
== stack_pointer_rtx
))
2226 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2227 if ((base
== hard_frame_pointer_rtx
2228 || base
== frame_pointer_rtx
2229 || base
== arg_pointer_rtx
) && !disp
)
2232 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2233 Avoid this by transforming to [%esi+0]. */
2234 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
2235 && base
&& !index
&& !disp
2237 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
2240 /* Special case: encode reg+reg instead of reg*2. */
2241 if (!base
&& index
&& scale
&& scale
== 2)
2242 base
= index
, scale
= 1;
2244 /* Special case: scaling cannot be encoded without base or displacement. */
2245 if (!base
&& !disp
&& index
&& scale
!= 1)
2256 /* Return cost of the memory address x.
2257 For i386, it is better to use a complex address than let gcc copy
2258 the address into a reg and make a new pseudo. But not if the address
2259 requires to two regs - that would mean more pseudos with longer
2262 ix86_address_cost (x
)
2265 struct ix86_address parts
;
2268 if (!ix86_decompose_address (x
, &parts
))
2271 /* More complex memory references are better. */
2272 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
2275 /* Attempt to minimize number of registers in the address. */
2277 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
2279 && (!REG_P (parts
.index
)
2280 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
2284 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
2286 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
2287 && parts
.base
!= parts
.index
)
2290 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2291 since it's predecode logic can't detect the length of instructions
2292 and it degenerates to vector decoded. Increase cost of such
2293 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2294 to split such addresses or even refuse such addresses at all.
2296 Following addressing modes are affected:
2301 The first and last case may be avoidable by explicitly coding the zero in
2302 memory address, but I don't have AMD-K6 machine handy to check this
2306 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2307 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
2308 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
2314 /* Determine if a given CONST RTX is a valid memory displacement
2318 legitimate_pic_address_disp_p (disp
)
2321 if (GET_CODE (disp
) != CONST
)
2323 disp
= XEXP (disp
, 0);
2325 if (GET_CODE (disp
) == PLUS
)
2327 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
2329 disp
= XEXP (disp
, 0);
2332 if (GET_CODE (disp
) != UNSPEC
2333 || XVECLEN (disp
, 0) != 1)
2336 /* Must be @GOT or @GOTOFF. */
2337 if (XINT (disp
, 1) != 6
2338 && XINT (disp
, 1) != 7)
2341 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
2342 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
2348 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2349 memory address for an instruction. The MODE argument is the machine mode
2350 for the MEM expression that wants to use this address.
2352 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2353 convert common non-canonical forms to canonical form so that they will
2357 legitimate_address_p (mode
, addr
, strict
)
2358 enum machine_mode mode
;
2362 struct ix86_address parts
;
2363 rtx base
, index
, disp
;
2364 HOST_WIDE_INT scale
;
2365 const char *reason
= NULL
;
2366 rtx reason_rtx
= NULL_RTX
;
2368 if (TARGET_DEBUG_ADDR
)
2371 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2372 GET_MODE_NAME (mode
), strict
);
2376 if (! ix86_decompose_address (addr
, &parts
))
2378 reason
= "decomposition failed";
2383 index
= parts
.index
;
2385 scale
= parts
.scale
;
2387 /* Validate base register.
2389 Don't allow SUBREG's here, it can lead to spill failures when the base
2390 is one word out of a two word structure, which is represented internally
2397 if (GET_CODE (base
) != REG
)
2399 reason
= "base is not a register";
2403 if (GET_MODE (base
) != Pmode
)
2405 reason
= "base is not in Pmode";
2409 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (base
))
2410 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (base
)))
2412 reason
= "base is not valid";
2417 /* Validate index register.
2419 Don't allow SUBREG's here, it can lead to spill failures when the index
2420 is one word out of a two word structure, which is represented internally
2427 if (GET_CODE (index
) != REG
)
2429 reason
= "index is not a register";
2433 if (GET_MODE (index
) != Pmode
)
2435 reason
= "index is not in Pmode";
2439 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (index
))
2440 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (index
)))
2442 reason
= "index is not valid";
2447 /* Validate scale factor. */
2450 reason_rtx
= GEN_INT (scale
);
2453 reason
= "scale without index";
2457 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
2459 reason
= "scale is not a valid multiplier";
2464 /* Validate displacement. */
2469 if (!CONSTANT_ADDRESS_P (disp
))
2471 reason
= "displacement is not constant";
2475 if (GET_CODE (disp
) == CONST_DOUBLE
)
2477 reason
= "displacement is a const_double";
2481 if (flag_pic
&& SYMBOLIC_CONST (disp
))
2483 if (! legitimate_pic_address_disp_p (disp
))
2485 reason
= "displacement is an invalid pic construct";
2489 /* Verify that a symbolic pic displacement includes
2490 the pic_offset_table_rtx register. */
2491 if (base
!= pic_offset_table_rtx
2492 && (index
!= pic_offset_table_rtx
|| scale
!= 1))
2494 reason
= "pic displacement against invalid base";
2498 else if (HALF_PIC_P ())
2500 if (! HALF_PIC_ADDRESS_P (disp
)
2501 || (base
!= NULL_RTX
|| index
!= NULL_RTX
))
2503 reason
= "displacement is an invalid half-pic reference";
2509 /* Everything looks valid. */
2510 if (TARGET_DEBUG_ADDR
)
2511 fprintf (stderr
, "Success.\n");
2515 if (TARGET_DEBUG_ADDR
)
2517 fprintf (stderr
, "Error: %s\n", reason
);
2518 debug_rtx (reason_rtx
);
2523 /* Return a legitimate reference for ORIG (an address) using the
2524 register REG. If REG is 0, a new pseudo is generated.
2526 There are two types of references that must be handled:
2528 1. Global data references must load the address from the GOT, via
2529 the PIC reg. An insn is emitted to do this load, and the reg is
2532 2. Static data references, constant pool addresses, and code labels
2533 compute the address as an offset from the GOT, whose base is in
2534 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2535 differentiate them from global data objects. The returned
2536 address is the PIC reg + an unspec constant.
2538 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2539 reg also appears in the address. */
2542 legitimize_pic_address (orig
, reg
)
2550 if (GET_CODE (addr
) == LABEL_REF
2551 || (GET_CODE (addr
) == SYMBOL_REF
2552 && (CONSTANT_POOL_ADDRESS_P (addr
)
2553 || SYMBOL_REF_FLAG (addr
))))
2555 /* This symbol may be referenced via a displacement from the PIC
2556 base address (@GOTOFF). */
2558 current_function_uses_pic_offset_table
= 1;
2559 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 7);
2560 new = gen_rtx_CONST (VOIDmode
, new);
2561 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2565 emit_move_insn (reg
, new);
2569 else if (GET_CODE (addr
) == SYMBOL_REF
)
2571 /* This symbol must be referenced via a load from the
2572 Global Offset Table (@GOT). */
2574 current_function_uses_pic_offset_table
= 1;
2575 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, addr
), 6);
2576 new = gen_rtx_CONST (VOIDmode
, new);
2577 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2578 new = gen_rtx_MEM (Pmode
, new);
2579 RTX_UNCHANGING_P (new) = 1;
2582 reg
= gen_reg_rtx (Pmode
);
2583 emit_move_insn (reg
, new);
2588 if (GET_CODE (addr
) == CONST
)
2590 addr
= XEXP (addr
, 0);
2591 if (GET_CODE (addr
) == UNSPEC
)
2593 /* Check that the unspec is one of the ones we generate? */
2595 else if (GET_CODE (addr
) != PLUS
)
2598 if (GET_CODE (addr
) == PLUS
)
2600 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
2602 /* Check first to see if this is a constant offset from a @GOTOFF
2603 symbol reference. */
2604 if ((GET_CODE (op0
) == LABEL_REF
2605 || (GET_CODE (op0
) == SYMBOL_REF
2606 && (CONSTANT_POOL_ADDRESS_P (op0
)
2607 || SYMBOL_REF_FLAG (op0
))))
2608 && GET_CODE (op1
) == CONST_INT
)
2610 current_function_uses_pic_offset_table
= 1;
2611 new = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, op0
), 7);
2612 new = gen_rtx_PLUS (VOIDmode
, new, op1
);
2613 new = gen_rtx_CONST (VOIDmode
, new);
2614 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
2618 emit_move_insn (reg
, new);
2624 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
2625 new = legitimize_pic_address (XEXP (addr
, 1),
2626 base
== reg
? NULL_RTX
: reg
);
2628 if (GET_CODE (new) == CONST_INT
)
2629 new = plus_constant (base
, INTVAL (new));
2632 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
2634 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
2635 new = XEXP (new, 1);
2637 new = gen_rtx_PLUS (Pmode
, base
, new);
2645 /* Try machine-dependent ways of modifying an illegitimate address
2646 to be legitimate. If we find one, return the new, valid address.
2647 This macro is used in only one place: `memory_address' in explow.c.
2649 OLDX is the address as it was before break_out_memory_refs was called.
2650 In some cases it is useful to look at this to decide what needs to be done.
2652 MODE and WIN are passed so that this macro can use
2653 GO_IF_LEGITIMATE_ADDRESS.
2655 It is always safe for this macro to do nothing. It exists to recognize
2656 opportunities to optimize the output.
2658 For the 80386, we handle X+REG by loading X into a register R and
2659 using R+REG. R will go in a general reg and indexing will be used.
2660 However, if REG is a broken-out memory address or multiplication,
2661 nothing needs to be done because REG can certainly go in a general reg.
2663 When -fpic is used, special handling is needed for symbolic references.
2664 See comments by legitimize_pic_address in i386.c for details. */
2667 legitimize_address (x
, oldx
, mode
)
2669 register rtx oldx ATTRIBUTE_UNUSED
;
2670 enum machine_mode mode
;
2675 if (TARGET_DEBUG_ADDR
)
2677 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2678 GET_MODE_NAME (mode
));
2682 if (flag_pic
&& SYMBOLIC_CONST (x
))
2683 return legitimize_pic_address (x
, 0);
2685 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2686 if (GET_CODE (x
) == ASHIFT
2687 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2688 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
2691 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
2692 GEN_INT (1 << log
));
2695 if (GET_CODE (x
) == PLUS
)
2697 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2699 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
2700 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
2701 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
2704 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
2705 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
2706 GEN_INT (1 << log
));
2709 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
2710 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
2711 && (log
= (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
2714 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
2715 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
2716 GEN_INT (1 << log
));
2719 /* Put multiply first if it isn't already. */
2720 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2722 rtx tmp
= XEXP (x
, 0);
2723 XEXP (x
, 0) = XEXP (x
, 1);
2728 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2729 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2730 created by virtual register instantiation, register elimination, and
2731 similar optimizations. */
2732 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
2735 x
= gen_rtx_PLUS (Pmode
,
2736 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
2737 XEXP (XEXP (x
, 1), 0)),
2738 XEXP (XEXP (x
, 1), 1));
2742 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2743 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2744 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
2745 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
2746 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
2747 && CONSTANT_P (XEXP (x
, 1)))
2750 rtx other
= NULL_RTX
;
2752 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2754 constant
= XEXP (x
, 1);
2755 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2757 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
2759 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
2760 other
= XEXP (x
, 1);
2768 x
= gen_rtx_PLUS (Pmode
,
2769 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
2770 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
2771 plus_constant (other
, INTVAL (constant
)));
2775 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2778 if (GET_CODE (XEXP (x
, 0)) == MULT
)
2781 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
2784 if (GET_CODE (XEXP (x
, 1)) == MULT
)
2787 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
2791 && GET_CODE (XEXP (x
, 1)) == REG
2792 && GET_CODE (XEXP (x
, 0)) == REG
)
2795 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
2798 x
= legitimize_pic_address (x
, 0);
2801 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
2804 if (GET_CODE (XEXP (x
, 0)) == REG
)
2806 register rtx temp
= gen_reg_rtx (Pmode
);
2807 register rtx val
= force_operand (XEXP (x
, 1), temp
);
2809 emit_move_insn (temp
, val
);
2815 else if (GET_CODE (XEXP (x
, 1)) == REG
)
2817 register rtx temp
= gen_reg_rtx (Pmode
);
2818 register rtx val
= force_operand (XEXP (x
, 0), temp
);
2820 emit_move_insn (temp
, val
);
2830 /* Print an integer constant expression in assembler syntax. Addition
2831 and subtraction are the only arithmetic that may appear in these
2832 expressions. FILE is the stdio stream to write to, X is the rtx, and
2833 CODE is the operand print code from the output string. */
2836 output_pic_addr_const (file
, x
, code
)
2843 switch (GET_CODE (x
))
2853 assemble_name (file
, XSTR (x
, 0));
2854 if (code
== 'P' && ! SYMBOL_REF_FLAG (x
))
2855 fputs ("@PLT", file
);
2862 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
2863 assemble_name (asm_out_file
, buf
);
2867 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
2871 /* This used to output parentheses around the expression,
2872 but that does not work on the 386 (either ATT or BSD assembler). */
2873 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2877 if (GET_MODE (x
) == VOIDmode
)
2879 /* We can use %d if the number is <32 bits and positive. */
2880 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
2881 fprintf (file
, "0x%lx%08lx",
2882 (unsigned long) CONST_DOUBLE_HIGH (x
),
2883 (unsigned long) CONST_DOUBLE_LOW (x
));
2885 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
2888 /* We can't handle floating point constants;
2889 PRINT_OPERAND must handle them. */
2890 output_operand_lossage ("floating constant misused");
2894 /* Some assemblers need integer constants to appear first. */
2895 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
2897 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2899 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2901 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
2903 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2905 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2912 putc (ASSEMBLER_DIALECT
? '(' : '[', file
);
2913 output_pic_addr_const (file
, XEXP (x
, 0), code
);
2915 output_pic_addr_const (file
, XEXP (x
, 1), code
);
2916 putc (ASSEMBLER_DIALECT
? ')' : ']', file
);
2920 if (XVECLEN (x
, 0) != 1)
2922 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
2923 switch (XINT (x
, 1))
2926 fputs ("@GOT", file
);
2929 fputs ("@GOTOFF", file
);
2932 fputs ("@PLT", file
);
2935 output_operand_lossage ("invalid UNSPEC as operand");
2941 output_operand_lossage ("invalid expression as operand");
2945 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2946 We need to handle our special PIC relocations. */
2949 i386_dwarf_output_addr_const (file
, x
)
2953 fprintf (file
, "\t%s\t", INT_ASM_OP
);
2955 output_pic_addr_const (file
, x
, '\0');
2957 output_addr_const (file
, x
);
2961 /* In the name of slightly smaller debug output, and to cater to
2962 general assembler losage, recognize PIC+GOTOFF and turn it back
2963 into a direct symbol reference. */
2966 i386_simplify_dwarf_addr (orig_x
)
2971 if (GET_CODE (x
) != PLUS
2972 || GET_CODE (XEXP (x
, 0)) != REG
2973 || GET_CODE (XEXP (x
, 1)) != CONST
)
2976 x
= XEXP (XEXP (x
, 1), 0);
2977 if (GET_CODE (x
) == UNSPEC
2978 && XINT (x
, 1) == 7)
2979 return XVECEXP (x
, 0, 0);
2981 if (GET_CODE (x
) == PLUS
2982 && GET_CODE (XEXP (x
, 0)) == UNSPEC
2983 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2984 && XINT (XEXP (x
, 0), 1) == 7)
2985 return gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
2991 put_condition_code (code
, mode
, reverse
, fp
, file
)
2993 enum machine_mode mode
;
3000 code
= reverse_condition (code
);
3011 if (mode
== CCNOmode
)
3016 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3017 Those same assemblers have the same but opposite losage on cmov. */
3018 suffix
= fp
? "nbe" : "a";
3021 if (mode
== CCNOmode
)
3030 if (mode
== CCNOmode
)
3037 suffix
= fp
? "nb" : "ae";
3040 if (mode
== CCNOmode
)
3056 fputs (suffix
, file
);
3060 print_reg (x
, code
, file
)
3065 if (REGNO (x
) == ARG_POINTER_REGNUM
3066 || REGNO (x
) == FRAME_POINTER_REGNUM
3067 || REGNO (x
) == FLAGS_REG
3068 || REGNO (x
) == FPSR_REG
)
3071 if (ASSEMBLER_DIALECT
== 0 || USER_LABEL_PREFIX
[0] == 0)
3076 else if (code
== 'b')
3078 else if (code
== 'k')
3080 else if (code
== 'y')
3082 else if (code
== 'h')
3085 code
= GET_MODE_SIZE (GET_MODE (x
));
3090 if (STACK_TOP_P (x
))
3092 fputs ("st(0)", file
);
3103 fputs (hi_reg_name
[REGNO (x
)], file
);
3106 fputs (qi_reg_name
[REGNO (x
)], file
);
3109 fputs (qi_high_reg_name
[REGNO (x
)], file
);
3117 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3118 C -- print opcode suffix for set/cmov insn.
3119 c -- like C, but print reversed condition
3120 R -- print the prefix for register names.
3121 z -- print the opcode suffix for the size of the current operand.
3122 * -- print a star (in certain assembler syntax)
3123 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3124 s -- print a shift double count, followed by the assemblers argument
3126 b -- print the QImode name of the register for the indicated operand.
3127 %b0 would print %al if operands[0] is reg 0.
3128 w -- likewise, print the HImode name of the register.
3129 k -- likewise, print the SImode name of the register.
3130 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3131 y -- print "st(0)" instead of "st" as a register. */
3134 print_operand (file
, x
, code
)
3144 if (ASSEMBLER_DIALECT
== 0)
3149 if (ASSEMBLER_DIALECT
== 0)
3154 if (ASSEMBLER_DIALECT
== 0)
3159 if (ASSEMBLER_DIALECT
== 0)
3164 if (ASSEMBLER_DIALECT
== 0)
3169 if (ASSEMBLER_DIALECT
== 0)
3174 if (ASSEMBLER_DIALECT
== 0)
3179 /* 387 opcodes don't get size suffixes if the operands are
3182 if (STACK_REG_P (x
))
3185 /* Intel syntax has no truck with instruction suffixes. */
3186 if (ASSEMBLER_DIALECT
!= 0)
3189 /* this is the size of op from size of operand */
3190 switch (GET_MODE_SIZE (GET_MODE (x
)))
3193 #ifdef HAVE_GAS_FILDS_FISTS
3199 if (GET_MODE (x
) == SFmode
)
3213 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
3215 #ifdef GAS_MNEMONICS
3240 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
3242 PRINT_OPERAND (file
, x
, 0);
3248 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
3251 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
3254 /* Like above, but reverse condition */
3256 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
3259 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
3265 sprintf (str
, "invalid operand code `%c'", code
);
3266 output_operand_lossage (str
);
3271 if (GET_CODE (x
) == REG
)
3273 PRINT_REG (x
, code
, file
);
3276 else if (GET_CODE (x
) == MEM
)
3278 /* No `byte ptr' prefix for call instructions. */
3279 if (ASSEMBLER_DIALECT
!= 0 && code
!= 'X' && code
!= 'P')
3282 switch (GET_MODE_SIZE (GET_MODE (x
)))
3284 case 1: size
= "BYTE"; break;
3285 case 2: size
= "WORD"; break;
3286 case 4: size
= "DWORD"; break;
3287 case 8: size
= "QWORD"; break;
3288 case 12: size
= "XWORD"; break;
3293 fputs (" PTR ", file
);
3297 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
3298 output_pic_addr_const (file
, x
, code
);
3303 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
3308 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3309 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
3311 if (ASSEMBLER_DIALECT
== 0)
3313 fprintf (file
, "0x%lx", l
);
3316 /* These float cases don't actually occur as immediate operands. */
3317 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
3322 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3323 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3324 fprintf (file
, "%s", dstr
);
3327 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
3332 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3333 REAL_VALUE_TO_DECIMAL (r
, "%.22e", dstr
);
3334 fprintf (file
, "%s", dstr
);
3340 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
3342 if (ASSEMBLER_DIALECT
== 0)
3345 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
3346 || GET_CODE (x
) == LABEL_REF
)
3348 if (ASSEMBLER_DIALECT
== 0)
3351 fputs ("OFFSET FLAT:", file
);
3354 if (GET_CODE (x
) == CONST_INT
)
3355 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3357 output_pic_addr_const (file
, x
, code
);
3359 output_addr_const (file
, x
);
3363 /* Print a memory operand whose address is ADDR. */
3366 print_operand_address (file
, addr
)
3370 struct ix86_address parts
;
3371 rtx base
, index
, disp
;
3374 if (! ix86_decompose_address (addr
, &parts
))
3378 index
= parts
.index
;
3380 scale
= parts
.scale
;
3382 if (!base
&& !index
)
3384 /* Displacement only requires special attention. */
3386 if (GET_CODE (disp
) == CONST_INT
)
3388 if (ASSEMBLER_DIALECT
!= 0)
3389 fputs ("ds:", file
);
3390 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
3393 output_pic_addr_const (file
, addr
, 0);
3395 output_addr_const (file
, addr
);
3399 if (ASSEMBLER_DIALECT
== 0)
3404 output_pic_addr_const (file
, disp
, 0);
3405 else if (GET_CODE (disp
) == LABEL_REF
)
3406 output_asm_label (disp
);
3408 output_addr_const (file
, disp
);
3413 PRINT_REG (base
, 0, file
);
3417 PRINT_REG (index
, 0, file
);
3419 fprintf (file
, ",%d", scale
);
3425 rtx offset
= NULL_RTX
;
3429 /* Pull out the offset of a symbol; print any symbol itself. */
3430 if (GET_CODE (disp
) == CONST
3431 && GET_CODE (XEXP (disp
, 0)) == PLUS
3432 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3434 offset
= XEXP (XEXP (disp
, 0), 1);
3435 disp
= gen_rtx_CONST (VOIDmode
,
3436 XEXP (XEXP (disp
, 0), 0));
3440 output_pic_addr_const (file
, disp
, 0);
3441 else if (GET_CODE (disp
) == LABEL_REF
)
3442 output_asm_label (disp
);
3443 else if (GET_CODE (disp
) == CONST_INT
)
3446 output_addr_const (file
, disp
);
3452 PRINT_REG (base
, 0, file
);
3455 if (INTVAL (offset
) >= 0)
3457 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3461 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
3468 PRINT_REG (index
, 0, file
);
3470 fprintf (file
, "*%d", scale
);
3477 /* Split one or more DImode RTL references into pairs of SImode
3478 references. The RTL can be REG, offsettable MEM, integer constant, or
3479 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3480 split and "num" is its length. lo_half and hi_half are output arrays
3481 that parallel "operands". */
3484 split_di (operands
, num
, lo_half
, hi_half
)
3487 rtx lo_half
[], hi_half
[];
3491 rtx op
= operands
[num
];
3492 if (CONSTANT_P (op
))
3493 split_double (op
, &lo_half
[num
], &hi_half
[num
]);
3494 else if (! reload_completed
)
3496 lo_half
[num
] = gen_lowpart (SImode
, op
);
3497 hi_half
[num
] = gen_highpart (SImode
, op
);
3499 else if (GET_CODE (op
) == REG
)
3501 lo_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
));
3502 hi_half
[num
] = gen_rtx_REG (SImode
, REGNO (op
) + 1);
3504 else if (offsettable_memref_p (op
))
3506 rtx lo_addr
= XEXP (op
, 0);
3507 rtx hi_addr
= XEXP (adj_offsettable_operand (op
, 4), 0);
3508 lo_half
[num
] = change_address (op
, SImode
, lo_addr
);
3509 hi_half
[num
] = change_address (op
, SImode
, hi_addr
);
3516 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3517 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3518 is the expression of the binary operation. The output may either be
3519 emitted here, or returned to the caller, like all output_* functions.
3521 There is no guarantee that the operands are the same mode, as they
3522 might be within FLOAT or FLOAT_EXTEND expressions. */
3524 #ifndef SYSV386_COMPAT
3525 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3526 wants to fix the assemblers because that causes incompatibility
3527 with gcc. No-one wants to fix gcc because that causes
3528 incompatibility with assemblers... You can use the option of
3529 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3530 #define SYSV386_COMPAT 1
3534 output_387_binary_op (insn
, operands
)
3538 static char buf
[30];
3541 #ifdef ENABLE_CHECKING
3542 /* Even if we do not want to check the inputs, this documents input
3543 constraints. Which helps in understanding the following code. */
3544 if (STACK_REG_P (operands
[0])
3545 && ((REG_P (operands
[1])
3546 && REGNO (operands
[0]) == REGNO (operands
[1])
3547 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
3548 || (REG_P (operands
[2])
3549 && REGNO (operands
[0]) == REGNO (operands
[2])
3550 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
3551 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
3557 switch (GET_CODE (operands
[3]))
3560 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3561 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3568 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3569 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3576 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3577 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3584 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
3585 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
3597 switch (GET_CODE (operands
[3]))
3601 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
3603 rtx temp
= operands
[2];
3604 operands
[2] = operands
[1];
3608 /* know operands[0] == operands[1]. */
3610 if (GET_CODE (operands
[2]) == MEM
)
3616 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3618 if (STACK_TOP_P (operands
[0]))
3619 /* How is it that we are storing to a dead operand[2]?
3620 Well, presumably operands[1] is dead too. We can't
3621 store the result to st(0) as st(0) gets popped on this
3622 instruction. Instead store to operands[2] (which I
3623 think has to be st(1)). st(1) will be popped later.
3624 gcc <= 2.8.1 didn't have this check and generated
3625 assembly code that the Unixware assembler rejected. */
3626 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3628 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3632 if (STACK_TOP_P (operands
[0]))
3633 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3635 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3640 if (GET_CODE (operands
[1]) == MEM
)
3646 if (GET_CODE (operands
[2]) == MEM
)
3652 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
3655 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3656 derived assemblers, confusingly reverse the direction of
3657 the operation for fsub{r} and fdiv{r} when the
3658 destination register is not st(0). The Intel assembler
3659 doesn't have this brain damage. Read !SYSV386_COMPAT to
3660 figure out what the hardware really does. */
3661 if (STACK_TOP_P (operands
[0]))
3662 p
= "{p\t%0, %2|rp\t%2, %0}";
3664 p
= "{rp\t%2, %0|p\t%0, %2}";
3666 if (STACK_TOP_P (operands
[0]))
3667 /* As above for fmul/fadd, we can't store to st(0). */
3668 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3670 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3675 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
3678 if (STACK_TOP_P (operands
[0]))
3679 p
= "{rp\t%0, %1|p\t%1, %0}";
3681 p
= "{p\t%1, %0|rp\t%0, %1}";
3683 if (STACK_TOP_P (operands
[0]))
3684 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3686 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3691 if (STACK_TOP_P (operands
[0]))
3693 if (STACK_TOP_P (operands
[1]))
3694 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3696 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3699 else if (STACK_TOP_P (operands
[1]))
3702 p
= "{\t%1, %0|r\t%0, %1}";
3704 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3710 p
= "{r\t%2, %0|\t%0, %2}";
3712 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3725 /* Output code for INSN to convert a float to a signed int. OPERANDS
3726 are the insn operands. The output may be [HSD]Imode and the input
3727 operand may be [SDX]Fmode. */
3730 output_fix_trunc (insn
, operands
)
3734 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3735 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
3738 /* Jump through a hoop or two for DImode, since the hardware has no
3739 non-popping instruction. We used to do this a different way, but
3740 that was somewhat fragile and broke with post-reload splitters. */
3741 if (dimode_p
&& !stack_top_dies
)
3742 output_asm_insn ("fld\t%y1", operands
);
3744 if (! STACK_TOP_P (operands
[1]))
3747 xops
[0] = GEN_INT (12);
3748 xops
[1] = adj_offsettable_operand (operands
[2], 1);
3749 xops
[1] = change_address (xops
[1], QImode
, NULL_RTX
);
3751 xops
[2] = operands
[0];
3752 if (GET_CODE (operands
[0]) != MEM
)
3753 xops
[2] = operands
[3];
3755 output_asm_insn ("fnstcw\t%2", operands
);
3756 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands
);
3757 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops
);
3758 output_asm_insn ("fldcw\t%2", operands
);
3759 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands
);
3761 if (stack_top_dies
|| dimode_p
)
3762 output_asm_insn ("fistp%z2\t%2", xops
);
3764 output_asm_insn ("fist%z2\t%2", xops
);
3766 output_asm_insn ("fldcw\t%2", operands
);
3768 if (GET_CODE (operands
[0]) != MEM
)
3772 split_di (operands
+0, 1, xops
+0, xops
+1);
3773 split_di (operands
+3, 1, xops
+2, xops
+3);
3774 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
3775 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops
);
3777 else if (GET_MODE (operands
[0]) == SImode
)
3778 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands
);
3780 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands
);
3786 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3787 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3788 when fucom should be used. */
3791 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
3794 int eflags_p
, unordered_p
;
3797 rtx cmp_op0
= operands
[0];
3798 rtx cmp_op1
= operands
[1];
3803 cmp_op1
= operands
[2];
3806 if (! STACK_TOP_P (cmp_op0
))
3809 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
3811 if (STACK_REG_P (cmp_op1
)
3813 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
3814 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
3816 /* If both the top of the 387 stack dies, and the other operand
3817 is also a stack register that dies, then this must be a
3818 `fcompp' float compare */
3822 /* There is no double popping fcomi variant. Fortunately,
3823 eflags is immune from the fstp's cc clobbering. */
3825 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
3827 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
3835 return "fucompp\n\tfnstsw\t%0";
3837 return "fcompp\n\tfnstsw\t%0";
3850 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3852 static const char * const alt
[24] =
3864 "fcomi\t{%y1, %0|%0, %y1}",
3865 "fcomip\t{%y1, %0|%0, %y1}",
3866 "fucomi\t{%y1, %0|%0, %y1}",
3867 "fucomip\t{%y1, %0|%0, %y1}",
3874 "fcom%z2\t%y2\n\tfnstsw\t%0",
3875 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3876 "fucom%z2\t%y2\n\tfnstsw\t%0",
3877 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3879 "ficom%z2\t%y2\n\tfnstsw\t%0",
3880 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3888 mask
= eflags_p
<< 3;
3889 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
3890 mask
|= unordered_p
<< 1;
3891 mask
|= stack_top_dies
;
3903 /* Output assembler code to FILE to initialize basic-block profiling.
3905 If profile_block_flag == 2
3907 Output code to call the subroutine `__bb_init_trace_func'
3908 and pass two parameters to it. The first parameter is
3909 the address of a block allocated in the object module.
3910 The second parameter is the number of the first basic block
3913 The name of the block is a local symbol made with this statement:
3915 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3917 Of course, since you are writing the definition of
3918 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3919 can take a short cut in the definition of this macro and use the
3920 name that you know will result.
3922 The number of the first basic block of the function is
3923 passed to the macro in BLOCK_OR_LABEL.
3925 If described in a virtual assembler language the code to be
3929 parameter2 <- BLOCK_OR_LABEL
3930 call __bb_init_trace_func
3932 else if profile_block_flag != 0
3934 Output code to call the subroutine `__bb_init_func'
3935 and pass one single parameter to it, which is the same
3936 as the first parameter to `__bb_init_trace_func'.
3938 The first word of this parameter is a flag which will be nonzero if
3939 the object module has already been initialized. So test this word
3940 first, and do not call `__bb_init_func' if the flag is nonzero.
3941 Note: When profile_block_flag == 2 the test need not be done
3942 but `__bb_init_trace_func' *must* be called.
3944 BLOCK_OR_LABEL may be used to generate a label number as a
3945 branch destination in case `__bb_init_func' will not be called.
3947 If described in a virtual assembler language the code to be
3958 ix86_output_function_block_profiler (file
, block_or_label
)
3962 static int num_func
= 0;
3964 char block_table
[80], false_label
[80];
3966 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
3968 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
3969 xops
[5] = stack_pointer_rtx
;
3970 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
3972 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
3974 switch (profile_block_flag
)
3977 xops
[2] = GEN_INT (block_or_label
);
3978 xops
[3] = gen_rtx_MEM (Pmode
,
3979 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_trace_func"));
3980 xops
[6] = GEN_INT (8);
3982 output_asm_insn ("push{l}\t%2", xops
);
3984 output_asm_insn ("push{l}\t%1", xops
);
3987 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
3988 output_asm_insn ("push{l}\t%7", xops
);
3990 output_asm_insn ("call\t%P3", xops
);
3991 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
3995 ASM_GENERATE_INTERNAL_LABEL (false_label
, "LPBZ", num_func
);
3997 xops
[0] = const0_rtx
;
3998 xops
[2] = gen_rtx_MEM (Pmode
,
3999 gen_rtx_SYMBOL_REF (VOIDmode
, false_label
));
4000 xops
[3] = gen_rtx_MEM (Pmode
,
4001 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_init_func"));
4002 xops
[4] = gen_rtx_MEM (Pmode
, xops
[1]);
4003 xops
[6] = GEN_INT (4);
4005 CONSTANT_POOL_ADDRESS_P (xops
[2]) = TRUE
;
4007 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops
);
4008 output_asm_insn ("jne\t%2", xops
);
4011 output_asm_insn ("push{l}\t%1", xops
);
4014 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops
);
4015 output_asm_insn ("push{l}\t%7", xops
);
4017 output_asm_insn ("call\t%P3", xops
);
4018 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops
);
4019 ASM_OUTPUT_INTERNAL_LABEL (file
, "LPBZ", num_func
);
4025 /* Output assembler code to FILE to increment a counter associated
4026 with basic block number BLOCKNO.
4028 If profile_block_flag == 2
4030 Output code to initialize the global structure `__bb' and
4031 call the function `__bb_trace_func' which will increment the
4034 `__bb' consists of two words. In the first word the number
4035 of the basic block has to be stored. In the second word
4036 the address of a block allocated in the object module
4039 The basic block number is given by BLOCKNO.
4041 The address of the block is given by the label created with
4043 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4045 by FUNCTION_BLOCK_PROFILER.
4047 Of course, since you are writing the definition of
4048 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4049 can take a short cut in the definition of this macro and use the
4050 name that you know will result.
4052 If described in a virtual assembler language the code to be
4055 move BLOCKNO -> (__bb)
4056 move LPBX0 -> (__bb+4)
4057 call __bb_trace_func
4059 Note that function `__bb_trace_func' must not change the
4060 machine state, especially the flag register. To grant
4061 this, you must output code to save and restore registers
4062 either in this macro or in the macros MACHINE_STATE_SAVE
4063 and MACHINE_STATE_RESTORE. The last two macros will be
4064 used in the function `__bb_trace_func', so you must make
4065 sure that the function prologue does not change any
4066 register prior to saving it with MACHINE_STATE_SAVE.
4068 else if profile_block_flag != 0
4070 Output code to increment the counter directly.
4071 Basic blocks are numbered separately from zero within each
4072 compiled object module. The count associated with block number
4073 BLOCKNO is at index BLOCKNO in an array of words; the name of
4074 this array is a local symbol made with this statement:
4076 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4078 Of course, since you are writing the definition of
4079 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4080 can take a short cut in the definition of this macro and use the
4081 name that you know will result.
4083 If described in a virtual assembler language the code to be
4086 inc (LPBX2+4*BLOCKNO)
4090 ix86_output_block_profiler (file
, blockno
)
4091 FILE *file ATTRIBUTE_UNUSED
;
4094 rtx xops
[8], cnt_rtx
;
4096 char *block_table
= counts
;
4098 switch (profile_block_flag
)
4101 ASM_GENERATE_INTERNAL_LABEL (block_table
, "LPBX", 0);
4103 xops
[1] = gen_rtx_SYMBOL_REF (VOIDmode
, block_table
);
4104 xops
[2] = GEN_INT (blockno
);
4105 xops
[3] = gen_rtx_MEM (Pmode
,
4106 gen_rtx_SYMBOL_REF (VOIDmode
, "__bb_trace_func"));
4107 xops
[4] = gen_rtx_SYMBOL_REF (VOIDmode
, "__bb");
4108 xops
[5] = plus_constant (xops
[4], 4);
4109 xops
[0] = gen_rtx_MEM (SImode
, xops
[4]);
4110 xops
[6] = gen_rtx_MEM (SImode
, xops
[5]);
4112 CONSTANT_POOL_ADDRESS_P (xops
[1]) = TRUE
;
4114 output_asm_insn ("pushf", xops
);
4115 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4118 xops
[7] = gen_rtx_REG (Pmode
, 0); /* eax */
4119 output_asm_insn ("push{l}\t%7", xops
);
4120 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops
);
4121 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops
);
4122 output_asm_insn ("pop{l}\t%7", xops
);
4125 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops
);
4126 output_asm_insn ("call\t%P3", xops
);
4127 output_asm_insn ("popf", xops
);
4132 ASM_GENERATE_INTERNAL_LABEL (counts
, "LPBX", 2);
4133 cnt_rtx
= gen_rtx_SYMBOL_REF (VOIDmode
, counts
);
4134 SYMBOL_REF_FLAG (cnt_rtx
) = TRUE
;
4137 cnt_rtx
= plus_constant (cnt_rtx
, blockno
*4);
4140 cnt_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, cnt_rtx
);
4142 xops
[0] = gen_rtx_MEM (SImode
, cnt_rtx
);
4143 output_asm_insn ("inc{l}\t%0", xops
);
4150 ix86_expand_move (mode
, operands
)
4151 enum machine_mode mode
;
4154 int strict
= (reload_in_progress
|| reload_completed
);
4157 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (operands
[1], Pmode
))
4159 /* Emit insns to move operands[1] into operands[0]. */
4161 if (GET_CODE (operands
[0]) == MEM
)
4162 operands
[1] = force_reg (Pmode
, operands
[1]);
4165 rtx temp
= operands
[0];
4166 if (GET_CODE (temp
) != REG
)
4167 temp
= gen_reg_rtx (Pmode
);
4168 temp
= legitimize_pic_address (operands
[1], temp
);
4169 if (temp
== operands
[0])
4176 if (GET_CODE (operands
[0]) == MEM
4177 && (GET_MODE (operands
[0]) == QImode
4178 || !push_operand (operands
[0], mode
))
4179 && GET_CODE (operands
[1]) == MEM
)
4180 operands
[1] = force_reg (mode
, operands
[1]);
4182 if (push_operand (operands
[0], mode
)
4183 && ! general_no_elim_operand (operands
[1], mode
))
4184 operands
[1] = copy_to_mode_reg (mode
, operands
[1]);
4186 if (FLOAT_MODE_P (mode
))
4188 /* If we are loading a floating point constant to a register,
4189 force the value to memory now, since we'll get better code
4190 out the back end. */
4194 else if (GET_CODE (operands
[1]) == CONST_DOUBLE
4195 && register_operand (operands
[0], mode
))
4196 operands
[1] = validize_mem (force_const_mem (mode
, operands
[1]));
4200 insn
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
4205 /* Attempt to expand a binary operator. Make the expansion closer to the
4206 actual machine, then just general_operand, which will allow 3 separate
4207 memory references (one output, two input) in a single insn. */
4210 ix86_expand_binary_operator (code
, mode
, operands
)
4212 enum machine_mode mode
;
4215 int matching_memory
;
4216 rtx src1
, src2
, dst
, op
, clob
;
4222 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4223 if (GET_RTX_CLASS (code
) == 'c'
4224 && (rtx_equal_p (dst
, src2
)
4225 || immediate_operand (src1
, mode
)))
4232 /* If the destination is memory, and we do not have matching source
4233 operands, do things in registers. */
4234 matching_memory
= 0;
4235 if (GET_CODE (dst
) == MEM
)
4237 if (rtx_equal_p (dst
, src1
))
4238 matching_memory
= 1;
4239 else if (GET_RTX_CLASS (code
) == 'c'
4240 && rtx_equal_p (dst
, src2
))
4241 matching_memory
= 2;
4243 dst
= gen_reg_rtx (mode
);
4246 /* Both source operands cannot be in memory. */
4247 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
4249 if (matching_memory
!= 2)
4250 src2
= force_reg (mode
, src2
);
4252 src1
= force_reg (mode
, src1
);
4255 /* If the operation is not commutable, source 1 cannot be a constant
4256 or non-matching memory. */
4257 if ((CONSTANT_P (src1
)
4258 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
4259 && GET_RTX_CLASS (code
) != 'c')
4260 src1
= force_reg (mode
, src1
);
4262 /* If optimizing, copy to regs to improve CSE */
4263 if (optimize
&& ! no_new_pseudos
)
4265 if (GET_CODE (dst
) == MEM
)
4266 dst
= gen_reg_rtx (mode
);
4267 if (GET_CODE (src1
) == MEM
)
4268 src1
= force_reg (mode
, src1
);
4269 if (GET_CODE (src2
) == MEM
)
4270 src2
= force_reg (mode
, src2
);
4273 /* Emit the instruction. */
4275 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
4276 if (reload_in_progress
)
4278 /* Reload doesn't know about the flags register, and doesn't know that
4279 it doesn't want to clobber it. We can only do this with PLUS. */
4286 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4287 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4290 /* Fix up the destination if needed. */
4291 if (dst
!= operands
[0])
4292 emit_move_insn (operands
[0], dst
);
4295 /* Return TRUE or FALSE depending on whether the binary operator meets the
4296 appropriate constraints. */
4299 ix86_binary_operator_ok (code
, mode
, operands
)
4301 enum machine_mode mode ATTRIBUTE_UNUSED
;
4304 /* Both source operands cannot be in memory. */
4305 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
4307 /* If the operation is not commutable, source 1 cannot be a constant. */
4308 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
4310 /* If the destination is memory, we must have a matching source operand. */
4311 if (GET_CODE (operands
[0]) == MEM
4312 && ! (rtx_equal_p (operands
[0], operands
[1])
4313 || (GET_RTX_CLASS (code
) == 'c'
4314 && rtx_equal_p (operands
[0], operands
[2]))))
4316 /* If the operation is not commutable and the source 1 is memory, we must
4317 have a matching destionation. */
4318 if (GET_CODE (operands
[1]) == MEM
4319 && GET_RTX_CLASS (code
) != 'c'
4320 && ! rtx_equal_p (operands
[0], operands
[1]))
4325 /* Attempt to expand a unary operator. Make the expansion closer to the
4326 actual machine, then just general_operand, which will allow 2 separate
4327 memory references (one output, one input) in a single insn. */
4330 ix86_expand_unary_operator (code
, mode
, operands
)
4332 enum machine_mode mode
;
4335 int matching_memory
;
4336 rtx src
, dst
, op
, clob
;
4341 /* If the destination is memory, and we do not have matching source
4342 operands, do things in registers. */
4343 matching_memory
= 0;
4344 if (GET_CODE (dst
) == MEM
)
4346 if (rtx_equal_p (dst
, src
))
4347 matching_memory
= 1;
4349 dst
= gen_reg_rtx (mode
);
4352 /* When source operand is memory, destination must match. */
4353 if (!matching_memory
&& GET_CODE (src
) == MEM
)
4354 src
= force_reg (mode
, src
);
4356 /* If optimizing, copy to regs to improve CSE */
4357 if (optimize
&& ! no_new_pseudos
)
4359 if (GET_CODE (dst
) == MEM
)
4360 dst
= gen_reg_rtx (mode
);
4361 if (GET_CODE (src
) == MEM
)
4362 src
= force_reg (mode
, src
);
4365 /* Emit the instruction. */
4367 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
4368 if (reload_in_progress
|| code
== NOT
)
4370 /* Reload doesn't know about the flags register, and doesn't know that
4371 it doesn't want to clobber it. */
4378 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
4379 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
4382 /* Fix up the destination if needed. */
4383 if (dst
!= operands
[0])
4384 emit_move_insn (operands
[0], dst
);
4387 /* Return TRUE or FALSE depending on whether the unary operator meets the
4388 appropriate constraints. */
4391 ix86_unary_operator_ok (code
, mode
, operands
)
4392 enum rtx_code code ATTRIBUTE_UNUSED
;
4393 enum machine_mode mode ATTRIBUTE_UNUSED
;
4394 rtx operands
[2] ATTRIBUTE_UNUSED
;
4396 /* If one of operands is memory, source and destination must match. */
4397 if ((GET_CODE (operands
[0]) == MEM
4398 || GET_CODE (operands
[1]) == MEM
)
4399 && ! rtx_equal_p (operands
[0], operands
[1]))
4404 /* Return TRUE or FALSE depending on whether the first SET in INSN
4405 has source and destination with matching CC modes, and that the
4406 CC mode is at least as constrained as REQ_MODE. */
4409 ix86_match_ccmode (insn
, req_mode
)
4411 enum machine_mode req_mode
;
4414 enum machine_mode set_mode
;
4416 set
= PATTERN (insn
);
4417 if (GET_CODE (set
) == PARALLEL
)
4418 set
= XVECEXP (set
, 0, 0);
4419 if (GET_CODE (set
) != SET
)
4422 set_mode
= GET_MODE (SET_DEST (set
));
4426 if (req_mode
== CCNOmode
)
4430 if (req_mode
== CCZmode
)
4440 return (GET_MODE (SET_SRC (set
)) == set_mode
);
4443 /* Produce an unsigned comparison for a given signed comparison. */
4445 static enum rtx_code
4446 unsigned_comparison (code
)
4478 /* Generate insn patterns to do an integer compare of OPERANDS. */
4481 ix86_expand_int_compare (code
, op0
, op1
)
4485 enum machine_mode cmpmode
;
4488 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
4489 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
4491 /* This is very simple, but making the interface the same as in the
4492 FP case makes the rest of the code easier. */
4493 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
4494 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
4496 /* Return the test that should be put into the flags user, i.e.
4497 the bcc, scc, or cmov instruction. */
4498 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
4501 /* Figure out whether to use ordered or unordered fp comparisons.
4502 Return the appropriate mode to use. */
4504 static enum machine_mode
4505 ix86_fp_compare_mode (code
)
4513 /* When not doing IEEE compliant compares, fault on NaNs. */
4514 unordered
= (TARGET_IEEE_FP
!= 0);
4517 case LT
: case LE
: case GT
: case GE
:
4521 case UNORDERED
: case ORDERED
:
4522 case UNEQ
: case UNGE
: case UNGT
: case UNLE
: case UNLT
: case LTGT
:
4530 /* ??? If we knew whether invalid-operand exceptions were masked,
4531 we could rely on fcom to raise an exception and take care of
4532 NaNs. But we don't. We could know this from c99 math pragmas. */
4536 return unordered
? CCFPUmode
: CCFPmode
;
4539 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4542 ix86_use_fcomi_compare (code
)
4545 return (TARGET_CMOVE
4546 && (code
== ORDERED
|| code
== UNORDERED
4547 /* All other unordered compares require checking
4548 multiple sets of bits. */
4549 || ix86_fp_compare_mode (code
) == CCFPmode
));
4552 /* Swap, force into registers, or otherwise massage the two operands
4553 to a fp comparison. The operands are updated in place; the new
4554 comparsion code is returned. */
4556 static enum rtx_code
4557 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
4561 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
4562 rtx op0
= *pop0
, op1
= *pop1
;
4563 enum machine_mode op_mode
= GET_MODE (op0
);
4565 /* All of the unordered compare instructions only work on registers.
4566 The same is true of the XFmode compare instructions. The same is
4567 true of the fcomi compare instructions. */
4569 if (fpcmp_mode
== CCFPUmode
4570 || op_mode
== XFmode
4571 || ix86_use_fcomi_compare (code
))
4573 op0
= force_reg (op_mode
, op0
);
4574 op1
= force_reg (op_mode
, op1
);
4578 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4579 things around if they appear profitable, otherwise force op0
4582 if (standard_80387_constant_p (op0
) == 0
4583 || (GET_CODE (op0
) == MEM
4584 && ! (standard_80387_constant_p (op1
) == 0
4585 || GET_CODE (op1
) == MEM
)))
4588 tmp
= op0
, op0
= op1
, op1
= tmp
;
4589 code
= swap_condition (code
);
4592 if (GET_CODE (op0
) != REG
)
4593 op0
= force_reg (op_mode
, op0
);
4595 if (CONSTANT_P (op1
))
4597 if (standard_80387_constant_p (op1
))
4598 op1
= force_reg (op_mode
, op1
);
4600 op1
= validize_mem (force_const_mem (op_mode
, op1
));
4609 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4612 ix86_expand_fp_compare (code
, op0
, op1
, scratch
)
4614 rtx op0
, op1
, scratch
;
4616 enum machine_mode fpcmp_mode
, intcmp_mode
;
4619 fpcmp_mode
= ix86_fp_compare_mode (code
);
4620 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
4622 /* %%% fcomi is probably always faster, even when dealing with memory,
4623 since compare-and-branch would be three insns instead of four. */
4624 if (ix86_use_fcomi_compare (code
))
4626 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
4627 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
4630 /* The FP codes work out to act like unsigned. */
4631 code
= unsigned_comparison (code
);
4632 intcmp_mode
= CCmode
;
4636 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4639 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
4640 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), 9);
4641 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
4643 if (fpcmp_mode
== CCFPmode
4645 || code
== UNORDERED
)
4647 /* We have two options here -- use sahf, or testing bits of ah
4648 directly. On PPRO, they are equivalent, sahf being one byte
4649 smaller. On Pentium, sahf is non-pairable while test is UV
4652 if (TARGET_USE_SAHF
|| optimize_size
)
4655 emit_insn (gen_x86_sahf_1 (scratch
));
4657 /* The FP codes work out to act like unsigned. */
4658 code
= unsigned_comparison (code
);
4659 intcmp_mode
= CCmode
;
4664 * The numbers below correspond to the bits of the FPSW in AH.
4665 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4687 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4688 faster in all cases to just fall back on sahf. */
4715 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (mask
)));
4716 intcmp_mode
= CCNOmode
;
4721 /* In the unordered case, we have to check C2 for NaN's, which
4722 doesn't happen to work out to anything nice combination-wise.
4723 So do some bit twiddling on the value we've got in AH to come
4724 up with an appropriate set of condition codes. */
4726 intcmp_mode
= CCNOmode
;
4730 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
4734 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4735 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
4736 intcmp_mode
= CCmode
;
4740 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
4744 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4745 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
4746 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
4747 intcmp_mode
= CCmode
;
4751 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4752 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
4753 intcmp_mode
= CCmode
;
4757 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4758 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, GEN_INT (0x40)));
4763 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
4767 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
4771 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
4775 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4776 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, GEN_INT (0x01)));
4780 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
4781 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
4782 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
4786 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
4790 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
4794 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
4804 /* Return the test that should be put into the flags user, i.e.
4805 the bcc, scc, or cmov instruction. */
4806 return gen_rtx_fmt_ee (code
, VOIDmode
,
4807 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
4812 ix86_expand_compare (code
)
4816 op0
= ix86_compare_op0
;
4817 op1
= ix86_compare_op1
;
4819 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
4820 ret
= ix86_expand_fp_compare (code
, op0
, op1
, gen_reg_rtx (HImode
));
4822 ret
= ix86_expand_int_compare (code
, op0
, op1
);
4828 ix86_expand_branch (code
, label
)
4834 switch (GET_MODE (ix86_compare_op0
))
4839 tmp
= ix86_expand_compare (code
);
4840 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4841 gen_rtx_LABEL_REF (VOIDmode
, label
),
4843 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
4849 /* Don't expand the comparison early, so that we get better code
4850 when jump or whoever decides to reverse the comparison. */
4855 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
4858 tmp
= gen_rtx_fmt_ee (code
, ix86_fp_compare_mode (code
),
4859 ix86_compare_op0
, ix86_compare_op1
);
4860 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
4861 gen_rtx_LABEL_REF (VOIDmode
, label
),
4863 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
4865 use_fcomi
= ix86_use_fcomi_compare (code
);
4866 vec
= rtvec_alloc (3 + !use_fcomi
);
4867 RTVEC_ELT (vec
, 0) = tmp
;
4869 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
4871 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
4874 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
4876 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
4881 /* Expand DImode branch into multiple compare+branch. */
4883 rtx lo
[2], hi
[2], label2
;
4884 enum rtx_code code1
, code2
, code3
;
4886 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
4888 tmp
= ix86_compare_op0
;
4889 ix86_compare_op0
= ix86_compare_op1
;
4890 ix86_compare_op1
= tmp
;
4891 code
= swap_condition (code
);
4893 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
4894 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
4896 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4897 avoid two branches. This costs one extra insn, so disable when
4898 optimizing for size. */
4900 if ((code
== EQ
|| code
== NE
)
4902 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
4907 if (hi
[1] != const0_rtx
)
4908 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
4909 NULL_RTX
, 0, OPTAB_WIDEN
);
4912 if (lo
[1] != const0_rtx
)
4913 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
4914 NULL_RTX
, 0, OPTAB_WIDEN
);
4916 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
4917 NULL_RTX
, 0, OPTAB_WIDEN
);
4919 ix86_compare_op0
= tmp
;
4920 ix86_compare_op1
= const0_rtx
;
4921 ix86_expand_branch (code
, label
);
4925 /* Otherwise, if we are doing less-than, op1 is a constant and the
4926 low word is zero, then we can just examine the high word. */
4928 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
4929 && (code
== LT
|| code
== LTU
))
4931 ix86_compare_op0
= hi
[0];
4932 ix86_compare_op1
= hi
[1];
4933 ix86_expand_branch (code
, label
);
4937 /* Otherwise, we need two or three jumps. */
4939 label2
= gen_label_rtx ();
4942 code2
= swap_condition (code
);
4943 code3
= unsigned_condition (code
);
4947 case LT
: case GT
: case LTU
: case GTU
:
4950 case LE
: code1
= LT
; code2
= GT
; break;
4951 case GE
: code1
= GT
; code2
= LT
; break;
4952 case LEU
: code1
= LTU
; code2
= GTU
; break;
4953 case GEU
: code1
= GTU
; code2
= LTU
; break;
4955 case EQ
: code1
= NIL
; code2
= NE
; break;
4956 case NE
: code2
= NIL
; break;
4964 * if (hi(a) < hi(b)) goto true;
4965 * if (hi(a) > hi(b)) goto false;
4966 * if (lo(a) < lo(b)) goto true;
4970 ix86_compare_op0
= hi
[0];
4971 ix86_compare_op1
= hi
[1];
4974 ix86_expand_branch (code1
, label
);
4976 ix86_expand_branch (code2
, label2
);
4978 ix86_compare_op0
= lo
[0];
4979 ix86_compare_op1
= lo
[1];
4980 ix86_expand_branch (code3
, label
);
4983 emit_label (label2
);
4993 ix86_expand_setcc (code
, dest
)
5000 if (GET_MODE (ix86_compare_op0
) == DImode
)
5001 return 0; /* FAIL */
5003 /* Three modes of generation:
5004 0 -- destination does not overlap compare sources:
5005 clear dest first, emit strict_low_part setcc.
5006 1 -- destination does overlap compare sources:
5007 emit subreg setcc, zero extend.
5008 2 -- destination is in QImode:
5014 if (GET_MODE (dest
) == QImode
)
5016 else if (reg_overlap_mentioned_p (dest
, ix86_compare_op0
)
5017 || reg_overlap_mentioned_p (dest
, ix86_compare_op1
))
5021 emit_move_insn (dest
, const0_rtx
);
5023 ret
= ix86_expand_compare (code
);
5024 PUT_MODE (ret
, QImode
);
5029 tmp
= gen_lowpart (QImode
, dest
);
5030 tmp
= gen_rtx_STRICT_LOW_PART (VOIDmode
, tmp
);
5034 if (!cse_not_expected
)
5035 tmp
= gen_reg_rtx (QImode
);
5037 tmp
= gen_lowpart (QImode
, dest
);
5040 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
5046 tmp
= gen_rtx_ZERO_EXTEND (GET_MODE (dest
), tmp
);
5047 tmp
= gen_rtx_SET (VOIDmode
, dest
, tmp
);
5048 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
5049 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
5053 return 1; /* DONE */
5057 ix86_expand_int_movcc (operands
)
5060 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
5061 rtx compare_seq
, compare_op
;
5063 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5064 In case comparsion is done with immediate, we can convert it to LTU or
5065 GEU by altering the integer. */
5067 if ((code
== LEU
|| code
== GTU
)
5068 && GET_CODE (ix86_compare_op1
) == CONST_INT
5069 && GET_MODE (operands
[0]) != HImode
5070 && (unsigned int)INTVAL (ix86_compare_op1
) != 0xffffffff
5071 && GET_CODE (operands
[2]) == CONST_INT
5072 && GET_CODE (operands
[3]) == CONST_INT
)
5078 ix86_compare_op1
= GEN_INT (INTVAL (ix86_compare_op1
) + 1);
5082 compare_op
= ix86_expand_compare (code
);
5083 compare_seq
= gen_sequence ();
5086 compare_code
= GET_CODE (compare_op
);
5088 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5089 HImode insns, we'd be swallowed in word prefix ops. */
5091 if (GET_MODE (operands
[0]) != HImode
5092 && GET_CODE (operands
[2]) == CONST_INT
5093 && GET_CODE (operands
[3]) == CONST_INT
)
5095 rtx out
= operands
[0];
5096 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
5097 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
5100 if (compare_code
== LTU
|| compare_code
== GEU
)
5103 /* Detect overlap between destination and compare sources. */
5106 /* To simplify rest of code, restrict to the GEU case. */
5107 if (compare_code
== LTU
)
5112 compare_code
= reverse_condition (compare_code
);
5113 code
= reverse_condition (code
);
5117 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
5118 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
5119 tmp
= gen_reg_rtx (SImode
);
5121 emit_insn (compare_seq
);
5122 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
5134 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5145 emit_insn (gen_iorsi3 (out
, out
, GEN_INT (ct
)));
5147 else if (diff
== -1 && ct
)
5157 emit_insn (gen_one_cmplsi2 (tmp
, tmp
));
5159 emit_insn (gen_addsi3 (out
, out
, GEN_INT (cf
)));
5166 * andl cf - ct, dest
5171 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
- ct
)));
5173 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5177 emit_move_insn (out
, tmp
);
5179 return 1; /* DONE */
5186 tmp
= ct
, ct
= cf
, cf
= tmp
;
5188 compare_code
= reverse_condition (compare_code
);
5189 code
= reverse_condition (code
);
5191 if (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
5192 || diff
== 3 || diff
== 5 || diff
== 9)
5198 * lea cf(dest*(ct-cf)),dest
5202 * This also catches the degenerate setcc-only case.
5208 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
5209 ix86_compare_op1
, VOIDmode
, 0, 1);
5216 tmp
= gen_rtx_MULT (SImode
, out
, GEN_INT (diff
& ~1));
5220 tmp
= gen_rtx_PLUS (SImode
, tmp
, out
);
5226 tmp
= gen_rtx_PLUS (SImode
, tmp
, GEN_INT (cf
));
5232 emit_move_insn (out
, tmp
);
5237 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
5238 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
5240 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
5241 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
5245 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
5247 if (out
!= operands
[0])
5248 emit_move_insn (operands
[0], out
);
5250 return 1; /* DONE */
5254 * General case: Jumpful:
5255 * xorl dest,dest cmpl op1, op2
5256 * cmpl op1, op2 movl ct, dest
5258 * decl dest movl cf, dest
5259 * andl (cf-ct),dest 1:
5264 * This is reasonably steep, but branch mispredict costs are
5265 * high on modern cpus, so consider failing only if optimizing
5268 * %%% Parameterize branch_cost on the tuning architecture, then
5269 * use that. The 80386 couldn't care less about mispredicts.
5272 if (!optimize_size
&& !TARGET_CMOVE
)
5278 compare_code
= reverse_condition (compare_code
);
5279 code
= reverse_condition (code
);
5282 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
5283 ix86_compare_op1
, VOIDmode
, 0, 1);
5285 emit_insn (gen_addsi3 (out
, out
, constm1_rtx
));
5286 emit_insn (gen_andsi3 (out
, out
, GEN_INT (cf
-ct
)));
5288 emit_insn (gen_addsi3 (out
, out
, GEN_INT (ct
)));
5289 if (out
!= operands
[0])
5290 emit_move_insn (operands
[0], out
);
5292 return 1; /* DONE */
5298 /* Try a few things more with specific constants and a variable. */
5301 rtx var
, orig_out
, out
, tmp
;
5304 return 0; /* FAIL */
5306 /* If one of the two operands is an interesting constant, load a
5307 constant with the above and mask it in with a logical operation. */
5309 if (GET_CODE (operands
[2]) == CONST_INT
)
5312 if (INTVAL (operands
[2]) == 0)
5313 operands
[3] = constm1_rtx
, op
= and_optab
;
5314 else if (INTVAL (operands
[2]) == -1)
5315 operands
[3] = const0_rtx
, op
= ior_optab
;
5317 return 0; /* FAIL */
5319 else if (GET_CODE (operands
[3]) == CONST_INT
)
5322 if (INTVAL (operands
[3]) == 0)
5323 operands
[2] = constm1_rtx
, op
= and_optab
;
5324 else if (INTVAL (operands
[3]) == -1)
5325 operands
[2] = const0_rtx
, op
= ior_optab
;
5327 return 0; /* FAIL */
5330 return 0; /* FAIL */
5332 orig_out
= operands
[0];
5333 tmp
= gen_reg_rtx (GET_MODE (orig_out
));
5336 /* Recurse to get the constant loaded. */
5337 if (ix86_expand_int_movcc (operands
) == 0)
5338 return 0; /* FAIL */
5340 /* Mask in the interesting variable. */
5341 out
= expand_binop (GET_MODE (orig_out
), op
, var
, tmp
, orig_out
, 0,
5343 if (out
!= orig_out
)
5344 emit_move_insn (orig_out
, out
);
5346 return 1; /* DONE */
5350 * For comparison with above,
5360 if (! nonimmediate_operand (operands
[2], GET_MODE (operands
[0])))
5361 operands
[2] = force_reg (GET_MODE (operands
[0]), operands
[2]);
5362 if (! nonimmediate_operand (operands
[3], GET_MODE (operands
[0])))
5363 operands
[3] = force_reg (GET_MODE (operands
[0]), operands
[3]);
5365 emit_insn (compare_seq
);
5366 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5367 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5368 compare_op
, operands
[2],
5371 return 1; /* DONE */
5375 ix86_expand_fp_movcc (operands
)
5379 enum machine_mode mode
;
5382 /* The floating point conditional move instructions don't directly
5383 support conditions resulting from a signed integer comparison. */
5385 code
= GET_CODE (operands
[1]);
5392 tmp
= gen_reg_rtx (QImode
);
5393 ix86_expand_setcc (code
, tmp
);
5395 ix86_compare_op0
= tmp
;
5396 ix86_compare_op1
= const0_rtx
;
5403 mode
= SELECT_CC_MODE (code
, ix86_compare_op0
, ix86_compare_op1
);
5404 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (mode
, FLAGS_REG
),
5405 gen_rtx_COMPARE (mode
,
5407 ix86_compare_op1
)));
5408 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
5409 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
5410 gen_rtx_fmt_ee (code
, VOIDmode
,
5411 gen_rtx_REG (mode
, FLAGS_REG
),
5419 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5420 works for floating pointer parameters and nonoffsetable memories.
5421 For pushes, it returns just stack offsets; the values will be saved
5422 in the right order. Maximally three parts are generated. */
5425 ix86_split_to_parts (operand
, parts
, mode
)
5428 enum machine_mode mode
;
5430 int size
= GET_MODE_SIZE (mode
) / 4;
5432 if (size
< 2 || size
> 3)
5435 /* Optimize constant pool reference to immediates. This is used by fp moves,
5436 that force all constants to memory to allow combining. */
5438 if (GET_CODE (operand
) == MEM
5439 && GET_CODE (XEXP (operand
, 0)) == SYMBOL_REF
5440 && CONSTANT_POOL_ADDRESS_P (XEXP (operand
, 0)))
5441 operand
= get_pool_constant (XEXP (operand
, 0));
5443 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
5445 /* The only non-offsetable memories we handle are pushes. */
5446 if (! push_operand (operand
, VOIDmode
))
5449 PUT_MODE (operand
, SImode
);
5450 parts
[0] = parts
[1] = parts
[2] = operand
;
5455 split_di (&operand
, 1, &parts
[0], &parts
[1]);
5458 if (REG_P (operand
))
5460 if (!reload_completed
)
5462 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
5463 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
5465 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
5467 else if (offsettable_memref_p (operand
))
5469 PUT_MODE (operand
, SImode
);
5471 parts
[1] = adj_offsettable_operand (operand
, 4);
5473 parts
[2] = adj_offsettable_operand (operand
, 8);
5475 else if (GET_CODE (operand
) == CONST_DOUBLE
)
5480 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
5484 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
5485 parts
[2] = GEN_INT (l
[2]);
5488 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
5493 parts
[1] = GEN_INT (l
[1]);
5494 parts
[0] = GEN_INT (l
[0]);
5504 /* Emit insns to perform a move or push of DI, DF, and XF values.
5505 Return false when normal moves are needed; true when all required
5506 insns have been emitted. Operands 2-4 contain the input values
5507 int the correct order; operands 5-7 contain the output values. */
5510 ix86_split_long_move (operands1
)
5515 int size
= GET_MODE_SIZE (GET_MODE (operands1
[0])) / 4;
5519 /* Make our own copy to avoid clobbering the operands. */
5520 operands
[0] = copy_rtx (operands1
[0]);
5521 operands
[1] = copy_rtx (operands1
[1]);
5523 if (size
< 2 || size
> 3)
5526 /* The only non-offsettable memory we handle is push. */
5527 if (push_operand (operands
[0], VOIDmode
))
5529 else if (GET_CODE (operands
[0]) == MEM
5530 && ! offsettable_memref_p (operands
[0]))
5533 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands1
[0]));
5534 ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands1
[0]));
5536 /* When emitting push, take care for source operands on the stack. */
5537 if (push
&& GET_CODE (operands
[1]) == MEM
5538 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
5541 part
[1][1] = part
[1][2];
5542 part
[1][0] = part
[1][1];
5545 /* We need to do copy in the right order in case an address register
5546 of the source overlaps the destination. */
5547 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
5549 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
5551 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
5554 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
5557 /* Collision in the middle part can be handled by reordering. */
5558 if (collisions
== 1 && size
== 3
5559 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
5562 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
5563 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
5566 /* If there are more collisions, we can't handle it by reordering.
5567 Do an lea to the last part and use only one colliding move. */
5568 else if (collisions
> 1)
5571 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][size
- 1],
5572 XEXP (part
[1][0], 0)));
5573 part
[1][0] = change_address (part
[1][0], SImode
, part
[0][size
- 1]);
5574 part
[1][1] = adj_offsettable_operand (part
[1][0], 4);
5576 part
[1][2] = adj_offsettable_operand (part
[1][0], 8);
5583 emit_insn (gen_push (part
[1][2]));
5584 emit_insn (gen_push (part
[1][1]));
5585 emit_insn (gen_push (part
[1][0]));
5589 /* Choose correct order to not overwrite the source before it is copied. */
5590 if ((REG_P (part
[0][0])
5591 && REG_P (part
[1][1])
5592 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
5594 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
5596 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
5600 operands1
[2] = part
[0][2];
5601 operands1
[3] = part
[0][1];
5602 operands1
[4] = part
[0][0];
5603 operands1
[5] = part
[1][2];
5604 operands1
[6] = part
[1][1];
5605 operands1
[7] = part
[1][0];
5609 operands1
[2] = part
[0][1];
5610 operands1
[3] = part
[0][0];
5611 operands1
[5] = part
[1][1];
5612 operands1
[6] = part
[1][0];
5619 operands1
[2] = part
[0][0];
5620 operands1
[3] = part
[0][1];
5621 operands1
[4] = part
[0][2];
5622 operands1
[5] = part
[1][0];
5623 operands1
[6] = part
[1][1];
5624 operands1
[7] = part
[1][2];
5628 operands1
[2] = part
[0][0];
5629 operands1
[3] = part
[0][1];
5630 operands1
[5] = part
[1][0];
5631 operands1
[6] = part
[1][1];
5639 ix86_split_ashldi (operands
, scratch
)
5640 rtx
*operands
, scratch
;
5642 rtx low
[2], high
[2];
5645 if (GET_CODE (operands
[2]) == CONST_INT
)
5647 split_di (operands
, 2, low
, high
);
5648 count
= INTVAL (operands
[2]) & 63;
5652 emit_move_insn (high
[0], low
[1]);
5653 emit_move_insn (low
[0], const0_rtx
);
5656 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
5660 if (!rtx_equal_p (operands
[0], operands
[1]))
5661 emit_move_insn (operands
[0], operands
[1]);
5662 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
5663 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
5668 if (!rtx_equal_p (operands
[0], operands
[1]))
5669 emit_move_insn (operands
[0], operands
[1]);
5671 split_di (operands
, 1, low
, high
);
5673 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
5674 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
5676 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
5678 if (! no_new_pseudos
)
5679 scratch
= force_reg (SImode
, const0_rtx
);
5681 emit_move_insn (scratch
, const0_rtx
);
5683 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
5687 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
5692 ix86_split_ashrdi (operands
, scratch
)
5693 rtx
*operands
, scratch
;
5695 rtx low
[2], high
[2];
5698 if (GET_CODE (operands
[2]) == CONST_INT
)
5700 split_di (operands
, 2, low
, high
);
5701 count
= INTVAL (operands
[2]) & 63;
5705 emit_move_insn (low
[0], high
[1]);
5707 if (! reload_completed
)
5708 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
5711 emit_move_insn (high
[0], low
[0]);
5712 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
5716 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
5720 if (!rtx_equal_p (operands
[0], operands
[1]))
5721 emit_move_insn (operands
[0], operands
[1]);
5722 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
5723 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
5728 if (!rtx_equal_p (operands
[0], operands
[1]))
5729 emit_move_insn (operands
[0], operands
[1]);
5731 split_di (operands
, 1, low
, high
);
5733 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
5734 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
5736 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
5738 if (! no_new_pseudos
)
5739 scratch
= gen_reg_rtx (SImode
);
5740 emit_move_insn (scratch
, high
[0]);
5741 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
5742 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
5746 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
5751 ix86_split_lshrdi (operands
, scratch
)
5752 rtx
*operands
, scratch
;
5754 rtx low
[2], high
[2];
5757 if (GET_CODE (operands
[2]) == CONST_INT
)
5759 split_di (operands
, 2, low
, high
);
5760 count
= INTVAL (operands
[2]) & 63;
5764 emit_move_insn (low
[0], high
[1]);
5765 emit_move_insn (high
[0], const0_rtx
);
5768 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
5772 if (!rtx_equal_p (operands
[0], operands
[1]))
5773 emit_move_insn (operands
[0], operands
[1]);
5774 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
5775 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
5780 if (!rtx_equal_p (operands
[0], operands
[1]))
5781 emit_move_insn (operands
[0], operands
[1]);
5783 split_di (operands
, 1, low
, high
);
5785 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
5786 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
5788 /* Heh. By reversing the arguments, we can reuse this pattern. */
5789 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
5791 if (! no_new_pseudos
)
5792 scratch
= force_reg (SImode
, const0_rtx
);
5794 emit_move_insn (scratch
, const0_rtx
);
5796 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
5800 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
5804 /* Expand the appropriate insns for doing strlen if not just doing
5807 out = result, initialized with the start address
5808 align_rtx = alignment of the address.
5809 scratch = scratch register, initialized with the startaddress when
5810 not aligned, otherwise undefined
5812 This is just the body. It needs the initialisations mentioned above and
5813 some address computing at the end. These things are done in i386.md. */
5816 ix86_expand_strlensi_unroll_1 (out
, align_rtx
, scratch
)
5817 rtx out
, align_rtx
, scratch
;
5821 rtx align_2_label
= NULL_RTX
;
5822 rtx align_3_label
= NULL_RTX
;
5823 rtx align_4_label
= gen_label_rtx ();
5824 rtx end_0_label
= gen_label_rtx ();
5826 rtx no_flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5827 rtx z_flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5828 rtx tmpreg
= gen_reg_rtx (SImode
);
5831 if (GET_CODE (align_rtx
) == CONST_INT
)
5832 align
= INTVAL (align_rtx
);
5834 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5836 /* Is there a known alignment and is it less than 4? */
5839 /* Is there a known alignment and is it not 2? */
5842 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
5843 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
5845 /* Leave just the 3 lower bits. */
5846 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (3),
5847 NULL_RTX
, 0, OPTAB_WIDEN
);
5849 emit_insn (gen_cmpsi_ccz_1 (align_rtx
, const0_rtx
));
5851 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5852 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5853 gen_rtx_LABEL_REF (VOIDmode
,
5856 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5858 emit_insn (gen_cmpsi_ccno_1 (align_rtx
, GEN_INT (2)));
5860 tmp
= gen_rtx_EQ (VOIDmode
, no_flags
, const0_rtx
);
5861 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5862 gen_rtx_LABEL_REF (VOIDmode
,
5865 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5867 tmp
= gen_rtx_GTU (VOIDmode
, no_flags
, const0_rtx
);
5868 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5869 gen_rtx_LABEL_REF (VOIDmode
,
5872 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5876 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5877 check if is aligned to 4 - byte. */
5879 align_rtx
= expand_binop (SImode
, and_optab
, scratch
, GEN_INT (2),
5880 NULL_RTX
, 0, OPTAB_WIDEN
);
5882 emit_insn (gen_cmpsi_ccz_1 (align_rtx
, const0_rtx
));
5884 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5885 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5886 gen_rtx_LABEL_REF (VOIDmode
,
5889 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5892 mem
= gen_rtx_MEM (QImode
, out
);
5894 /* Now compare the bytes. */
5896 /* Compare the first n unaligned byte on a byte per byte basis. */
5897 emit_insn (gen_cmpqi_ccz_1 (mem
, const0_rtx
));
5899 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5900 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5901 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
5903 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5905 /* Increment the address. */
5906 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5908 /* Not needed with an alignment of 2 */
5911 emit_label (align_2_label
);
5913 emit_insn (gen_cmpqi_ccz_1 (mem
, const0_rtx
));
5915 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5916 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5917 gen_rtx_LABEL_REF (VOIDmode
,
5920 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5922 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5924 emit_label (align_3_label
);
5927 emit_insn (gen_cmpqi_ccz_1 (mem
, const0_rtx
));
5929 tmp
= gen_rtx_EQ (VOIDmode
, z_flags
, const0_rtx
);
5930 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5931 gen_rtx_LABEL_REF (VOIDmode
, end_0_label
),
5933 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5935 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
5938 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5939 align this loop. It gives only huge programs, but does not help to
5941 emit_label (align_4_label
);
5943 mem
= gen_rtx_MEM (SImode
, out
);
5944 emit_move_insn (scratch
, mem
);
5945 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
5947 /* This formula yields a nonzero result iff one of the bytes is zero.
5948 This saves three branches inside loop and many cycles. */
5950 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
5951 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
5952 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
5953 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, GEN_INT (0x80808080)));
5954 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1, 0, align_4_label
);
5958 rtx reg
= gen_reg_rtx (SImode
);
5959 emit_move_insn (reg
, tmpreg
);
5960 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
5962 /* If zero is not in the first two bytes, move two bytes forward. */
5963 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
5964 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5965 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
5966 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
5967 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
5970 /* Emit lea manually to avoid clobbering of flags. */
5971 emit_insn (gen_rtx_SET (SImode
, reg
,
5972 gen_rtx_PLUS (SImode
, out
, GEN_INT (2))));
5974 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5975 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
5976 emit_insn (gen_rtx_SET (VOIDmode
, out
,
5977 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
5984 rtx end_2_label
= gen_label_rtx ();
5985 /* Is zero in the first two bytes? */
5987 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
5988 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
5989 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
5990 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
5991 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
5993 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
5994 JUMP_LABEL (tmp
) = end_2_label
;
5996 /* Not in the first two. Move two bytes forward. */
5997 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
5998 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
6000 emit_label (end_2_label
);
6004 /* Avoid branch in fixing the byte. */
6005 tmpreg
= gen_lowpart (QImode
, tmpreg
);
6006 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
6007 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
6009 emit_label (end_0_label
);
6012 /* Clear stack slot assignments remembered from previous functions.
6013 This is called from INIT_EXPANDERS once before RTL is emitted for each
6017 ix86_init_machine_status (p
)
6020 enum machine_mode mode
;
6023 = (struct machine_function
*) xmalloc (sizeof (struct machine_function
));
6025 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
6026 mode
= (enum machine_mode
) ((int) mode
+ 1))
6027 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
6028 ix86_stack_locals
[(int) mode
][n
] = NULL_RTX
;
6031 /* Mark machine specific bits of P for GC. */
6033 ix86_mark_machine_status (p
)
6036 enum machine_mode mode
;
6039 for (mode
= VOIDmode
; (int) mode
< (int) MAX_MACHINE_MODE
;
6040 mode
= (enum machine_mode
) ((int) mode
+ 1))
6041 for (n
= 0; n
< MAX_386_STACK_LOCALS
; n
++)
6042 ggc_mark_rtx (p
->machine
->stack_locals
[(int) mode
][n
]);
6045 /* Return a MEM corresponding to a stack slot with mode MODE.
6046 Allocate a new slot if necessary.
6048 The RTL for a function can have several slots available: N is
6049 which slot to use. */
6052 assign_386_stack_local (mode
, n
)
6053 enum machine_mode mode
;
6056 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
6059 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
6060 ix86_stack_locals
[(int) mode
][n
]
6061 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
6063 return ix86_stack_locals
[(int) mode
][n
];
6066 /* Calculate the length of the memory address in the instruction
6067 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6070 memory_address_length (addr
)
6073 struct ix86_address parts
;
6074 rtx base
, index
, disp
;
6077 if (GET_CODE (addr
) == PRE_DEC
6078 || GET_CODE (addr
) == POST_INC
)
6081 if (! ix86_decompose_address (addr
, &parts
))
6085 index
= parts
.index
;
6089 /* Register Indirect. */
6090 if (base
&& !index
&& !disp
)
6092 /* Special cases: ebp and esp need the two-byte modrm form. */
6093 if (addr
== stack_pointer_rtx
6094 || addr
== arg_pointer_rtx
6095 || addr
== frame_pointer_rtx
6096 || addr
== hard_frame_pointer_rtx
)
6100 /* Direct Addressing. */
6101 else if (disp
&& !base
&& !index
)
6106 /* Find the length of the displacement constant. */
6109 if (GET_CODE (disp
) == CONST_INT
6110 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
6116 /* An index requires the two-byte modrm form. */
6125 ix86_attr_length_default (insn
)
6128 enum attr_type type
;
6131 type
= get_attr_type (insn
);
6132 extract_insn (insn
);
6163 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6164 if (CONSTANT_P (recog_data
.operand
[i
]))
6166 if (GET_CODE (recog_data
.operand
[i
]) == CONST_INT
6167 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
6170 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
6175 if (CONSTANT_P (recog_data
.operand
[1]))
6176 len
+= GET_MODE_SIZE (GET_MODE (recog_data
.operand
[0]));
6180 if (constant_call_address_operand (recog_data
.operand
[0],
6181 GET_MODE (recog_data
.operand
[0])))
6186 if (constant_call_address_operand (recog_data
.operand
[1],
6187 GET_MODE (recog_data
.operand
[1])))
6193 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
6194 as we'll get from running life_analysis during reg-stack when
6195 not optimizing. Not that it matters anyway, now that
6196 pro_epilogue_adjust_stack uses lea, and is by design not
6198 rtx set
= PATTERN (insn
);
6199 if (GET_CODE (set
) == SET
)
6201 else if (GET_CODE (set
) == PARALLEL
6202 && GET_CODE (XVECEXP (set
, 0, 0)) == SET
)
6203 set
= XVECEXP (set
, 0, 0);
6207 len
+= memory_address_length (SET_SRC (set
));
6216 if (STACK_TOP_P (recog_data
.operand
[0]))
6217 return 2 + (REGNO (recog_data
.operand
[1]) != FIRST_STACK_REG
+ 1);
6219 return 2 + (REGNO (recog_data
.operand
[0]) != FIRST_STACK_REG
+ 1);
6225 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6226 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
6228 len
+= memory_address_length (XEXP (recog_data
.operand
[i
], 0));
6233 len
+= get_attr_length_opcode (insn
);
6234 len
+= get_attr_length_prefix (insn
);
6239 /* Return the maximum number of instructions a cpu can issue. */
6246 case PROCESSOR_PENTIUM
:
6250 case PROCESSOR_PENTIUMPRO
:
6258 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6259 by DEP_INSN and nothing set by DEP_INSN. */
6262 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
6264 enum attr_type insn_type
;
6268 /* Simplify the test for uninteresting insns. */
6269 if (insn_type
!= TYPE_SETCC
6270 && insn_type
!= TYPE_ICMOV
6271 && insn_type
!= TYPE_FCMOV
6272 && insn_type
!= TYPE_IBR
)
6275 if ((set
= single_set (dep_insn
)) != 0)
6277 set
= SET_DEST (set
);
6280 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
6281 && XVECLEN (PATTERN (dep_insn
), 0) == 2
6282 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
6283 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
6285 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
6286 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
6291 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
6294 /* This test is true if the dependant insn reads the flags but
6295 not any other potentially set register. */
6296 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
6299 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
6305 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6306 address with operands set by DEP_INSN. */
6309 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
6311 enum attr_type insn_type
;
6315 if (insn_type
== TYPE_LEA
)
6317 addr
= PATTERN (insn
);
6318 if (GET_CODE (addr
) == SET
)
6320 else if (GET_CODE (addr
) == PARALLEL
6321 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
6322 addr
= XVECEXP (addr
, 0, 0);
6325 addr
= SET_SRC (addr
);
6330 extract_insn (insn
);
6331 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
6332 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
6334 addr
= XEXP (recog_data
.operand
[i
], 0);
6341 return modified_in_p (addr
, dep_insn
);
6345 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
6346 rtx insn
, link
, dep_insn
;
6349 enum attr_type insn_type
, dep_insn_type
;
6350 enum attr_memory memory
;
6352 int dep_insn_code_number
;
6354 /* Anti and output depenancies have zero cost on all CPUs. */
6355 if (REG_NOTE_KIND (link
) != 0)
6358 dep_insn_code_number
= recog_memoized (dep_insn
);
6360 /* If we can't recognize the insns, we can't really do anything. */
6361 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
6364 insn_type
= get_attr_type (insn
);
6365 dep_insn_type
= get_attr_type (dep_insn
);
6367 /* Prologue and epilogue allocators can have a false dependency on ebp.
6368 This results in one cycle extra stall on Pentium prologue scheduling,
6369 so handle this important case manually. */
6370 if (dep_insn_code_number
== CODE_FOR_pro_epilogue_adjust_stack
6371 && dep_insn_type
== TYPE_ALU
6372 && !reg_mentioned_p (stack_pointer_rtx
, insn
))
6377 case PROCESSOR_PENTIUM
:
6378 /* Address Generation Interlock adds a cycle of latency. */
6379 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6382 /* ??? Compares pair with jump/setcc. */
6383 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
6386 /* Floating point stores require value to be ready one cycle ealier. */
6387 if (insn_type
== TYPE_FMOV
6388 && get_attr_memory (insn
) == MEMORY_STORE
6389 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
6393 case PROCESSOR_PENTIUMPRO
:
6394 /* Since we can't represent delayed latencies of load+operation,
6395 increase the cost here for non-imov insns. */
6396 if (dep_insn_type
!= TYPE_IMOV
6397 && dep_insn_type
!= TYPE_FMOV
6398 && ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6399 || memory
== MEMORY_BOTH
))
6402 /* INT->FP conversion is expensive. */
6403 if (get_attr_fp_int_src (dep_insn
))
6406 /* There is one cycle extra latency between an FP op and a store. */
6407 if (insn_type
== TYPE_FMOV
6408 && (set
= single_set (dep_insn
)) != NULL_RTX
6409 && (set2
= single_set (insn
)) != NULL_RTX
6410 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
6411 && GET_CODE (SET_DEST (set2
)) == MEM
)
6416 /* The esp dependency is resolved before the instruction is really
6418 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
6419 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
6422 /* Since we can't represent delayed latencies of load+operation,
6423 increase the cost here for non-imov insns. */
6424 if ((memory
= get_attr_memory (dep_insn
) == MEMORY_LOAD
)
6425 || memory
== MEMORY_BOTH
)
6426 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
6428 /* INT->FP conversion is expensive. */
6429 if (get_attr_fp_int_src (dep_insn
))
6433 case PROCESSOR_ATHLON
:
6434 if ((memory
= get_attr_memory (dep_insn
)) == MEMORY_LOAD
6435 || memory
== MEMORY_BOTH
)
6437 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
6452 struct ppro_sched_data
6455 int issued_this_cycle
;
6460 ix86_safe_length (insn
)
6463 if (recog_memoized (insn
) >= 0)
6464 return get_attr_length(insn
);
6470 ix86_safe_length_prefix (insn
)
6473 if (recog_memoized (insn
) >= 0)
6474 return get_attr_length(insn
);
6479 static enum attr_memory
6480 ix86_safe_memory (insn
)
6483 if (recog_memoized (insn
) >= 0)
6484 return get_attr_memory(insn
);
6486 return MEMORY_UNKNOWN
;
6489 static enum attr_pent_pair
6490 ix86_safe_pent_pair (insn
)
6493 if (recog_memoized (insn
) >= 0)
6494 return get_attr_pent_pair(insn
);
6496 return PENT_PAIR_NP
;
6499 static enum attr_ppro_uops
6500 ix86_safe_ppro_uops (insn
)
6503 if (recog_memoized (insn
) >= 0)
6504 return get_attr_ppro_uops (insn
);
6506 return PPRO_UOPS_MANY
;
6510 ix86_dump_ppro_packet (dump
)
6513 if (ix86_sched_data
.ppro
.decode
[0])
6515 fprintf (dump
, "PPRO packet: %d",
6516 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
6517 if (ix86_sched_data
.ppro
.decode
[1])
6518 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
6519 if (ix86_sched_data
.ppro
.decode
[2])
6520 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
6525 /* We're beginning a new block. Initialize data structures as necessary. */
6528 ix86_sched_init (dump
, sched_verbose
)
6529 FILE *dump ATTRIBUTE_UNUSED
;
6530 int sched_verbose ATTRIBUTE_UNUSED
;
6532 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
6535 /* Shift INSN to SLOT, and shift everything else down. */
6538 ix86_reorder_insn (insnp
, slot
)
6545 insnp
[0] = insnp
[1];
6546 while (++insnp
!= slot
);
6551 /* Find an instruction with given pairability and minimal amount of cycles
6552 lost by the fact that the CPU waits for both pipelines to finish before
6553 reading next instructions. Also take care that both instructions together
6554 can not exceed 7 bytes. */
6557 ix86_pent_find_pair (e_ready
, ready
, type
, first
)
6560 enum attr_pent_pair type
;
6563 int mincycles
, cycles
;
6564 enum attr_pent_pair tmp
;
6565 enum attr_memory memory
;
6566 rtx
*insnp
, *bestinsnp
= NULL
;
6568 if (ix86_safe_length (first
) > 7 + ix86_safe_length_prefix (first
))
6571 memory
= ix86_safe_memory (first
);
6572 cycles
= result_ready_cost (first
);
6573 mincycles
= INT_MAX
;
6575 for (insnp
= e_ready
; insnp
>= ready
&& mincycles
; --insnp
)
6576 if ((tmp
= ix86_safe_pent_pair (*insnp
)) == type
6577 && ix86_safe_length (*insnp
) <= 7 + ix86_safe_length_prefix (*insnp
))
6579 enum attr_memory second_memory
;
6580 int secondcycles
, currentcycles
;
6582 second_memory
= ix86_safe_memory (*insnp
);
6583 secondcycles
= result_ready_cost (*insnp
);
6584 currentcycles
= abs (cycles
- secondcycles
);
6586 if (secondcycles
>= 1 && cycles
>= 1)
6588 /* Two read/modify/write instructions together takes two
6590 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_BOTH
)
6593 /* Read modify/write instruction followed by read/modify
6594 takes one cycle longer. */
6595 if (memory
== MEMORY_BOTH
&& second_memory
== MEMORY_LOAD
6596 && tmp
!= PENT_PAIR_UV
6597 && ix86_safe_pent_pair (first
) != PENT_PAIR_UV
)
6600 if (currentcycles
< mincycles
)
6601 bestinsnp
= insnp
, mincycles
= currentcycles
;
6607 /* Subroutines of ix86_sched_reorder. */
6610 ix86_sched_reorder_pentium (ready
, e_ready
)
6614 enum attr_pent_pair pair1
, pair2
;
6617 /* This wouldn't be necessary if Haifa knew that static insn ordering
6618 is important to which pipe an insn is issued to. So we have to make
6619 some minor rearrangements. */
6621 pair1
= ix86_safe_pent_pair (*e_ready
);
6623 /* If the first insn is non-pairable, let it be. */
6624 if (pair1
== PENT_PAIR_NP
)
6627 pair2
= PENT_PAIR_NP
;
6630 /* If the first insn is UV or PV pairable, search for a PU
6632 if (pair1
== PENT_PAIR_UV
|| pair1
== PENT_PAIR_PV
)
6634 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6635 PENT_PAIR_PU
, *e_ready
);
6637 pair2
= PENT_PAIR_PU
;
6640 /* If the first insn is PU or UV pairable, search for a PV
6642 if (pair2
== PENT_PAIR_NP
6643 && (pair1
== PENT_PAIR_PU
|| pair1
== PENT_PAIR_UV
))
6645 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6646 PENT_PAIR_PV
, *e_ready
);
6648 pair2
= PENT_PAIR_PV
;
6651 /* If the first insn is pairable, search for a UV
6653 if (pair2
== PENT_PAIR_NP
)
6655 insnp
= ix86_pent_find_pair (e_ready
-1, ready
,
6656 PENT_PAIR_UV
, *e_ready
);
6658 pair2
= PENT_PAIR_UV
;
6661 if (pair2
== PENT_PAIR_NP
)
6664 /* Found something! Decide if we need to swap the order. */
6665 if (pair1
== PENT_PAIR_PV
|| pair2
== PENT_PAIR_PU
6666 || (pair1
== PENT_PAIR_UV
&& pair2
== PENT_PAIR_UV
6667 && ix86_safe_memory (*e_ready
) == MEMORY_BOTH
6668 && ix86_safe_memory (*insnp
) == MEMORY_LOAD
))
6669 ix86_reorder_insn (insnp
, e_ready
);
6671 ix86_reorder_insn (insnp
, e_ready
- 1);
6675 ix86_sched_reorder_ppro (ready
, e_ready
)
6680 enum attr_ppro_uops cur_uops
;
6681 int issued_this_cycle
;
6685 /* At this point .ppro.decode contains the state of the three
6686 decoders from last "cycle". That is, those insns that were
6687 actually independent. But here we're scheduling for the
6688 decoder, and we may find things that are decodable in the
6691 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof(decode
));
6692 issued_this_cycle
= 0;
6695 cur_uops
= ix86_safe_ppro_uops (*insnp
);
6697 /* If the decoders are empty, and we've a complex insn at the
6698 head of the priority queue, let it issue without complaint. */
6699 if (decode
[0] == NULL
)
6701 if (cur_uops
== PPRO_UOPS_MANY
)
6707 /* Otherwise, search for a 2-4 uop unsn to issue. */
6708 while (cur_uops
!= PPRO_UOPS_FEW
)
6712 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
6715 /* If so, move it to the head of the line. */
6716 if (cur_uops
== PPRO_UOPS_FEW
)
6717 ix86_reorder_insn (insnp
, e_ready
);
6719 /* Issue the head of the queue. */
6720 issued_this_cycle
= 1;
6721 decode
[0] = *e_ready
--;
6724 /* Look for simple insns to fill in the other two slots. */
6725 for (i
= 1; i
< 3; ++i
)
6726 if (decode
[i
] == NULL
)
6728 if (ready
>= e_ready
)
6732 cur_uops
= ix86_safe_ppro_uops (*insnp
);
6733 while (cur_uops
!= PPRO_UOPS_ONE
)
6737 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
6740 /* Found one. Move it to the head of the queue and issue it. */
6741 if (cur_uops
== PPRO_UOPS_ONE
)
6743 ix86_reorder_insn (insnp
, e_ready
);
6744 decode
[i
] = *e_ready
--;
6745 issued_this_cycle
++;
6749 /* ??? Didn't find one. Ideally, here we would do a lazy split
6750 of 2-uop insns, issue one and queue the other. */
6754 if (issued_this_cycle
== 0)
6755 issued_this_cycle
= 1;
6756 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
6760 /* We are about to being issuing insns for this clock cycle.
6761 Override the default sort algorithm to better slot instructions. */
6763 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_ready
, clock_var
)
6764 FILE *dump ATTRIBUTE_UNUSED
;
6765 int sched_verbose ATTRIBUTE_UNUSED
;
6768 int clock_var ATTRIBUTE_UNUSED
;
6770 rtx
*e_ready
= ready
+ n_ready
- 1;
6780 case PROCESSOR_PENTIUM
:
6781 ix86_sched_reorder_pentium (ready
, e_ready
);
6784 case PROCESSOR_PENTIUMPRO
:
6785 ix86_sched_reorder_ppro (ready
, e_ready
);
6790 return ix86_issue_rate ();
6793 /* We are about to issue INSN. Return the number of insns left on the
6794 ready queue that can be issued this cycle. */
6797 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
6807 return can_issue_more
- 1;
6809 case PROCESSOR_PENTIUMPRO
:
6811 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
6813 if (uops
== PPRO_UOPS_MANY
)
6816 ix86_dump_ppro_packet (dump
);
6817 ix86_sched_data
.ppro
.decode
[0] = insn
;
6818 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6819 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6821 ix86_dump_ppro_packet (dump
);
6822 ix86_sched_data
.ppro
.decode
[0] = NULL
;
6824 else if (uops
== PPRO_UOPS_FEW
)
6827 ix86_dump_ppro_packet (dump
);
6828 ix86_sched_data
.ppro
.decode
[0] = insn
;
6829 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6830 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6834 for (i
= 0; i
< 3; ++i
)
6835 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
6837 ix86_sched_data
.ppro
.decode
[i
] = insn
;
6845 ix86_dump_ppro_packet (dump
);
6846 ix86_sched_data
.ppro
.decode
[0] = NULL
;
6847 ix86_sched_data
.ppro
.decode
[1] = NULL
;
6848 ix86_sched_data
.ppro
.decode
[2] = NULL
;
6852 return --ix86_sched_data
.ppro
.issued_this_cycle
;