23a868229aed7c1008c825f4568cf12cb1f5176d
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include <setjmp.h>
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "toplev.h"
42 #include "basic-block.h"
43 #include "ggc.h"
44
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
52 #endif
53
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
56 #endif
57
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost = { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
68 3, /* MOVE_RATIO */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
78 };
79
80 struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
89 3, /* MOVE_RATIO */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
99 };
100
101 struct processor_costs pentium_cost = {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
110 6, /* MOVE_RATIO */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
120 };
121
122 struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 4, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
131 6, /* MOVE_RATIO */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
141 };
142
143 struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
152 4, /* MOVE_RATIO */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
162 };
163
164 struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 2, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 42, /* cost of a divide/mod */
172 8, /* "large" insn */
173 9, /* MOVE_RATIO */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 20}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 16} /* cost of loading integer registers */
183 };
184
185 struct processor_costs *ix86_cost = &pentium_cost;
186
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
194
195 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
197 const int x86_zero_extend_with_and = m_486 | m_PENT;
198 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
199 const int x86_double_with_add = ~m_386;
200 const int x86_use_bit_test = m_386;
201 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
202 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203 const int x86_use_any_reg = m_486;
204 const int x86_cmove = m_PPRO | m_ATHLON;
205 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
207 const int x86_partial_reg_stall = m_PPRO;
208 const int x86_use_loop = m_K6;
209 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
210 const int x86_use_mov0 = m_K6;
211 const int x86_use_cltd = ~(m_PENT | m_K6);
212 const int x86_read_modify_write = ~m_PENT;
213 const int x86_read_modify = ~(m_PENT | m_PPRO);
214 const int x86_split_long_moves = m_PPRO;
215 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
216 const int x86_single_stringop = m_386;
217 const int x86_qimode_math = ~(0);
218 const int x86_promote_qi_regs = 0;
219 const int x86_himode_math = ~(m_PPRO);
220 const int x86_promote_hi_regs = m_PPRO;
221 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
222 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
223 const int x86_add_esp_4 = m_ATHLON | m_K6;
224 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
225 const int x86_integer_DFmode_moves = ~m_ATHLON;
226 const int x86_partial_reg_dependency = m_ATHLON;
227 const int x86_memory_mismatch_stall = m_ATHLON;
228
229 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
230
231 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
232 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
233 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
234
235 /* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
237
238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
239 {
240 /* ax, dx, cx, bx */
241 AREG, DREG, CREG, BREG,
242 /* si, di, bp, sp */
243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
244 /* FP registers */
245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
247 /* arg pointer */
248 NON_Q_REGS,
249 /* flags, fpsr, dirflag, frame */
250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS
251 };
252
253 /* The "default" register map. */
254
255 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
256 {
257 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
258 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
259 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
260 };
261
262 /* Define the register numbers to be used in Dwarf debugging information.
263 The SVR4 reference port C compiler uses the following register numbers
264 in its Dwarf output code:
265 0 for %eax (gcc regno = 0)
266 1 for %ecx (gcc regno = 2)
267 2 for %edx (gcc regno = 1)
268 3 for %ebx (gcc regno = 3)
269 4 for %esp (gcc regno = 7)
270 5 for %ebp (gcc regno = 6)
271 6 for %esi (gcc regno = 4)
272 7 for %edi (gcc regno = 5)
273 The following three DWARF register numbers are never generated by
274 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
275 believes these numbers have these meanings.
276 8 for %eip (no gcc equivalent)
277 9 for %eflags (gcc regno = 17)
278 10 for %trapno (no gcc equivalent)
279 It is not at all clear how we should number the FP stack registers
280 for the x86 architecture. If the version of SDB on x86/svr4 were
281 a bit less brain dead with respect to floating-point then we would
282 have a precedent to follow with respect to DWARF register numbers
283 for x86 FP registers, but the SDB on x86/svr4 is so completely
284 broken with respect to FP registers that it is hardly worth thinking
285 of it as something to strive for compatibility with.
286 The version of x86/svr4 SDB I have at the moment does (partially)
287 seem to believe that DWARF register number 11 is associated with
288 the x86 register %st(0), but that's about all. Higher DWARF
289 register numbers don't seem to be associated with anything in
290 particular, and even for DWARF regno 11, SDB only seems to under-
291 stand that it should say that a variable lives in %st(0) (when
292 asked via an `=' command) if we said it was in DWARF regno 11,
293 but SDB still prints garbage when asked for the value of the
294 variable in question (via a `/' command).
295 (Also note that the labels SDB prints for various FP stack regs
296 when doing an `x' command are all wrong.)
297 Note that these problems generally don't affect the native SVR4
298 C compiler because it doesn't allow the use of -O with -g and
299 because when it is *not* optimizing, it allocates a memory
300 location for each floating-point variable, and the memory
301 location is what gets described in the DWARF AT_location
302 attribute for the variable in question.
303 Regardless of the severe mental illness of the x86/svr4 SDB, we
304 do something sensible here and we use the following DWARF
305 register numbers. Note that these are all stack-top-relative
306 numbers.
307 11 for %st(0) (gcc regno = 8)
308 12 for %st(1) (gcc regno = 9)
309 13 for %st(2) (gcc regno = 10)
310 14 for %st(3) (gcc regno = 11)
311 15 for %st(4) (gcc regno = 12)
312 16 for %st(5) (gcc regno = 13)
313 17 for %st(6) (gcc regno = 14)
314 18 for %st(7) (gcc regno = 15)
315 */
316 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
317 {
318 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
319 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
320 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
321 };
322
323
324
325 /* Test and compare insns in i386.md store the information needed to
326 generate branch and scc insns here. */
327
328 struct rtx_def *ix86_compare_op0 = NULL_RTX;
329 struct rtx_def *ix86_compare_op1 = NULL_RTX;
330
331 #define MAX_386_STACK_LOCALS 2
332
333 /* Define the structure for the machine field in struct function. */
334 struct machine_function
335 {
336 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
337 };
338
339 #define ix86_stack_locals (cfun->machine->stack_locals)
340
341 /* which cpu are we scheduling for */
342 enum processor_type ix86_cpu;
343
344 /* which instruction set architecture to use. */
345 int ix86_arch;
346
347 /* Strings to hold which cpu and instruction set architecture to use. */
348 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
349 const char *ix86_arch_string; /* for -march=<xxx> */
350
351 /* Register allocation order */
352 const char *ix86_reg_alloc_order;
353 static char regs_allocated[FIRST_PSEUDO_REGISTER];
354
355 /* # of registers to use to pass arguments. */
356 const char *ix86_regparm_string;
357
358 /* ix86_regparm_string as a number */
359 int ix86_regparm;
360
361 /* Alignment to use for loops and jumps: */
362
363 /* Power of two alignment for loops. */
364 const char *ix86_align_loops_string;
365
366 /* Power of two alignment for non-loop jumps. */
367 const char *ix86_align_jumps_string;
368
369 /* Power of two alignment for stack boundary in bytes. */
370 const char *ix86_preferred_stack_boundary_string;
371
372 /* Preferred alignment for stack boundary in bits. */
373 int ix86_preferred_stack_boundary;
374
375 /* Values 1-5: see jump.c */
376 int ix86_branch_cost;
377 const char *ix86_branch_cost_string;
378
379 /* Power of two alignment for functions. */
380 int ix86_align_funcs;
381 const char *ix86_align_funcs_string;
382
383 /* Power of two alignment for loops. */
384 int ix86_align_loops;
385
386 /* Power of two alignment for non-loop jumps. */
387 int ix86_align_jumps;
388 \f
389 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
390 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
391 int, int, FILE *));
392 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
393 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
394 static enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
395 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
396 rtx *, rtx *));
397 static rtx ix86_expand_compare PARAMS ((enum rtx_code));
398 static rtx gen_push PARAMS ((rtx));
399 static int memory_address_length PARAMS ((rtx addr));
400 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
401 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
402 static int ix86_safe_length PARAMS ((rtx));
403 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
404 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
405 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
406 static void ix86_dump_ppro_packet PARAMS ((FILE *));
407 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
408 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
409 rtx));
410 static void ix86_init_machine_status PARAMS ((struct function *));
411 static void ix86_mark_machine_status PARAMS ((struct function *));
412 static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
413 static int ix86_safe_length_prefix PARAMS ((rtx));
414 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
415 int *, int *, int *));
416 static int ix86_nsaved_regs PARAMS((void));
417 static void ix86_emit_save_regs PARAMS((void));
418 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
419 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
420 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
421 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
422
423 struct ix86_address
424 {
425 rtx base, index, disp;
426 HOST_WIDE_INT scale;
427 };
428
429 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
430 \f
431 /* Sometimes certain combinations of command options do not make
432 sense on a particular target machine. You can define a macro
433 `OVERRIDE_OPTIONS' to take account of this. This macro, if
434 defined, is executed once just after all the command options have
435 been parsed.
436
437 Don't use this macro to turn on various extra optimizations for
438 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
439
440 void
441 override_options ()
442 {
443 /* Comes from final.c -- no real reason to change it. */
444 #define MAX_CODE_ALIGN 16
445
446 static struct ptt
447 {
448 struct processor_costs *cost; /* Processor costs */
449 int target_enable; /* Target flags to enable. */
450 int target_disable; /* Target flags to disable. */
451 int align_loop; /* Default alignments. */
452 int align_jump;
453 int align_func;
454 int branch_cost;
455 }
456 const processor_target_table[PROCESSOR_max] =
457 {
458 {&i386_cost, 0, 0, 2, 2, 2, 1},
459 {&i486_cost, 0, 0, 4, 4, 4, 1},
460 {&pentium_cost, 0, 0, -4, -4, -4, 1},
461 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
462 {&k6_cost, 0, 0, -5, -5, 4, 1},
463 {&athlon_cost, 0, 0, 4, -4, 4, 1}
464 };
465
466 static struct pta
467 {
468 const char *name; /* processor name or nickname. */
469 enum processor_type processor;
470 }
471 const processor_alias_table[] =
472 {
473 {"i386", PROCESSOR_I386},
474 {"i486", PROCESSOR_I486},
475 {"i586", PROCESSOR_PENTIUM},
476 {"pentium", PROCESSOR_PENTIUM},
477 {"i686", PROCESSOR_PENTIUMPRO},
478 {"pentiumpro", PROCESSOR_PENTIUMPRO},
479 {"k6", PROCESSOR_K6},
480 {"athlon", PROCESSOR_ATHLON},
481 };
482
483 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
484
485 #ifdef SUBTARGET_OVERRIDE_OPTIONS
486 SUBTARGET_OVERRIDE_OPTIONS;
487 #endif
488
489 ix86_arch = PROCESSOR_I386;
490 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
491
492 if (ix86_arch_string != 0)
493 {
494 int i;
495 for (i = 0; i < pta_size; i++)
496 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
497 {
498 ix86_arch = processor_alias_table[i].processor;
499 /* Default cpu tuning to the architecture. */
500 ix86_cpu = ix86_arch;
501 break;
502 }
503 if (i == pta_size)
504 error ("bad value (%s) for -march= switch", ix86_arch_string);
505 }
506
507 if (ix86_cpu_string != 0)
508 {
509 int i;
510 for (i = 0; i < pta_size; i++)
511 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
512 {
513 ix86_cpu = processor_alias_table[i].processor;
514 break;
515 }
516 if (i == pta_size)
517 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
518 }
519
520 ix86_cost = processor_target_table[ix86_cpu].cost;
521 target_flags |= processor_target_table[ix86_cpu].target_enable;
522 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
523
524 /* Arrange to set up i386_stack_locals for all functions. */
525 init_machine_status = ix86_init_machine_status;
526 mark_machine_status = ix86_mark_machine_status;
527
528 /* Validate registers in register allocation order. */
529 if (ix86_reg_alloc_order)
530 {
531 int i, ch;
532 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
533 {
534 int regno = 0;
535
536 switch (ch)
537 {
538 case 'a': regno = 0; break;
539 case 'd': regno = 1; break;
540 case 'c': regno = 2; break;
541 case 'b': regno = 3; break;
542 case 'S': regno = 4; break;
543 case 'D': regno = 5; break;
544 case 'B': regno = 6; break;
545
546 default: fatal ("Register '%c' is unknown", ch);
547 }
548
549 if (regs_allocated[regno])
550 fatal ("Register '%c' already specified in allocation order", ch);
551
552 regs_allocated[regno] = 1;
553 }
554 }
555
556 /* Validate -mregparm= value. */
557 if (ix86_regparm_string)
558 {
559 ix86_regparm = atoi (ix86_regparm_string);
560 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
561 fatal ("-mregparm=%d is not between 0 and %d",
562 ix86_regparm, REGPARM_MAX);
563 }
564
565 /* Validate -malign-loops= value, or provide default. */
566 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
567 if (ix86_align_loops_string)
568 {
569 ix86_align_loops = atoi (ix86_align_loops_string);
570 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
571 fatal ("-malign-loops=%d is not between 0 and %d",
572 ix86_align_loops, MAX_CODE_ALIGN);
573 }
574
575 /* Validate -malign-jumps= value, or provide default. */
576 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
577 if (ix86_align_jumps_string)
578 {
579 ix86_align_jumps = atoi (ix86_align_jumps_string);
580 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
581 fatal ("-malign-jumps=%d is not between 0 and %d",
582 ix86_align_jumps, MAX_CODE_ALIGN);
583 }
584
585 /* Validate -malign-functions= value, or provide default. */
586 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
587 if (ix86_align_funcs_string)
588 {
589 ix86_align_funcs = atoi (ix86_align_funcs_string);
590 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
591 fatal ("-malign-functions=%d is not between 0 and %d",
592 ix86_align_funcs, MAX_CODE_ALIGN);
593 }
594
595 /* Validate -mpreferred-stack-boundary= value, or provide default.
596 The default of 128 bits is for Pentium III's SSE __m128. */
597 ix86_preferred_stack_boundary = 128;
598 if (ix86_preferred_stack_boundary_string)
599 {
600 int i = atoi (ix86_preferred_stack_boundary_string);
601 if (i < 2 || i > 31)
602 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
603 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
604 }
605
606 /* Validate -mbranch-cost= value, or provide default. */
607 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
608 if (ix86_branch_cost_string)
609 {
610 ix86_branch_cost = atoi (ix86_branch_cost_string);
611 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
612 fatal ("-mbranch-cost=%d is not between 0 and 5",
613 ix86_branch_cost);
614 }
615
616 /* Keep nonleaf frame pointers. */
617 if (TARGET_OMIT_LEAF_FRAME_POINTER)
618 flag_omit_frame_pointer = 1;
619
620 /* If we're doing fast math, we don't care about comparison order
621 wrt NaNs. This lets us use a shorter comparison sequence. */
622 if (flag_fast_math)
623 target_flags &= ~MASK_IEEE_FP;
624
625 /* If we're planning on using `loop', use it. */
626 if (TARGET_USE_LOOP && optimize)
627 flag_branch_on_count_reg = 1;
628 }
629 \f
630 /* A C statement (sans semicolon) to choose the order in which to
631 allocate hard registers for pseudo-registers local to a basic
632 block.
633
634 Store the desired register order in the array `reg_alloc_order'.
635 Element 0 should be the register to allocate first; element 1, the
636 next register; and so on.
637
638 The macro body should not assume anything about the contents of
639 `reg_alloc_order' before execution of the macro.
640
641 On most machines, it is not necessary to define this macro. */
642
643 void
644 order_regs_for_local_alloc ()
645 {
646 int i, ch, order;
647
648 /* User specified the register allocation order. */
649
650 if (ix86_reg_alloc_order)
651 {
652 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
653 {
654 int regno = 0;
655
656 switch (ch)
657 {
658 case 'a': regno = 0; break;
659 case 'd': regno = 1; break;
660 case 'c': regno = 2; break;
661 case 'b': regno = 3; break;
662 case 'S': regno = 4; break;
663 case 'D': regno = 5; break;
664 case 'B': regno = 6; break;
665 }
666
667 reg_alloc_order[order++] = regno;
668 }
669
670 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
671 {
672 if (! regs_allocated[i])
673 reg_alloc_order[order++] = i;
674 }
675 }
676
677 /* If user did not specify a register allocation order, use natural order. */
678 else
679 {
680 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
681 reg_alloc_order[i] = i;
682 }
683 }
684 \f
685 void
686 optimization_options (level, size)
687 int level;
688 int size ATTRIBUTE_UNUSED;
689 {
690 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
691 make the problem with not enough registers even worse. */
692 #ifdef INSN_SCHEDULING
693 if (level > 1)
694 flag_schedule_insns = 0;
695 #endif
696 }
697 \f
698 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
699 attribute for DECL. The attributes in ATTRIBUTES have previously been
700 assigned to DECL. */
701
702 int
703 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
704 tree decl ATTRIBUTE_UNUSED;
705 tree attributes ATTRIBUTE_UNUSED;
706 tree identifier ATTRIBUTE_UNUSED;
707 tree args ATTRIBUTE_UNUSED;
708 {
709 return 0;
710 }
711
712 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
713 attribute for TYPE. The attributes in ATTRIBUTES have previously been
714 assigned to TYPE. */
715
716 int
717 ix86_valid_type_attribute_p (type, attributes, identifier, args)
718 tree type;
719 tree attributes ATTRIBUTE_UNUSED;
720 tree identifier;
721 tree args;
722 {
723 if (TREE_CODE (type) != FUNCTION_TYPE
724 && TREE_CODE (type) != METHOD_TYPE
725 && TREE_CODE (type) != FIELD_DECL
726 && TREE_CODE (type) != TYPE_DECL)
727 return 0;
728
729 /* Stdcall attribute says callee is responsible for popping arguments
730 if they are not variable. */
731 if (is_attribute_p ("stdcall", identifier))
732 return (args == NULL_TREE);
733
734 /* Cdecl attribute says the callee is a normal C declaration. */
735 if (is_attribute_p ("cdecl", identifier))
736 return (args == NULL_TREE);
737
738 /* Regparm attribute specifies how many integer arguments are to be
739 passed in registers. */
740 if (is_attribute_p ("regparm", identifier))
741 {
742 tree cst;
743
744 if (! args || TREE_CODE (args) != TREE_LIST
745 || TREE_CHAIN (args) != NULL_TREE
746 || TREE_VALUE (args) == NULL_TREE)
747 return 0;
748
749 cst = TREE_VALUE (args);
750 if (TREE_CODE (cst) != INTEGER_CST)
751 return 0;
752
753 if (compare_tree_int (cst, REGPARM_MAX) > 0)
754 return 0;
755
756 return 1;
757 }
758
759 return 0;
760 }
761
762 /* Return 0 if the attributes for two types are incompatible, 1 if they
763 are compatible, and 2 if they are nearly compatible (which causes a
764 warning to be generated). */
765
766 int
767 ix86_comp_type_attributes (type1, type2)
768 tree type1;
769 tree type2;
770 {
771 /* Check for mismatch of non-default calling convention. */
772 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
773
774 if (TREE_CODE (type1) != FUNCTION_TYPE)
775 return 1;
776
777 /* Check for mismatched return types (cdecl vs stdcall). */
778 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
779 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
780 return 0;
781 return 1;
782 }
783 \f
784 /* Value is the number of bytes of arguments automatically
785 popped when returning from a subroutine call.
786 FUNDECL is the declaration node of the function (as a tree),
787 FUNTYPE is the data type of the function (as a tree),
788 or for a library call it is an identifier node for the subroutine name.
789 SIZE is the number of bytes of arguments passed on the stack.
790
791 On the 80386, the RTD insn may be used to pop them if the number
792 of args is fixed, but if the number is variable then the caller
793 must pop them all. RTD can't be used for library calls now
794 because the library is compiled with the Unix compiler.
795 Use of RTD is a selectable option, since it is incompatible with
796 standard Unix calling sequences. If the option is not selected,
797 the caller must always pop the args.
798
799 The attribute stdcall is equivalent to RTD on a per module basis. */
800
801 int
802 ix86_return_pops_args (fundecl, funtype, size)
803 tree fundecl;
804 tree funtype;
805 int size;
806 {
807 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
808
809 /* Cdecl functions override -mrtd, and never pop the stack. */
810 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
811
812 /* Stdcall functions will pop the stack if not variable args. */
813 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
814 rtd = 1;
815
816 if (rtd
817 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
818 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
819 == void_type_node)))
820 return size;
821 }
822
823 /* Lose any fake structure return argument. */
824 if (aggregate_value_p (TREE_TYPE (funtype)))
825 return GET_MODE_SIZE (Pmode);
826
827 return 0;
828 }
829 \f
830 /* Argument support functions. */
831
832 /* Initialize a variable CUM of type CUMULATIVE_ARGS
833 for a call to a function whose data type is FNTYPE.
834 For a library call, FNTYPE is 0. */
835
836 void
837 init_cumulative_args (cum, fntype, libname)
838 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
839 tree fntype; /* tree ptr for function decl */
840 rtx libname; /* SYMBOL_REF of library name or 0 */
841 {
842 static CUMULATIVE_ARGS zero_cum;
843 tree param, next_param;
844
845 if (TARGET_DEBUG_ARG)
846 {
847 fprintf (stderr, "\ninit_cumulative_args (");
848 if (fntype)
849 fprintf (stderr, "fntype code = %s, ret code = %s",
850 tree_code_name[(int) TREE_CODE (fntype)],
851 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
852 else
853 fprintf (stderr, "no fntype");
854
855 if (libname)
856 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
857 }
858
859 *cum = zero_cum;
860
861 /* Set up the number of registers to use for passing arguments. */
862 cum->nregs = ix86_regparm;
863 if (fntype)
864 {
865 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
866
867 if (attr)
868 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
869 }
870
871 /* Determine if this function has variable arguments. This is
872 indicated by the last argument being 'void_type_mode' if there
873 are no variable arguments. If there are variable arguments, then
874 we won't pass anything in registers */
875
876 if (cum->nregs)
877 {
878 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
879 param != 0; param = next_param)
880 {
881 next_param = TREE_CHAIN (param);
882 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
883 cum->nregs = 0;
884 }
885 }
886
887 if (TARGET_DEBUG_ARG)
888 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
889
890 return;
891 }
892
893 /* Update the data in CUM to advance over an argument
894 of mode MODE and data type TYPE.
895 (TYPE is null for libcalls where that information may not be available.) */
896
897 void
898 function_arg_advance (cum, mode, type, named)
899 CUMULATIVE_ARGS *cum; /* current arg information */
900 enum machine_mode mode; /* current arg mode */
901 tree type; /* type of the argument or 0 if lib support */
902 int named; /* whether or not the argument was named */
903 {
904 int bytes
905 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
906 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
907
908 if (TARGET_DEBUG_ARG)
909 fprintf (stderr,
910 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
911 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
912
913 cum->words += words;
914 cum->nregs -= words;
915 cum->regno += words;
916
917 if (cum->nregs <= 0)
918 {
919 cum->nregs = 0;
920 cum->regno = 0;
921 }
922
923 return;
924 }
925
926 /* Define where to put the arguments to a function.
927 Value is zero to push the argument on the stack,
928 or a hard register in which to store the argument.
929
930 MODE is the argument's machine mode.
931 TYPE is the data type of the argument (as a tree).
932 This is null for libcalls where that information may
933 not be available.
934 CUM is a variable of type CUMULATIVE_ARGS which gives info about
935 the preceding args and about the function being called.
936 NAMED is nonzero if this argument is a named parameter
937 (otherwise it is an extra parameter matching an ellipsis). */
938
939 struct rtx_def *
940 function_arg (cum, mode, type, named)
941 CUMULATIVE_ARGS *cum; /* current arg information */
942 enum machine_mode mode; /* current arg mode */
943 tree type; /* type of the argument or 0 if lib support */
944 int named; /* != 0 for normal args, == 0 for ... args */
945 {
946 rtx ret = NULL_RTX;
947 int bytes
948 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
949 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
950
951 switch (mode)
952 {
953 /* For now, pass fp/complex values on the stack. */
954 default:
955 break;
956
957 case BLKmode:
958 case DImode:
959 case SImode:
960 case HImode:
961 case QImode:
962 if (words <= cum->nregs)
963 ret = gen_rtx_REG (mode, cum->regno);
964 break;
965 }
966
967 if (TARGET_DEBUG_ARG)
968 {
969 fprintf (stderr,
970 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
971 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
972
973 if (ret)
974 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
975 else
976 fprintf (stderr, ", stack");
977
978 fprintf (stderr, " )\n");
979 }
980
981 return ret;
982 }
983 \f
984 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
985 reference and a constant. */
986
987 int
988 symbolic_operand (op, mode)
989 register rtx op;
990 enum machine_mode mode ATTRIBUTE_UNUSED;
991 {
992 switch (GET_CODE (op))
993 {
994 case SYMBOL_REF:
995 case LABEL_REF:
996 return 1;
997
998 case CONST:
999 op = XEXP (op, 0);
1000 if (GET_CODE (op) == SYMBOL_REF
1001 || GET_CODE (op) == LABEL_REF
1002 || (GET_CODE (op) == UNSPEC
1003 && XINT (op, 1) >= 6
1004 && XINT (op, 1) <= 7))
1005 return 1;
1006 if (GET_CODE (op) != PLUS
1007 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1008 return 0;
1009
1010 op = XEXP (op, 0);
1011 if (GET_CODE (op) == SYMBOL_REF
1012 || GET_CODE (op) == LABEL_REF)
1013 return 1;
1014 /* Only @GOTOFF gets offsets. */
1015 if (GET_CODE (op) != UNSPEC
1016 || XINT (op, 1) != 7)
1017 return 0;
1018
1019 op = XVECEXP (op, 0, 0);
1020 if (GET_CODE (op) == SYMBOL_REF
1021 || GET_CODE (op) == LABEL_REF)
1022 return 1;
1023 return 0;
1024
1025 default:
1026 return 0;
1027 }
1028 }
1029
1030 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1031
1032 int
1033 pic_symbolic_operand (op, mode)
1034 register rtx op;
1035 enum machine_mode mode ATTRIBUTE_UNUSED;
1036 {
1037 if (GET_CODE (op) == CONST)
1038 {
1039 op = XEXP (op, 0);
1040 if (GET_CODE (op) == UNSPEC)
1041 return 1;
1042 if (GET_CODE (op) != PLUS
1043 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1044 return 0;
1045 op = XEXP (op, 0);
1046 if (GET_CODE (op) == UNSPEC)
1047 return 1;
1048 }
1049 return 0;
1050 }
1051
1052 /* Test for a valid operand for a call instruction. Don't allow the
1053 arg pointer register or virtual regs since they may decay into
1054 reg + const, which the patterns can't handle. */
1055
1056 int
1057 call_insn_operand (op, mode)
1058 rtx op;
1059 enum machine_mode mode ATTRIBUTE_UNUSED;
1060 {
1061 if (GET_CODE (op) != MEM)
1062 return 0;
1063 op = XEXP (op, 0);
1064
1065 /* Disallow indirect through a virtual register. This leads to
1066 compiler aborts when trying to eliminate them. */
1067 if (GET_CODE (op) == REG
1068 && (op == arg_pointer_rtx
1069 || op == frame_pointer_rtx
1070 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1071 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1072 return 0;
1073
1074 /* Disallow `call 1234'. Due to varying assembler lameness this
1075 gets either rejected or translated to `call .+1234'. */
1076 if (GET_CODE (op) == CONST_INT)
1077 return 0;
1078
1079 /* Explicitly allow SYMBOL_REF even if pic. */
1080 if (GET_CODE (op) == SYMBOL_REF)
1081 return 1;
1082
1083 /* Half-pic doesn't allow anything but registers and constants.
1084 We've just taken care of the later. */
1085 if (HALF_PIC_P ())
1086 return register_operand (op, Pmode);
1087
1088 /* Otherwise we can allow any general_operand in the address. */
1089 return general_operand (op, Pmode);
1090 }
1091
1092 int
1093 constant_call_address_operand (op, mode)
1094 rtx op;
1095 enum machine_mode mode ATTRIBUTE_UNUSED;
1096 {
1097 return (GET_CODE (op) == MEM
1098 && CONSTANT_ADDRESS_P (XEXP (op, 0))
1099 && GET_CODE (XEXP (op, 0)) != CONST_INT);
1100 }
1101
1102 /* Match exactly zero and one. */
1103
1104 int
1105 const0_operand (op, mode)
1106 register rtx op;
1107 enum machine_mode mode;
1108 {
1109 return op == CONST0_RTX (mode);
1110 }
1111
1112 int
1113 const1_operand (op, mode)
1114 register rtx op;
1115 enum machine_mode mode ATTRIBUTE_UNUSED;
1116 {
1117 return op == const1_rtx;
1118 }
1119
1120 /* Match 2, 4, or 8. Used for leal multiplicands. */
1121
1122 int
1123 const248_operand (op, mode)
1124 register rtx op;
1125 enum machine_mode mode ATTRIBUTE_UNUSED;
1126 {
1127 return (GET_CODE (op) == CONST_INT
1128 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1129 }
1130
1131 /* True if this is a constant appropriate for an increment or decremenmt. */
1132
1133 int
1134 incdec_operand (op, mode)
1135 register rtx op;
1136 enum machine_mode mode;
1137 {
1138 if (op == const1_rtx || op == constm1_rtx)
1139 return 1;
1140 if (GET_CODE (op) != CONST_INT)
1141 return 0;
1142 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1143 return 1;
1144 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1145 return 1;
1146 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1147 return 1;
1148 return 0;
1149 }
1150
1151 /* Return false if this is the stack pointer, or any other fake
1152 register eliminable to the stack pointer. Otherwise, this is
1153 a register operand.
1154
1155 This is used to prevent esp from being used as an index reg.
1156 Which would only happen in pathological cases. */
1157
1158 int
1159 reg_no_sp_operand (op, mode)
1160 register rtx op;
1161 enum machine_mode mode;
1162 {
1163 rtx t = op;
1164 if (GET_CODE (t) == SUBREG)
1165 t = SUBREG_REG (t);
1166 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1167 return 0;
1168
1169 return register_operand (op, mode);
1170 }
1171
1172 /* Return false if this is any eliminable register. Otherwise
1173 general_operand. */
1174
1175 int
1176 general_no_elim_operand (op, mode)
1177 register rtx op;
1178 enum machine_mode mode;
1179 {
1180 rtx t = op;
1181 if (GET_CODE (t) == SUBREG)
1182 t = SUBREG_REG (t);
1183 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1184 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1185 || t == virtual_stack_dynamic_rtx)
1186 return 0;
1187
1188 return general_operand (op, mode);
1189 }
1190
1191 /* Return false if this is any eliminable register. Otherwise
1192 register_operand or const_int. */
1193
1194 int
1195 nonmemory_no_elim_operand (op, mode)
1196 register rtx op;
1197 enum machine_mode mode;
1198 {
1199 rtx t = op;
1200 if (GET_CODE (t) == SUBREG)
1201 t = SUBREG_REG (t);
1202 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1203 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1204 || t == virtual_stack_dynamic_rtx)
1205 return 0;
1206
1207 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1208 }
1209
1210 /* Return true if op is a Q_REGS class register. */
1211
1212 int
1213 q_regs_operand (op, mode)
1214 register rtx op;
1215 enum machine_mode mode;
1216 {
1217 if (mode != VOIDmode && GET_MODE (op) != mode)
1218 return 0;
1219 if (GET_CODE (op) == SUBREG)
1220 op = SUBREG_REG (op);
1221 return QI_REG_P (op);
1222 }
1223
1224 /* Return true if op is a NON_Q_REGS class register. */
1225
1226 int
1227 non_q_regs_operand (op, mode)
1228 register rtx op;
1229 enum machine_mode mode;
1230 {
1231 if (mode != VOIDmode && GET_MODE (op) != mode)
1232 return 0;
1233 if (GET_CODE (op) == SUBREG)
1234 op = SUBREG_REG (op);
1235 return NON_QI_REG_P (op);
1236 }
1237
1238 /* Return 1 if OP is a comparison operator that can use the condition code
1239 generated by a logical operation, which characteristicly does not set
1240 overflow or carry. To be used with CCNOmode. */
1241
1242 int
1243 no_comparison_operator (op, mode)
1244 register rtx op;
1245 enum machine_mode mode;
1246 {
1247 if (mode != VOIDmode && GET_MODE (op) != mode)
1248 return 0;
1249
1250 switch (GET_CODE (op))
1251 {
1252 case EQ: case NE:
1253 case LT: case GE:
1254 case LEU: case LTU: case GEU: case GTU:
1255 return 1;
1256
1257 default:
1258 return 0;
1259 }
1260 }
1261
1262 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1263
1264 int
1265 fcmov_comparison_operator (op, mode)
1266 register rtx op;
1267 enum machine_mode mode;
1268 {
1269 if (mode != VOIDmode && GET_MODE (op) != mode)
1270 return 0;
1271
1272 switch (GET_CODE (op))
1273 {
1274 case EQ: case NE:
1275 case LEU: case LTU: case GEU: case GTU:
1276 case UNORDERED: case ORDERED:
1277 return 1;
1278
1279 default:
1280 return 0;
1281 }
1282 }
1283
1284 /* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1285
1286 int
1287 uno_comparison_operator (op, mode)
1288 register rtx op;
1289 enum machine_mode mode;
1290 {
1291 if (mode != VOIDmode && GET_MODE (op) != mode)
1292 return 0;
1293
1294 switch (GET_CODE (op))
1295 {
1296 case EQ: case NE:
1297 case LE: case LT: case GE: case GT:
1298 case LEU: case LTU: case GEU: case GTU:
1299 case UNORDERED: case ORDERED:
1300 return 1;
1301
1302 default:
1303 return 0;
1304 }
1305 }
1306
1307 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1308
1309 int
1310 promotable_binary_operator (op, mode)
1311 register rtx op;
1312 enum machine_mode mode ATTRIBUTE_UNUSED;
1313 {
1314 switch (GET_CODE (op))
1315 {
1316 case MULT:
1317 /* Modern CPUs have same latency for HImode and SImode multiply,
1318 but 386 and 486 do HImode multiply faster. */
1319 return ix86_cpu > PROCESSOR_I486;
1320 case PLUS:
1321 case AND:
1322 case IOR:
1323 case XOR:
1324 case ASHIFT:
1325 return 1;
1326 default:
1327 return 0;
1328 }
1329 }
1330
1331 /* Nearly general operand, but accept any const_double, since we wish
1332 to be able to drop them into memory rather than have them get pulled
1333 into registers. */
1334
1335 int
1336 cmp_fp_expander_operand (op, mode)
1337 register rtx op;
1338 enum machine_mode mode;
1339 {
1340 if (mode != VOIDmode && mode != GET_MODE (op))
1341 return 0;
1342 if (GET_CODE (op) == CONST_DOUBLE)
1343 return 1;
1344 return general_operand (op, mode);
1345 }
1346
1347 /* Match an SI or HImode register for a zero_extract. */
1348
1349 int
1350 ext_register_operand (op, mode)
1351 register rtx op;
1352 enum machine_mode mode ATTRIBUTE_UNUSED;
1353 {
1354 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1355 return 0;
1356 return register_operand (op, VOIDmode);
1357 }
1358
1359 /* Return 1 if this is a valid binary floating-point operation.
1360 OP is the expression matched, and MODE is its mode. */
1361
1362 int
1363 binary_fp_operator (op, mode)
1364 register rtx op;
1365 enum machine_mode mode;
1366 {
1367 if (mode != VOIDmode && mode != GET_MODE (op))
1368 return 0;
1369
1370 switch (GET_CODE (op))
1371 {
1372 case PLUS:
1373 case MINUS:
1374 case MULT:
1375 case DIV:
1376 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1377
1378 default:
1379 return 0;
1380 }
1381 }
1382
1383 int
1384 mult_operator(op, mode)
1385 register rtx op;
1386 enum machine_mode mode ATTRIBUTE_UNUSED;
1387 {
1388 return GET_CODE (op) == MULT;
1389 }
1390
1391 int
1392 div_operator(op, mode)
1393 register rtx op;
1394 enum machine_mode mode ATTRIBUTE_UNUSED;
1395 {
1396 return GET_CODE (op) == DIV;
1397 }
1398
1399 int
1400 arith_or_logical_operator (op, mode)
1401 rtx op;
1402 enum machine_mode mode;
1403 {
1404 return ((mode == VOIDmode || GET_MODE (op) == mode)
1405 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1406 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1407 }
1408
1409 /* Returns 1 if OP is memory operand with a displacement. */
1410
1411 int
1412 memory_displacement_operand (op, mode)
1413 register rtx op;
1414 enum machine_mode mode;
1415 {
1416 struct ix86_address parts;
1417
1418 if (! memory_operand (op, mode))
1419 return 0;
1420
1421 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1422 abort ();
1423
1424 return parts.disp != NULL_RTX;
1425 }
1426
1427 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1428 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1429
1430 ??? It seems likely that this will only work because cmpsi is an
1431 expander, and no actual insns use this. */
1432
1433 int
1434 cmpsi_operand (op, mode)
1435 rtx op;
1436 enum machine_mode mode;
1437 {
1438 if (general_operand (op, mode))
1439 return 1;
1440
1441 if (GET_CODE (op) == AND
1442 && GET_MODE (op) == SImode
1443 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1444 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1445 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1446 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1447 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1448 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1449 return 1;
1450
1451 return 0;
1452 }
1453
1454 /* Returns 1 if OP is memory operand that can not be represented by the
1455 modRM array. */
1456
1457 int
1458 long_memory_operand (op, mode)
1459 register rtx op;
1460 enum machine_mode mode;
1461 {
1462 if (! memory_operand (op, mode))
1463 return 0;
1464
1465 return memory_address_length (op) != 0;
1466 }
1467
1468 /* Return nonzero if the rtx is known aligned. */
1469
1470 int
1471 aligned_operand (op, mode)
1472 rtx op;
1473 enum machine_mode mode;
1474 {
1475 struct ix86_address parts;
1476
1477 if (!general_operand (op, mode))
1478 return 0;
1479
1480 /* Registers and immediate operands are always "aligned". */
1481 if (GET_CODE (op) != MEM)
1482 return 1;
1483
1484 /* Don't even try to do any aligned optimizations with volatiles. */
1485 if (MEM_VOLATILE_P (op))
1486 return 0;
1487
1488 op = XEXP (op, 0);
1489
1490 /* Pushes and pops are only valid on the stack pointer. */
1491 if (GET_CODE (op) == PRE_DEC
1492 || GET_CODE (op) == POST_INC)
1493 return 1;
1494
1495 /* Decode the address. */
1496 if (! ix86_decompose_address (op, &parts))
1497 abort ();
1498
1499 /* Look for some component that isn't known to be aligned. */
1500 if (parts.index)
1501 {
1502 if (parts.scale < 4
1503 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1504 return 0;
1505 }
1506 if (parts.base)
1507 {
1508 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1509 return 0;
1510 }
1511 if (parts.disp)
1512 {
1513 if (GET_CODE (parts.disp) != CONST_INT
1514 || (INTVAL (parts.disp) & 3) != 0)
1515 return 0;
1516 }
1517
1518 /* Didn't find one -- this must be an aligned address. */
1519 return 1;
1520 }
1521 \f
1522 /* Return true if the constant is something that can be loaded with
1523 a special instruction. Only handle 0.0 and 1.0; others are less
1524 worthwhile. */
1525
1526 int
1527 standard_80387_constant_p (x)
1528 rtx x;
1529 {
1530 if (GET_CODE (x) != CONST_DOUBLE)
1531 return -1;
1532
1533 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1534 {
1535 REAL_VALUE_TYPE d;
1536 jmp_buf handler;
1537 int is0, is1;
1538
1539 if (setjmp (handler))
1540 return 0;
1541
1542 set_float_handler (handler);
1543 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1544 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1545 is1 = REAL_VALUES_EQUAL (d, dconst1);
1546 set_float_handler (NULL_PTR);
1547
1548 if (is0)
1549 return 1;
1550
1551 if (is1)
1552 return 2;
1553
1554 /* Note that on the 80387, other constants, such as pi,
1555 are much slower to load as standard constants
1556 than to load from doubles in memory! */
1557 /* ??? Not true on K6: all constants are equal cost. */
1558 }
1559 #endif
1560
1561 return 0;
1562 }
1563
1564 /* Returns 1 if OP contains a symbol reference */
1565
1566 int
1567 symbolic_reference_mentioned_p (op)
1568 rtx op;
1569 {
1570 register const char *fmt;
1571 register int i;
1572
1573 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1574 return 1;
1575
1576 fmt = GET_RTX_FORMAT (GET_CODE (op));
1577 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1578 {
1579 if (fmt[i] == 'E')
1580 {
1581 register int j;
1582
1583 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1584 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1585 return 1;
1586 }
1587
1588 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1589 return 1;
1590 }
1591
1592 return 0;
1593 }
1594
1595 /* Return 1 if it is appropriate to emit `ret' instructions in the
1596 body of a function. Do this only if the epilogue is simple, needing a
1597 couple of insns. Prior to reloading, we can't tell how many registers
1598 must be saved, so return 0 then. Return 0 if there is no frame
1599 marker to de-allocate.
1600
1601 If NON_SAVING_SETJMP is defined and true, then it is not possible
1602 for the epilogue to be simple, so return 0. This is a special case
1603 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1604 until final, but jump_optimize may need to know sooner if a
1605 `return' is OK. */
1606
1607 int
1608 ix86_can_use_return_insn_p ()
1609 {
1610 HOST_WIDE_INT tsize;
1611 int nregs;
1612
1613 #ifdef NON_SAVING_SETJMP
1614 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1615 return 0;
1616 #endif
1617 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1618 if (profile_block_flag == 2)
1619 return 0;
1620 #endif
1621
1622 if (! reload_completed || frame_pointer_needed)
1623 return 0;
1624
1625 /* Don't allow more than 32 pop, since that's all we can do
1626 with one instruction. */
1627 if (current_function_pops_args
1628 && current_function_args_size >= 32768)
1629 return 0;
1630
1631 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1632 return tsize == 0 && nregs == 0;
1633 }
1634 \f
1635 static char *pic_label_name;
1636 static int pic_label_output;
1637 static char *global_offset_table_name;
1638
1639 /* This function generates code for -fpic that loads %ebx with
1640 the return address of the caller and then returns. */
1641
1642 void
1643 asm_output_function_prefix (file, name)
1644 FILE *file;
1645 const char *name ATTRIBUTE_UNUSED;
1646 {
1647 rtx xops[2];
1648 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1649 || current_function_uses_const_pool);
1650 xops[0] = pic_offset_table_rtx;
1651 xops[1] = stack_pointer_rtx;
1652
1653 /* Deep branch prediction favors having a return for every call. */
1654 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1655 {
1656 if (!pic_label_output)
1657 {
1658 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1659 internal (non-global) label that's being emitted, it didn't make
1660 sense to have .type information for local labels. This caused
1661 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1662 me debug info for a label that you're declaring non-global?) this
1663 was changed to call ASM_OUTPUT_LABEL() instead. */
1664
1665 ASM_OUTPUT_LABEL (file, pic_label_name);
1666
1667 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1668 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1669 output_asm_insn ("ret", xops);
1670
1671 pic_label_output = 1;
1672 }
1673 }
1674 }
1675
1676 void
1677 load_pic_register ()
1678 {
1679 rtx gotsym, pclab;
1680
1681 if (global_offset_table_name == NULL)
1682 {
1683 global_offset_table_name =
1684 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1685 ggc_add_string_root (&global_offset_table_name, 1);
1686 }
1687 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1688
1689 if (TARGET_DEEP_BRANCH_PREDICTION)
1690 {
1691 if (pic_label_name == NULL)
1692 {
1693 pic_label_name = ggc_alloc_string (NULL, 32);
1694 ggc_add_string_root (&pic_label_name, 1);
1695 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1696 }
1697 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1698 }
1699 else
1700 {
1701 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1702 }
1703
1704 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1705
1706 if (! TARGET_DEEP_BRANCH_PREDICTION)
1707 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1708
1709 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1710 }
1711
1712 /* Generate an SImode "push" pattern for input ARG. */
1713
1714 static rtx
1715 gen_push (arg)
1716 rtx arg;
1717 {
1718 return gen_rtx_SET (VOIDmode,
1719 gen_rtx_MEM (SImode,
1720 gen_rtx_PRE_DEC (SImode,
1721 stack_pointer_rtx)),
1722 arg);
1723 }
1724
1725 /* Return number of registers to be saved on the stack. */
1726
1727 static int
1728 ix86_nsaved_regs ()
1729 {
1730 int nregs = 0;
1731 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1732 || current_function_uses_const_pool);
1733 int limit = (frame_pointer_needed
1734 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1735 int regno;
1736
1737 for (regno = limit - 1; regno >= 0; regno--)
1738 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1739 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1740 {
1741 nregs ++;
1742 }
1743 return nregs;
1744 }
1745
1746 /* Return the offset between two registers, one to be eliminated, and the other
1747 its replacement, at the start of a routine. */
1748
1749 HOST_WIDE_INT
1750 ix86_initial_elimination_offset (from, to)
1751 int from;
1752 int to;
1753 {
1754 int padding1;
1755 int nregs;
1756
1757 /* Stack grows downward:
1758
1759 [arguments]
1760 <- ARG_POINTER
1761 saved pc
1762
1763 saved frame pointer if frame_pointer_needed
1764 <- HARD_FRAME_POINTER
1765 [saved regs]
1766
1767 [padding1] \
1768 | <- FRAME_POINTER
1769 [frame] > tsize
1770 |
1771 [padding2] /
1772 */
1773
1774 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1775 /* Skip saved PC and previous frame pointer.
1776 Executed only when frame_pointer_needed. */
1777 return 8;
1778 else if (from == FRAME_POINTER_REGNUM
1779 && to == HARD_FRAME_POINTER_REGNUM)
1780 {
1781 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1782 padding1 += nregs * UNITS_PER_WORD;
1783 return -padding1;
1784 }
1785 else
1786 {
1787 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1788 int frame_size = frame_pointer_needed ? 8 : 4;
1789 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1790 &nregs, &padding1, (int *)0);
1791
1792
1793 if (to != STACK_POINTER_REGNUM)
1794 abort ();
1795 else if (from == ARG_POINTER_REGNUM)
1796 return tsize + nregs * UNITS_PER_WORD + frame_size;
1797 else if (from != FRAME_POINTER_REGNUM)
1798 abort ();
1799 else
1800 return tsize - padding1;
1801 }
1802 }
1803
1804 /* Compute the size of local storage taking into consideration the
1805 desired stack alignment which is to be maintained. Also determine
1806 the number of registers saved below the local storage.
1807
1808 PADDING1 returns padding before stack frame and PADDING2 returns
1809 padding after stack frame;
1810 */
1811
1812 static HOST_WIDE_INT
1813 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1814 HOST_WIDE_INT size;
1815 int *nregs_on_stack;
1816 int *rpadding1;
1817 int *rpadding2;
1818 {
1819 int nregs;
1820 int padding1 = 0;
1821 int padding2 = 0;
1822 HOST_WIDE_INT total_size;
1823 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1824 int offset;
1825 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1826
1827 nregs = ix86_nsaved_regs ();
1828 total_size = size;
1829
1830 offset = frame_pointer_needed ? 8 : 4;
1831
1832 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1833 since i386 port is the only using those features that may break easilly. */
1834
1835 if (size && !stack_alignment_needed)
1836 abort ();
1837 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1838 abort ();
1839 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1840 abort ();
1841 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1842 abort ();
1843 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1844 abort ();
1845
1846 if (stack_alignment_needed < 4)
1847 stack_alignment_needed = 4;
1848
1849 offset += nregs * UNITS_PER_WORD;
1850
1851 if (ACCUMULATE_OUTGOING_ARGS)
1852 total_size += current_function_outgoing_args_size;
1853
1854 total_size += offset;
1855
1856 /* Align start of frame for local function. */
1857 padding1 = ((offset + stack_alignment_needed - 1)
1858 & -stack_alignment_needed) - offset;
1859 total_size += padding1;
1860
1861 /* Align stack boundary. */
1862 padding2 = ((total_size + preferred_alignment - 1)
1863 & -preferred_alignment) - total_size;
1864
1865 if (ACCUMULATE_OUTGOING_ARGS)
1866 padding2 += current_function_outgoing_args_size;
1867
1868 if (nregs_on_stack)
1869 *nregs_on_stack = nregs;
1870 if (rpadding1)
1871 *rpadding1 = padding1;
1872 if (rpadding2)
1873 *rpadding2 = padding2;
1874
1875 return size + padding1 + padding2;
1876 }
1877
1878 /* Emit code to save registers in the prologue. */
1879
1880 static void
1881 ix86_emit_save_regs ()
1882 {
1883 register int regno;
1884 int limit;
1885 rtx insn;
1886 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1887 || current_function_uses_const_pool);
1888 limit = (frame_pointer_needed
1889 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1890
1891 for (regno = limit - 1; regno >= 0; regno--)
1892 if ((regs_ever_live[regno] && !call_used_regs[regno])
1893 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1894 {
1895 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1896 RTX_FRAME_RELATED_P (insn) = 1;
1897 }
1898 }
1899
1900 /* Expand the prologue into a bunch of separate insns. */
1901
1902 void
1903 ix86_expand_prologue ()
1904 {
1905 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1906 (int *)0);
1907 rtx insn;
1908 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1909 || current_function_uses_const_pool);
1910
1911 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1912 slower on all targets. Also sdb doesn't like it. */
1913
1914 if (frame_pointer_needed)
1915 {
1916 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1917 RTX_FRAME_RELATED_P (insn) = 1;
1918
1919 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1920 RTX_FRAME_RELATED_P (insn) = 1;
1921 }
1922
1923 ix86_emit_save_regs ();
1924
1925 if (tsize == 0)
1926 ;
1927 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1928 {
1929 if (frame_pointer_needed)
1930 insn = emit_insn (gen_pro_epilogue_adjust_stack
1931 (stack_pointer_rtx, stack_pointer_rtx,
1932 GEN_INT (-tsize), hard_frame_pointer_rtx));
1933 else
1934 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1935 GEN_INT (-tsize)));
1936 RTX_FRAME_RELATED_P (insn) = 1;
1937 }
1938 else
1939 {
1940 /* ??? Is this only valid for Win32? */
1941
1942 rtx arg0, sym;
1943
1944 arg0 = gen_rtx_REG (SImode, 0);
1945 emit_move_insn (arg0, GEN_INT (tsize));
1946
1947 sym = gen_rtx_MEM (FUNCTION_MODE,
1948 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1949 insn = emit_call_insn (gen_call (sym, const0_rtx));
1950
1951 CALL_INSN_FUNCTION_USAGE (insn)
1952 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1953 CALL_INSN_FUNCTION_USAGE (insn));
1954 }
1955
1956 #ifdef SUBTARGET_PROLOGUE
1957 SUBTARGET_PROLOGUE;
1958 #endif
1959
1960 if (pic_reg_used)
1961 load_pic_register ();
1962
1963 /* If we are profiling, make sure no instructions are scheduled before
1964 the call to mcount. However, if -fpic, the above call will have
1965 done that. */
1966 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
1967 emit_insn (gen_blockage ());
1968 }
1969
1970 /* Emit code to add TSIZE to esp value. Use POP instruction when
1971 profitable. */
1972
1973 static void
1974 ix86_emit_epilogue_esp_adjustment (tsize)
1975 int tsize;
1976 {
1977 /* If a frame pointer is present, we must be sure to tie the sp
1978 to the fp so that we don't mis-schedule. */
1979 if (frame_pointer_needed)
1980 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
1981 stack_pointer_rtx,
1982 GEN_INT (tsize),
1983 hard_frame_pointer_rtx));
1984 else
1985 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1986 GEN_INT (tsize)));
1987 }
1988
1989 /* Emit code to restore saved registers using MOV insns. First register
1990 is restored from POINTER + OFFSET. */
1991 static void
1992 ix86_emit_restore_regs_using_mov (pointer, offset)
1993 rtx pointer;
1994 int offset;
1995 {
1996 int regno;
1997 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1998 || current_function_uses_const_pool);
1999 int limit = (frame_pointer_needed
2000 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2001
2002 for (regno = 0; regno < limit; regno++)
2003 if ((regs_ever_live[regno] && !call_used_regs[regno])
2004 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2005 {
2006 emit_move_insn (gen_rtx_REG (SImode, regno),
2007 adj_offsettable_operand (gen_rtx_MEM (SImode,
2008 pointer),
2009 offset));
2010 offset += 4;
2011 }
2012 }
2013
2014 /* Restore function stack, frame, and registers. */
2015
2016 void
2017 ix86_expand_epilogue (emit_return)
2018 int emit_return;
2019 {
2020 int nregs;
2021 int regno;
2022
2023 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2024 || current_function_uses_const_pool);
2025 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2026 HOST_WIDE_INT offset;
2027 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2028 (int *)0, (int *)0);
2029
2030
2031 /* Calculate start of saved registers relative to ebp. */
2032 offset = -nregs * UNITS_PER_WORD;
2033
2034 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2035 if (profile_block_flag == 2)
2036 {
2037 FUNCTION_BLOCK_PROFILER_EXIT;
2038 }
2039 #endif
2040
2041 /* If we're only restoring one register and sp is not valid then
2042 using a move instruction to restore the register since it's
2043 less work than reloading sp and popping the register.
2044
2045 The default code result in stack adjustment using add/lea instruction,
2046 while this code results in LEAVE instruction (or discrete equivalent),
2047 so it is profitable in some other cases as well. Especially when there
2048 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2049 and there is exactly one register to pop. This heruistic may need some
2050 tuning in future. */
2051 if ((!sp_valid && nregs <= 1)
2052 || (frame_pointer_needed && !nregs && tsize)
2053 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2054 && nregs == 1))
2055 {
2056 /* Restore registers. We can use ebp or esp to address the memory
2057 locations. If both are available, default to ebp, since offsets
2058 are known to be small. Only exception is esp pointing directly to the
2059 end of block of saved registers, where we may simplify addressing
2060 mode. */
2061
2062 if (!frame_pointer_needed || (sp_valid && !tsize))
2063 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2064 else
2065 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2066
2067 if (!frame_pointer_needed)
2068 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2069 /* If not an i386, mov & pop is faster than "leave". */
2070 else if (TARGET_USE_LEAVE || optimize_size)
2071 emit_insn (gen_leave ());
2072 else
2073 {
2074 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2075 hard_frame_pointer_rtx,
2076 const0_rtx,
2077 hard_frame_pointer_rtx));
2078 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2079 }
2080 }
2081 else
2082 {
2083 /* First step is to deallocate the stack frame so that we can
2084 pop the registers. */
2085 if (!sp_valid)
2086 {
2087 if (!frame_pointer_needed)
2088 abort ();
2089 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2090 hard_frame_pointer_rtx,
2091 GEN_INT (offset),
2092 hard_frame_pointer_rtx));
2093 }
2094 else if (tsize)
2095 ix86_emit_epilogue_esp_adjustment (tsize);
2096
2097 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2098 if ((regs_ever_live[regno] && !call_used_regs[regno])
2099 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2100 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2101 }
2102
2103 /* Sibcall epilogues don't want a return instruction. */
2104 if (! emit_return)
2105 return;
2106
2107 if (current_function_pops_args && current_function_args_size)
2108 {
2109 rtx popc = GEN_INT (current_function_pops_args);
2110
2111 /* i386 can only pop 64K bytes. If asked to pop more, pop
2112 return address, do explicit add, and jump indirectly to the
2113 caller. */
2114
2115 if (current_function_pops_args >= 65536)
2116 {
2117 rtx ecx = gen_rtx_REG (SImode, 2);
2118
2119 emit_insn (gen_popsi1 (ecx));
2120 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2121 emit_indirect_jump (ecx);
2122 }
2123 else
2124 emit_jump_insn (gen_return_pop_internal (popc));
2125 }
2126 else
2127 emit_jump_insn (gen_return_internal ());
2128 }
2129 \f
2130 /* Extract the parts of an RTL expression that is a valid memory address
2131 for an instruction. Return false if the structure of the address is
2132 grossly off. */
2133
2134 static int
2135 ix86_decompose_address (addr, out)
2136 register rtx addr;
2137 struct ix86_address *out;
2138 {
2139 rtx base = NULL_RTX;
2140 rtx index = NULL_RTX;
2141 rtx disp = NULL_RTX;
2142 HOST_WIDE_INT scale = 1;
2143 rtx scale_rtx = NULL_RTX;
2144
2145 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2146 base = addr;
2147 else if (GET_CODE (addr) == PLUS)
2148 {
2149 rtx op0 = XEXP (addr, 0);
2150 rtx op1 = XEXP (addr, 1);
2151 enum rtx_code code0 = GET_CODE (op0);
2152 enum rtx_code code1 = GET_CODE (op1);
2153
2154 if (code0 == REG || code0 == SUBREG)
2155 {
2156 if (code1 == REG || code1 == SUBREG)
2157 index = op0, base = op1; /* index + base */
2158 else
2159 base = op0, disp = op1; /* base + displacement */
2160 }
2161 else if (code0 == MULT)
2162 {
2163 index = XEXP (op0, 0);
2164 scale_rtx = XEXP (op0, 1);
2165 if (code1 == REG || code1 == SUBREG)
2166 base = op1; /* index*scale + base */
2167 else
2168 disp = op1; /* index*scale + disp */
2169 }
2170 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2171 {
2172 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2173 scale_rtx = XEXP (XEXP (op0, 0), 1);
2174 base = XEXP (op0, 1);
2175 disp = op1;
2176 }
2177 else if (code0 == PLUS)
2178 {
2179 index = XEXP (op0, 0); /* index + base + disp */
2180 base = XEXP (op0, 1);
2181 disp = op1;
2182 }
2183 else
2184 return FALSE;
2185 }
2186 else if (GET_CODE (addr) == MULT)
2187 {
2188 index = XEXP (addr, 0); /* index*scale */
2189 scale_rtx = XEXP (addr, 1);
2190 }
2191 else if (GET_CODE (addr) == ASHIFT)
2192 {
2193 rtx tmp;
2194
2195 /* We're called for lea too, which implements ashift on occasion. */
2196 index = XEXP (addr, 0);
2197 tmp = XEXP (addr, 1);
2198 if (GET_CODE (tmp) != CONST_INT)
2199 return FALSE;
2200 scale = INTVAL (tmp);
2201 if ((unsigned HOST_WIDE_INT) scale > 3)
2202 return FALSE;
2203 scale = 1 << scale;
2204 }
2205 else
2206 disp = addr; /* displacement */
2207
2208 /* Extract the integral value of scale. */
2209 if (scale_rtx)
2210 {
2211 if (GET_CODE (scale_rtx) != CONST_INT)
2212 return FALSE;
2213 scale = INTVAL (scale_rtx);
2214 }
2215
2216 /* Allow arg pointer and stack pointer as index if there is not scaling */
2217 if (base && index && scale == 1
2218 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2219 || index == stack_pointer_rtx))
2220 {
2221 rtx tmp = base;
2222 base = index;
2223 index = tmp;
2224 }
2225
2226 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2227 if ((base == hard_frame_pointer_rtx
2228 || base == frame_pointer_rtx
2229 || base == arg_pointer_rtx) && !disp)
2230 disp = const0_rtx;
2231
2232 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2233 Avoid this by transforming to [%esi+0]. */
2234 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2235 && base && !index && !disp
2236 && REG_P (base)
2237 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2238 disp = const0_rtx;
2239
2240 /* Special case: encode reg+reg instead of reg*2. */
2241 if (!base && index && scale && scale == 2)
2242 base = index, scale = 1;
2243
2244 /* Special case: scaling cannot be encoded without base or displacement. */
2245 if (!base && !disp && index && scale != 1)
2246 disp = const0_rtx;
2247
2248 out->base = base;
2249 out->index = index;
2250 out->disp = disp;
2251 out->scale = scale;
2252
2253 return TRUE;
2254 }
2255 \f
2256 /* Return cost of the memory address x.
2257 For i386, it is better to use a complex address than let gcc copy
2258 the address into a reg and make a new pseudo. But not if the address
2259 requires to two regs - that would mean more pseudos with longer
2260 lifetimes. */
2261 int
2262 ix86_address_cost (x)
2263 rtx x;
2264 {
2265 struct ix86_address parts;
2266 int cost = 1;
2267
2268 if (!ix86_decompose_address (x, &parts))
2269 abort ();
2270
2271 /* More complex memory references are better. */
2272 if (parts.disp && parts.disp != const0_rtx)
2273 cost--;
2274
2275 /* Attempt to minimize number of registers in the address. */
2276 if ((parts.base
2277 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2278 || (parts.index
2279 && (!REG_P (parts.index)
2280 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2281 cost++;
2282
2283 if (parts.base
2284 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2285 && parts.index
2286 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2287 && parts.base != parts.index)
2288 cost++;
2289
2290 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2291 since it's predecode logic can't detect the length of instructions
2292 and it degenerates to vector decoded. Increase cost of such
2293 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2294 to split such addresses or even refuse such addresses at all.
2295
2296 Following addressing modes are affected:
2297 [base+scale*index]
2298 [scale*index+disp]
2299 [base+index]
2300
2301 The first and last case may be avoidable by explicitly coding the zero in
2302 memory address, but I don't have AMD-K6 machine handy to check this
2303 theory. */
2304
2305 if (TARGET_K6
2306 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2307 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2308 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2309 cost += 10;
2310
2311 return cost;
2312 }
2313 \f
2314 /* Determine if a given CONST RTX is a valid memory displacement
2315 in PIC mode. */
2316
2317 int
2318 legitimate_pic_address_disp_p (disp)
2319 register rtx disp;
2320 {
2321 if (GET_CODE (disp) != CONST)
2322 return 0;
2323 disp = XEXP (disp, 0);
2324
2325 if (GET_CODE (disp) == PLUS)
2326 {
2327 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2328 return 0;
2329 disp = XEXP (disp, 0);
2330 }
2331
2332 if (GET_CODE (disp) != UNSPEC
2333 || XVECLEN (disp, 0) != 1)
2334 return 0;
2335
2336 /* Must be @GOT or @GOTOFF. */
2337 if (XINT (disp, 1) != 6
2338 && XINT (disp, 1) != 7)
2339 return 0;
2340
2341 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2342 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2343 return 0;
2344
2345 return 1;
2346 }
2347
2348 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2349 memory address for an instruction. The MODE argument is the machine mode
2350 for the MEM expression that wants to use this address.
2351
2352 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2353 convert common non-canonical forms to canonical form so that they will
2354 be recognized. */
2355
2356 int
2357 legitimate_address_p (mode, addr, strict)
2358 enum machine_mode mode;
2359 register rtx addr;
2360 int strict;
2361 {
2362 struct ix86_address parts;
2363 rtx base, index, disp;
2364 HOST_WIDE_INT scale;
2365 const char *reason = NULL;
2366 rtx reason_rtx = NULL_RTX;
2367
2368 if (TARGET_DEBUG_ADDR)
2369 {
2370 fprintf (stderr,
2371 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2372 GET_MODE_NAME (mode), strict);
2373 debug_rtx (addr);
2374 }
2375
2376 if (! ix86_decompose_address (addr, &parts))
2377 {
2378 reason = "decomposition failed";
2379 goto error;
2380 }
2381
2382 base = parts.base;
2383 index = parts.index;
2384 disp = parts.disp;
2385 scale = parts.scale;
2386
2387 /* Validate base register.
2388
2389 Don't allow SUBREG's here, it can lead to spill failures when the base
2390 is one word out of a two word structure, which is represented internally
2391 as a DImode int. */
2392
2393 if (base)
2394 {
2395 reason_rtx = base;
2396
2397 if (GET_CODE (base) != REG)
2398 {
2399 reason = "base is not a register";
2400 goto error;
2401 }
2402
2403 if (GET_MODE (base) != Pmode)
2404 {
2405 reason = "base is not in Pmode";
2406 goto error;
2407 }
2408
2409 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2410 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2411 {
2412 reason = "base is not valid";
2413 goto error;
2414 }
2415 }
2416
2417 /* Validate index register.
2418
2419 Don't allow SUBREG's here, it can lead to spill failures when the index
2420 is one word out of a two word structure, which is represented internally
2421 as a DImode int. */
2422
2423 if (index)
2424 {
2425 reason_rtx = index;
2426
2427 if (GET_CODE (index) != REG)
2428 {
2429 reason = "index is not a register";
2430 goto error;
2431 }
2432
2433 if (GET_MODE (index) != Pmode)
2434 {
2435 reason = "index is not in Pmode";
2436 goto error;
2437 }
2438
2439 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2440 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2441 {
2442 reason = "index is not valid";
2443 goto error;
2444 }
2445 }
2446
2447 /* Validate scale factor. */
2448 if (scale != 1)
2449 {
2450 reason_rtx = GEN_INT (scale);
2451 if (!index)
2452 {
2453 reason = "scale without index";
2454 goto error;
2455 }
2456
2457 if (scale != 2 && scale != 4 && scale != 8)
2458 {
2459 reason = "scale is not a valid multiplier";
2460 goto error;
2461 }
2462 }
2463
2464 /* Validate displacement. */
2465 if (disp)
2466 {
2467 reason_rtx = disp;
2468
2469 if (!CONSTANT_ADDRESS_P (disp))
2470 {
2471 reason = "displacement is not constant";
2472 goto error;
2473 }
2474
2475 if (GET_CODE (disp) == CONST_DOUBLE)
2476 {
2477 reason = "displacement is a const_double";
2478 goto error;
2479 }
2480
2481 if (flag_pic && SYMBOLIC_CONST (disp))
2482 {
2483 if (! legitimate_pic_address_disp_p (disp))
2484 {
2485 reason = "displacement is an invalid pic construct";
2486 goto error;
2487 }
2488
2489 /* Verify that a symbolic pic displacement includes
2490 the pic_offset_table_rtx register. */
2491 if (base != pic_offset_table_rtx
2492 && (index != pic_offset_table_rtx || scale != 1))
2493 {
2494 reason = "pic displacement against invalid base";
2495 goto error;
2496 }
2497 }
2498 else if (HALF_PIC_P ())
2499 {
2500 if (! HALF_PIC_ADDRESS_P (disp)
2501 || (base != NULL_RTX || index != NULL_RTX))
2502 {
2503 reason = "displacement is an invalid half-pic reference";
2504 goto error;
2505 }
2506 }
2507 }
2508
2509 /* Everything looks valid. */
2510 if (TARGET_DEBUG_ADDR)
2511 fprintf (stderr, "Success.\n");
2512 return TRUE;
2513
2514 error:
2515 if (TARGET_DEBUG_ADDR)
2516 {
2517 fprintf (stderr, "Error: %s\n", reason);
2518 debug_rtx (reason_rtx);
2519 }
2520 return FALSE;
2521 }
2522 \f
2523 /* Return a legitimate reference for ORIG (an address) using the
2524 register REG. If REG is 0, a new pseudo is generated.
2525
2526 There are two types of references that must be handled:
2527
2528 1. Global data references must load the address from the GOT, via
2529 the PIC reg. An insn is emitted to do this load, and the reg is
2530 returned.
2531
2532 2. Static data references, constant pool addresses, and code labels
2533 compute the address as an offset from the GOT, whose base is in
2534 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2535 differentiate them from global data objects. The returned
2536 address is the PIC reg + an unspec constant.
2537
2538 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2539 reg also appears in the address. */
2540
2541 rtx
2542 legitimize_pic_address (orig, reg)
2543 rtx orig;
2544 rtx reg;
2545 {
2546 rtx addr = orig;
2547 rtx new = orig;
2548 rtx base;
2549
2550 if (GET_CODE (addr) == LABEL_REF
2551 || (GET_CODE (addr) == SYMBOL_REF
2552 && (CONSTANT_POOL_ADDRESS_P (addr)
2553 || SYMBOL_REF_FLAG (addr))))
2554 {
2555 /* This symbol may be referenced via a displacement from the PIC
2556 base address (@GOTOFF). */
2557
2558 current_function_uses_pic_offset_table = 1;
2559 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2560 new = gen_rtx_CONST (VOIDmode, new);
2561 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2562
2563 if (reg != 0)
2564 {
2565 emit_move_insn (reg, new);
2566 new = reg;
2567 }
2568 }
2569 else if (GET_CODE (addr) == SYMBOL_REF)
2570 {
2571 /* This symbol must be referenced via a load from the
2572 Global Offset Table (@GOT). */
2573
2574 current_function_uses_pic_offset_table = 1;
2575 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2576 new = gen_rtx_CONST (VOIDmode, new);
2577 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2578 new = gen_rtx_MEM (Pmode, new);
2579 RTX_UNCHANGING_P (new) = 1;
2580
2581 if (reg == 0)
2582 reg = gen_reg_rtx (Pmode);
2583 emit_move_insn (reg, new);
2584 new = reg;
2585 }
2586 else
2587 {
2588 if (GET_CODE (addr) == CONST)
2589 {
2590 addr = XEXP (addr, 0);
2591 if (GET_CODE (addr) == UNSPEC)
2592 {
2593 /* Check that the unspec is one of the ones we generate? */
2594 }
2595 else if (GET_CODE (addr) != PLUS)
2596 abort ();
2597 }
2598 if (GET_CODE (addr) == PLUS)
2599 {
2600 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2601
2602 /* Check first to see if this is a constant offset from a @GOTOFF
2603 symbol reference. */
2604 if ((GET_CODE (op0) == LABEL_REF
2605 || (GET_CODE (op0) == SYMBOL_REF
2606 && (CONSTANT_POOL_ADDRESS_P (op0)
2607 || SYMBOL_REF_FLAG (op0))))
2608 && GET_CODE (op1) == CONST_INT)
2609 {
2610 current_function_uses_pic_offset_table = 1;
2611 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2612 new = gen_rtx_PLUS (VOIDmode, new, op1);
2613 new = gen_rtx_CONST (VOIDmode, new);
2614 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2615
2616 if (reg != 0)
2617 {
2618 emit_move_insn (reg, new);
2619 new = reg;
2620 }
2621 }
2622 else
2623 {
2624 base = legitimize_pic_address (XEXP (addr, 0), reg);
2625 new = legitimize_pic_address (XEXP (addr, 1),
2626 base == reg ? NULL_RTX : reg);
2627
2628 if (GET_CODE (new) == CONST_INT)
2629 new = plus_constant (base, INTVAL (new));
2630 else
2631 {
2632 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2633 {
2634 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2635 new = XEXP (new, 1);
2636 }
2637 new = gen_rtx_PLUS (Pmode, base, new);
2638 }
2639 }
2640 }
2641 }
2642 return new;
2643 }
2644 \f
2645 /* Try machine-dependent ways of modifying an illegitimate address
2646 to be legitimate. If we find one, return the new, valid address.
2647 This macro is used in only one place: `memory_address' in explow.c.
2648
2649 OLDX is the address as it was before break_out_memory_refs was called.
2650 In some cases it is useful to look at this to decide what needs to be done.
2651
2652 MODE and WIN are passed so that this macro can use
2653 GO_IF_LEGITIMATE_ADDRESS.
2654
2655 It is always safe for this macro to do nothing. It exists to recognize
2656 opportunities to optimize the output.
2657
2658 For the 80386, we handle X+REG by loading X into a register R and
2659 using R+REG. R will go in a general reg and indexing will be used.
2660 However, if REG is a broken-out memory address or multiplication,
2661 nothing needs to be done because REG can certainly go in a general reg.
2662
2663 When -fpic is used, special handling is needed for symbolic references.
2664 See comments by legitimize_pic_address in i386.c for details. */
2665
2666 rtx
2667 legitimize_address (x, oldx, mode)
2668 register rtx x;
2669 register rtx oldx ATTRIBUTE_UNUSED;
2670 enum machine_mode mode;
2671 {
2672 int changed = 0;
2673 unsigned log;
2674
2675 if (TARGET_DEBUG_ADDR)
2676 {
2677 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2678 GET_MODE_NAME (mode));
2679 debug_rtx (x);
2680 }
2681
2682 if (flag_pic && SYMBOLIC_CONST (x))
2683 return legitimize_pic_address (x, 0);
2684
2685 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2686 if (GET_CODE (x) == ASHIFT
2687 && GET_CODE (XEXP (x, 1)) == CONST_INT
2688 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2689 {
2690 changed = 1;
2691 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2692 GEN_INT (1 << log));
2693 }
2694
2695 if (GET_CODE (x) == PLUS)
2696 {
2697 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2698
2699 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2700 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2701 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2702 {
2703 changed = 1;
2704 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2705 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2706 GEN_INT (1 << log));
2707 }
2708
2709 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2710 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2711 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2712 {
2713 changed = 1;
2714 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2715 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2716 GEN_INT (1 << log));
2717 }
2718
2719 /* Put multiply first if it isn't already. */
2720 if (GET_CODE (XEXP (x, 1)) == MULT)
2721 {
2722 rtx tmp = XEXP (x, 0);
2723 XEXP (x, 0) = XEXP (x, 1);
2724 XEXP (x, 1) = tmp;
2725 changed = 1;
2726 }
2727
2728 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2729 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2730 created by virtual register instantiation, register elimination, and
2731 similar optimizations. */
2732 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2733 {
2734 changed = 1;
2735 x = gen_rtx_PLUS (Pmode,
2736 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2737 XEXP (XEXP (x, 1), 0)),
2738 XEXP (XEXP (x, 1), 1));
2739 }
2740
2741 /* Canonicalize
2742 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2743 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2744 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2745 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2746 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2747 && CONSTANT_P (XEXP (x, 1)))
2748 {
2749 rtx constant;
2750 rtx other = NULL_RTX;
2751
2752 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2753 {
2754 constant = XEXP (x, 1);
2755 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2756 }
2757 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2758 {
2759 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2760 other = XEXP (x, 1);
2761 }
2762 else
2763 constant = 0;
2764
2765 if (constant)
2766 {
2767 changed = 1;
2768 x = gen_rtx_PLUS (Pmode,
2769 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2770 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2771 plus_constant (other, INTVAL (constant)));
2772 }
2773 }
2774
2775 if (changed && legitimate_address_p (mode, x, FALSE))
2776 return x;
2777
2778 if (GET_CODE (XEXP (x, 0)) == MULT)
2779 {
2780 changed = 1;
2781 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2782 }
2783
2784 if (GET_CODE (XEXP (x, 1)) == MULT)
2785 {
2786 changed = 1;
2787 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2788 }
2789
2790 if (changed
2791 && GET_CODE (XEXP (x, 1)) == REG
2792 && GET_CODE (XEXP (x, 0)) == REG)
2793 return x;
2794
2795 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2796 {
2797 changed = 1;
2798 x = legitimize_pic_address (x, 0);
2799 }
2800
2801 if (changed && legitimate_address_p (mode, x, FALSE))
2802 return x;
2803
2804 if (GET_CODE (XEXP (x, 0)) == REG)
2805 {
2806 register rtx temp = gen_reg_rtx (Pmode);
2807 register rtx val = force_operand (XEXP (x, 1), temp);
2808 if (val != temp)
2809 emit_move_insn (temp, val);
2810
2811 XEXP (x, 1) = temp;
2812 return x;
2813 }
2814
2815 else if (GET_CODE (XEXP (x, 1)) == REG)
2816 {
2817 register rtx temp = gen_reg_rtx (Pmode);
2818 register rtx val = force_operand (XEXP (x, 0), temp);
2819 if (val != temp)
2820 emit_move_insn (temp, val);
2821
2822 XEXP (x, 0) = temp;
2823 return x;
2824 }
2825 }
2826
2827 return x;
2828 }
2829 \f
2830 /* Print an integer constant expression in assembler syntax. Addition
2831 and subtraction are the only arithmetic that may appear in these
2832 expressions. FILE is the stdio stream to write to, X is the rtx, and
2833 CODE is the operand print code from the output string. */
2834
2835 static void
2836 output_pic_addr_const (file, x, code)
2837 FILE *file;
2838 rtx x;
2839 int code;
2840 {
2841 char buf[256];
2842
2843 switch (GET_CODE (x))
2844 {
2845 case PC:
2846 if (flag_pic)
2847 putc ('.', file);
2848 else
2849 abort ();
2850 break;
2851
2852 case SYMBOL_REF:
2853 assemble_name (file, XSTR (x, 0));
2854 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2855 fputs ("@PLT", file);
2856 break;
2857
2858 case LABEL_REF:
2859 x = XEXP (x, 0);
2860 /* FALLTHRU */
2861 case CODE_LABEL:
2862 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2863 assemble_name (asm_out_file, buf);
2864 break;
2865
2866 case CONST_INT:
2867 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2868 break;
2869
2870 case CONST:
2871 /* This used to output parentheses around the expression,
2872 but that does not work on the 386 (either ATT or BSD assembler). */
2873 output_pic_addr_const (file, XEXP (x, 0), code);
2874 break;
2875
2876 case CONST_DOUBLE:
2877 if (GET_MODE (x) == VOIDmode)
2878 {
2879 /* We can use %d if the number is <32 bits and positive. */
2880 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2881 fprintf (file, "0x%lx%08lx",
2882 (unsigned long) CONST_DOUBLE_HIGH (x),
2883 (unsigned long) CONST_DOUBLE_LOW (x));
2884 else
2885 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2886 }
2887 else
2888 /* We can't handle floating point constants;
2889 PRINT_OPERAND must handle them. */
2890 output_operand_lossage ("floating constant misused");
2891 break;
2892
2893 case PLUS:
2894 /* Some assemblers need integer constants to appear first. */
2895 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2896 {
2897 output_pic_addr_const (file, XEXP (x, 0), code);
2898 putc ('+', file);
2899 output_pic_addr_const (file, XEXP (x, 1), code);
2900 }
2901 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2902 {
2903 output_pic_addr_const (file, XEXP (x, 1), code);
2904 putc ('+', file);
2905 output_pic_addr_const (file, XEXP (x, 0), code);
2906 }
2907 else
2908 abort ();
2909 break;
2910
2911 case MINUS:
2912 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2913 output_pic_addr_const (file, XEXP (x, 0), code);
2914 putc ('-', file);
2915 output_pic_addr_const (file, XEXP (x, 1), code);
2916 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2917 break;
2918
2919 case UNSPEC:
2920 if (XVECLEN (x, 0) != 1)
2921 abort ();
2922 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2923 switch (XINT (x, 1))
2924 {
2925 case 6:
2926 fputs ("@GOT", file);
2927 break;
2928 case 7:
2929 fputs ("@GOTOFF", file);
2930 break;
2931 case 8:
2932 fputs ("@PLT", file);
2933 break;
2934 default:
2935 output_operand_lossage ("invalid UNSPEC as operand");
2936 break;
2937 }
2938 break;
2939
2940 default:
2941 output_operand_lossage ("invalid expression as operand");
2942 }
2943 }
2944
2945 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2946 We need to handle our special PIC relocations. */
2947
2948 void
2949 i386_dwarf_output_addr_const (file, x)
2950 FILE *file;
2951 rtx x;
2952 {
2953 fprintf (file, "\t%s\t", INT_ASM_OP);
2954 if (flag_pic)
2955 output_pic_addr_const (file, x, '\0');
2956 else
2957 output_addr_const (file, x);
2958 fputc ('\n', file);
2959 }
2960
2961 /* In the name of slightly smaller debug output, and to cater to
2962 general assembler losage, recognize PIC+GOTOFF and turn it back
2963 into a direct symbol reference. */
2964
2965 rtx
2966 i386_simplify_dwarf_addr (orig_x)
2967 rtx orig_x;
2968 {
2969 rtx x = orig_x;
2970
2971 if (GET_CODE (x) != PLUS
2972 || GET_CODE (XEXP (x, 0)) != REG
2973 || GET_CODE (XEXP (x, 1)) != CONST)
2974 return orig_x;
2975
2976 x = XEXP (XEXP (x, 1), 0);
2977 if (GET_CODE (x) == UNSPEC
2978 && XINT (x, 1) == 7)
2979 return XVECEXP (x, 0, 0);
2980
2981 if (GET_CODE (x) == PLUS
2982 && GET_CODE (XEXP (x, 0)) == UNSPEC
2983 && GET_CODE (XEXP (x, 1)) == CONST_INT
2984 && XINT (XEXP (x, 0), 1) == 7)
2985 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
2986
2987 return orig_x;
2988 }
2989 \f
2990 static void
2991 put_condition_code (code, mode, reverse, fp, file)
2992 enum rtx_code code;
2993 enum machine_mode mode;
2994 int reverse, fp;
2995 FILE *file;
2996 {
2997 const char *suffix;
2998
2999 if (reverse)
3000 code = reverse_condition (code);
3001
3002 switch (code)
3003 {
3004 case EQ:
3005 suffix = "e";
3006 break;
3007 case NE:
3008 suffix = "ne";
3009 break;
3010 case GT:
3011 if (mode == CCNOmode)
3012 abort ();
3013 suffix = "g";
3014 break;
3015 case GTU:
3016 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3017 Those same assemblers have the same but opposite losage on cmov. */
3018 suffix = fp ? "nbe" : "a";
3019 break;
3020 case LT:
3021 if (mode == CCNOmode)
3022 suffix = "s";
3023 else
3024 suffix = "l";
3025 break;
3026 case LTU:
3027 suffix = "b";
3028 break;
3029 case GE:
3030 if (mode == CCNOmode)
3031 suffix = "ns";
3032 else
3033 suffix = "ge";
3034 break;
3035 case GEU:
3036 /* ??? As above. */
3037 suffix = fp ? "nb" : "ae";
3038 break;
3039 case LE:
3040 if (mode == CCNOmode)
3041 abort ();
3042 suffix = "le";
3043 break;
3044 case LEU:
3045 suffix = "be";
3046 break;
3047 case UNORDERED:
3048 suffix = "p";
3049 break;
3050 case ORDERED:
3051 suffix = "np";
3052 break;
3053 default:
3054 abort ();
3055 }
3056 fputs (suffix, file);
3057 }
3058
3059 void
3060 print_reg (x, code, file)
3061 rtx x;
3062 int code;
3063 FILE *file;
3064 {
3065 if (REGNO (x) == ARG_POINTER_REGNUM
3066 || REGNO (x) == FRAME_POINTER_REGNUM
3067 || REGNO (x) == FLAGS_REG
3068 || REGNO (x) == FPSR_REG)
3069 abort ();
3070
3071 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3072 putc ('%', file);
3073
3074 if (code == 'w')
3075 code = 2;
3076 else if (code == 'b')
3077 code = 1;
3078 else if (code == 'k')
3079 code = 4;
3080 else if (code == 'y')
3081 code = 3;
3082 else if (code == 'h')
3083 code = 0;
3084 else
3085 code = GET_MODE_SIZE (GET_MODE (x));
3086
3087 switch (code)
3088 {
3089 case 3:
3090 if (STACK_TOP_P (x))
3091 {
3092 fputs ("st(0)", file);
3093 break;
3094 }
3095 /* FALLTHRU */
3096 case 4:
3097 case 8:
3098 case 12:
3099 if (! FP_REG_P (x))
3100 putc ('e', file);
3101 /* FALLTHRU */
3102 case 2:
3103 fputs (hi_reg_name[REGNO (x)], file);
3104 break;
3105 case 1:
3106 fputs (qi_reg_name[REGNO (x)], file);
3107 break;
3108 case 0:
3109 fputs (qi_high_reg_name[REGNO (x)], file);
3110 break;
3111 default:
3112 abort ();
3113 }
3114 }
3115
3116 /* Meaning of CODE:
3117 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3118 C -- print opcode suffix for set/cmov insn.
3119 c -- like C, but print reversed condition
3120 R -- print the prefix for register names.
3121 z -- print the opcode suffix for the size of the current operand.
3122 * -- print a star (in certain assembler syntax)
3123 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3124 s -- print a shift double count, followed by the assemblers argument
3125 delimiter.
3126 b -- print the QImode name of the register for the indicated operand.
3127 %b0 would print %al if operands[0] is reg 0.
3128 w -- likewise, print the HImode name of the register.
3129 k -- likewise, print the SImode name of the register.
3130 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3131 y -- print "st(0)" instead of "st" as a register. */
3132
3133 void
3134 print_operand (file, x, code)
3135 FILE *file;
3136 rtx x;
3137 int code;
3138 {
3139 if (code)
3140 {
3141 switch (code)
3142 {
3143 case '*':
3144 if (ASSEMBLER_DIALECT == 0)
3145 putc ('*', file);
3146 return;
3147
3148 case 'L':
3149 if (ASSEMBLER_DIALECT == 0)
3150 putc ('l', file);
3151 return;
3152
3153 case 'W':
3154 if (ASSEMBLER_DIALECT == 0)
3155 putc ('w', file);
3156 return;
3157
3158 case 'B':
3159 if (ASSEMBLER_DIALECT == 0)
3160 putc ('b', file);
3161 return;
3162
3163 case 'Q':
3164 if (ASSEMBLER_DIALECT == 0)
3165 putc ('l', file);
3166 return;
3167
3168 case 'S':
3169 if (ASSEMBLER_DIALECT == 0)
3170 putc ('s', file);
3171 return;
3172
3173 case 'T':
3174 if (ASSEMBLER_DIALECT == 0)
3175 putc ('t', file);
3176 return;
3177
3178 case 'z':
3179 /* 387 opcodes don't get size suffixes if the operands are
3180 registers. */
3181
3182 if (STACK_REG_P (x))
3183 return;
3184
3185 /* Intel syntax has no truck with instruction suffixes. */
3186 if (ASSEMBLER_DIALECT != 0)
3187 return;
3188
3189 /* this is the size of op from size of operand */
3190 switch (GET_MODE_SIZE (GET_MODE (x)))
3191 {
3192 case 2:
3193 #ifdef HAVE_GAS_FILDS_FISTS
3194 putc ('s', file);
3195 #endif
3196 return;
3197
3198 case 4:
3199 if (GET_MODE (x) == SFmode)
3200 {
3201 putc ('s', file);
3202 return;
3203 }
3204 else
3205 putc ('l', file);
3206 return;
3207
3208 case 12:
3209 putc ('t', file);
3210 return;
3211
3212 case 8:
3213 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3214 {
3215 #ifdef GAS_MNEMONICS
3216 putc ('q', file);
3217 #else
3218 putc ('l', file);
3219 putc ('l', file);
3220 #endif
3221 }
3222 else
3223 putc ('l', file);
3224 return;
3225
3226 default:
3227 abort ();
3228 }
3229
3230 case 'b':
3231 case 'w':
3232 case 'k':
3233 case 'h':
3234 case 'y':
3235 case 'X':
3236 case 'P':
3237 break;
3238
3239 case 's':
3240 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3241 {
3242 PRINT_OPERAND (file, x, 0);
3243 putc (',', file);
3244 }
3245 return;
3246
3247 case 'C':
3248 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3249 return;
3250 case 'F':
3251 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3252 return;
3253
3254 /* Like above, but reverse condition */
3255 case 'c':
3256 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3257 return;
3258 case 'f':
3259 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3260 return;
3261
3262 default:
3263 {
3264 char str[50];
3265 sprintf (str, "invalid operand code `%c'", code);
3266 output_operand_lossage (str);
3267 }
3268 }
3269 }
3270
3271 if (GET_CODE (x) == REG)
3272 {
3273 PRINT_REG (x, code, file);
3274 }
3275
3276 else if (GET_CODE (x) == MEM)
3277 {
3278 /* No `byte ptr' prefix for call instructions. */
3279 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3280 {
3281 const char * size;
3282 switch (GET_MODE_SIZE (GET_MODE (x)))
3283 {
3284 case 1: size = "BYTE"; break;
3285 case 2: size = "WORD"; break;
3286 case 4: size = "DWORD"; break;
3287 case 8: size = "QWORD"; break;
3288 case 12: size = "XWORD"; break;
3289 default:
3290 abort ();
3291 }
3292 fputs (size, file);
3293 fputs (" PTR ", file);
3294 }
3295
3296 x = XEXP (x, 0);
3297 if (flag_pic && CONSTANT_ADDRESS_P (x))
3298 output_pic_addr_const (file, x, code);
3299 else
3300 output_address (x);
3301 }
3302
3303 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3304 {
3305 REAL_VALUE_TYPE r;
3306 long l;
3307
3308 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3309 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3310
3311 if (ASSEMBLER_DIALECT == 0)
3312 putc ('$', file);
3313 fprintf (file, "0x%lx", l);
3314 }
3315
3316 /* These float cases don't actually occur as immediate operands. */
3317 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3318 {
3319 REAL_VALUE_TYPE r;
3320 char dstr[30];
3321
3322 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3323 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3324 fprintf (file, "%s", dstr);
3325 }
3326
3327 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3328 {
3329 REAL_VALUE_TYPE r;
3330 char dstr[30];
3331
3332 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3333 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3334 fprintf (file, "%s", dstr);
3335 }
3336 else
3337 {
3338 if (code != 'P')
3339 {
3340 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3341 {
3342 if (ASSEMBLER_DIALECT == 0)
3343 putc ('$', file);
3344 }
3345 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3346 || GET_CODE (x) == LABEL_REF)
3347 {
3348 if (ASSEMBLER_DIALECT == 0)
3349 putc ('$', file);
3350 else
3351 fputs ("OFFSET FLAT:", file);
3352 }
3353 }
3354 if (GET_CODE (x) == CONST_INT)
3355 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3356 else if (flag_pic)
3357 output_pic_addr_const (file, x, code);
3358 else
3359 output_addr_const (file, x);
3360 }
3361 }
3362 \f
3363 /* Print a memory operand whose address is ADDR. */
3364
3365 void
3366 print_operand_address (file, addr)
3367 FILE *file;
3368 register rtx addr;
3369 {
3370 struct ix86_address parts;
3371 rtx base, index, disp;
3372 int scale;
3373
3374 if (! ix86_decompose_address (addr, &parts))
3375 abort ();
3376
3377 base = parts.base;
3378 index = parts.index;
3379 disp = parts.disp;
3380 scale = parts.scale;
3381
3382 if (!base && !index)
3383 {
3384 /* Displacement only requires special attention. */
3385
3386 if (GET_CODE (disp) == CONST_INT)
3387 {
3388 if (ASSEMBLER_DIALECT != 0)
3389 fputs ("ds:", file);
3390 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3391 }
3392 else if (flag_pic)
3393 output_pic_addr_const (file, addr, 0);
3394 else
3395 output_addr_const (file, addr);
3396 }
3397 else
3398 {
3399 if (ASSEMBLER_DIALECT == 0)
3400 {
3401 if (disp)
3402 {
3403 if (flag_pic)
3404 output_pic_addr_const (file, disp, 0);
3405 else if (GET_CODE (disp) == LABEL_REF)
3406 output_asm_label (disp);
3407 else
3408 output_addr_const (file, disp);
3409 }
3410
3411 putc ('(', file);
3412 if (base)
3413 PRINT_REG (base, 0, file);
3414 if (index)
3415 {
3416 putc (',', file);
3417 PRINT_REG (index, 0, file);
3418 if (scale != 1)
3419 fprintf (file, ",%d", scale);
3420 }
3421 putc (')', file);
3422 }
3423 else
3424 {
3425 rtx offset = NULL_RTX;
3426
3427 if (disp)
3428 {
3429 /* Pull out the offset of a symbol; print any symbol itself. */
3430 if (GET_CODE (disp) == CONST
3431 && GET_CODE (XEXP (disp, 0)) == PLUS
3432 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3433 {
3434 offset = XEXP (XEXP (disp, 0), 1);
3435 disp = gen_rtx_CONST (VOIDmode,
3436 XEXP (XEXP (disp, 0), 0));
3437 }
3438
3439 if (flag_pic)
3440 output_pic_addr_const (file, disp, 0);
3441 else if (GET_CODE (disp) == LABEL_REF)
3442 output_asm_label (disp);
3443 else if (GET_CODE (disp) == CONST_INT)
3444 offset = disp;
3445 else
3446 output_addr_const (file, disp);
3447 }
3448
3449 putc ('[', file);
3450 if (base)
3451 {
3452 PRINT_REG (base, 0, file);
3453 if (offset)
3454 {
3455 if (INTVAL (offset) >= 0)
3456 putc ('+', file);
3457 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3458 }
3459 }
3460 else if (offset)
3461 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3462 else
3463 putc ('0', file);
3464
3465 if (index)
3466 {
3467 putc ('+', file);
3468 PRINT_REG (index, 0, file);
3469 if (scale != 1)
3470 fprintf (file, "*%d", scale);
3471 }
3472 putc (']', file);
3473 }
3474 }
3475 }
3476 \f
3477 /* Split one or more DImode RTL references into pairs of SImode
3478 references. The RTL can be REG, offsettable MEM, integer constant, or
3479 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3480 split and "num" is its length. lo_half and hi_half are output arrays
3481 that parallel "operands". */
3482
3483 void
3484 split_di (operands, num, lo_half, hi_half)
3485 rtx operands[];
3486 int num;
3487 rtx lo_half[], hi_half[];
3488 {
3489 while (num--)
3490 {
3491 rtx op = operands[num];
3492 if (CONSTANT_P (op))
3493 split_double (op, &lo_half[num], &hi_half[num]);
3494 else if (! reload_completed)
3495 {
3496 lo_half[num] = gen_lowpart (SImode, op);
3497 hi_half[num] = gen_highpart (SImode, op);
3498 }
3499 else if (GET_CODE (op) == REG)
3500 {
3501 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3502 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3503 }
3504 else if (offsettable_memref_p (op))
3505 {
3506 rtx lo_addr = XEXP (op, 0);
3507 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3508 lo_half[num] = change_address (op, SImode, lo_addr);
3509 hi_half[num] = change_address (op, SImode, hi_addr);
3510 }
3511 else
3512 abort ();
3513 }
3514 }
3515 \f
3516 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3517 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3518 is the expression of the binary operation. The output may either be
3519 emitted here, or returned to the caller, like all output_* functions.
3520
3521 There is no guarantee that the operands are the same mode, as they
3522 might be within FLOAT or FLOAT_EXTEND expressions. */
3523
3524 #ifndef SYSV386_COMPAT
3525 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3526 wants to fix the assemblers because that causes incompatibility
3527 with gcc. No-one wants to fix gcc because that causes
3528 incompatibility with assemblers... You can use the option of
3529 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3530 #define SYSV386_COMPAT 1
3531 #endif
3532
3533 const char *
3534 output_387_binary_op (insn, operands)
3535 rtx insn;
3536 rtx *operands;
3537 {
3538 static char buf[30];
3539 const char *p;
3540
3541 #ifdef ENABLE_CHECKING
3542 /* Even if we do not want to check the inputs, this documents input
3543 constraints. Which helps in understanding the following code. */
3544 if (STACK_REG_P (operands[0])
3545 && ((REG_P (operands[1])
3546 && REGNO (operands[0]) == REGNO (operands[1])
3547 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3548 || (REG_P (operands[2])
3549 && REGNO (operands[0]) == REGNO (operands[2])
3550 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3551 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3552 ; /* ok */
3553 else
3554 abort ();
3555 #endif
3556
3557 switch (GET_CODE (operands[3]))
3558 {
3559 case PLUS:
3560 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3561 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3562 p = "fiadd";
3563 else
3564 p = "fadd";
3565 break;
3566
3567 case MINUS:
3568 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3569 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3570 p = "fisub";
3571 else
3572 p = "fsub";
3573 break;
3574
3575 case MULT:
3576 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3577 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3578 p = "fimul";
3579 else
3580 p = "fmul";
3581 break;
3582
3583 case DIV:
3584 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3585 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3586 p = "fidiv";
3587 else
3588 p = "fdiv";
3589 break;
3590
3591 default:
3592 abort ();
3593 }
3594
3595 strcpy (buf, p);
3596
3597 switch (GET_CODE (operands[3]))
3598 {
3599 case MULT:
3600 case PLUS:
3601 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3602 {
3603 rtx temp = operands[2];
3604 operands[2] = operands[1];
3605 operands[1] = temp;
3606 }
3607
3608 /* know operands[0] == operands[1]. */
3609
3610 if (GET_CODE (operands[2]) == MEM)
3611 {
3612 p = "%z2\t%2";
3613 break;
3614 }
3615
3616 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3617 {
3618 if (STACK_TOP_P (operands[0]))
3619 /* How is it that we are storing to a dead operand[2]?
3620 Well, presumably operands[1] is dead too. We can't
3621 store the result to st(0) as st(0) gets popped on this
3622 instruction. Instead store to operands[2] (which I
3623 think has to be st(1)). st(1) will be popped later.
3624 gcc <= 2.8.1 didn't have this check and generated
3625 assembly code that the Unixware assembler rejected. */
3626 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3627 else
3628 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3629 break;
3630 }
3631
3632 if (STACK_TOP_P (operands[0]))
3633 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3634 else
3635 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3636 break;
3637
3638 case MINUS:
3639 case DIV:
3640 if (GET_CODE (operands[1]) == MEM)
3641 {
3642 p = "r%z1\t%1";
3643 break;
3644 }
3645
3646 if (GET_CODE (operands[2]) == MEM)
3647 {
3648 p = "%z2\t%2";
3649 break;
3650 }
3651
3652 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3653 {
3654 #if SYSV386_COMPAT
3655 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3656 derived assemblers, confusingly reverse the direction of
3657 the operation for fsub{r} and fdiv{r} when the
3658 destination register is not st(0). The Intel assembler
3659 doesn't have this brain damage. Read !SYSV386_COMPAT to
3660 figure out what the hardware really does. */
3661 if (STACK_TOP_P (operands[0]))
3662 p = "{p\t%0, %2|rp\t%2, %0}";
3663 else
3664 p = "{rp\t%2, %0|p\t%0, %2}";
3665 #else
3666 if (STACK_TOP_P (operands[0]))
3667 /* As above for fmul/fadd, we can't store to st(0). */
3668 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3669 else
3670 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3671 #endif
3672 break;
3673 }
3674
3675 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3676 {
3677 #if SYSV386_COMPAT
3678 if (STACK_TOP_P (operands[0]))
3679 p = "{rp\t%0, %1|p\t%1, %0}";
3680 else
3681 p = "{p\t%1, %0|rp\t%0, %1}";
3682 #else
3683 if (STACK_TOP_P (operands[0]))
3684 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3685 else
3686 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3687 #endif
3688 break;
3689 }
3690
3691 if (STACK_TOP_P (operands[0]))
3692 {
3693 if (STACK_TOP_P (operands[1]))
3694 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3695 else
3696 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3697 break;
3698 }
3699 else if (STACK_TOP_P (operands[1]))
3700 {
3701 #if SYSV386_COMPAT
3702 p = "{\t%1, %0|r\t%0, %1}";
3703 #else
3704 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3705 #endif
3706 }
3707 else
3708 {
3709 #if SYSV386_COMPAT
3710 p = "{r\t%2, %0|\t%0, %2}";
3711 #else
3712 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3713 #endif
3714 }
3715 break;
3716
3717 default:
3718 abort ();
3719 }
3720
3721 strcat (buf, p);
3722 return buf;
3723 }
3724
3725 /* Output code for INSN to convert a float to a signed int. OPERANDS
3726 are the insn operands. The output may be [HSD]Imode and the input
3727 operand may be [SDX]Fmode. */
3728
3729 const char *
3730 output_fix_trunc (insn, operands)
3731 rtx insn;
3732 rtx *operands;
3733 {
3734 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3735 int dimode_p = GET_MODE (operands[0]) == DImode;
3736 rtx xops[4];
3737
3738 /* Jump through a hoop or two for DImode, since the hardware has no
3739 non-popping instruction. We used to do this a different way, but
3740 that was somewhat fragile and broke with post-reload splitters. */
3741 if (dimode_p && !stack_top_dies)
3742 output_asm_insn ("fld\t%y1", operands);
3743
3744 if (! STACK_TOP_P (operands[1]))
3745 abort ();
3746
3747 xops[0] = GEN_INT (12);
3748 xops[1] = adj_offsettable_operand (operands[2], 1);
3749 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3750
3751 xops[2] = operands[0];
3752 if (GET_CODE (operands[0]) != MEM)
3753 xops[2] = operands[3];
3754
3755 output_asm_insn ("fnstcw\t%2", operands);
3756 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3757 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3758 output_asm_insn ("fldcw\t%2", operands);
3759 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3760
3761 if (stack_top_dies || dimode_p)
3762 output_asm_insn ("fistp%z2\t%2", xops);
3763 else
3764 output_asm_insn ("fist%z2\t%2", xops);
3765
3766 output_asm_insn ("fldcw\t%2", operands);
3767
3768 if (GET_CODE (operands[0]) != MEM)
3769 {
3770 if (dimode_p)
3771 {
3772 split_di (operands+0, 1, xops+0, xops+1);
3773 split_di (operands+3, 1, xops+2, xops+3);
3774 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3775 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3776 }
3777 else if (GET_MODE (operands[0]) == SImode)
3778 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3779 else
3780 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3781 }
3782
3783 return "";
3784 }
3785
3786 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3787 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3788 when fucom should be used. */
3789
3790 const char *
3791 output_fp_compare (insn, operands, eflags_p, unordered_p)
3792 rtx insn;
3793 rtx *operands;
3794 int eflags_p, unordered_p;
3795 {
3796 int stack_top_dies;
3797 rtx cmp_op0 = operands[0];
3798 rtx cmp_op1 = operands[1];
3799
3800 if (eflags_p == 2)
3801 {
3802 cmp_op0 = cmp_op1;
3803 cmp_op1 = operands[2];
3804 }
3805
3806 if (! STACK_TOP_P (cmp_op0))
3807 abort ();
3808
3809 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3810
3811 if (STACK_REG_P (cmp_op1)
3812 && stack_top_dies
3813 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3814 && REGNO (cmp_op1) != FIRST_STACK_REG)
3815 {
3816 /* If both the top of the 387 stack dies, and the other operand
3817 is also a stack register that dies, then this must be a
3818 `fcompp' float compare */
3819
3820 if (eflags_p == 1)
3821 {
3822 /* There is no double popping fcomi variant. Fortunately,
3823 eflags is immune from the fstp's cc clobbering. */
3824 if (unordered_p)
3825 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3826 else
3827 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3828 return "fstp\t%y0";
3829 }
3830 else
3831 {
3832 if (eflags_p == 2)
3833 {
3834 if (unordered_p)
3835 return "fucompp\n\tfnstsw\t%0";
3836 else
3837 return "fcompp\n\tfnstsw\t%0";
3838 }
3839 else
3840 {
3841 if (unordered_p)
3842 return "fucompp";
3843 else
3844 return "fcompp";
3845 }
3846 }
3847 }
3848 else
3849 {
3850 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3851
3852 static const char * const alt[24] =
3853 {
3854 "fcom%z1\t%y1",
3855 "fcomp%z1\t%y1",
3856 "fucom%z1\t%y1",
3857 "fucomp%z1\t%y1",
3858
3859 "ficom%z1\t%y1",
3860 "ficomp%z1\t%y1",
3861 NULL,
3862 NULL,
3863
3864 "fcomi\t{%y1, %0|%0, %y1}",
3865 "fcomip\t{%y1, %0|%0, %y1}",
3866 "fucomi\t{%y1, %0|%0, %y1}",
3867 "fucomip\t{%y1, %0|%0, %y1}",
3868
3869 NULL,
3870 NULL,
3871 NULL,
3872 NULL,
3873
3874 "fcom%z2\t%y2\n\tfnstsw\t%0",
3875 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3876 "fucom%z2\t%y2\n\tfnstsw\t%0",
3877 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3878
3879 "ficom%z2\t%y2\n\tfnstsw\t%0",
3880 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3881 NULL,
3882 NULL
3883 };
3884
3885 int mask;
3886 const char *ret;
3887
3888 mask = eflags_p << 3;
3889 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3890 mask |= unordered_p << 1;
3891 mask |= stack_top_dies;
3892
3893 if (mask >= 24)
3894 abort ();
3895 ret = alt[mask];
3896 if (ret == NULL)
3897 abort ();
3898
3899 return ret;
3900 }
3901 }
3902
3903 /* Output assembler code to FILE to initialize basic-block profiling.
3904
3905 If profile_block_flag == 2
3906
3907 Output code to call the subroutine `__bb_init_trace_func'
3908 and pass two parameters to it. The first parameter is
3909 the address of a block allocated in the object module.
3910 The second parameter is the number of the first basic block
3911 of the function.
3912
3913 The name of the block is a local symbol made with this statement:
3914
3915 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3916
3917 Of course, since you are writing the definition of
3918 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3919 can take a short cut in the definition of this macro and use the
3920 name that you know will result.
3921
3922 The number of the first basic block of the function is
3923 passed to the macro in BLOCK_OR_LABEL.
3924
3925 If described in a virtual assembler language the code to be
3926 output looks like:
3927
3928 parameter1 <- LPBX0
3929 parameter2 <- BLOCK_OR_LABEL
3930 call __bb_init_trace_func
3931
3932 else if profile_block_flag != 0
3933
3934 Output code to call the subroutine `__bb_init_func'
3935 and pass one single parameter to it, which is the same
3936 as the first parameter to `__bb_init_trace_func'.
3937
3938 The first word of this parameter is a flag which will be nonzero if
3939 the object module has already been initialized. So test this word
3940 first, and do not call `__bb_init_func' if the flag is nonzero.
3941 Note: When profile_block_flag == 2 the test need not be done
3942 but `__bb_init_trace_func' *must* be called.
3943
3944 BLOCK_OR_LABEL may be used to generate a label number as a
3945 branch destination in case `__bb_init_func' will not be called.
3946
3947 If described in a virtual assembler language the code to be
3948 output looks like:
3949
3950 cmp (LPBX0),0
3951 jne local_label
3952 parameter1 <- LPBX0
3953 call __bb_init_func
3954 local_label:
3955 */
3956
3957 void
3958 ix86_output_function_block_profiler (file, block_or_label)
3959 FILE *file;
3960 int block_or_label;
3961 {
3962 static int num_func = 0;
3963 rtx xops[8];
3964 char block_table[80], false_label[80];
3965
3966 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
3967
3968 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
3969 xops[5] = stack_pointer_rtx;
3970 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
3971
3972 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
3973
3974 switch (profile_block_flag)
3975 {
3976 case 2:
3977 xops[2] = GEN_INT (block_or_label);
3978 xops[3] = gen_rtx_MEM (Pmode,
3979 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
3980 xops[6] = GEN_INT (8);
3981
3982 output_asm_insn ("push{l}\t%2", xops);
3983 if (!flag_pic)
3984 output_asm_insn ("push{l}\t%1", xops);
3985 else
3986 {
3987 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
3988 output_asm_insn ("push{l}\t%7", xops);
3989 }
3990 output_asm_insn ("call\t%P3", xops);
3991 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
3992 break;
3993
3994 default:
3995 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
3996
3997 xops[0] = const0_rtx;
3998 xops[2] = gen_rtx_MEM (Pmode,
3999 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4000 xops[3] = gen_rtx_MEM (Pmode,
4001 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4002 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4003 xops[6] = GEN_INT (4);
4004
4005 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4006
4007 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4008 output_asm_insn ("jne\t%2", xops);
4009
4010 if (!flag_pic)
4011 output_asm_insn ("push{l}\t%1", xops);
4012 else
4013 {
4014 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4015 output_asm_insn ("push{l}\t%7", xops);
4016 }
4017 output_asm_insn ("call\t%P3", xops);
4018 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4019 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4020 num_func++;
4021 break;
4022 }
4023 }
4024
4025 /* Output assembler code to FILE to increment a counter associated
4026 with basic block number BLOCKNO.
4027
4028 If profile_block_flag == 2
4029
4030 Output code to initialize the global structure `__bb' and
4031 call the function `__bb_trace_func' which will increment the
4032 counter.
4033
4034 `__bb' consists of two words. In the first word the number
4035 of the basic block has to be stored. In the second word
4036 the address of a block allocated in the object module
4037 has to be stored.
4038
4039 The basic block number is given by BLOCKNO.
4040
4041 The address of the block is given by the label created with
4042
4043 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4044
4045 by FUNCTION_BLOCK_PROFILER.
4046
4047 Of course, since you are writing the definition of
4048 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4049 can take a short cut in the definition of this macro and use the
4050 name that you know will result.
4051
4052 If described in a virtual assembler language the code to be
4053 output looks like:
4054
4055 move BLOCKNO -> (__bb)
4056 move LPBX0 -> (__bb+4)
4057 call __bb_trace_func
4058
4059 Note that function `__bb_trace_func' must not change the
4060 machine state, especially the flag register. To grant
4061 this, you must output code to save and restore registers
4062 either in this macro or in the macros MACHINE_STATE_SAVE
4063 and MACHINE_STATE_RESTORE. The last two macros will be
4064 used in the function `__bb_trace_func', so you must make
4065 sure that the function prologue does not change any
4066 register prior to saving it with MACHINE_STATE_SAVE.
4067
4068 else if profile_block_flag != 0
4069
4070 Output code to increment the counter directly.
4071 Basic blocks are numbered separately from zero within each
4072 compiled object module. The count associated with block number
4073 BLOCKNO is at index BLOCKNO in an array of words; the name of
4074 this array is a local symbol made with this statement:
4075
4076 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4077
4078 Of course, since you are writing the definition of
4079 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4080 can take a short cut in the definition of this macro and use the
4081 name that you know will result.
4082
4083 If described in a virtual assembler language the code to be
4084 output looks like:
4085
4086 inc (LPBX2+4*BLOCKNO)
4087 */
4088
4089 void
4090 ix86_output_block_profiler (file, blockno)
4091 FILE *file ATTRIBUTE_UNUSED;
4092 int blockno;
4093 {
4094 rtx xops[8], cnt_rtx;
4095 char counts[80];
4096 char *block_table = counts;
4097
4098 switch (profile_block_flag)
4099 {
4100 case 2:
4101 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4102
4103 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4104 xops[2] = GEN_INT (blockno);
4105 xops[3] = gen_rtx_MEM (Pmode,
4106 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4107 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4108 xops[5] = plus_constant (xops[4], 4);
4109 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4110 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4111
4112 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4113
4114 output_asm_insn ("pushf", xops);
4115 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4116 if (flag_pic)
4117 {
4118 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4119 output_asm_insn ("push{l}\t%7", xops);
4120 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4121 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4122 output_asm_insn ("pop{l}\t%7", xops);
4123 }
4124 else
4125 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4126 output_asm_insn ("call\t%P3", xops);
4127 output_asm_insn ("popf", xops);
4128
4129 break;
4130
4131 default:
4132 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4133 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4134 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4135
4136 if (blockno)
4137 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4138
4139 if (flag_pic)
4140 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4141
4142 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4143 output_asm_insn ("inc{l}\t%0", xops);
4144
4145 break;
4146 }
4147 }
4148 \f
4149 void
4150 ix86_expand_move (mode, operands)
4151 enum machine_mode mode;
4152 rtx operands[];
4153 {
4154 int strict = (reload_in_progress || reload_completed);
4155 rtx insn;
4156
4157 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4158 {
4159 /* Emit insns to move operands[1] into operands[0]. */
4160
4161 if (GET_CODE (operands[0]) == MEM)
4162 operands[1] = force_reg (Pmode, operands[1]);
4163 else
4164 {
4165 rtx temp = operands[0];
4166 if (GET_CODE (temp) != REG)
4167 temp = gen_reg_rtx (Pmode);
4168 temp = legitimize_pic_address (operands[1], temp);
4169 if (temp == operands[0])
4170 return;
4171 operands[1] = temp;
4172 }
4173 }
4174 else
4175 {
4176 if (GET_CODE (operands[0]) == MEM
4177 && (GET_MODE (operands[0]) == QImode
4178 || !push_operand (operands[0], mode))
4179 && GET_CODE (operands[1]) == MEM)
4180 operands[1] = force_reg (mode, operands[1]);
4181
4182 if (push_operand (operands[0], mode)
4183 && ! general_no_elim_operand (operands[1], mode))
4184 operands[1] = copy_to_mode_reg (mode, operands[1]);
4185
4186 if (FLOAT_MODE_P (mode))
4187 {
4188 /* If we are loading a floating point constant to a register,
4189 force the value to memory now, since we'll get better code
4190 out the back end. */
4191
4192 if (strict)
4193 ;
4194 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4195 && register_operand (operands[0], mode))
4196 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4197 }
4198 }
4199
4200 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4201
4202 emit_insn (insn);
4203 }
4204
4205 /* Attempt to expand a binary operator. Make the expansion closer to the
4206 actual machine, then just general_operand, which will allow 3 separate
4207 memory references (one output, two input) in a single insn. */
4208
4209 void
4210 ix86_expand_binary_operator (code, mode, operands)
4211 enum rtx_code code;
4212 enum machine_mode mode;
4213 rtx operands[];
4214 {
4215 int matching_memory;
4216 rtx src1, src2, dst, op, clob;
4217
4218 dst = operands[0];
4219 src1 = operands[1];
4220 src2 = operands[2];
4221
4222 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4223 if (GET_RTX_CLASS (code) == 'c'
4224 && (rtx_equal_p (dst, src2)
4225 || immediate_operand (src1, mode)))
4226 {
4227 rtx temp = src1;
4228 src1 = src2;
4229 src2 = temp;
4230 }
4231
4232 /* If the destination is memory, and we do not have matching source
4233 operands, do things in registers. */
4234 matching_memory = 0;
4235 if (GET_CODE (dst) == MEM)
4236 {
4237 if (rtx_equal_p (dst, src1))
4238 matching_memory = 1;
4239 else if (GET_RTX_CLASS (code) == 'c'
4240 && rtx_equal_p (dst, src2))
4241 matching_memory = 2;
4242 else
4243 dst = gen_reg_rtx (mode);
4244 }
4245
4246 /* Both source operands cannot be in memory. */
4247 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4248 {
4249 if (matching_memory != 2)
4250 src2 = force_reg (mode, src2);
4251 else
4252 src1 = force_reg (mode, src1);
4253 }
4254
4255 /* If the operation is not commutable, source 1 cannot be a constant
4256 or non-matching memory. */
4257 if ((CONSTANT_P (src1)
4258 || (!matching_memory && GET_CODE (src1) == MEM))
4259 && GET_RTX_CLASS (code) != 'c')
4260 src1 = force_reg (mode, src1);
4261
4262 /* If optimizing, copy to regs to improve CSE */
4263 if (optimize && ! no_new_pseudos)
4264 {
4265 if (GET_CODE (dst) == MEM)
4266 dst = gen_reg_rtx (mode);
4267 if (GET_CODE (src1) == MEM)
4268 src1 = force_reg (mode, src1);
4269 if (GET_CODE (src2) == MEM)
4270 src2 = force_reg (mode, src2);
4271 }
4272
4273 /* Emit the instruction. */
4274
4275 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4276 if (reload_in_progress)
4277 {
4278 /* Reload doesn't know about the flags register, and doesn't know that
4279 it doesn't want to clobber it. We can only do this with PLUS. */
4280 if (code != PLUS)
4281 abort ();
4282 emit_insn (op);
4283 }
4284 else
4285 {
4286 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4287 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4288 }
4289
4290 /* Fix up the destination if needed. */
4291 if (dst != operands[0])
4292 emit_move_insn (operands[0], dst);
4293 }
4294
4295 /* Return TRUE or FALSE depending on whether the binary operator meets the
4296 appropriate constraints. */
4297
4298 int
4299 ix86_binary_operator_ok (code, mode, operands)
4300 enum rtx_code code;
4301 enum machine_mode mode ATTRIBUTE_UNUSED;
4302 rtx operands[3];
4303 {
4304 /* Both source operands cannot be in memory. */
4305 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4306 return 0;
4307 /* If the operation is not commutable, source 1 cannot be a constant. */
4308 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4309 return 0;
4310 /* If the destination is memory, we must have a matching source operand. */
4311 if (GET_CODE (operands[0]) == MEM
4312 && ! (rtx_equal_p (operands[0], operands[1])
4313 || (GET_RTX_CLASS (code) == 'c'
4314 && rtx_equal_p (operands[0], operands[2]))))
4315 return 0;
4316 /* If the operation is not commutable and the source 1 is memory, we must
4317 have a matching destionation. */
4318 if (GET_CODE (operands[1]) == MEM
4319 && GET_RTX_CLASS (code) != 'c'
4320 && ! rtx_equal_p (operands[0], operands[1]))
4321 return 0;
4322 return 1;
4323 }
4324
4325 /* Attempt to expand a unary operator. Make the expansion closer to the
4326 actual machine, then just general_operand, which will allow 2 separate
4327 memory references (one output, one input) in a single insn. */
4328
4329 void
4330 ix86_expand_unary_operator (code, mode, operands)
4331 enum rtx_code code;
4332 enum machine_mode mode;
4333 rtx operands[];
4334 {
4335 int matching_memory;
4336 rtx src, dst, op, clob;
4337
4338 dst = operands[0];
4339 src = operands[1];
4340
4341 /* If the destination is memory, and we do not have matching source
4342 operands, do things in registers. */
4343 matching_memory = 0;
4344 if (GET_CODE (dst) == MEM)
4345 {
4346 if (rtx_equal_p (dst, src))
4347 matching_memory = 1;
4348 else
4349 dst = gen_reg_rtx (mode);
4350 }
4351
4352 /* When source operand is memory, destination must match. */
4353 if (!matching_memory && GET_CODE (src) == MEM)
4354 src = force_reg (mode, src);
4355
4356 /* If optimizing, copy to regs to improve CSE */
4357 if (optimize && ! no_new_pseudos)
4358 {
4359 if (GET_CODE (dst) == MEM)
4360 dst = gen_reg_rtx (mode);
4361 if (GET_CODE (src) == MEM)
4362 src = force_reg (mode, src);
4363 }
4364
4365 /* Emit the instruction. */
4366
4367 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4368 if (reload_in_progress || code == NOT)
4369 {
4370 /* Reload doesn't know about the flags register, and doesn't know that
4371 it doesn't want to clobber it. */
4372 if (code != NOT)
4373 abort ();
4374 emit_insn (op);
4375 }
4376 else
4377 {
4378 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4379 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4380 }
4381
4382 /* Fix up the destination if needed. */
4383 if (dst != operands[0])
4384 emit_move_insn (operands[0], dst);
4385 }
4386
4387 /* Return TRUE or FALSE depending on whether the unary operator meets the
4388 appropriate constraints. */
4389
4390 int
4391 ix86_unary_operator_ok (code, mode, operands)
4392 enum rtx_code code ATTRIBUTE_UNUSED;
4393 enum machine_mode mode ATTRIBUTE_UNUSED;
4394 rtx operands[2] ATTRIBUTE_UNUSED;
4395 {
4396 /* If one of operands is memory, source and destination must match. */
4397 if ((GET_CODE (operands[0]) == MEM
4398 || GET_CODE (operands[1]) == MEM)
4399 && ! rtx_equal_p (operands[0], operands[1]))
4400 return FALSE;
4401 return TRUE;
4402 }
4403
4404 /* Return TRUE or FALSE depending on whether the first SET in INSN
4405 has source and destination with matching CC modes, and that the
4406 CC mode is at least as constrained as REQ_MODE. */
4407
4408 int
4409 ix86_match_ccmode (insn, req_mode)
4410 rtx insn;
4411 enum machine_mode req_mode;
4412 {
4413 rtx set;
4414 enum machine_mode set_mode;
4415
4416 set = PATTERN (insn);
4417 if (GET_CODE (set) == PARALLEL)
4418 set = XVECEXP (set, 0, 0);
4419 if (GET_CODE (set) != SET)
4420 abort ();
4421
4422 set_mode = GET_MODE (SET_DEST (set));
4423 switch (set_mode)
4424 {
4425 case CCmode:
4426 if (req_mode == CCNOmode)
4427 return 0;
4428 /* FALLTHRU */
4429 case CCNOmode:
4430 if (req_mode == CCZmode)
4431 return 0;
4432 /* FALLTHRU */
4433 case CCZmode:
4434 break;
4435
4436 default:
4437 abort ();
4438 }
4439
4440 return (GET_MODE (SET_SRC (set)) == set_mode);
4441 }
4442
4443 /* Produce an unsigned comparison for a given signed comparison. */
4444
4445 static enum rtx_code
4446 unsigned_comparison (code)
4447 enum rtx_code code;
4448 {
4449 switch (code)
4450 {
4451 case GT:
4452 code = GTU;
4453 break;
4454 case LT:
4455 code = LTU;
4456 break;
4457 case GE:
4458 code = GEU;
4459 break;
4460 case LE:
4461 code = LEU;
4462 break;
4463 case EQ:
4464 case NE:
4465 case LEU:
4466 case LTU:
4467 case GEU:
4468 case GTU:
4469 case UNORDERED:
4470 case ORDERED:
4471 break;
4472 default:
4473 abort ();
4474 }
4475 return code;
4476 }
4477
4478 /* Generate insn patterns to do an integer compare of OPERANDS. */
4479
4480 static rtx
4481 ix86_expand_int_compare (code, op0, op1)
4482 enum rtx_code code;
4483 rtx op0, op1;
4484 {
4485 enum machine_mode cmpmode;
4486 rtx tmp, flags;
4487
4488 cmpmode = SELECT_CC_MODE (code, op0, op1);
4489 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4490
4491 /* This is very simple, but making the interface the same as in the
4492 FP case makes the rest of the code easier. */
4493 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4494 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4495
4496 /* Return the test that should be put into the flags user, i.e.
4497 the bcc, scc, or cmov instruction. */
4498 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4499 }
4500
4501 /* Figure out whether to use ordered or unordered fp comparisons.
4502 Return the appropriate mode to use. */
4503
4504 static enum machine_mode
4505 ix86_fp_compare_mode (code)
4506 enum rtx_code code;
4507 {
4508 int unordered;
4509
4510 switch (code)
4511 {
4512 case NE: case EQ:
4513 /* When not doing IEEE compliant compares, fault on NaNs. */
4514 unordered = (TARGET_IEEE_FP != 0);
4515 break;
4516
4517 case LT: case LE: case GT: case GE:
4518 unordered = 0;
4519 break;
4520
4521 case UNORDERED: case ORDERED:
4522 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4523 unordered = 1;
4524 break;
4525
4526 default:
4527 abort ();
4528 }
4529
4530 /* ??? If we knew whether invalid-operand exceptions were masked,
4531 we could rely on fcom to raise an exception and take care of
4532 NaNs. But we don't. We could know this from c99 math pragmas. */
4533 if (TARGET_IEEE_FP)
4534 unordered = 1;
4535
4536 return unordered ? CCFPUmode : CCFPmode;
4537 }
4538
4539 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4540
4541 int
4542 ix86_use_fcomi_compare (code)
4543 enum rtx_code code;
4544 {
4545 return (TARGET_CMOVE
4546 && (code == ORDERED || code == UNORDERED
4547 /* All other unordered compares require checking
4548 multiple sets of bits. */
4549 || ix86_fp_compare_mode (code) == CCFPmode));
4550 }
4551
4552 /* Swap, force into registers, or otherwise massage the two operands
4553 to a fp comparison. The operands are updated in place; the new
4554 comparsion code is returned. */
4555
4556 static enum rtx_code
4557 ix86_prepare_fp_compare_args (code, pop0, pop1)
4558 enum rtx_code code;
4559 rtx *pop0, *pop1;
4560 {
4561 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4562 rtx op0 = *pop0, op1 = *pop1;
4563 enum machine_mode op_mode = GET_MODE (op0);
4564
4565 /* All of the unordered compare instructions only work on registers.
4566 The same is true of the XFmode compare instructions. The same is
4567 true of the fcomi compare instructions. */
4568
4569 if (fpcmp_mode == CCFPUmode
4570 || op_mode == XFmode
4571 || ix86_use_fcomi_compare (code))
4572 {
4573 op0 = force_reg (op_mode, op0);
4574 op1 = force_reg (op_mode, op1);
4575 }
4576 else
4577 {
4578 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4579 things around if they appear profitable, otherwise force op0
4580 into a register. */
4581
4582 if (standard_80387_constant_p (op0) == 0
4583 || (GET_CODE (op0) == MEM
4584 && ! (standard_80387_constant_p (op1) == 0
4585 || GET_CODE (op1) == MEM)))
4586 {
4587 rtx tmp;
4588 tmp = op0, op0 = op1, op1 = tmp;
4589 code = swap_condition (code);
4590 }
4591
4592 if (GET_CODE (op0) != REG)
4593 op0 = force_reg (op_mode, op0);
4594
4595 if (CONSTANT_P (op1))
4596 {
4597 if (standard_80387_constant_p (op1))
4598 op1 = force_reg (op_mode, op1);
4599 else
4600 op1 = validize_mem (force_const_mem (op_mode, op1));
4601 }
4602 }
4603
4604 *pop0 = op0;
4605 *pop1 = op1;
4606 return code;
4607 }
4608
4609 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4610
4611 rtx
4612 ix86_expand_fp_compare (code, op0, op1, scratch)
4613 enum rtx_code code;
4614 rtx op0, op1, scratch;
4615 {
4616 enum machine_mode fpcmp_mode, intcmp_mode;
4617 rtx tmp;
4618
4619 fpcmp_mode = ix86_fp_compare_mode (code);
4620 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4621
4622 /* %%% fcomi is probably always faster, even when dealing with memory,
4623 since compare-and-branch would be three insns instead of four. */
4624 if (ix86_use_fcomi_compare (code))
4625 {
4626 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4627 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4628 emit_insn (tmp);
4629
4630 /* The FP codes work out to act like unsigned. */
4631 code = unsigned_comparison (code);
4632 intcmp_mode = CCmode;
4633 }
4634 else
4635 {
4636 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4637
4638 rtx tmp2;
4639 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4640 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4641 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4642
4643 if (fpcmp_mode == CCFPmode
4644 || code == ORDERED
4645 || code == UNORDERED)
4646 {
4647 /* We have two options here -- use sahf, or testing bits of ah
4648 directly. On PPRO, they are equivalent, sahf being one byte
4649 smaller. On Pentium, sahf is non-pairable while test is UV
4650 pairable. */
4651
4652 if (TARGET_USE_SAHF || optimize_size)
4653 {
4654 do_sahf:
4655 emit_insn (gen_x86_sahf_1 (scratch));
4656
4657 /* The FP codes work out to act like unsigned. */
4658 code = unsigned_comparison (code);
4659 intcmp_mode = CCmode;
4660 }
4661 else
4662 {
4663 /*
4664 * The numbers below correspond to the bits of the FPSW in AH.
4665 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4666 *
4667 * cmp C3 C2 C0
4668 * > 0 0 0
4669 * < 0 0 1
4670 * = 1 0 0
4671 * un 1 1 1
4672 */
4673
4674 int mask;
4675
4676 switch (code)
4677 {
4678 case GT:
4679 mask = 0x41;
4680 code = EQ;
4681 break;
4682 case LT:
4683 mask = 0x01;
4684 code = NE;
4685 break;
4686 case GE:
4687 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4688 faster in all cases to just fall back on sahf. */
4689 goto do_sahf;
4690 case LE:
4691 mask = 0x41;
4692 code = NE;
4693 break;
4694 case EQ:
4695 mask = 0x40;
4696 code = NE;
4697 break;
4698 case NE:
4699 mask = 0x40;
4700 code = EQ;
4701 break;
4702 case UNORDERED:
4703 mask = 0x04;
4704 code = NE;
4705 break;
4706 case ORDERED:
4707 mask = 0x04;
4708 code = EQ;
4709 break;
4710
4711 default:
4712 abort ();
4713 }
4714
4715 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4716 intcmp_mode = CCNOmode;
4717 }
4718 }
4719 else
4720 {
4721 /* In the unordered case, we have to check C2 for NaN's, which
4722 doesn't happen to work out to anything nice combination-wise.
4723 So do some bit twiddling on the value we've got in AH to come
4724 up with an appropriate set of condition codes. */
4725
4726 intcmp_mode = CCNOmode;
4727 switch (code)
4728 {
4729 case GT:
4730 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4731 code = EQ;
4732 break;
4733 case LT:
4734 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4735 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4736 intcmp_mode = CCmode;
4737 code = EQ;
4738 break;
4739 case GE:
4740 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4741 code = EQ;
4742 break;
4743 case LE:
4744 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4745 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4746 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4747 intcmp_mode = CCmode;
4748 code = LTU;
4749 break;
4750 case EQ:
4751 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4752 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4753 intcmp_mode = CCmode;
4754 code = EQ;
4755 break;
4756 case NE:
4757 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4758 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4759 code = NE;
4760 break;
4761
4762 case UNORDERED:
4763 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4764 code = NE;
4765 break;
4766 case ORDERED:
4767 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4768 code = EQ;
4769 break;
4770 case UNEQ:
4771 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4772 code = NE;
4773 break;
4774 case UNGE:
4775 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4776 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4777 code = NE;
4778 break;
4779 case UNGT:
4780 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4781 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4782 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4783 code = GEU;
4784 break;
4785 case UNLE:
4786 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4787 code = NE;
4788 break;
4789 case UNLT:
4790 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4791 code = NE;
4792 break;
4793 case LTGT:
4794 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4795 code = EQ;
4796 break;
4797
4798 default:
4799 abort ();
4800 }
4801 }
4802 }
4803
4804 /* Return the test that should be put into the flags user, i.e.
4805 the bcc, scc, or cmov instruction. */
4806 return gen_rtx_fmt_ee (code, VOIDmode,
4807 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4808 const0_rtx);
4809 }
4810
4811 static rtx
4812 ix86_expand_compare (code)
4813 enum rtx_code code;
4814 {
4815 rtx op0, op1, ret;
4816 op0 = ix86_compare_op0;
4817 op1 = ix86_compare_op1;
4818
4819 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4820 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
4821 else
4822 ret = ix86_expand_int_compare (code, op0, op1);
4823
4824 return ret;
4825 }
4826
4827 void
4828 ix86_expand_branch (code, label)
4829 enum rtx_code code;
4830 rtx label;
4831 {
4832 rtx tmp;
4833
4834 switch (GET_MODE (ix86_compare_op0))
4835 {
4836 case QImode:
4837 case HImode:
4838 case SImode:
4839 tmp = ix86_expand_compare (code);
4840 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4841 gen_rtx_LABEL_REF (VOIDmode, label),
4842 pc_rtx);
4843 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4844 return;
4845
4846 case SFmode:
4847 case DFmode:
4848 case XFmode:
4849 /* Don't expand the comparison early, so that we get better code
4850 when jump or whoever decides to reverse the comparison. */
4851 {
4852 rtvec vec;
4853 int use_fcomi;
4854
4855 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
4856 &ix86_compare_op1);
4857
4858 tmp = gen_rtx_fmt_ee (code, ix86_fp_compare_mode (code),
4859 ix86_compare_op0, ix86_compare_op1);
4860 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4861 gen_rtx_LABEL_REF (VOIDmode, label),
4862 pc_rtx);
4863 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
4864
4865 use_fcomi = ix86_use_fcomi_compare (code);
4866 vec = rtvec_alloc (3 + !use_fcomi);
4867 RTVEC_ELT (vec, 0) = tmp;
4868 RTVEC_ELT (vec, 1)
4869 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
4870 RTVEC_ELT (vec, 2)
4871 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
4872 if (! use_fcomi)
4873 RTVEC_ELT (vec, 3)
4874 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
4875
4876 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
4877 return;
4878 }
4879
4880 case DImode:
4881 /* Expand DImode branch into multiple compare+branch. */
4882 {
4883 rtx lo[2], hi[2], label2;
4884 enum rtx_code code1, code2, code3;
4885
4886 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4887 {
4888 tmp = ix86_compare_op0;
4889 ix86_compare_op0 = ix86_compare_op1;
4890 ix86_compare_op1 = tmp;
4891 code = swap_condition (code);
4892 }
4893 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4894 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
4895
4896 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4897 avoid two branches. This costs one extra insn, so disable when
4898 optimizing for size. */
4899
4900 if ((code == EQ || code == NE)
4901 && (!optimize_size
4902 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4903 {
4904 rtx xor0, xor1;
4905
4906 xor1 = hi[0];
4907 if (hi[1] != const0_rtx)
4908 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4909 NULL_RTX, 0, OPTAB_WIDEN);
4910
4911 xor0 = lo[0];
4912 if (lo[1] != const0_rtx)
4913 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4914 NULL_RTX, 0, OPTAB_WIDEN);
4915
4916 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4917 NULL_RTX, 0, OPTAB_WIDEN);
4918
4919 ix86_compare_op0 = tmp;
4920 ix86_compare_op1 = const0_rtx;
4921 ix86_expand_branch (code, label);
4922 return;
4923 }
4924
4925 /* Otherwise, if we are doing less-than, op1 is a constant and the
4926 low word is zero, then we can just examine the high word. */
4927
4928 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4929 && (code == LT || code == LTU))
4930 {
4931 ix86_compare_op0 = hi[0];
4932 ix86_compare_op1 = hi[1];
4933 ix86_expand_branch (code, label);
4934 return;
4935 }
4936
4937 /* Otherwise, we need two or three jumps. */
4938
4939 label2 = gen_label_rtx ();
4940
4941 code1 = code;
4942 code2 = swap_condition (code);
4943 code3 = unsigned_condition (code);
4944
4945 switch (code)
4946 {
4947 case LT: case GT: case LTU: case GTU:
4948 break;
4949
4950 case LE: code1 = LT; code2 = GT; break;
4951 case GE: code1 = GT; code2 = LT; break;
4952 case LEU: code1 = LTU; code2 = GTU; break;
4953 case GEU: code1 = GTU; code2 = LTU; break;
4954
4955 case EQ: code1 = NIL; code2 = NE; break;
4956 case NE: code2 = NIL; break;
4957
4958 default:
4959 abort ();
4960 }
4961
4962 /*
4963 * a < b =>
4964 * if (hi(a) < hi(b)) goto true;
4965 * if (hi(a) > hi(b)) goto false;
4966 * if (lo(a) < lo(b)) goto true;
4967 * false:
4968 */
4969
4970 ix86_compare_op0 = hi[0];
4971 ix86_compare_op1 = hi[1];
4972
4973 if (code1 != NIL)
4974 ix86_expand_branch (code1, label);
4975 if (code2 != NIL)
4976 ix86_expand_branch (code2, label2);
4977
4978 ix86_compare_op0 = lo[0];
4979 ix86_compare_op1 = lo[1];
4980 ix86_expand_branch (code3, label);
4981
4982 if (code2 != NIL)
4983 emit_label (label2);
4984 return;
4985 }
4986
4987 default:
4988 abort ();
4989 }
4990 }
4991
4992 int
4993 ix86_expand_setcc (code, dest)
4994 enum rtx_code code;
4995 rtx dest;
4996 {
4997 rtx ret, tmp;
4998 int type;
4999
5000 if (GET_MODE (ix86_compare_op0) == DImode)
5001 return 0; /* FAIL */
5002
5003 /* Three modes of generation:
5004 0 -- destination does not overlap compare sources:
5005 clear dest first, emit strict_low_part setcc.
5006 1 -- destination does overlap compare sources:
5007 emit subreg setcc, zero extend.
5008 2 -- destination is in QImode:
5009 emit setcc only.
5010 */
5011
5012 type = 0;
5013
5014 if (GET_MODE (dest) == QImode)
5015 type = 2;
5016 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5017 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5018 type = 1;
5019
5020 if (type == 0)
5021 emit_move_insn (dest, const0_rtx);
5022
5023 ret = ix86_expand_compare (code);
5024 PUT_MODE (ret, QImode);
5025
5026 tmp = dest;
5027 if (type == 0)
5028 {
5029 tmp = gen_lowpart (QImode, dest);
5030 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5031 }
5032 else if (type == 1)
5033 {
5034 if (!cse_not_expected)
5035 tmp = gen_reg_rtx (QImode);
5036 else
5037 tmp = gen_lowpart (QImode, dest);
5038 }
5039
5040 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5041
5042 if (type == 1)
5043 {
5044 rtx clob;
5045
5046 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5047 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5048 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5049 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5050 emit_insn (tmp);
5051 }
5052
5053 return 1; /* DONE */
5054 }
5055
5056 int
5057 ix86_expand_int_movcc (operands)
5058 rtx operands[];
5059 {
5060 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5061 rtx compare_seq, compare_op;
5062
5063 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5064 In case comparsion is done with immediate, we can convert it to LTU or
5065 GEU by altering the integer. */
5066
5067 if ((code == LEU || code == GTU)
5068 && GET_CODE (ix86_compare_op1) == CONST_INT
5069 && GET_MODE (operands[0]) != HImode
5070 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5071 && GET_CODE (operands[2]) == CONST_INT
5072 && GET_CODE (operands[3]) == CONST_INT)
5073 {
5074 if (code == LEU)
5075 code = LTU;
5076 else
5077 code = GEU;
5078 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5079 }
5080
5081 start_sequence ();
5082 compare_op = ix86_expand_compare (code);
5083 compare_seq = gen_sequence ();
5084 end_sequence ();
5085
5086 compare_code = GET_CODE (compare_op);
5087
5088 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5089 HImode insns, we'd be swallowed in word prefix ops. */
5090
5091 if (GET_MODE (operands[0]) != HImode
5092 && GET_CODE (operands[2]) == CONST_INT
5093 && GET_CODE (operands[3]) == CONST_INT)
5094 {
5095 rtx out = operands[0];
5096 HOST_WIDE_INT ct = INTVAL (operands[2]);
5097 HOST_WIDE_INT cf = INTVAL (operands[3]);
5098 HOST_WIDE_INT diff;
5099
5100 if (compare_code == LTU || compare_code == GEU)
5101 {
5102
5103 /* Detect overlap between destination and compare sources. */
5104 rtx tmp = out;
5105
5106 /* To simplify rest of code, restrict to the GEU case. */
5107 if (compare_code == LTU)
5108 {
5109 int tmp = ct;
5110 ct = cf;
5111 cf = tmp;
5112 compare_code = reverse_condition (compare_code);
5113 code = reverse_condition (code);
5114 }
5115 diff = ct - cf;
5116
5117 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5118 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5119 tmp = gen_reg_rtx (SImode);
5120
5121 emit_insn (compare_seq);
5122 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5123
5124 if (diff == 1)
5125 {
5126 /*
5127 * cmpl op0,op1
5128 * sbbl dest,dest
5129 * [addl dest, ct]
5130 *
5131 * Size 5 - 8.
5132 */
5133 if (ct)
5134 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5135 }
5136 else if (cf == -1)
5137 {
5138 /*
5139 * cmpl op0,op1
5140 * sbbl dest,dest
5141 * orl $ct, dest
5142 *
5143 * Size 8.
5144 */
5145 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5146 }
5147 else if (diff == -1 && ct)
5148 {
5149 /*
5150 * cmpl op0,op1
5151 * sbbl dest,dest
5152 * xorl $-1, dest
5153 * [addl dest, cf]
5154 *
5155 * Size 8 - 11.
5156 */
5157 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5158 if (cf)
5159 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5160 }
5161 else
5162 {
5163 /*
5164 * cmpl op0,op1
5165 * sbbl dest,dest
5166 * andl cf - ct, dest
5167 * [addl dest, ct]
5168 *
5169 * Size 8 - 11.
5170 */
5171 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5172 if (ct)
5173 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5174 }
5175
5176 if (tmp != out)
5177 emit_move_insn (out, tmp);
5178
5179 return 1; /* DONE */
5180 }
5181
5182 diff = ct - cf;
5183 if (diff < 0)
5184 {
5185 HOST_WIDE_INT tmp;
5186 tmp = ct, ct = cf, cf = tmp;
5187 diff = -diff;
5188 compare_code = reverse_condition (compare_code);
5189 code = reverse_condition (code);
5190 }
5191 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5192 || diff == 3 || diff == 5 || diff == 9)
5193 {
5194 /*
5195 * xorl dest,dest
5196 * cmpl op1,op2
5197 * setcc dest
5198 * lea cf(dest*(ct-cf)),dest
5199 *
5200 * Size 14.
5201 *
5202 * This also catches the degenerate setcc-only case.
5203 */
5204
5205 rtx tmp;
5206 int nops;
5207
5208 out = emit_store_flag (out, code, ix86_compare_op0,
5209 ix86_compare_op1, VOIDmode, 0, 1);
5210
5211 nops = 0;
5212 if (diff == 1)
5213 tmp = out;
5214 else
5215 {
5216 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5217 nops++;
5218 if (diff & 1)
5219 {
5220 tmp = gen_rtx_PLUS (SImode, tmp, out);
5221 nops++;
5222 }
5223 }
5224 if (cf != 0)
5225 {
5226 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5227 nops++;
5228 }
5229 if (tmp != out)
5230 {
5231 if (nops == 0)
5232 emit_move_insn (out, tmp);
5233 else if (nops == 1)
5234 {
5235 rtx clob;
5236
5237 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5238 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5239
5240 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5241 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5242 emit_insn (tmp);
5243 }
5244 else
5245 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5246 }
5247 if (out != operands[0])
5248 emit_move_insn (operands[0], out);
5249
5250 return 1; /* DONE */
5251 }
5252
5253 /*
5254 * General case: Jumpful:
5255 * xorl dest,dest cmpl op1, op2
5256 * cmpl op1, op2 movl ct, dest
5257 * setcc dest jcc 1f
5258 * decl dest movl cf, dest
5259 * andl (cf-ct),dest 1:
5260 * addl ct,dest
5261 *
5262 * Size 20. Size 14.
5263 *
5264 * This is reasonably steep, but branch mispredict costs are
5265 * high on modern cpus, so consider failing only if optimizing
5266 * for space.
5267 *
5268 * %%% Parameterize branch_cost on the tuning architecture, then
5269 * use that. The 80386 couldn't care less about mispredicts.
5270 */
5271
5272 if (!optimize_size && !TARGET_CMOVE)
5273 {
5274 if (ct == 0)
5275 {
5276 ct = cf;
5277 cf = 0;
5278 compare_code = reverse_condition (compare_code);
5279 code = reverse_condition (code);
5280 }
5281
5282 out = emit_store_flag (out, code, ix86_compare_op0,
5283 ix86_compare_op1, VOIDmode, 0, 1);
5284
5285 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5286 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5287 if (ct != 0)
5288 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5289 if (out != operands[0])
5290 emit_move_insn (operands[0], out);
5291
5292 return 1; /* DONE */
5293 }
5294 }
5295
5296 if (!TARGET_CMOVE)
5297 {
5298 /* Try a few things more with specific constants and a variable. */
5299
5300 optab op;
5301 rtx var, orig_out, out, tmp;
5302
5303 if (optimize_size)
5304 return 0; /* FAIL */
5305
5306 /* If one of the two operands is an interesting constant, load a
5307 constant with the above and mask it in with a logical operation. */
5308
5309 if (GET_CODE (operands[2]) == CONST_INT)
5310 {
5311 var = operands[3];
5312 if (INTVAL (operands[2]) == 0)
5313 operands[3] = constm1_rtx, op = and_optab;
5314 else if (INTVAL (operands[2]) == -1)
5315 operands[3] = const0_rtx, op = ior_optab;
5316 else
5317 return 0; /* FAIL */
5318 }
5319 else if (GET_CODE (operands[3]) == CONST_INT)
5320 {
5321 var = operands[2];
5322 if (INTVAL (operands[3]) == 0)
5323 operands[2] = constm1_rtx, op = and_optab;
5324 else if (INTVAL (operands[3]) == -1)
5325 operands[2] = const0_rtx, op = ior_optab;
5326 else
5327 return 0; /* FAIL */
5328 }
5329 else
5330 return 0; /* FAIL */
5331
5332 orig_out = operands[0];
5333 tmp = gen_reg_rtx (GET_MODE (orig_out));
5334 operands[0] = tmp;
5335
5336 /* Recurse to get the constant loaded. */
5337 if (ix86_expand_int_movcc (operands) == 0)
5338 return 0; /* FAIL */
5339
5340 /* Mask in the interesting variable. */
5341 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5342 OPTAB_WIDEN);
5343 if (out != orig_out)
5344 emit_move_insn (orig_out, out);
5345
5346 return 1; /* DONE */
5347 }
5348
5349 /*
5350 * For comparison with above,
5351 *
5352 * movl cf,dest
5353 * movl ct,tmp
5354 * cmpl op1,op2
5355 * cmovcc tmp,dest
5356 *
5357 * Size 15.
5358 */
5359
5360 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5361 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5362 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5363 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5364
5365 emit_insn (compare_seq);
5366 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5367 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5368 compare_op, operands[2],
5369 operands[3])));
5370
5371 return 1; /* DONE */
5372 }
5373
5374 int
5375 ix86_expand_fp_movcc (operands)
5376 rtx operands[];
5377 {
5378 enum rtx_code code;
5379 enum machine_mode mode;
5380 rtx tmp;
5381
5382 /* The floating point conditional move instructions don't directly
5383 support conditions resulting from a signed integer comparison. */
5384
5385 code = GET_CODE (operands[1]);
5386 switch (code)
5387 {
5388 case LT:
5389 case LE:
5390 case GE:
5391 case GT:
5392 tmp = gen_reg_rtx (QImode);
5393 ix86_expand_setcc (code, tmp);
5394 code = NE;
5395 ix86_compare_op0 = tmp;
5396 ix86_compare_op1 = const0_rtx;
5397 break;
5398
5399 default:
5400 break;
5401 }
5402
5403 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5404 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5405 gen_rtx_COMPARE (mode,
5406 ix86_compare_op0,
5407 ix86_compare_op1)));
5408 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5409 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5410 gen_rtx_fmt_ee (code, VOIDmode,
5411 gen_rtx_REG (mode, FLAGS_REG),
5412 const0_rtx),
5413 operands[2],
5414 operands[3])));
5415
5416 return 1;
5417 }
5418
5419 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5420 works for floating pointer parameters and nonoffsetable memories.
5421 For pushes, it returns just stack offsets; the values will be saved
5422 in the right order. Maximally three parts are generated. */
5423
5424 static void
5425 ix86_split_to_parts (operand, parts, mode)
5426 rtx operand;
5427 rtx *parts;
5428 enum machine_mode mode;
5429 {
5430 int size = GET_MODE_SIZE (mode) / 4;
5431
5432 if (size < 2 || size > 3)
5433 abort ();
5434
5435 /* Optimize constant pool reference to immediates. This is used by fp moves,
5436 that force all constants to memory to allow combining. */
5437
5438 if (GET_CODE (operand) == MEM
5439 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5440 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5441 operand = get_pool_constant (XEXP (operand, 0));
5442
5443 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5444 {
5445 /* The only non-offsetable memories we handle are pushes. */
5446 if (! push_operand (operand, VOIDmode))
5447 abort ();
5448
5449 PUT_MODE (operand, SImode);
5450 parts[0] = parts[1] = parts[2] = operand;
5451 }
5452 else
5453 {
5454 if (mode == DImode)
5455 split_di (&operand, 1, &parts[0], &parts[1]);
5456 else
5457 {
5458 if (REG_P (operand))
5459 {
5460 if (!reload_completed)
5461 abort ();
5462 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5463 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5464 if (size == 3)
5465 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5466 }
5467 else if (offsettable_memref_p (operand))
5468 {
5469 PUT_MODE (operand, SImode);
5470 parts[0] = operand;
5471 parts[1] = adj_offsettable_operand (operand, 4);
5472 if (size == 3)
5473 parts[2] = adj_offsettable_operand (operand, 8);
5474 }
5475 else if (GET_CODE (operand) == CONST_DOUBLE)
5476 {
5477 REAL_VALUE_TYPE r;
5478 long l[3];
5479
5480 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5481 switch (mode)
5482 {
5483 case XFmode:
5484 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5485 parts[2] = GEN_INT (l[2]);
5486 break;
5487 case DFmode:
5488 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5489 break;
5490 default:
5491 abort ();
5492 }
5493 parts[1] = GEN_INT (l[1]);
5494 parts[0] = GEN_INT (l[0]);
5495 }
5496 else
5497 abort ();
5498 }
5499 }
5500
5501 return;
5502 }
5503
5504 /* Emit insns to perform a move or push of DI, DF, and XF values.
5505 Return false when normal moves are needed; true when all required
5506 insns have been emitted. Operands 2-4 contain the input values
5507 int the correct order; operands 5-7 contain the output values. */
5508
5509 int
5510 ix86_split_long_move (operands1)
5511 rtx operands1[];
5512 {
5513 rtx part[2][3];
5514 rtx operands[2];
5515 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5516 int push = 0;
5517 int collisions = 0;
5518
5519 /* Make our own copy to avoid clobbering the operands. */
5520 operands[0] = copy_rtx (operands1[0]);
5521 operands[1] = copy_rtx (operands1[1]);
5522
5523 if (size < 2 || size > 3)
5524 abort ();
5525
5526 /* The only non-offsettable memory we handle is push. */
5527 if (push_operand (operands[0], VOIDmode))
5528 push = 1;
5529 else if (GET_CODE (operands[0]) == MEM
5530 && ! offsettable_memref_p (operands[0]))
5531 abort ();
5532
5533 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5534 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5535
5536 /* When emitting push, take care for source operands on the stack. */
5537 if (push && GET_CODE (operands[1]) == MEM
5538 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5539 {
5540 if (size == 3)
5541 part[1][1] = part[1][2];
5542 part[1][0] = part[1][1];
5543 }
5544
5545 /* We need to do copy in the right order in case an address register
5546 of the source overlaps the destination. */
5547 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5548 {
5549 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5550 collisions++;
5551 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5552 collisions++;
5553 if (size == 3
5554 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5555 collisions++;
5556
5557 /* Collision in the middle part can be handled by reordering. */
5558 if (collisions == 1 && size == 3
5559 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5560 {
5561 rtx tmp;
5562 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5563 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5564 }
5565
5566 /* If there are more collisions, we can't handle it by reordering.
5567 Do an lea to the last part and use only one colliding move. */
5568 else if (collisions > 1)
5569 {
5570 collisions = 1;
5571 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5572 XEXP (part[1][0], 0)));
5573 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5574 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5575 if (size == 3)
5576 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5577 }
5578 }
5579
5580 if (push)
5581 {
5582 if (size == 3)
5583 emit_insn (gen_push (part[1][2]));
5584 emit_insn (gen_push (part[1][1]));
5585 emit_insn (gen_push (part[1][0]));
5586 return 1;
5587 }
5588
5589 /* Choose correct order to not overwrite the source before it is copied. */
5590 if ((REG_P (part[0][0])
5591 && REG_P (part[1][1])
5592 && (REGNO (part[0][0]) == REGNO (part[1][1])
5593 || (size == 3
5594 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5595 || (collisions > 0
5596 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5597 {
5598 if (size == 3)
5599 {
5600 operands1[2] = part[0][2];
5601 operands1[3] = part[0][1];
5602 operands1[4] = part[0][0];
5603 operands1[5] = part[1][2];
5604 operands1[6] = part[1][1];
5605 operands1[7] = part[1][0];
5606 }
5607 else
5608 {
5609 operands1[2] = part[0][1];
5610 operands1[3] = part[0][0];
5611 operands1[5] = part[1][1];
5612 operands1[6] = part[1][0];
5613 }
5614 }
5615 else
5616 {
5617 if (size == 3)
5618 {
5619 operands1[2] = part[0][0];
5620 operands1[3] = part[0][1];
5621 operands1[4] = part[0][2];
5622 operands1[5] = part[1][0];
5623 operands1[6] = part[1][1];
5624 operands1[7] = part[1][2];
5625 }
5626 else
5627 {
5628 operands1[2] = part[0][0];
5629 operands1[3] = part[0][1];
5630 operands1[5] = part[1][0];
5631 operands1[6] = part[1][1];
5632 }
5633 }
5634
5635 return 0;
5636 }
5637
5638 void
5639 ix86_split_ashldi (operands, scratch)
5640 rtx *operands, scratch;
5641 {
5642 rtx low[2], high[2];
5643 int count;
5644
5645 if (GET_CODE (operands[2]) == CONST_INT)
5646 {
5647 split_di (operands, 2, low, high);
5648 count = INTVAL (operands[2]) & 63;
5649
5650 if (count >= 32)
5651 {
5652 emit_move_insn (high[0], low[1]);
5653 emit_move_insn (low[0], const0_rtx);
5654
5655 if (count > 32)
5656 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5657 }
5658 else
5659 {
5660 if (!rtx_equal_p (operands[0], operands[1]))
5661 emit_move_insn (operands[0], operands[1]);
5662 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5663 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5664 }
5665 }
5666 else
5667 {
5668 if (!rtx_equal_p (operands[0], operands[1]))
5669 emit_move_insn (operands[0], operands[1]);
5670
5671 split_di (operands, 1, low, high);
5672
5673 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5674 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5675
5676 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5677 {
5678 if (! no_new_pseudos)
5679 scratch = force_reg (SImode, const0_rtx);
5680 else
5681 emit_move_insn (scratch, const0_rtx);
5682
5683 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5684 scratch));
5685 }
5686 else
5687 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5688 }
5689 }
5690
5691 void
5692 ix86_split_ashrdi (operands, scratch)
5693 rtx *operands, scratch;
5694 {
5695 rtx low[2], high[2];
5696 int count;
5697
5698 if (GET_CODE (operands[2]) == CONST_INT)
5699 {
5700 split_di (operands, 2, low, high);
5701 count = INTVAL (operands[2]) & 63;
5702
5703 if (count >= 32)
5704 {
5705 emit_move_insn (low[0], high[1]);
5706
5707 if (! reload_completed)
5708 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5709 else
5710 {
5711 emit_move_insn (high[0], low[0]);
5712 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5713 }
5714
5715 if (count > 32)
5716 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5717 }
5718 else
5719 {
5720 if (!rtx_equal_p (operands[0], operands[1]))
5721 emit_move_insn (operands[0], operands[1]);
5722 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5723 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5724 }
5725 }
5726 else
5727 {
5728 if (!rtx_equal_p (operands[0], operands[1]))
5729 emit_move_insn (operands[0], operands[1]);
5730
5731 split_di (operands, 1, low, high);
5732
5733 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5734 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5735
5736 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5737 {
5738 if (! no_new_pseudos)
5739 scratch = gen_reg_rtx (SImode);
5740 emit_move_insn (scratch, high[0]);
5741 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5742 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5743 scratch));
5744 }
5745 else
5746 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5747 }
5748 }
5749
5750 void
5751 ix86_split_lshrdi (operands, scratch)
5752 rtx *operands, scratch;
5753 {
5754 rtx low[2], high[2];
5755 int count;
5756
5757 if (GET_CODE (operands[2]) == CONST_INT)
5758 {
5759 split_di (operands, 2, low, high);
5760 count = INTVAL (operands[2]) & 63;
5761
5762 if (count >= 32)
5763 {
5764 emit_move_insn (low[0], high[1]);
5765 emit_move_insn (high[0], const0_rtx);
5766
5767 if (count > 32)
5768 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5769 }
5770 else
5771 {
5772 if (!rtx_equal_p (operands[0], operands[1]))
5773 emit_move_insn (operands[0], operands[1]);
5774 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5775 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5776 }
5777 }
5778 else
5779 {
5780 if (!rtx_equal_p (operands[0], operands[1]))
5781 emit_move_insn (operands[0], operands[1]);
5782
5783 split_di (operands, 1, low, high);
5784
5785 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5786 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5787
5788 /* Heh. By reversing the arguments, we can reuse this pattern. */
5789 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5790 {
5791 if (! no_new_pseudos)
5792 scratch = force_reg (SImode, const0_rtx);
5793 else
5794 emit_move_insn (scratch, const0_rtx);
5795
5796 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5797 scratch));
5798 }
5799 else
5800 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5801 }
5802 }
5803
5804 /* Expand the appropriate insns for doing strlen if not just doing
5805 repnz; scasb
5806
5807 out = result, initialized with the start address
5808 align_rtx = alignment of the address.
5809 scratch = scratch register, initialized with the startaddress when
5810 not aligned, otherwise undefined
5811
5812 This is just the body. It needs the initialisations mentioned above and
5813 some address computing at the end. These things are done in i386.md. */
5814
5815 void
5816 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5817 rtx out, align_rtx, scratch;
5818 {
5819 int align;
5820 rtx tmp;
5821 rtx align_2_label = NULL_RTX;
5822 rtx align_3_label = NULL_RTX;
5823 rtx align_4_label = gen_label_rtx ();
5824 rtx end_0_label = gen_label_rtx ();
5825 rtx mem;
5826 rtx no_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5827 rtx z_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5828 rtx tmpreg = gen_reg_rtx (SImode);
5829
5830 align = 0;
5831 if (GET_CODE (align_rtx) == CONST_INT)
5832 align = INTVAL (align_rtx);
5833
5834 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5835
5836 /* Is there a known alignment and is it less than 4? */
5837 if (align < 4)
5838 {
5839 /* Is there a known alignment and is it not 2? */
5840 if (align != 2)
5841 {
5842 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5843 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5844
5845 /* Leave just the 3 lower bits. */
5846 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5847 NULL_RTX, 0, OPTAB_WIDEN);
5848
5849 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
5850
5851 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5852 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5853 gen_rtx_LABEL_REF (VOIDmode,
5854 align_4_label),
5855 pc_rtx);
5856 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5857
5858 emit_insn (gen_cmpsi_ccno_1 (align_rtx, GEN_INT (2)));
5859
5860 tmp = gen_rtx_EQ (VOIDmode, no_flags, const0_rtx);
5861 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5862 gen_rtx_LABEL_REF (VOIDmode,
5863 align_2_label),
5864 pc_rtx);
5865 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5866
5867 tmp = gen_rtx_GTU (VOIDmode, no_flags, const0_rtx);
5868 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5869 gen_rtx_LABEL_REF (VOIDmode,
5870 align_3_label),
5871 pc_rtx);
5872 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5873 }
5874 else
5875 {
5876 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5877 check if is aligned to 4 - byte. */
5878
5879 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5880 NULL_RTX, 0, OPTAB_WIDEN);
5881
5882 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
5883
5884 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5885 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5886 gen_rtx_LABEL_REF (VOIDmode,
5887 align_4_label),
5888 pc_rtx);
5889 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5890 }
5891
5892 mem = gen_rtx_MEM (QImode, out);
5893
5894 /* Now compare the bytes. */
5895
5896 /* Compare the first n unaligned byte on a byte per byte basis. */
5897 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5898
5899 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5900 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5901 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5902 pc_rtx);
5903 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5904
5905 /* Increment the address. */
5906 emit_insn (gen_addsi3 (out, out, const1_rtx));
5907
5908 /* Not needed with an alignment of 2 */
5909 if (align != 2)
5910 {
5911 emit_label (align_2_label);
5912
5913 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5914
5915 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5916 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5917 gen_rtx_LABEL_REF (VOIDmode,
5918 end_0_label),
5919 pc_rtx);
5920 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5921
5922 emit_insn (gen_addsi3 (out, out, const1_rtx));
5923
5924 emit_label (align_3_label);
5925 }
5926
5927 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5928
5929 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5930 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5931 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5932 pc_rtx);
5933 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5934
5935 emit_insn (gen_addsi3 (out, out, const1_rtx));
5936 }
5937
5938 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5939 align this loop. It gives only huge programs, but does not help to
5940 speed up. */
5941 emit_label (align_4_label);
5942
5943 mem = gen_rtx_MEM (SImode, out);
5944 emit_move_insn (scratch, mem);
5945 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
5946
5947 /* This formula yields a nonzero result iff one of the bytes is zero.
5948 This saves three branches inside loop and many cycles. */
5949
5950 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5951 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5952 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5953 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5954 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5955
5956 if (TARGET_CMOVE)
5957 {
5958 rtx reg = gen_reg_rtx (SImode);
5959 emit_move_insn (reg, tmpreg);
5960 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
5961
5962 /* If zero is not in the first two bytes, move two bytes forward. */
5963 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
5964 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5965 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5966 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
5967 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5968 reg,
5969 tmpreg)));
5970 /* Emit lea manually to avoid clobbering of flags. */
5971 emit_insn (gen_rtx_SET (SImode, reg,
5972 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
5973
5974 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5975 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
5976 emit_insn (gen_rtx_SET (VOIDmode, out,
5977 gen_rtx_IF_THEN_ELSE (SImode, tmp,
5978 reg,
5979 out)));
5980
5981 }
5982 else
5983 {
5984 rtx end_2_label = gen_label_rtx ();
5985 /* Is zero in the first two bytes? */
5986
5987 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
5988 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
5989 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
5990 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5991 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
5992 pc_rtx);
5993 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5994 JUMP_LABEL (tmp) = end_2_label;
5995
5996 /* Not in the first two. Move two bytes forward. */
5997 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
5998 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
5999
6000 emit_label (end_2_label);
6001
6002 }
6003
6004 /* Avoid branch in fixing the byte. */
6005 tmpreg = gen_lowpart (QImode, tmpreg);
6006 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6007 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6008
6009 emit_label (end_0_label);
6010 }
6011 \f
6012 /* Clear stack slot assignments remembered from previous functions.
6013 This is called from INIT_EXPANDERS once before RTL is emitted for each
6014 function. */
6015
6016 static void
6017 ix86_init_machine_status (p)
6018 struct function *p;
6019 {
6020 enum machine_mode mode;
6021 int n;
6022 p->machine
6023 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6024
6025 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6026 mode = (enum machine_mode) ((int) mode + 1))
6027 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6028 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6029 }
6030
6031 /* Mark machine specific bits of P for GC. */
6032 static void
6033 ix86_mark_machine_status (p)
6034 struct function *p;
6035 {
6036 enum machine_mode mode;
6037 int n;
6038
6039 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6040 mode = (enum machine_mode) ((int) mode + 1))
6041 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6042 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6043 }
6044
6045 /* Return a MEM corresponding to a stack slot with mode MODE.
6046 Allocate a new slot if necessary.
6047
6048 The RTL for a function can have several slots available: N is
6049 which slot to use. */
6050
6051 rtx
6052 assign_386_stack_local (mode, n)
6053 enum machine_mode mode;
6054 int n;
6055 {
6056 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6057 abort ();
6058
6059 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6060 ix86_stack_locals[(int) mode][n]
6061 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6062
6063 return ix86_stack_locals[(int) mode][n];
6064 }
6065 \f
6066 /* Calculate the length of the memory address in the instruction
6067 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6068
6069 static int
6070 memory_address_length (addr)
6071 rtx addr;
6072 {
6073 struct ix86_address parts;
6074 rtx base, index, disp;
6075 int len;
6076
6077 if (GET_CODE (addr) == PRE_DEC
6078 || GET_CODE (addr) == POST_INC)
6079 return 0;
6080
6081 if (! ix86_decompose_address (addr, &parts))
6082 abort ();
6083
6084 base = parts.base;
6085 index = parts.index;
6086 disp = parts.disp;
6087 len = 0;
6088
6089 /* Register Indirect. */
6090 if (base && !index && !disp)
6091 {
6092 /* Special cases: ebp and esp need the two-byte modrm form. */
6093 if (addr == stack_pointer_rtx
6094 || addr == arg_pointer_rtx
6095 || addr == frame_pointer_rtx
6096 || addr == hard_frame_pointer_rtx)
6097 len = 1;
6098 }
6099
6100 /* Direct Addressing. */
6101 else if (disp && !base && !index)
6102 len = 4;
6103
6104 else
6105 {
6106 /* Find the length of the displacement constant. */
6107 if (disp)
6108 {
6109 if (GET_CODE (disp) == CONST_INT
6110 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6111 len = 1;
6112 else
6113 len = 4;
6114 }
6115
6116 /* An index requires the two-byte modrm form. */
6117 if (index)
6118 len += 1;
6119 }
6120
6121 return len;
6122 }
6123
6124 int
6125 ix86_attr_length_default (insn)
6126 rtx insn;
6127 {
6128 enum attr_type type;
6129 int len = 0, i;
6130
6131 type = get_attr_type (insn);
6132 extract_insn (insn);
6133 switch (type)
6134 {
6135 case TYPE_INCDEC:
6136 case TYPE_SETCC:
6137 case TYPE_ICMOV:
6138 case TYPE_FMOV:
6139 case TYPE_FOP:
6140 case TYPE_FCMP:
6141 case TYPE_FOP1:
6142 case TYPE_FMUL:
6143 case TYPE_FDIV:
6144 case TYPE_FSGN:
6145 case TYPE_FPSPC:
6146 case TYPE_FCMOV:
6147 case TYPE_IBR:
6148 break;
6149 case TYPE_STR:
6150 case TYPE_CLD:
6151 len = 0;
6152
6153 case TYPE_ALU1:
6154 case TYPE_NEGNOT:
6155 case TYPE_ALU:
6156 case TYPE_ICMP:
6157 case TYPE_IMOVX:
6158 case TYPE_ISHIFT:
6159 case TYPE_IMUL:
6160 case TYPE_IDIV:
6161 case TYPE_PUSH:
6162 case TYPE_POP:
6163 for (i = recog_data.n_operands - 1; i >= 0; --i)
6164 if (CONSTANT_P (recog_data.operand[i]))
6165 {
6166 if (GET_CODE (recog_data.operand[i]) == CONST_INT
6167 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6168 len += 1;
6169 else
6170 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
6171 }
6172 break;
6173
6174 case TYPE_IMOV:
6175 if (CONSTANT_P (recog_data.operand[1]))
6176 len += GET_MODE_SIZE (GET_MODE (recog_data.operand[0]));
6177 break;
6178
6179 case TYPE_CALL:
6180 if (constant_call_address_operand (recog_data.operand[0],
6181 GET_MODE (recog_data.operand[0])))
6182 return 5;
6183 break;
6184
6185 case TYPE_CALLV:
6186 if (constant_call_address_operand (recog_data.operand[1],
6187 GET_MODE (recog_data.operand[1])))
6188 return 5;
6189 break;
6190
6191 case TYPE_LEA:
6192 {
6193 /* Irritatingly, single_set doesn't work with REG_UNUSED present,
6194 as we'll get from running life_analysis during reg-stack when
6195 not optimizing. Not that it matters anyway, now that
6196 pro_epilogue_adjust_stack uses lea, and is by design not
6197 single_set. */
6198 rtx set = PATTERN (insn);
6199 if (GET_CODE (set) == SET)
6200 ;
6201 else if (GET_CODE (set) == PARALLEL
6202 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
6203 set = XVECEXP (set, 0, 0);
6204 else
6205 abort ();
6206
6207 len += memory_address_length (SET_SRC (set));
6208 goto just_opcode;
6209 }
6210
6211 case TYPE_OTHER:
6212 case TYPE_MULTI:
6213 return 15;
6214
6215 case TYPE_FXCH:
6216 if (STACK_TOP_P (recog_data.operand[0]))
6217 return 2 + (REGNO (recog_data.operand[1]) != FIRST_STACK_REG + 1);
6218 else
6219 return 2 + (REGNO (recog_data.operand[0]) != FIRST_STACK_REG + 1);
6220
6221 default:
6222 abort ();
6223 }
6224
6225 for (i = recog_data.n_operands - 1; i >= 0; --i)
6226 if (GET_CODE (recog_data.operand[i]) == MEM)
6227 {
6228 len += memory_address_length (XEXP (recog_data.operand[i], 0));
6229 break;
6230 }
6231
6232 just_opcode:
6233 len += get_attr_length_opcode (insn);
6234 len += get_attr_length_prefix (insn);
6235
6236 return len;
6237 }
6238 \f
6239 /* Return the maximum number of instructions a cpu can issue. */
6240
6241 int
6242 ix86_issue_rate ()
6243 {
6244 switch (ix86_cpu)
6245 {
6246 case PROCESSOR_PENTIUM:
6247 case PROCESSOR_K6:
6248 return 2;
6249
6250 case PROCESSOR_PENTIUMPRO:
6251 return 3;
6252
6253 default:
6254 return 1;
6255 }
6256 }
6257
6258 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6259 by DEP_INSN and nothing set by DEP_INSN. */
6260
6261 static int
6262 ix86_flags_dependant (insn, dep_insn, insn_type)
6263 rtx insn, dep_insn;
6264 enum attr_type insn_type;
6265 {
6266 rtx set, set2;
6267
6268 /* Simplify the test for uninteresting insns. */
6269 if (insn_type != TYPE_SETCC
6270 && insn_type != TYPE_ICMOV
6271 && insn_type != TYPE_FCMOV
6272 && insn_type != TYPE_IBR)
6273 return 0;
6274
6275 if ((set = single_set (dep_insn)) != 0)
6276 {
6277 set = SET_DEST (set);
6278 set2 = NULL_RTX;
6279 }
6280 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6281 && XVECLEN (PATTERN (dep_insn), 0) == 2
6282 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6283 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6284 {
6285 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6286 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6287 }
6288 else
6289 return 0;
6290
6291 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6292 return 0;
6293
6294 /* This test is true if the dependant insn reads the flags but
6295 not any other potentially set register. */
6296 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6297 return 0;
6298
6299 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6300 return 0;
6301
6302 return 1;
6303 }
6304
6305 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6306 address with operands set by DEP_INSN. */
6307
6308 static int
6309 ix86_agi_dependant (insn, dep_insn, insn_type)
6310 rtx insn, dep_insn;
6311 enum attr_type insn_type;
6312 {
6313 rtx addr;
6314
6315 if (insn_type == TYPE_LEA)
6316 {
6317 addr = PATTERN (insn);
6318 if (GET_CODE (addr) == SET)
6319 ;
6320 else if (GET_CODE (addr) == PARALLEL
6321 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6322 addr = XVECEXP (addr, 0, 0);
6323 else
6324 abort ();
6325 addr = SET_SRC (addr);
6326 }
6327 else
6328 {
6329 int i;
6330 extract_insn (insn);
6331 for (i = recog_data.n_operands - 1; i >= 0; --i)
6332 if (GET_CODE (recog_data.operand[i]) == MEM)
6333 {
6334 addr = XEXP (recog_data.operand[i], 0);
6335 goto found;
6336 }
6337 return 0;
6338 found:;
6339 }
6340
6341 return modified_in_p (addr, dep_insn);
6342 }
6343
6344 int
6345 ix86_adjust_cost (insn, link, dep_insn, cost)
6346 rtx insn, link, dep_insn;
6347 int cost;
6348 {
6349 enum attr_type insn_type, dep_insn_type;
6350 enum attr_memory memory;
6351 rtx set, set2;
6352 int dep_insn_code_number;
6353
6354 /* Anti and output depenancies have zero cost on all CPUs. */
6355 if (REG_NOTE_KIND (link) != 0)
6356 return 0;
6357
6358 dep_insn_code_number = recog_memoized (dep_insn);
6359
6360 /* If we can't recognize the insns, we can't really do anything. */
6361 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6362 return cost;
6363
6364 insn_type = get_attr_type (insn);
6365 dep_insn_type = get_attr_type (dep_insn);
6366
6367 /* Prologue and epilogue allocators can have a false dependency on ebp.
6368 This results in one cycle extra stall on Pentium prologue scheduling,
6369 so handle this important case manually. */
6370 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6371 && dep_insn_type == TYPE_ALU
6372 && !reg_mentioned_p (stack_pointer_rtx, insn))
6373 return 0;
6374
6375 switch (ix86_cpu)
6376 {
6377 case PROCESSOR_PENTIUM:
6378 /* Address Generation Interlock adds a cycle of latency. */
6379 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6380 cost += 1;
6381
6382 /* ??? Compares pair with jump/setcc. */
6383 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6384 cost = 0;
6385
6386 /* Floating point stores require value to be ready one cycle ealier. */
6387 if (insn_type == TYPE_FMOV
6388 && get_attr_memory (insn) == MEMORY_STORE
6389 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6390 cost += 1;
6391 break;
6392
6393 case PROCESSOR_PENTIUMPRO:
6394 /* Since we can't represent delayed latencies of load+operation,
6395 increase the cost here for non-imov insns. */
6396 if (dep_insn_type != TYPE_IMOV
6397 && dep_insn_type != TYPE_FMOV
6398 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6399 || memory == MEMORY_BOTH))
6400 cost += 1;
6401
6402 /* INT->FP conversion is expensive. */
6403 if (get_attr_fp_int_src (dep_insn))
6404 cost += 5;
6405
6406 /* There is one cycle extra latency between an FP op and a store. */
6407 if (insn_type == TYPE_FMOV
6408 && (set = single_set (dep_insn)) != NULL_RTX
6409 && (set2 = single_set (insn)) != NULL_RTX
6410 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6411 && GET_CODE (SET_DEST (set2)) == MEM)
6412 cost += 1;
6413 break;
6414
6415 case PROCESSOR_K6:
6416 /* The esp dependency is resolved before the instruction is really
6417 finished. */
6418 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6419 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6420 return 1;
6421
6422 /* Since we can't represent delayed latencies of load+operation,
6423 increase the cost here for non-imov insns. */
6424 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6425 || memory == MEMORY_BOTH)
6426 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6427
6428 /* INT->FP conversion is expensive. */
6429 if (get_attr_fp_int_src (dep_insn))
6430 cost += 5;
6431 break;
6432
6433 case PROCESSOR_ATHLON:
6434 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6435 || memory == MEMORY_BOTH)
6436 {
6437 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6438 cost += 2;
6439 else
6440 cost += 3;
6441 }
6442
6443 default:
6444 break;
6445 }
6446
6447 return cost;
6448 }
6449
6450 static union
6451 {
6452 struct ppro_sched_data
6453 {
6454 rtx decode[3];
6455 int issued_this_cycle;
6456 } ppro;
6457 } ix86_sched_data;
6458
6459 static int
6460 ix86_safe_length (insn)
6461 rtx insn;
6462 {
6463 if (recog_memoized (insn) >= 0)
6464 return get_attr_length(insn);
6465 else
6466 return 128;
6467 }
6468
6469 static int
6470 ix86_safe_length_prefix (insn)
6471 rtx insn;
6472 {
6473 if (recog_memoized (insn) >= 0)
6474 return get_attr_length(insn);
6475 else
6476 return 0;
6477 }
6478
6479 static enum attr_memory
6480 ix86_safe_memory (insn)
6481 rtx insn;
6482 {
6483 if (recog_memoized (insn) >= 0)
6484 return get_attr_memory(insn);
6485 else
6486 return MEMORY_UNKNOWN;
6487 }
6488
6489 static enum attr_pent_pair
6490 ix86_safe_pent_pair (insn)
6491 rtx insn;
6492 {
6493 if (recog_memoized (insn) >= 0)
6494 return get_attr_pent_pair(insn);
6495 else
6496 return PENT_PAIR_NP;
6497 }
6498
6499 static enum attr_ppro_uops
6500 ix86_safe_ppro_uops (insn)
6501 rtx insn;
6502 {
6503 if (recog_memoized (insn) >= 0)
6504 return get_attr_ppro_uops (insn);
6505 else
6506 return PPRO_UOPS_MANY;
6507 }
6508
6509 static void
6510 ix86_dump_ppro_packet (dump)
6511 FILE *dump;
6512 {
6513 if (ix86_sched_data.ppro.decode[0])
6514 {
6515 fprintf (dump, "PPRO packet: %d",
6516 INSN_UID (ix86_sched_data.ppro.decode[0]));
6517 if (ix86_sched_data.ppro.decode[1])
6518 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6519 if (ix86_sched_data.ppro.decode[2])
6520 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6521 fputc ('\n', dump);
6522 }
6523 }
6524
6525 /* We're beginning a new block. Initialize data structures as necessary. */
6526
6527 void
6528 ix86_sched_init (dump, sched_verbose)
6529 FILE *dump ATTRIBUTE_UNUSED;
6530 int sched_verbose ATTRIBUTE_UNUSED;
6531 {
6532 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6533 }
6534
6535 /* Shift INSN to SLOT, and shift everything else down. */
6536
6537 static void
6538 ix86_reorder_insn (insnp, slot)
6539 rtx *insnp, *slot;
6540 {
6541 if (insnp != slot)
6542 {
6543 rtx insn = *insnp;
6544 do
6545 insnp[0] = insnp[1];
6546 while (++insnp != slot);
6547 *insnp = insn;
6548 }
6549 }
6550
6551 /* Find an instruction with given pairability and minimal amount of cycles
6552 lost by the fact that the CPU waits for both pipelines to finish before
6553 reading next instructions. Also take care that both instructions together
6554 can not exceed 7 bytes. */
6555
6556 static rtx *
6557 ix86_pent_find_pair (e_ready, ready, type, first)
6558 rtx *e_ready;
6559 rtx *ready;
6560 enum attr_pent_pair type;
6561 rtx first;
6562 {
6563 int mincycles, cycles;
6564 enum attr_pent_pair tmp;
6565 enum attr_memory memory;
6566 rtx *insnp, *bestinsnp = NULL;
6567
6568 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6569 return NULL;
6570
6571 memory = ix86_safe_memory (first);
6572 cycles = result_ready_cost (first);
6573 mincycles = INT_MAX;
6574
6575 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6576 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6577 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6578 {
6579 enum attr_memory second_memory;
6580 int secondcycles, currentcycles;
6581
6582 second_memory = ix86_safe_memory (*insnp);
6583 secondcycles = result_ready_cost (*insnp);
6584 currentcycles = abs (cycles - secondcycles);
6585
6586 if (secondcycles >= 1 && cycles >= 1)
6587 {
6588 /* Two read/modify/write instructions together takes two
6589 cycles longer. */
6590 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6591 currentcycles += 2;
6592
6593 /* Read modify/write instruction followed by read/modify
6594 takes one cycle longer. */
6595 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6596 && tmp != PENT_PAIR_UV
6597 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6598 currentcycles += 1;
6599 }
6600 if (currentcycles < mincycles)
6601 bestinsnp = insnp, mincycles = currentcycles;
6602 }
6603
6604 return bestinsnp;
6605 }
6606
6607 /* Subroutines of ix86_sched_reorder. */
6608
6609 static void
6610 ix86_sched_reorder_pentium (ready, e_ready)
6611 rtx *ready;
6612 rtx *e_ready;
6613 {
6614 enum attr_pent_pair pair1, pair2;
6615 rtx *insnp;
6616
6617 /* This wouldn't be necessary if Haifa knew that static insn ordering
6618 is important to which pipe an insn is issued to. So we have to make
6619 some minor rearrangements. */
6620
6621 pair1 = ix86_safe_pent_pair (*e_ready);
6622
6623 /* If the first insn is non-pairable, let it be. */
6624 if (pair1 == PENT_PAIR_NP)
6625 return;
6626
6627 pair2 = PENT_PAIR_NP;
6628 insnp = 0;
6629
6630 /* If the first insn is UV or PV pairable, search for a PU
6631 insn to go with. */
6632 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6633 {
6634 insnp = ix86_pent_find_pair (e_ready-1, ready,
6635 PENT_PAIR_PU, *e_ready);
6636 if (insnp)
6637 pair2 = PENT_PAIR_PU;
6638 }
6639
6640 /* If the first insn is PU or UV pairable, search for a PV
6641 insn to go with. */
6642 if (pair2 == PENT_PAIR_NP
6643 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6644 {
6645 insnp = ix86_pent_find_pair (e_ready-1, ready,
6646 PENT_PAIR_PV, *e_ready);
6647 if (insnp)
6648 pair2 = PENT_PAIR_PV;
6649 }
6650
6651 /* If the first insn is pairable, search for a UV
6652 insn to go with. */
6653 if (pair2 == PENT_PAIR_NP)
6654 {
6655 insnp = ix86_pent_find_pair (e_ready-1, ready,
6656 PENT_PAIR_UV, *e_ready);
6657 if (insnp)
6658 pair2 = PENT_PAIR_UV;
6659 }
6660
6661 if (pair2 == PENT_PAIR_NP)
6662 return;
6663
6664 /* Found something! Decide if we need to swap the order. */
6665 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6666 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6667 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6668 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6669 ix86_reorder_insn (insnp, e_ready);
6670 else
6671 ix86_reorder_insn (insnp, e_ready - 1);
6672 }
6673
6674 static void
6675 ix86_sched_reorder_ppro (ready, e_ready)
6676 rtx *ready;
6677 rtx *e_ready;
6678 {
6679 rtx decode[3];
6680 enum attr_ppro_uops cur_uops;
6681 int issued_this_cycle;
6682 rtx *insnp;
6683 int i;
6684
6685 /* At this point .ppro.decode contains the state of the three
6686 decoders from last "cycle". That is, those insns that were
6687 actually independent. But here we're scheduling for the
6688 decoder, and we may find things that are decodable in the
6689 same cycle. */
6690
6691 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6692 issued_this_cycle = 0;
6693
6694 insnp = e_ready;
6695 cur_uops = ix86_safe_ppro_uops (*insnp);
6696
6697 /* If the decoders are empty, and we've a complex insn at the
6698 head of the priority queue, let it issue without complaint. */
6699 if (decode[0] == NULL)
6700 {
6701 if (cur_uops == PPRO_UOPS_MANY)
6702 {
6703 decode[0] = *insnp;
6704 goto ppro_done;
6705 }
6706
6707 /* Otherwise, search for a 2-4 uop unsn to issue. */
6708 while (cur_uops != PPRO_UOPS_FEW)
6709 {
6710 if (insnp == ready)
6711 break;
6712 cur_uops = ix86_safe_ppro_uops (*--insnp);
6713 }
6714
6715 /* If so, move it to the head of the line. */
6716 if (cur_uops == PPRO_UOPS_FEW)
6717 ix86_reorder_insn (insnp, e_ready);
6718
6719 /* Issue the head of the queue. */
6720 issued_this_cycle = 1;
6721 decode[0] = *e_ready--;
6722 }
6723
6724 /* Look for simple insns to fill in the other two slots. */
6725 for (i = 1; i < 3; ++i)
6726 if (decode[i] == NULL)
6727 {
6728 if (ready >= e_ready)
6729 goto ppro_done;
6730
6731 insnp = e_ready;
6732 cur_uops = ix86_safe_ppro_uops (*insnp);
6733 while (cur_uops != PPRO_UOPS_ONE)
6734 {
6735 if (insnp == ready)
6736 break;
6737 cur_uops = ix86_safe_ppro_uops (*--insnp);
6738 }
6739
6740 /* Found one. Move it to the head of the queue and issue it. */
6741 if (cur_uops == PPRO_UOPS_ONE)
6742 {
6743 ix86_reorder_insn (insnp, e_ready);
6744 decode[i] = *e_ready--;
6745 issued_this_cycle++;
6746 continue;
6747 }
6748
6749 /* ??? Didn't find one. Ideally, here we would do a lazy split
6750 of 2-uop insns, issue one and queue the other. */
6751 }
6752
6753 ppro_done:
6754 if (issued_this_cycle == 0)
6755 issued_this_cycle = 1;
6756 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6757 }
6758
6759
6760 /* We are about to being issuing insns for this clock cycle.
6761 Override the default sort algorithm to better slot instructions. */
6762 int
6763 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6764 FILE *dump ATTRIBUTE_UNUSED;
6765 int sched_verbose ATTRIBUTE_UNUSED;
6766 rtx *ready;
6767 int n_ready;
6768 int clock_var ATTRIBUTE_UNUSED;
6769 {
6770 rtx *e_ready = ready + n_ready - 1;
6771
6772 if (n_ready < 2)
6773 goto out;
6774
6775 switch (ix86_cpu)
6776 {
6777 default:
6778 break;
6779
6780 case PROCESSOR_PENTIUM:
6781 ix86_sched_reorder_pentium (ready, e_ready);
6782 break;
6783
6784 case PROCESSOR_PENTIUMPRO:
6785 ix86_sched_reorder_ppro (ready, e_ready);
6786 break;
6787 }
6788
6789 out:
6790 return ix86_issue_rate ();
6791 }
6792
6793 /* We are about to issue INSN. Return the number of insns left on the
6794 ready queue that can be issued this cycle. */
6795
6796 int
6797 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6798 FILE *dump;
6799 int sched_verbose;
6800 rtx insn;
6801 int can_issue_more;
6802 {
6803 int i;
6804 switch (ix86_cpu)
6805 {
6806 default:
6807 return can_issue_more - 1;
6808
6809 case PROCESSOR_PENTIUMPRO:
6810 {
6811 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6812
6813 if (uops == PPRO_UOPS_MANY)
6814 {
6815 if (sched_verbose)
6816 ix86_dump_ppro_packet (dump);
6817 ix86_sched_data.ppro.decode[0] = insn;
6818 ix86_sched_data.ppro.decode[1] = NULL;
6819 ix86_sched_data.ppro.decode[2] = NULL;
6820 if (sched_verbose)
6821 ix86_dump_ppro_packet (dump);
6822 ix86_sched_data.ppro.decode[0] = NULL;
6823 }
6824 else if (uops == PPRO_UOPS_FEW)
6825 {
6826 if (sched_verbose)
6827 ix86_dump_ppro_packet (dump);
6828 ix86_sched_data.ppro.decode[0] = insn;
6829 ix86_sched_data.ppro.decode[1] = NULL;
6830 ix86_sched_data.ppro.decode[2] = NULL;
6831 }
6832 else
6833 {
6834 for (i = 0; i < 3; ++i)
6835 if (ix86_sched_data.ppro.decode[i] == NULL)
6836 {
6837 ix86_sched_data.ppro.decode[i] = insn;
6838 break;
6839 }
6840 if (i == 3)
6841 abort ();
6842 if (i == 2)
6843 {
6844 if (sched_verbose)
6845 ix86_dump_ppro_packet (dump);
6846 ix86_sched_data.ppro.decode[0] = NULL;
6847 ix86_sched_data.ppro.decode[1] = NULL;
6848 ix86_sched_data.ppro.decode[2] = NULL;
6849 }
6850 }
6851 }
6852 return --ix86_sched_data.ppro.issued_this_cycle;
6853 }
6854 }