3a2603e083663490fe83244d6534ef81252b601a
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000
3 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include <setjmp.h>
23 #include "config.h"
24 #include "system.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-flags.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "toplev.h"
42 #include "basic-block.h"
43 #include "ggc.h"
44
45 #ifdef EXTRA_CONSTRAINT
46 /* If EXTRA_CONSTRAINT is defined, then the 'S'
47 constraint in REG_CLASS_FROM_LETTER will no longer work, and various
48 asm statements that need 'S' for class SIREG will break. */
49 error EXTRA_CONSTRAINT conflicts with S constraint letter
50 /* The previous line used to be #error, but some compilers barf
51 even if the conditional was untrue. */
52 #endif
53
54 #ifndef CHECK_STACK_LIMIT
55 #define CHECK_STACK_LIMIT -1
56 #endif
57
58 /* Processor costs (relative to an add) */
59 struct processor_costs i386_cost = { /* 386 specific costs */
60 1, /* cost of an add instruction */
61 1, /* cost of a lea instruction */
62 3, /* variable shift costs */
63 2, /* constant shift costs */
64 6, /* cost of starting a multiply */
65 1, /* cost of multiply per each bit set */
66 23, /* cost of a divide/mod */
67 15, /* "large" insn */
68 3, /* MOVE_RATIO */
69 4, /* cost for loading QImode using movzbl */
70 {2, 4, 2}, /* cost of loading integer registers
71 in QImode, HImode and SImode.
72 Relative to reg-reg move (2). */
73 {2, 4, 2}, /* cost of storing integer registers */
74 2, /* cost of reg,reg fld/fst */
75 {8, 8, 8}, /* cost of loading fp registers
76 in SFmode, DFmode and XFmode */
77 {8, 8, 8} /* cost of loading integer registers */
78 };
79
80 struct processor_costs i486_cost = { /* 486 specific costs */
81 1, /* cost of an add instruction */
82 1, /* cost of a lea instruction */
83 3, /* variable shift costs */
84 2, /* constant shift costs */
85 12, /* cost of starting a multiply */
86 1, /* cost of multiply per each bit set */
87 40, /* cost of a divide/mod */
88 15, /* "large" insn */
89 3, /* MOVE_RATIO */
90 4, /* cost for loading QImode using movzbl */
91 {2, 4, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 4, 2}, /* cost of storing integer registers */
95 2, /* cost of reg,reg fld/fst */
96 {8, 8, 8}, /* cost of loading fp registers
97 in SFmode, DFmode and XFmode */
98 {8, 8, 8} /* cost of loading integer registers */
99 };
100
101 struct processor_costs pentium_cost = {
102 1, /* cost of an add instruction */
103 1, /* cost of a lea instruction */
104 4, /* variable shift costs */
105 1, /* constant shift costs */
106 11, /* cost of starting a multiply */
107 0, /* cost of multiply per each bit set */
108 25, /* cost of a divide/mod */
109 8, /* "large" insn */
110 6, /* MOVE_RATIO */
111 6, /* cost for loading QImode using movzbl */
112 {2, 4, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 4, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 6}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {4, 4, 6} /* cost of loading integer registers */
120 };
121
122 struct processor_costs pentiumpro_cost = {
123 1, /* cost of an add instruction */
124 1, /* cost of a lea instruction */
125 1, /* variable shift costs */
126 1, /* constant shift costs */
127 4, /* cost of starting a multiply */
128 0, /* cost of multiply per each bit set */
129 17, /* cost of a divide/mod */
130 8, /* "large" insn */
131 6, /* MOVE_RATIO */
132 2, /* cost for loading QImode using movzbl */
133 {4, 4, 4}, /* cost of loading integer registers
134 in QImode, HImode and SImode.
135 Relative to reg-reg move (2). */
136 {2, 2, 2}, /* cost of storing integer registers */
137 2, /* cost of reg,reg fld/fst */
138 {2, 2, 6}, /* cost of loading fp registers
139 in SFmode, DFmode and XFmode */
140 {4, 4, 6} /* cost of loading integer registers */
141 };
142
143 struct processor_costs k6_cost = {
144 1, /* cost of an add instruction */
145 2, /* cost of a lea instruction */
146 1, /* variable shift costs */
147 1, /* constant shift costs */
148 3, /* cost of starting a multiply */
149 0, /* cost of multiply per each bit set */
150 18, /* cost of a divide/mod */
151 8, /* "large" insn */
152 4, /* MOVE_RATIO */
153 3, /* cost for loading QImode using movzbl */
154 {4, 5, 4}, /* cost of loading integer registers
155 in QImode, HImode and SImode.
156 Relative to reg-reg move (2). */
157 {2, 3, 2}, /* cost of storing integer registers */
158 4, /* cost of reg,reg fld/fst */
159 {6, 6, 6}, /* cost of loading fp registers
160 in SFmode, DFmode and XFmode */
161 {4, 4, 4} /* cost of loading integer registers */
162 };
163
164 struct processor_costs athlon_cost = {
165 1, /* cost of an add instruction */
166 2, /* cost of a lea instruction */
167 1, /* variable shift costs */
168 1, /* constant shift costs */
169 5, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 42, /* cost of a divide/mod */
172 8, /* "large" insn */
173 9, /* MOVE_RATIO */
174 4, /* cost for loading QImode using movzbl */
175 {4, 5, 4}, /* cost of loading integer registers
176 in QImode, HImode and SImode.
177 Relative to reg-reg move (2). */
178 {2, 3, 2}, /* cost of storing integer registers */
179 4, /* cost of reg,reg fld/fst */
180 {6, 6, 20}, /* cost of loading fp registers
181 in SFmode, DFmode and XFmode */
182 {4, 4, 16} /* cost of loading integer registers */
183 };
184
185 struct processor_costs *ix86_cost = &pentium_cost;
186
187 /* Processor feature/optimization bitmasks. */
188 #define m_386 (1<<PROCESSOR_I386)
189 #define m_486 (1<<PROCESSOR_I486)
190 #define m_PENT (1<<PROCESSOR_PENTIUM)
191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
192 #define m_K6 (1<<PROCESSOR_K6)
193 #define m_ATHLON (1<<PROCESSOR_ATHLON)
194
195 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
196 const int x86_push_memory = m_386 | m_K6 | m_ATHLON;
197 const int x86_zero_extend_with_and = m_486 | m_PENT;
198 const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */;
199 const int x86_double_with_add = ~m_386;
200 const int x86_use_bit_test = m_386;
201 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
202 const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
203 const int x86_use_any_reg = m_486;
204 const int x86_cmove = m_PPRO | m_ATHLON;
205 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
206 const int x86_use_sahf = m_PPRO | m_K6 | m_ATHLON;
207 const int x86_partial_reg_stall = m_PPRO;
208 const int x86_use_loop = m_K6;
209 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
210 const int x86_use_mov0 = m_K6;
211 const int x86_use_cltd = ~(m_PENT | m_K6);
212 const int x86_read_modify_write = ~m_PENT;
213 const int x86_read_modify = ~(m_PENT | m_PPRO);
214 const int x86_split_long_moves = m_PPRO;
215 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
216 const int x86_single_stringop = m_386;
217 const int x86_qimode_math = ~(0);
218 const int x86_promote_qi_regs = 0;
219 const int x86_himode_math = ~(m_PPRO);
220 const int x86_promote_hi_regs = m_PPRO;
221 const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
222 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
223 const int x86_add_esp_4 = m_ATHLON | m_K6;
224 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
225 const int x86_integer_DFmode_moves = ~m_ATHLON;
226 const int x86_partial_reg_dependency = m_ATHLON;
227 const int x86_memory_mismatch_stall = m_ATHLON;
228
229 #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
230
231 const char * const hi_reg_name[] = HI_REGISTER_NAMES;
232 const char * const qi_reg_name[] = QI_REGISTER_NAMES;
233 const char * const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
234
235 /* Array of the smallest class containing reg number REGNO, indexed by
236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
237
238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
239 {
240 /* ax, dx, cx, bx */
241 AREG, DREG, CREG, BREG,
242 /* si, di, bp, sp */
243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
244 /* FP registers */
245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
247 /* arg pointer */
248 NON_Q_REGS,
249 /* flags, fpsr, dirflag, frame */
250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
252 SSE_REGS, SSE_REGS,
253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
254 MMX_REGS, MMX_REGS
255 };
256
257 /* The "default" register map. */
258
259 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
260 {
261 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
262 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
263 -1, -1, -1, -1, /* arg, flags, fpsr, dir */
264 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
265 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
266 };
267
268 /* Define the register numbers to be used in Dwarf debugging information.
269 The SVR4 reference port C compiler uses the following register numbers
270 in its Dwarf output code:
271 0 for %eax (gcc regno = 0)
272 1 for %ecx (gcc regno = 2)
273 2 for %edx (gcc regno = 1)
274 3 for %ebx (gcc regno = 3)
275 4 for %esp (gcc regno = 7)
276 5 for %ebp (gcc regno = 6)
277 6 for %esi (gcc regno = 4)
278 7 for %edi (gcc regno = 5)
279 The following three DWARF register numbers are never generated by
280 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
281 believes these numbers have these meanings.
282 8 for %eip (no gcc equivalent)
283 9 for %eflags (gcc regno = 17)
284 10 for %trapno (no gcc equivalent)
285 It is not at all clear how we should number the FP stack registers
286 for the x86 architecture. If the version of SDB on x86/svr4 were
287 a bit less brain dead with respect to floating-point then we would
288 have a precedent to follow with respect to DWARF register numbers
289 for x86 FP registers, but the SDB on x86/svr4 is so completely
290 broken with respect to FP registers that it is hardly worth thinking
291 of it as something to strive for compatibility with.
292 The version of x86/svr4 SDB I have at the moment does (partially)
293 seem to believe that DWARF register number 11 is associated with
294 the x86 register %st(0), but that's about all. Higher DWARF
295 register numbers don't seem to be associated with anything in
296 particular, and even for DWARF regno 11, SDB only seems to under-
297 stand that it should say that a variable lives in %st(0) (when
298 asked via an `=' command) if we said it was in DWARF regno 11,
299 but SDB still prints garbage when asked for the value of the
300 variable in question (via a `/' command).
301 (Also note that the labels SDB prints for various FP stack regs
302 when doing an `x' command are all wrong.)
303 Note that these problems generally don't affect the native SVR4
304 C compiler because it doesn't allow the use of -O with -g and
305 because when it is *not* optimizing, it allocates a memory
306 location for each floating-point variable, and the memory
307 location is what gets described in the DWARF AT_location
308 attribute for the variable in question.
309 Regardless of the severe mental illness of the x86/svr4 SDB, we
310 do something sensible here and we use the following DWARF
311 register numbers. Note that these are all stack-top-relative
312 numbers.
313 11 for %st(0) (gcc regno = 8)
314 12 for %st(1) (gcc regno = 9)
315 13 for %st(2) (gcc regno = 10)
316 14 for %st(3) (gcc regno = 11)
317 15 for %st(4) (gcc regno = 12)
318 16 for %st(5) (gcc regno = 13)
319 17 for %st(6) (gcc regno = 14)
320 18 for %st(7) (gcc regno = 15)
321 */
322 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
323 {
324 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
325 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
326 -1, 9, -1, -1, /* arg, flags, fpsr, dir */
327 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
328 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
329 };
330
331
332
333 /* Test and compare insns in i386.md store the information needed to
334 generate branch and scc insns here. */
335
336 struct rtx_def *ix86_compare_op0 = NULL_RTX;
337 struct rtx_def *ix86_compare_op1 = NULL_RTX;
338
339 #define MAX_386_STACK_LOCALS 2
340
341 /* Define the structure for the machine field in struct function. */
342 struct machine_function
343 {
344 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
345 };
346
347 #define ix86_stack_locals (cfun->machine->stack_locals)
348
349 /* which cpu are we scheduling for */
350 enum processor_type ix86_cpu;
351
352 /* which instruction set architecture to use. */
353 int ix86_arch;
354
355 /* Strings to hold which cpu and instruction set architecture to use. */
356 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
357 const char *ix86_arch_string; /* for -march=<xxx> */
358
359 /* Register allocation order */
360 const char *ix86_reg_alloc_order;
361 static char regs_allocated[FIRST_PSEUDO_REGISTER];
362
363 /* # of registers to use to pass arguments. */
364 const char *ix86_regparm_string;
365
366 /* ix86_regparm_string as a number */
367 int ix86_regparm;
368
369 /* Alignment to use for loops and jumps: */
370
371 /* Power of two alignment for loops. */
372 const char *ix86_align_loops_string;
373
374 /* Power of two alignment for non-loop jumps. */
375 const char *ix86_align_jumps_string;
376
377 /* Power of two alignment for stack boundary in bytes. */
378 const char *ix86_preferred_stack_boundary_string;
379
380 /* Preferred alignment for stack boundary in bits. */
381 int ix86_preferred_stack_boundary;
382
383 /* Values 1-5: see jump.c */
384 int ix86_branch_cost;
385 const char *ix86_branch_cost_string;
386
387 /* Power of two alignment for functions. */
388 int ix86_align_funcs;
389 const char *ix86_align_funcs_string;
390
391 /* Power of two alignment for loops. */
392 int ix86_align_loops;
393
394 /* Power of two alignment for non-loop jumps. */
395 int ix86_align_jumps;
396 \f
397 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
398 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
399 int, int, FILE *));
400 static enum rtx_code unsigned_comparison PARAMS ((enum rtx_code code));
401 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
402 static enum machine_mode ix86_fp_compare_mode PARAMS ((enum rtx_code));
403 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
404 rtx *, rtx *));
405 static rtx ix86_expand_compare PARAMS ((enum rtx_code));
406 static rtx gen_push PARAMS ((rtx));
407 static int memory_address_length PARAMS ((rtx addr));
408 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
409 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
410 static int ix86_safe_length PARAMS ((rtx));
411 static enum attr_memory ix86_safe_memory PARAMS ((rtx));
412 static enum attr_pent_pair ix86_safe_pent_pair PARAMS ((rtx));
413 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
414 static void ix86_dump_ppro_packet PARAMS ((FILE *));
415 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
416 static rtx * ix86_pent_find_pair PARAMS ((rtx *, rtx *, enum attr_pent_pair,
417 rtx));
418 static void ix86_init_machine_status PARAMS ((struct function *));
419 static void ix86_mark_machine_status PARAMS ((struct function *));
420 static void ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
421 static int ix86_safe_length_prefix PARAMS ((rtx));
422 static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
423 int *, int *, int *));
424 static int ix86_nsaved_regs PARAMS((void));
425 static void ix86_emit_save_regs PARAMS((void));
426 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
427 static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
428 static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
429 static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
430
431 struct ix86_address
432 {
433 rtx base, index, disp;
434 HOST_WIDE_INT scale;
435 };
436
437 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
438 \f
439 /* Sometimes certain combinations of command options do not make
440 sense on a particular target machine. You can define a macro
441 `OVERRIDE_OPTIONS' to take account of this. This macro, if
442 defined, is executed once just after all the command options have
443 been parsed.
444
445 Don't use this macro to turn on various extra optimizations for
446 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
447
448 void
449 override_options ()
450 {
451 /* Comes from final.c -- no real reason to change it. */
452 #define MAX_CODE_ALIGN 16
453
454 static struct ptt
455 {
456 struct processor_costs *cost; /* Processor costs */
457 int target_enable; /* Target flags to enable. */
458 int target_disable; /* Target flags to disable. */
459 int align_loop; /* Default alignments. */
460 int align_jump;
461 int align_func;
462 int branch_cost;
463 }
464 const processor_target_table[PROCESSOR_max] =
465 {
466 {&i386_cost, 0, 0, 2, 2, 2, 1},
467 {&i486_cost, 0, 0, 4, 4, 4, 1},
468 {&pentium_cost, 0, 0, -4, -4, -4, 1},
469 {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
470 {&k6_cost, 0, 0, -5, -5, 4, 1},
471 {&athlon_cost, 0, 0, 4, -4, 4, 1}
472 };
473
474 static struct pta
475 {
476 const char *name; /* processor name or nickname. */
477 enum processor_type processor;
478 }
479 const processor_alias_table[] =
480 {
481 {"i386", PROCESSOR_I386},
482 {"i486", PROCESSOR_I486},
483 {"i586", PROCESSOR_PENTIUM},
484 {"pentium", PROCESSOR_PENTIUM},
485 {"i686", PROCESSOR_PENTIUMPRO},
486 {"pentiumpro", PROCESSOR_PENTIUMPRO},
487 {"k6", PROCESSOR_K6},
488 {"athlon", PROCESSOR_ATHLON},
489 };
490
491 int const pta_size = sizeof(processor_alias_table)/sizeof(struct pta);
492
493 #ifdef SUBTARGET_OVERRIDE_OPTIONS
494 SUBTARGET_OVERRIDE_OPTIONS;
495 #endif
496
497 ix86_arch = PROCESSOR_I386;
498 ix86_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
499
500 if (ix86_arch_string != 0)
501 {
502 int i;
503 for (i = 0; i < pta_size; i++)
504 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
505 {
506 ix86_arch = processor_alias_table[i].processor;
507 /* Default cpu tuning to the architecture. */
508 ix86_cpu = ix86_arch;
509 break;
510 }
511 if (i == pta_size)
512 error ("bad value (%s) for -march= switch", ix86_arch_string);
513 }
514
515 if (ix86_cpu_string != 0)
516 {
517 int i;
518 for (i = 0; i < pta_size; i++)
519 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
520 {
521 ix86_cpu = processor_alias_table[i].processor;
522 break;
523 }
524 if (i == pta_size)
525 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
526 }
527
528 ix86_cost = processor_target_table[ix86_cpu].cost;
529 target_flags |= processor_target_table[ix86_cpu].target_enable;
530 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
531
532 /* Arrange to set up i386_stack_locals for all functions. */
533 init_machine_status = ix86_init_machine_status;
534 mark_machine_status = ix86_mark_machine_status;
535
536 /* Validate registers in register allocation order. */
537 if (ix86_reg_alloc_order)
538 {
539 int i, ch;
540 for (i = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
541 {
542 int regno = 0;
543
544 switch (ch)
545 {
546 case 'a': regno = 0; break;
547 case 'd': regno = 1; break;
548 case 'c': regno = 2; break;
549 case 'b': regno = 3; break;
550 case 'S': regno = 4; break;
551 case 'D': regno = 5; break;
552 case 'B': regno = 6; break;
553
554 default: fatal ("Register '%c' is unknown", ch);
555 }
556
557 if (regs_allocated[regno])
558 fatal ("Register '%c' already specified in allocation order", ch);
559
560 regs_allocated[regno] = 1;
561 }
562 }
563
564 /* Validate -mregparm= value. */
565 if (ix86_regparm_string)
566 {
567 ix86_regparm = atoi (ix86_regparm_string);
568 if (ix86_regparm < 0 || ix86_regparm > REGPARM_MAX)
569 fatal ("-mregparm=%d is not between 0 and %d",
570 ix86_regparm, REGPARM_MAX);
571 }
572
573 /* Validate -malign-loops= value, or provide default. */
574 ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
575 if (ix86_align_loops_string)
576 {
577 ix86_align_loops = atoi (ix86_align_loops_string);
578 if (ix86_align_loops < 0 || ix86_align_loops > MAX_CODE_ALIGN)
579 fatal ("-malign-loops=%d is not between 0 and %d",
580 ix86_align_loops, MAX_CODE_ALIGN);
581 }
582
583 /* Validate -malign-jumps= value, or provide default. */
584 ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
585 if (ix86_align_jumps_string)
586 {
587 ix86_align_jumps = atoi (ix86_align_jumps_string);
588 if (ix86_align_jumps < 0 || ix86_align_jumps > MAX_CODE_ALIGN)
589 fatal ("-malign-jumps=%d is not between 0 and %d",
590 ix86_align_jumps, MAX_CODE_ALIGN);
591 }
592
593 /* Validate -malign-functions= value, or provide default. */
594 ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
595 if (ix86_align_funcs_string)
596 {
597 ix86_align_funcs = atoi (ix86_align_funcs_string);
598 if (ix86_align_funcs < 0 || ix86_align_funcs > MAX_CODE_ALIGN)
599 fatal ("-malign-functions=%d is not between 0 and %d",
600 ix86_align_funcs, MAX_CODE_ALIGN);
601 }
602
603 /* Validate -mpreferred-stack-boundary= value, or provide default.
604 The default of 128 bits is for Pentium III's SSE __m128. */
605 ix86_preferred_stack_boundary = 128;
606 if (ix86_preferred_stack_boundary_string)
607 {
608 int i = atoi (ix86_preferred_stack_boundary_string);
609 if (i < 2 || i > 31)
610 fatal ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
611 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
612 }
613
614 /* Validate -mbranch-cost= value, or provide default. */
615 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
616 if (ix86_branch_cost_string)
617 {
618 ix86_branch_cost = atoi (ix86_branch_cost_string);
619 if (ix86_branch_cost < 0 || ix86_branch_cost > 5)
620 fatal ("-mbranch-cost=%d is not between 0 and 5",
621 ix86_branch_cost);
622 }
623
624 /* Keep nonleaf frame pointers. */
625 if (TARGET_OMIT_LEAF_FRAME_POINTER)
626 flag_omit_frame_pointer = 1;
627
628 /* If we're doing fast math, we don't care about comparison order
629 wrt NaNs. This lets us use a shorter comparison sequence. */
630 if (flag_fast_math)
631 target_flags &= ~MASK_IEEE_FP;
632
633 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
634 on by -msse. */
635 if (TARGET_SSE)
636 target_flags |= MASK_MMX;
637 }
638 \f
639 /* A C statement (sans semicolon) to choose the order in which to
640 allocate hard registers for pseudo-registers local to a basic
641 block.
642
643 Store the desired register order in the array `reg_alloc_order'.
644 Element 0 should be the register to allocate first; element 1, the
645 next register; and so on.
646
647 The macro body should not assume anything about the contents of
648 `reg_alloc_order' before execution of the macro.
649
650 On most machines, it is not necessary to define this macro. */
651
652 void
653 order_regs_for_local_alloc ()
654 {
655 int i, ch, order;
656
657 /* User specified the register allocation order. */
658
659 if (ix86_reg_alloc_order)
660 {
661 for (i = order = 0; (ch = ix86_reg_alloc_order[i]) != '\0'; i++)
662 {
663 int regno = 0;
664
665 switch (ch)
666 {
667 case 'a': regno = 0; break;
668 case 'd': regno = 1; break;
669 case 'c': regno = 2; break;
670 case 'b': regno = 3; break;
671 case 'S': regno = 4; break;
672 case 'D': regno = 5; break;
673 case 'B': regno = 6; break;
674 }
675
676 reg_alloc_order[order++] = regno;
677 }
678
679 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
680 {
681 if (! regs_allocated[i])
682 reg_alloc_order[order++] = i;
683 }
684 }
685
686 /* If user did not specify a register allocation order, use natural order. */
687 else
688 {
689 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
690 reg_alloc_order[i] = i;
691 }
692 }
693 \f
694 void
695 optimization_options (level, size)
696 int level;
697 int size ATTRIBUTE_UNUSED;
698 {
699 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
700 make the problem with not enough registers even worse. */
701 #ifdef INSN_SCHEDULING
702 if (level > 1)
703 flag_schedule_insns = 0;
704 #endif
705 }
706 \f
707 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
708 attribute for DECL. The attributes in ATTRIBUTES have previously been
709 assigned to DECL. */
710
711 int
712 ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
713 tree decl ATTRIBUTE_UNUSED;
714 tree attributes ATTRIBUTE_UNUSED;
715 tree identifier ATTRIBUTE_UNUSED;
716 tree args ATTRIBUTE_UNUSED;
717 {
718 return 0;
719 }
720
721 /* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
722 attribute for TYPE. The attributes in ATTRIBUTES have previously been
723 assigned to TYPE. */
724
725 int
726 ix86_valid_type_attribute_p (type, attributes, identifier, args)
727 tree type;
728 tree attributes ATTRIBUTE_UNUSED;
729 tree identifier;
730 tree args;
731 {
732 if (TREE_CODE (type) != FUNCTION_TYPE
733 && TREE_CODE (type) != METHOD_TYPE
734 && TREE_CODE (type) != FIELD_DECL
735 && TREE_CODE (type) != TYPE_DECL)
736 return 0;
737
738 /* Stdcall attribute says callee is responsible for popping arguments
739 if they are not variable. */
740 if (is_attribute_p ("stdcall", identifier))
741 return (args == NULL_TREE);
742
743 /* Cdecl attribute says the callee is a normal C declaration. */
744 if (is_attribute_p ("cdecl", identifier))
745 return (args == NULL_TREE);
746
747 /* Regparm attribute specifies how many integer arguments are to be
748 passed in registers. */
749 if (is_attribute_p ("regparm", identifier))
750 {
751 tree cst;
752
753 if (! args || TREE_CODE (args) != TREE_LIST
754 || TREE_CHAIN (args) != NULL_TREE
755 || TREE_VALUE (args) == NULL_TREE)
756 return 0;
757
758 cst = TREE_VALUE (args);
759 if (TREE_CODE (cst) != INTEGER_CST)
760 return 0;
761
762 if (compare_tree_int (cst, REGPARM_MAX) > 0)
763 return 0;
764
765 return 1;
766 }
767
768 return 0;
769 }
770
771 /* Return 0 if the attributes for two types are incompatible, 1 if they
772 are compatible, and 2 if they are nearly compatible (which causes a
773 warning to be generated). */
774
775 int
776 ix86_comp_type_attributes (type1, type2)
777 tree type1;
778 tree type2;
779 {
780 /* Check for mismatch of non-default calling convention. */
781 const char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
782
783 if (TREE_CODE (type1) != FUNCTION_TYPE)
784 return 1;
785
786 /* Check for mismatched return types (cdecl vs stdcall). */
787 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
788 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
789 return 0;
790 return 1;
791 }
792 \f
793 /* Value is the number of bytes of arguments automatically
794 popped when returning from a subroutine call.
795 FUNDECL is the declaration node of the function (as a tree),
796 FUNTYPE is the data type of the function (as a tree),
797 or for a library call it is an identifier node for the subroutine name.
798 SIZE is the number of bytes of arguments passed on the stack.
799
800 On the 80386, the RTD insn may be used to pop them if the number
801 of args is fixed, but if the number is variable then the caller
802 must pop them all. RTD can't be used for library calls now
803 because the library is compiled with the Unix compiler.
804 Use of RTD is a selectable option, since it is incompatible with
805 standard Unix calling sequences. If the option is not selected,
806 the caller must always pop the args.
807
808 The attribute stdcall is equivalent to RTD on a per module basis. */
809
810 int
811 ix86_return_pops_args (fundecl, funtype, size)
812 tree fundecl;
813 tree funtype;
814 int size;
815 {
816 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
817
818 /* Cdecl functions override -mrtd, and never pop the stack. */
819 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
820
821 /* Stdcall functions will pop the stack if not variable args. */
822 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
823 rtd = 1;
824
825 if (rtd
826 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
827 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
828 == void_type_node)))
829 return size;
830 }
831
832 /* Lose any fake structure return argument. */
833 if (aggregate_value_p (TREE_TYPE (funtype)))
834 return GET_MODE_SIZE (Pmode);
835
836 return 0;
837 }
838 \f
839 /* Argument support functions. */
840
841 /* Initialize a variable CUM of type CUMULATIVE_ARGS
842 for a call to a function whose data type is FNTYPE.
843 For a library call, FNTYPE is 0. */
844
845 void
846 init_cumulative_args (cum, fntype, libname)
847 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
848 tree fntype; /* tree ptr for function decl */
849 rtx libname; /* SYMBOL_REF of library name or 0 */
850 {
851 static CUMULATIVE_ARGS zero_cum;
852 tree param, next_param;
853
854 if (TARGET_DEBUG_ARG)
855 {
856 fprintf (stderr, "\ninit_cumulative_args (");
857 if (fntype)
858 fprintf (stderr, "fntype code = %s, ret code = %s",
859 tree_code_name[(int) TREE_CODE (fntype)],
860 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
861 else
862 fprintf (stderr, "no fntype");
863
864 if (libname)
865 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
866 }
867
868 *cum = zero_cum;
869
870 /* Set up the number of registers to use for passing arguments. */
871 cum->nregs = ix86_regparm;
872 if (fntype)
873 {
874 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
875
876 if (attr)
877 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
878 }
879
880 /* Determine if this function has variable arguments. This is
881 indicated by the last argument being 'void_type_mode' if there
882 are no variable arguments. If there are variable arguments, then
883 we won't pass anything in registers */
884
885 if (cum->nregs)
886 {
887 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
888 param != 0; param = next_param)
889 {
890 next_param = TREE_CHAIN (param);
891 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
892 cum->nregs = 0;
893 }
894 }
895
896 if (TARGET_DEBUG_ARG)
897 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
898
899 return;
900 }
901
902 /* Update the data in CUM to advance over an argument
903 of mode MODE and data type TYPE.
904 (TYPE is null for libcalls where that information may not be available.) */
905
906 void
907 function_arg_advance (cum, mode, type, named)
908 CUMULATIVE_ARGS *cum; /* current arg information */
909 enum machine_mode mode; /* current arg mode */
910 tree type; /* type of the argument or 0 if lib support */
911 int named; /* whether or not the argument was named */
912 {
913 int bytes =
914 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
915 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
916
917 if (TARGET_DEBUG_ARG)
918 fprintf (stderr,
919 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
920 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
921
922 cum->words += words;
923 cum->nregs -= words;
924 cum->regno += words;
925
926 if (cum->nregs <= 0)
927 {
928 cum->nregs = 0;
929 cum->regno = 0;
930 }
931
932 return;
933 }
934
935 /* Define where to put the arguments to a function.
936 Value is zero to push the argument on the stack,
937 or a hard register in which to store the argument.
938
939 MODE is the argument's machine mode.
940 TYPE is the data type of the argument (as a tree).
941 This is null for libcalls where that information may
942 not be available.
943 CUM is a variable of type CUMULATIVE_ARGS which gives info about
944 the preceding args and about the function being called.
945 NAMED is nonzero if this argument is a named parameter
946 (otherwise it is an extra parameter matching an ellipsis). */
947
948 struct rtx_def *
949 function_arg (cum, mode, type, named)
950 CUMULATIVE_ARGS *cum; /* current arg information */
951 enum machine_mode mode; /* current arg mode */
952 tree type; /* type of the argument or 0 if lib support */
953 int named; /* != 0 for normal args, == 0 for ... args */
954 {
955 rtx ret = NULL_RTX;
956 int bytes =
957 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
958 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
959
960 switch (mode)
961 {
962 /* For now, pass fp/complex values on the stack. */
963 default:
964 break;
965
966 case BLKmode:
967 case DImode:
968 case SImode:
969 case HImode:
970 case QImode:
971 if (words <= cum->nregs)
972 ret = gen_rtx_REG (mode, cum->regno);
973 break;
974 }
975
976 if (TARGET_DEBUG_ARG)
977 {
978 fprintf (stderr,
979 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d",
980 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
981
982 if (ret)
983 fprintf (stderr, ", reg=%%e%s", reg_names[ REGNO(ret) ]);
984 else
985 fprintf (stderr, ", stack");
986
987 fprintf (stderr, " )\n");
988 }
989
990 return ret;
991 }
992 \f
993
994 /* Return nonzero if OP is (const_int 1), else return zero. */
995
996 int
997 const_int_1_operand (op, mode)
998 rtx op;
999 enum machine_mode mode ATTRIBUTE_UNUSED;
1000 {
1001 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
1002 }
1003
1004 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
1005 reference and a constant. */
1006
1007 int
1008 symbolic_operand (op, mode)
1009 register rtx op;
1010 enum machine_mode mode ATTRIBUTE_UNUSED;
1011 {
1012 switch (GET_CODE (op))
1013 {
1014 case SYMBOL_REF:
1015 case LABEL_REF:
1016 return 1;
1017
1018 case CONST:
1019 op = XEXP (op, 0);
1020 if (GET_CODE (op) == SYMBOL_REF
1021 || GET_CODE (op) == LABEL_REF
1022 || (GET_CODE (op) == UNSPEC
1023 && XINT (op, 1) >= 6
1024 && XINT (op, 1) <= 7))
1025 return 1;
1026 if (GET_CODE (op) != PLUS
1027 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1028 return 0;
1029
1030 op = XEXP (op, 0);
1031 if (GET_CODE (op) == SYMBOL_REF
1032 || GET_CODE (op) == LABEL_REF)
1033 return 1;
1034 /* Only @GOTOFF gets offsets. */
1035 if (GET_CODE (op) != UNSPEC
1036 || XINT (op, 1) != 7)
1037 return 0;
1038
1039 op = XVECEXP (op, 0, 0);
1040 if (GET_CODE (op) == SYMBOL_REF
1041 || GET_CODE (op) == LABEL_REF)
1042 return 1;
1043 return 0;
1044
1045 default:
1046 return 0;
1047 }
1048 }
1049
1050 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
1051
1052 int
1053 pic_symbolic_operand (op, mode)
1054 register rtx op;
1055 enum machine_mode mode ATTRIBUTE_UNUSED;
1056 {
1057 if (GET_CODE (op) == CONST)
1058 {
1059 op = XEXP (op, 0);
1060 if (GET_CODE (op) == UNSPEC)
1061 return 1;
1062 if (GET_CODE (op) != PLUS
1063 || GET_CODE (XEXP (op, 1)) != CONST_INT)
1064 return 0;
1065 op = XEXP (op, 0);
1066 if (GET_CODE (op) == UNSPEC)
1067 return 1;
1068 }
1069 return 0;
1070 }
1071
1072 /* Test for a valid operand for a call instruction. Don't allow the
1073 arg pointer register or virtual regs since they may decay into
1074 reg + const, which the patterns can't handle. */
1075
1076 int
1077 call_insn_operand (op, mode)
1078 rtx op;
1079 enum machine_mode mode ATTRIBUTE_UNUSED;
1080 {
1081 if (GET_CODE (op) != MEM)
1082 return 0;
1083 op = XEXP (op, 0);
1084
1085 /* Disallow indirect through a virtual register. This leads to
1086 compiler aborts when trying to eliminate them. */
1087 if (GET_CODE (op) == REG
1088 && (op == arg_pointer_rtx
1089 || op == frame_pointer_rtx
1090 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
1091 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
1092 return 0;
1093
1094 /* Disallow `call 1234'. Due to varying assembler lameness this
1095 gets either rejected or translated to `call .+1234'. */
1096 if (GET_CODE (op) == CONST_INT)
1097 return 0;
1098
1099 /* Explicitly allow SYMBOL_REF even if pic. */
1100 if (GET_CODE (op) == SYMBOL_REF)
1101 return 1;
1102
1103 /* Half-pic doesn't allow anything but registers and constants.
1104 We've just taken care of the later. */
1105 if (HALF_PIC_P ())
1106 return register_operand (op, Pmode);
1107
1108 /* Otherwise we can allow any general_operand in the address. */
1109 return general_operand (op, Pmode);
1110 }
1111
1112 int
1113 constant_call_address_operand (op, mode)
1114 rtx op;
1115 enum machine_mode mode ATTRIBUTE_UNUSED;
1116 {
1117 return (GET_CODE (op) == MEM
1118 && CONSTANT_ADDRESS_P (XEXP (op, 0))
1119 && GET_CODE (XEXP (op, 0)) != CONST_INT);
1120 }
1121
1122 /* Match exactly zero and one. */
1123
1124 int
1125 const0_operand (op, mode)
1126 register rtx op;
1127 enum machine_mode mode;
1128 {
1129 return op == CONST0_RTX (mode);
1130 }
1131
1132 int
1133 const1_operand (op, mode)
1134 register rtx op;
1135 enum machine_mode mode ATTRIBUTE_UNUSED;
1136 {
1137 return op == const1_rtx;
1138 }
1139
1140 /* Match 2, 4, or 8. Used for leal multiplicands. */
1141
1142 int
1143 const248_operand (op, mode)
1144 register rtx op;
1145 enum machine_mode mode ATTRIBUTE_UNUSED;
1146 {
1147 return (GET_CODE (op) == CONST_INT
1148 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
1149 }
1150
1151 /* True if this is a constant appropriate for an increment or decremenmt. */
1152
1153 int
1154 incdec_operand (op, mode)
1155 register rtx op;
1156 enum machine_mode mode;
1157 {
1158 if (op == const1_rtx || op == constm1_rtx)
1159 return 1;
1160 if (GET_CODE (op) != CONST_INT)
1161 return 0;
1162 if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
1163 return 1;
1164 if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
1165 return 1;
1166 if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
1167 return 1;
1168 return 0;
1169 }
1170
1171 /* Return false if this is the stack pointer, or any other fake
1172 register eliminable to the stack pointer. Otherwise, this is
1173 a register operand.
1174
1175 This is used to prevent esp from being used as an index reg.
1176 Which would only happen in pathological cases. */
1177
1178 int
1179 reg_no_sp_operand (op, mode)
1180 register rtx op;
1181 enum machine_mode mode;
1182 {
1183 rtx t = op;
1184 if (GET_CODE (t) == SUBREG)
1185 t = SUBREG_REG (t);
1186 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
1187 return 0;
1188
1189 return register_operand (op, mode);
1190 }
1191
1192 /* Return false if this is any eliminable register. Otherwise
1193 general_operand. */
1194
1195 int
1196 general_no_elim_operand (op, mode)
1197 register rtx op;
1198 enum machine_mode mode;
1199 {
1200 rtx t = op;
1201 if (GET_CODE (t) == SUBREG)
1202 t = SUBREG_REG (t);
1203 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1204 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1205 || t == virtual_stack_dynamic_rtx)
1206 return 0;
1207
1208 return general_operand (op, mode);
1209 }
1210
1211 /* Return false if this is any eliminable register. Otherwise
1212 register_operand or const_int. */
1213
1214 int
1215 nonmemory_no_elim_operand (op, mode)
1216 register rtx op;
1217 enum machine_mode mode;
1218 {
1219 rtx t = op;
1220 if (GET_CODE (t) == SUBREG)
1221 t = SUBREG_REG (t);
1222 if (t == arg_pointer_rtx || t == frame_pointer_rtx
1223 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
1224 || t == virtual_stack_dynamic_rtx)
1225 return 0;
1226
1227 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
1228 }
1229
1230 /* Return true if op is a Q_REGS class register. */
1231
1232 int
1233 q_regs_operand (op, mode)
1234 register rtx op;
1235 enum machine_mode mode;
1236 {
1237 if (mode != VOIDmode && GET_MODE (op) != mode)
1238 return 0;
1239 if (GET_CODE (op) == SUBREG)
1240 op = SUBREG_REG (op);
1241 return QI_REG_P (op);
1242 }
1243
1244 /* Return true if op is a NON_Q_REGS class register. */
1245
1246 int
1247 non_q_regs_operand (op, mode)
1248 register rtx op;
1249 enum machine_mode mode;
1250 {
1251 if (mode != VOIDmode && GET_MODE (op) != mode)
1252 return 0;
1253 if (GET_CODE (op) == SUBREG)
1254 op = SUBREG_REG (op);
1255 return NON_QI_REG_P (op);
1256 }
1257
1258 /* Return 1 if OP is a comparison operator that can use the condition code
1259 generated by a logical operation, which characteristicly does not set
1260 overflow or carry. To be used with CCNOmode. */
1261
1262 int
1263 no_comparison_operator (op, mode)
1264 register rtx op;
1265 enum machine_mode mode;
1266 {
1267 if (mode != VOIDmode && GET_MODE (op) != mode)
1268 return 0;
1269
1270 switch (GET_CODE (op))
1271 {
1272 case EQ: case NE:
1273 case LT: case GE:
1274 case LEU: case LTU: case GEU: case GTU:
1275 return 1;
1276
1277 default:
1278 return 0;
1279 }
1280 }
1281
1282 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
1283
1284 int
1285 fcmov_comparison_operator (op, mode)
1286 register rtx op;
1287 enum machine_mode mode;
1288 {
1289 if (mode != VOIDmode && GET_MODE (op) != mode)
1290 return 0;
1291
1292 switch (GET_CODE (op))
1293 {
1294 case EQ: case NE:
1295 case LEU: case LTU: case GEU: case GTU:
1296 case UNORDERED: case ORDERED:
1297 return 1;
1298
1299 default:
1300 return 0;
1301 }
1302 }
1303
1304 /* Return 1 if OP is any normal comparison operator plus {UN}ORDERED. */
1305
1306 int
1307 uno_comparison_operator (op, mode)
1308 register rtx op;
1309 enum machine_mode mode;
1310 {
1311 if (mode != VOIDmode && GET_MODE (op) != mode)
1312 return 0;
1313
1314 switch (GET_CODE (op))
1315 {
1316 case EQ: case NE:
1317 case LE: case LT: case GE: case GT:
1318 case LEU: case LTU: case GEU: case GTU:
1319 case UNORDERED: case ORDERED:
1320 return 1;
1321
1322 default:
1323 return 0;
1324 }
1325 }
1326
1327 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
1328
1329 int
1330 promotable_binary_operator (op, mode)
1331 register rtx op;
1332 enum machine_mode mode ATTRIBUTE_UNUSED;
1333 {
1334 switch (GET_CODE (op))
1335 {
1336 case MULT:
1337 /* Modern CPUs have same latency for HImode and SImode multiply,
1338 but 386 and 486 do HImode multiply faster. */
1339 return ix86_cpu > PROCESSOR_I486;
1340 case PLUS:
1341 case AND:
1342 case IOR:
1343 case XOR:
1344 case ASHIFT:
1345 return 1;
1346 default:
1347 return 0;
1348 }
1349 }
1350
1351 /* Nearly general operand, but accept any const_double, since we wish
1352 to be able to drop them into memory rather than have them get pulled
1353 into registers. */
1354
1355 int
1356 cmp_fp_expander_operand (op, mode)
1357 register rtx op;
1358 enum machine_mode mode;
1359 {
1360 if (mode != VOIDmode && mode != GET_MODE (op))
1361 return 0;
1362 if (GET_CODE (op) == CONST_DOUBLE)
1363 return 1;
1364 return general_operand (op, mode);
1365 }
1366
1367 /* Match an SI or HImode register for a zero_extract. */
1368
1369 int
1370 ext_register_operand (op, mode)
1371 register rtx op;
1372 enum machine_mode mode ATTRIBUTE_UNUSED;
1373 {
1374 if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
1375 return 0;
1376 return register_operand (op, VOIDmode);
1377 }
1378
1379 /* Return 1 if this is a valid binary floating-point operation.
1380 OP is the expression matched, and MODE is its mode. */
1381
1382 int
1383 binary_fp_operator (op, mode)
1384 register rtx op;
1385 enum machine_mode mode;
1386 {
1387 if (mode != VOIDmode && mode != GET_MODE (op))
1388 return 0;
1389
1390 switch (GET_CODE (op))
1391 {
1392 case PLUS:
1393 case MINUS:
1394 case MULT:
1395 case DIV:
1396 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
1397
1398 default:
1399 return 0;
1400 }
1401 }
1402
1403 int
1404 mult_operator(op, mode)
1405 register rtx op;
1406 enum machine_mode mode ATTRIBUTE_UNUSED;
1407 {
1408 return GET_CODE (op) == MULT;
1409 }
1410
1411 int
1412 div_operator(op, mode)
1413 register rtx op;
1414 enum machine_mode mode ATTRIBUTE_UNUSED;
1415 {
1416 return GET_CODE (op) == DIV;
1417 }
1418
1419 int
1420 arith_or_logical_operator (op, mode)
1421 rtx op;
1422 enum machine_mode mode;
1423 {
1424 return ((mode == VOIDmode || GET_MODE (op) == mode)
1425 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
1426 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
1427 }
1428
1429 /* Returns 1 if OP is memory operand with a displacement. */
1430
1431 int
1432 memory_displacement_operand (op, mode)
1433 register rtx op;
1434 enum machine_mode mode;
1435 {
1436 struct ix86_address parts;
1437
1438 if (! memory_operand (op, mode))
1439 return 0;
1440
1441 if (! ix86_decompose_address (XEXP (op, 0), &parts))
1442 abort ();
1443
1444 return parts.disp != NULL_RTX;
1445 }
1446
1447 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
1448 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
1449
1450 ??? It seems likely that this will only work because cmpsi is an
1451 expander, and no actual insns use this. */
1452
1453 int
1454 cmpsi_operand (op, mode)
1455 rtx op;
1456 enum machine_mode mode;
1457 {
1458 if (general_operand (op, mode))
1459 return 1;
1460
1461 if (GET_CODE (op) == AND
1462 && GET_MODE (op) == SImode
1463 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
1464 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
1465 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
1466 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
1467 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
1468 && GET_CODE (XEXP (op, 1)) == CONST_INT)
1469 return 1;
1470
1471 return 0;
1472 }
1473
1474 /* Returns 1 if OP is memory operand that can not be represented by the
1475 modRM array. */
1476
1477 int
1478 long_memory_operand (op, mode)
1479 register rtx op;
1480 enum machine_mode mode;
1481 {
1482 if (! memory_operand (op, mode))
1483 return 0;
1484
1485 return memory_address_length (op) != 0;
1486 }
1487
1488 /* Return nonzero if the rtx is known aligned. */
1489
1490 int
1491 aligned_operand (op, mode)
1492 rtx op;
1493 enum machine_mode mode;
1494 {
1495 struct ix86_address parts;
1496
1497 if (!general_operand (op, mode))
1498 return 0;
1499
1500 /* Registers and immediate operands are always "aligned". */
1501 if (GET_CODE (op) != MEM)
1502 return 1;
1503
1504 /* Don't even try to do any aligned optimizations with volatiles. */
1505 if (MEM_VOLATILE_P (op))
1506 return 0;
1507
1508 op = XEXP (op, 0);
1509
1510 /* Pushes and pops are only valid on the stack pointer. */
1511 if (GET_CODE (op) == PRE_DEC
1512 || GET_CODE (op) == POST_INC)
1513 return 1;
1514
1515 /* Decode the address. */
1516 if (! ix86_decompose_address (op, &parts))
1517 abort ();
1518
1519 /* Look for some component that isn't known to be aligned. */
1520 if (parts.index)
1521 {
1522 if (parts.scale < 4
1523 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
1524 return 0;
1525 }
1526 if (parts.base)
1527 {
1528 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
1529 return 0;
1530 }
1531 if (parts.disp)
1532 {
1533 if (GET_CODE (parts.disp) != CONST_INT
1534 || (INTVAL (parts.disp) & 3) != 0)
1535 return 0;
1536 }
1537
1538 /* Didn't find one -- this must be an aligned address. */
1539 return 1;
1540 }
1541 \f
1542 /* Return true if the constant is something that can be loaded with
1543 a special instruction. Only handle 0.0 and 1.0; others are less
1544 worthwhile. */
1545
1546 int
1547 standard_80387_constant_p (x)
1548 rtx x;
1549 {
1550 if (GET_CODE (x) != CONST_DOUBLE)
1551 return -1;
1552
1553 #if ! defined (REAL_IS_NOT_DOUBLE) || defined (REAL_ARITHMETIC)
1554 {
1555 REAL_VALUE_TYPE d;
1556 jmp_buf handler;
1557 int is0, is1;
1558
1559 if (setjmp (handler))
1560 return 0;
1561
1562 set_float_handler (handler);
1563 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
1564 is0 = REAL_VALUES_EQUAL (d, dconst0) && !REAL_VALUE_MINUS_ZERO (d);
1565 is1 = REAL_VALUES_EQUAL (d, dconst1);
1566 set_float_handler (NULL_PTR);
1567
1568 if (is0)
1569 return 1;
1570
1571 if (is1)
1572 return 2;
1573
1574 /* Note that on the 80387, other constants, such as pi,
1575 are much slower to load as standard constants
1576 than to load from doubles in memory! */
1577 /* ??? Not true on K6: all constants are equal cost. */
1578 }
1579 #endif
1580
1581 return 0;
1582 }
1583
1584 /* Returns 1 if OP contains a symbol reference */
1585
1586 int
1587 symbolic_reference_mentioned_p (op)
1588 rtx op;
1589 {
1590 register const char *fmt;
1591 register int i;
1592
1593 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
1594 return 1;
1595
1596 fmt = GET_RTX_FORMAT (GET_CODE (op));
1597 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
1598 {
1599 if (fmt[i] == 'E')
1600 {
1601 register int j;
1602
1603 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
1604 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
1605 return 1;
1606 }
1607
1608 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
1609 return 1;
1610 }
1611
1612 return 0;
1613 }
1614
1615 /* Return 1 if it is appropriate to emit `ret' instructions in the
1616 body of a function. Do this only if the epilogue is simple, needing a
1617 couple of insns. Prior to reloading, we can't tell how many registers
1618 must be saved, so return 0 then. Return 0 if there is no frame
1619 marker to de-allocate.
1620
1621 If NON_SAVING_SETJMP is defined and true, then it is not possible
1622 for the epilogue to be simple, so return 0. This is a special case
1623 since NON_SAVING_SETJMP will not cause regs_ever_live to change
1624 until final, but jump_optimize may need to know sooner if a
1625 `return' is OK. */
1626
1627 int
1628 ix86_can_use_return_insn_p ()
1629 {
1630 HOST_WIDE_INT tsize;
1631 int nregs;
1632
1633 #ifdef NON_SAVING_SETJMP
1634 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
1635 return 0;
1636 #endif
1637 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
1638 if (profile_block_flag == 2)
1639 return 0;
1640 #endif
1641
1642 if (! reload_completed || frame_pointer_needed)
1643 return 0;
1644
1645 /* Don't allow more than 32 pop, since that's all we can do
1646 with one instruction. */
1647 if (current_function_pops_args
1648 && current_function_args_size >= 32768)
1649 return 0;
1650
1651 tsize = ix86_compute_frame_size (get_frame_size (), &nregs, NULL, NULL);
1652 return tsize == 0 && nregs == 0;
1653 }
1654 \f
1655 static char *pic_label_name;
1656 static int pic_label_output;
1657 static char *global_offset_table_name;
1658
1659 /* This function generates code for -fpic that loads %ebx with
1660 the return address of the caller and then returns. */
1661
1662 void
1663 asm_output_function_prefix (file, name)
1664 FILE *file;
1665 const char *name ATTRIBUTE_UNUSED;
1666 {
1667 rtx xops[2];
1668 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1669 || current_function_uses_const_pool);
1670 xops[0] = pic_offset_table_rtx;
1671 xops[1] = stack_pointer_rtx;
1672
1673 /* Deep branch prediction favors having a return for every call. */
1674 if (pic_reg_used && TARGET_DEEP_BRANCH_PREDICTION)
1675 {
1676 if (!pic_label_output)
1677 {
1678 /* This used to call ASM_DECLARE_FUNCTION_NAME() but since it's an
1679 internal (non-global) label that's being emitted, it didn't make
1680 sense to have .type information for local labels. This caused
1681 the SCO OpenServer 5.0.4 ELF assembler grief (why are you giving
1682 me debug info for a label that you're declaring non-global?) this
1683 was changed to call ASM_OUTPUT_LABEL() instead. */
1684
1685 ASM_OUTPUT_LABEL (file, pic_label_name);
1686
1687 xops[1] = gen_rtx_MEM (SImode, xops[1]);
1688 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
1689 output_asm_insn ("ret", xops);
1690
1691 pic_label_output = 1;
1692 }
1693 }
1694 }
1695
1696 void
1697 load_pic_register ()
1698 {
1699 rtx gotsym, pclab;
1700
1701 if (global_offset_table_name == NULL)
1702 {
1703 global_offset_table_name =
1704 ggc_alloc_string ("_GLOBAL_OFFSET_TABLE_", 21);
1705 ggc_add_string_root (&global_offset_table_name, 1);
1706 }
1707 gotsym = gen_rtx_SYMBOL_REF (Pmode, global_offset_table_name);
1708
1709 if (TARGET_DEEP_BRANCH_PREDICTION)
1710 {
1711 if (pic_label_name == NULL)
1712 {
1713 pic_label_name = ggc_alloc_string (NULL, 32);
1714 ggc_add_string_root (&pic_label_name, 1);
1715 ASM_GENERATE_INTERNAL_LABEL (pic_label_name, "LPR", 0);
1716 }
1717 pclab = gen_rtx_MEM (QImode, gen_rtx_SYMBOL_REF (Pmode, pic_label_name));
1718 }
1719 else
1720 {
1721 pclab = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
1722 }
1723
1724 emit_insn (gen_prologue_get_pc (pic_offset_table_rtx, pclab));
1725
1726 if (! TARGET_DEEP_BRANCH_PREDICTION)
1727 emit_insn (gen_popsi1 (pic_offset_table_rtx));
1728
1729 emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
1730 }
1731
1732 /* Generate an SImode "push" pattern for input ARG. */
1733
1734 static rtx
1735 gen_push (arg)
1736 rtx arg;
1737 {
1738 return gen_rtx_SET (VOIDmode,
1739 gen_rtx_MEM (SImode,
1740 gen_rtx_PRE_DEC (SImode,
1741 stack_pointer_rtx)),
1742 arg);
1743 }
1744
1745 /* Return number of registers to be saved on the stack. */
1746
1747 static int
1748 ix86_nsaved_regs ()
1749 {
1750 int nregs = 0;
1751 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1752 || current_function_uses_const_pool);
1753 int limit = (frame_pointer_needed
1754 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1755 int regno;
1756
1757 for (regno = limit - 1; regno >= 0; regno--)
1758 if ((regs_ever_live[regno] && ! call_used_regs[regno])
1759 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1760 {
1761 nregs ++;
1762 }
1763 return nregs;
1764 }
1765
1766 /* Return the offset between two registers, one to be eliminated, and the other
1767 its replacement, at the start of a routine. */
1768
1769 HOST_WIDE_INT
1770 ix86_initial_elimination_offset (from, to)
1771 int from;
1772 int to;
1773 {
1774 int padding1;
1775 int nregs;
1776
1777 /* Stack grows downward:
1778
1779 [arguments]
1780 <- ARG_POINTER
1781 saved pc
1782
1783 saved frame pointer if frame_pointer_needed
1784 <- HARD_FRAME_POINTER
1785 [saved regs]
1786
1787 [padding1] \
1788 | <- FRAME_POINTER
1789 [frame] > tsize
1790 |
1791 [padding2] /
1792 */
1793
1794 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
1795 /* Skip saved PC and previous frame pointer.
1796 Executed only when frame_pointer_needed. */
1797 return 8;
1798 else if (from == FRAME_POINTER_REGNUM
1799 && to == HARD_FRAME_POINTER_REGNUM)
1800 {
1801 ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
1802 padding1 += nregs * UNITS_PER_WORD;
1803 return -padding1;
1804 }
1805 else
1806 {
1807 /* ARG_POINTER or FRAME_POINTER to STACK_POINTER elimination. */
1808 int frame_size = frame_pointer_needed ? 8 : 4;
1809 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (),
1810 &nregs, &padding1, (int *)0);
1811
1812
1813 if (to != STACK_POINTER_REGNUM)
1814 abort ();
1815 else if (from == ARG_POINTER_REGNUM)
1816 return tsize + nregs * UNITS_PER_WORD + frame_size;
1817 else if (from != FRAME_POINTER_REGNUM)
1818 abort ();
1819 else
1820 return tsize - padding1;
1821 }
1822 }
1823
1824 /* Compute the size of local storage taking into consideration the
1825 desired stack alignment which is to be maintained. Also determine
1826 the number of registers saved below the local storage.
1827
1828 PADDING1 returns padding before stack frame and PADDING2 returns
1829 padding after stack frame;
1830 */
1831
1832 static HOST_WIDE_INT
1833 ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
1834 HOST_WIDE_INT size;
1835 int *nregs_on_stack;
1836 int *rpadding1;
1837 int *rpadding2;
1838 {
1839 int nregs;
1840 int padding1 = 0;
1841 int padding2 = 0;
1842 HOST_WIDE_INT total_size;
1843 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
1844 int offset;
1845 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
1846
1847 nregs = ix86_nsaved_regs ();
1848 total_size = size;
1849
1850 offset = frame_pointer_needed ? 8 : 4;
1851
1852 /* Do some sanity checking of stack_alignment_needed and preferred_alignment,
1853 since i386 port is the only using those features that may break easilly. */
1854
1855 if (size && !stack_alignment_needed)
1856 abort ();
1857 if (!size && stack_alignment_needed != STACK_BOUNDARY / BITS_PER_UNIT)
1858 abort ();
1859 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
1860 abort ();
1861 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1862 abort ();
1863 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
1864 abort ();
1865
1866 if (stack_alignment_needed < 4)
1867 stack_alignment_needed = 4;
1868
1869 offset += nregs * UNITS_PER_WORD;
1870
1871 if (ACCUMULATE_OUTGOING_ARGS)
1872 total_size += current_function_outgoing_args_size;
1873
1874 total_size += offset;
1875
1876 /* Align start of frame for local function. */
1877 padding1 = ((offset + stack_alignment_needed - 1)
1878 & -stack_alignment_needed) - offset;
1879 total_size += padding1;
1880
1881 /* Align stack boundary. */
1882 padding2 = ((total_size + preferred_alignment - 1)
1883 & -preferred_alignment) - total_size;
1884
1885 if (ACCUMULATE_OUTGOING_ARGS)
1886 padding2 += current_function_outgoing_args_size;
1887
1888 if (nregs_on_stack)
1889 *nregs_on_stack = nregs;
1890 if (rpadding1)
1891 *rpadding1 = padding1;
1892 if (rpadding2)
1893 *rpadding2 = padding2;
1894
1895 return size + padding1 + padding2;
1896 }
1897
1898 /* Emit code to save registers in the prologue. */
1899
1900 static void
1901 ix86_emit_save_regs ()
1902 {
1903 register int regno;
1904 int limit;
1905 rtx insn;
1906 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1907 || current_function_uses_const_pool);
1908 limit = (frame_pointer_needed
1909 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
1910
1911 for (regno = limit - 1; regno >= 0; regno--)
1912 if ((regs_ever_live[regno] && !call_used_regs[regno])
1913 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
1914 {
1915 insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
1916 RTX_FRAME_RELATED_P (insn) = 1;
1917 }
1918 }
1919
1920 /* Expand the prologue into a bunch of separate insns. */
1921
1922 void
1923 ix86_expand_prologue ()
1924 {
1925 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0, (int *)0,
1926 (int *)0);
1927 rtx insn;
1928 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
1929 || current_function_uses_const_pool);
1930
1931 /* Note: AT&T enter does NOT have reversed args. Enter is probably
1932 slower on all targets. Also sdb doesn't like it. */
1933
1934 if (frame_pointer_needed)
1935 {
1936 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
1937 RTX_FRAME_RELATED_P (insn) = 1;
1938
1939 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
1940 RTX_FRAME_RELATED_P (insn) = 1;
1941 }
1942
1943 ix86_emit_save_regs ();
1944
1945 if (tsize == 0)
1946 ;
1947 else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
1948 {
1949 if (frame_pointer_needed)
1950 insn = emit_insn (gen_pro_epilogue_adjust_stack
1951 (stack_pointer_rtx, stack_pointer_rtx,
1952 GEN_INT (-tsize), hard_frame_pointer_rtx));
1953 else
1954 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
1955 GEN_INT (-tsize)));
1956 RTX_FRAME_RELATED_P (insn) = 1;
1957 }
1958 else
1959 {
1960 /* ??? Is this only valid for Win32? */
1961
1962 rtx arg0, sym;
1963
1964 arg0 = gen_rtx_REG (SImode, 0);
1965 emit_move_insn (arg0, GEN_INT (tsize));
1966
1967 sym = gen_rtx_MEM (FUNCTION_MODE,
1968 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
1969 insn = emit_call_insn (gen_call (sym, const0_rtx));
1970
1971 CALL_INSN_FUNCTION_USAGE (insn)
1972 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
1973 CALL_INSN_FUNCTION_USAGE (insn));
1974 }
1975
1976 #ifdef SUBTARGET_PROLOGUE
1977 SUBTARGET_PROLOGUE;
1978 #endif
1979
1980 if (pic_reg_used)
1981 load_pic_register ();
1982
1983 /* If we are profiling, make sure no instructions are scheduled before
1984 the call to mcount. However, if -fpic, the above call will have
1985 done that. */
1986 if ((profile_flag || profile_block_flag) && ! pic_reg_used)
1987 emit_insn (gen_blockage ());
1988 }
1989
1990 /* Emit code to add TSIZE to esp value. Use POP instruction when
1991 profitable. */
1992
1993 static void
1994 ix86_emit_epilogue_esp_adjustment (tsize)
1995 int tsize;
1996 {
1997 /* If a frame pointer is present, we must be sure to tie the sp
1998 to the fp so that we don't mis-schedule. */
1999 if (frame_pointer_needed)
2000 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2001 stack_pointer_rtx,
2002 GEN_INT (tsize),
2003 hard_frame_pointer_rtx));
2004 else
2005 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2006 GEN_INT (tsize)));
2007 }
2008
2009 /* Emit code to restore saved registers using MOV insns. First register
2010 is restored from POINTER + OFFSET. */
2011 static void
2012 ix86_emit_restore_regs_using_mov (pointer, offset)
2013 rtx pointer;
2014 int offset;
2015 {
2016 int regno;
2017 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2018 || current_function_uses_const_pool);
2019 int limit = (frame_pointer_needed
2020 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
2021
2022 for (regno = 0; regno < limit; regno++)
2023 if ((regs_ever_live[regno] && !call_used_regs[regno])
2024 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2025 {
2026 emit_move_insn (gen_rtx_REG (SImode, regno),
2027 adj_offsettable_operand (gen_rtx_MEM (SImode,
2028 pointer),
2029 offset));
2030 offset += 4;
2031 }
2032 }
2033
2034 /* Restore function stack, frame, and registers. */
2035
2036 void
2037 ix86_expand_epilogue (emit_return)
2038 int emit_return;
2039 {
2040 int nregs;
2041 int regno;
2042
2043 int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
2044 || current_function_uses_const_pool);
2045 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
2046 HOST_WIDE_INT offset;
2047 HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
2048 (int *)0, (int *)0);
2049
2050
2051 /* Calculate start of saved registers relative to ebp. */
2052 offset = -nregs * UNITS_PER_WORD;
2053
2054 #ifdef FUNCTION_BLOCK_PROFILER_EXIT
2055 if (profile_block_flag == 2)
2056 {
2057 FUNCTION_BLOCK_PROFILER_EXIT;
2058 }
2059 #endif
2060
2061 /* If we're only restoring one register and sp is not valid then
2062 using a move instruction to restore the register since it's
2063 less work than reloading sp and popping the register.
2064
2065 The default code result in stack adjustment using add/lea instruction,
2066 while this code results in LEAVE instruction (or discrete equivalent),
2067 so it is profitable in some other cases as well. Especially when there
2068 are no registers to restore. We also use this code when TARGET_USE_LEAVE
2069 and there is exactly one register to pop. This heruistic may need some
2070 tuning in future. */
2071 if ((!sp_valid && nregs <= 1)
2072 || (frame_pointer_needed && !nregs && tsize)
2073 || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
2074 && nregs == 1))
2075 {
2076 /* Restore registers. We can use ebp or esp to address the memory
2077 locations. If both are available, default to ebp, since offsets
2078 are known to be small. Only exception is esp pointing directly to the
2079 end of block of saved registers, where we may simplify addressing
2080 mode. */
2081
2082 if (!frame_pointer_needed || (sp_valid && !tsize))
2083 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, tsize);
2084 else
2085 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
2086
2087 if (!frame_pointer_needed)
2088 ix86_emit_epilogue_esp_adjustment (tsize + nregs * UNITS_PER_WORD);
2089 /* If not an i386, mov & pop is faster than "leave". */
2090 else if (TARGET_USE_LEAVE || optimize_size)
2091 emit_insn (gen_leave ());
2092 else
2093 {
2094 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2095 hard_frame_pointer_rtx,
2096 const0_rtx,
2097 hard_frame_pointer_rtx));
2098 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
2099 }
2100 }
2101 else
2102 {
2103 /* First step is to deallocate the stack frame so that we can
2104 pop the registers. */
2105 if (!sp_valid)
2106 {
2107 if (!frame_pointer_needed)
2108 abort ();
2109 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
2110 hard_frame_pointer_rtx,
2111 GEN_INT (offset),
2112 hard_frame_pointer_rtx));
2113 }
2114 else if (tsize)
2115 ix86_emit_epilogue_esp_adjustment (tsize);
2116
2117 for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
2118 if ((regs_ever_live[regno] && !call_used_regs[regno])
2119 || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
2120 emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
2121 }
2122
2123 /* Sibcall epilogues don't want a return instruction. */
2124 if (! emit_return)
2125 return;
2126
2127 if (current_function_pops_args && current_function_args_size)
2128 {
2129 rtx popc = GEN_INT (current_function_pops_args);
2130
2131 /* i386 can only pop 64K bytes. If asked to pop more, pop
2132 return address, do explicit add, and jump indirectly to the
2133 caller. */
2134
2135 if (current_function_pops_args >= 65536)
2136 {
2137 rtx ecx = gen_rtx_REG (SImode, 2);
2138
2139 emit_insn (gen_popsi1 (ecx));
2140 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
2141 emit_jump_insn (gen_return_indirect_internal (ecx));
2142 }
2143 else
2144 emit_jump_insn (gen_return_pop_internal (popc));
2145 }
2146 else
2147 emit_jump_insn (gen_return_internal ());
2148 }
2149 \f
2150 /* Extract the parts of an RTL expression that is a valid memory address
2151 for an instruction. Return false if the structure of the address is
2152 grossly off. */
2153
2154 static int
2155 ix86_decompose_address (addr, out)
2156 register rtx addr;
2157 struct ix86_address *out;
2158 {
2159 rtx base = NULL_RTX;
2160 rtx index = NULL_RTX;
2161 rtx disp = NULL_RTX;
2162 HOST_WIDE_INT scale = 1;
2163 rtx scale_rtx = NULL_RTX;
2164
2165 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
2166 base = addr;
2167 else if (GET_CODE (addr) == PLUS)
2168 {
2169 rtx op0 = XEXP (addr, 0);
2170 rtx op1 = XEXP (addr, 1);
2171 enum rtx_code code0 = GET_CODE (op0);
2172 enum rtx_code code1 = GET_CODE (op1);
2173
2174 if (code0 == REG || code0 == SUBREG)
2175 {
2176 if (code1 == REG || code1 == SUBREG)
2177 index = op0, base = op1; /* index + base */
2178 else
2179 base = op0, disp = op1; /* base + displacement */
2180 }
2181 else if (code0 == MULT)
2182 {
2183 index = XEXP (op0, 0);
2184 scale_rtx = XEXP (op0, 1);
2185 if (code1 == REG || code1 == SUBREG)
2186 base = op1; /* index*scale + base */
2187 else
2188 disp = op1; /* index*scale + disp */
2189 }
2190 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
2191 {
2192 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
2193 scale_rtx = XEXP (XEXP (op0, 0), 1);
2194 base = XEXP (op0, 1);
2195 disp = op1;
2196 }
2197 else if (code0 == PLUS)
2198 {
2199 index = XEXP (op0, 0); /* index + base + disp */
2200 base = XEXP (op0, 1);
2201 disp = op1;
2202 }
2203 else
2204 return FALSE;
2205 }
2206 else if (GET_CODE (addr) == MULT)
2207 {
2208 index = XEXP (addr, 0); /* index*scale */
2209 scale_rtx = XEXP (addr, 1);
2210 }
2211 else if (GET_CODE (addr) == ASHIFT)
2212 {
2213 rtx tmp;
2214
2215 /* We're called for lea too, which implements ashift on occasion. */
2216 index = XEXP (addr, 0);
2217 tmp = XEXP (addr, 1);
2218 if (GET_CODE (tmp) != CONST_INT)
2219 return FALSE;
2220 scale = INTVAL (tmp);
2221 if ((unsigned HOST_WIDE_INT) scale > 3)
2222 return FALSE;
2223 scale = 1 << scale;
2224 }
2225 else
2226 disp = addr; /* displacement */
2227
2228 /* Extract the integral value of scale. */
2229 if (scale_rtx)
2230 {
2231 if (GET_CODE (scale_rtx) != CONST_INT)
2232 return FALSE;
2233 scale = INTVAL (scale_rtx);
2234 }
2235
2236 /* Allow arg pointer and stack pointer as index if there is not scaling */
2237 if (base && index && scale == 1
2238 && (index == arg_pointer_rtx || index == frame_pointer_rtx
2239 || index == stack_pointer_rtx))
2240 {
2241 rtx tmp = base;
2242 base = index;
2243 index = tmp;
2244 }
2245
2246 /* Special case: %ebp cannot be encoded as a base without a displacement. */
2247 if ((base == hard_frame_pointer_rtx
2248 || base == frame_pointer_rtx
2249 || base == arg_pointer_rtx) && !disp)
2250 disp = const0_rtx;
2251
2252 /* Special case: on K6, [%esi] makes the instruction vector decoded.
2253 Avoid this by transforming to [%esi+0]. */
2254 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
2255 && base && !index && !disp
2256 && REG_P (base)
2257 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
2258 disp = const0_rtx;
2259
2260 /* Special case: encode reg+reg instead of reg*2. */
2261 if (!base && index && scale && scale == 2)
2262 base = index, scale = 1;
2263
2264 /* Special case: scaling cannot be encoded without base or displacement. */
2265 if (!base && !disp && index && scale != 1)
2266 disp = const0_rtx;
2267
2268 out->base = base;
2269 out->index = index;
2270 out->disp = disp;
2271 out->scale = scale;
2272
2273 return TRUE;
2274 }
2275 \f
2276 /* Return cost of the memory address x.
2277 For i386, it is better to use a complex address than let gcc copy
2278 the address into a reg and make a new pseudo. But not if the address
2279 requires to two regs - that would mean more pseudos with longer
2280 lifetimes. */
2281 int
2282 ix86_address_cost (x)
2283 rtx x;
2284 {
2285 struct ix86_address parts;
2286 int cost = 1;
2287
2288 if (!ix86_decompose_address (x, &parts))
2289 abort ();
2290
2291 /* More complex memory references are better. */
2292 if (parts.disp && parts.disp != const0_rtx)
2293 cost--;
2294
2295 /* Attempt to minimize number of registers in the address. */
2296 if ((parts.base
2297 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
2298 || (parts.index
2299 && (!REG_P (parts.index)
2300 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
2301 cost++;
2302
2303 if (parts.base
2304 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
2305 && parts.index
2306 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
2307 && parts.base != parts.index)
2308 cost++;
2309
2310 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
2311 since it's predecode logic can't detect the length of instructions
2312 and it degenerates to vector decoded. Increase cost of such
2313 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
2314 to split such addresses or even refuse such addresses at all.
2315
2316 Following addressing modes are affected:
2317 [base+scale*index]
2318 [scale*index+disp]
2319 [base+index]
2320
2321 The first and last case may be avoidable by explicitly coding the zero in
2322 memory address, but I don't have AMD-K6 machine handy to check this
2323 theory. */
2324
2325 if (TARGET_K6
2326 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
2327 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
2328 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
2329 cost += 10;
2330
2331 return cost;
2332 }
2333 \f
2334 /* Determine if a given CONST RTX is a valid memory displacement
2335 in PIC mode. */
2336
2337 int
2338 legitimate_pic_address_disp_p (disp)
2339 register rtx disp;
2340 {
2341 if (GET_CODE (disp) != CONST)
2342 return 0;
2343 disp = XEXP (disp, 0);
2344
2345 if (GET_CODE (disp) == PLUS)
2346 {
2347 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
2348 return 0;
2349 disp = XEXP (disp, 0);
2350 }
2351
2352 if (GET_CODE (disp) != UNSPEC
2353 || XVECLEN (disp, 0) != 1)
2354 return 0;
2355
2356 /* Must be @GOT or @GOTOFF. */
2357 if (XINT (disp, 1) != 6
2358 && XINT (disp, 1) != 7)
2359 return 0;
2360
2361 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2362 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
2363 return 0;
2364
2365 return 1;
2366 }
2367
2368 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
2369 memory address for an instruction. The MODE argument is the machine mode
2370 for the MEM expression that wants to use this address.
2371
2372 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
2373 convert common non-canonical forms to canonical form so that they will
2374 be recognized. */
2375
2376 int
2377 legitimate_address_p (mode, addr, strict)
2378 enum machine_mode mode;
2379 register rtx addr;
2380 int strict;
2381 {
2382 struct ix86_address parts;
2383 rtx base, index, disp;
2384 HOST_WIDE_INT scale;
2385 const char *reason = NULL;
2386 rtx reason_rtx = NULL_RTX;
2387
2388 if (TARGET_DEBUG_ADDR)
2389 {
2390 fprintf (stderr,
2391 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
2392 GET_MODE_NAME (mode), strict);
2393 debug_rtx (addr);
2394 }
2395
2396 if (! ix86_decompose_address (addr, &parts))
2397 {
2398 reason = "decomposition failed";
2399 goto report_error;
2400 }
2401
2402 base = parts.base;
2403 index = parts.index;
2404 disp = parts.disp;
2405 scale = parts.scale;
2406
2407 /* Validate base register.
2408
2409 Don't allow SUBREG's here, it can lead to spill failures when the base
2410 is one word out of a two word structure, which is represented internally
2411 as a DImode int. */
2412
2413 if (base)
2414 {
2415 reason_rtx = base;
2416
2417 if (GET_CODE (base) != REG)
2418 {
2419 reason = "base is not a register";
2420 goto report_error;
2421 }
2422
2423 if (GET_MODE (base) != Pmode)
2424 {
2425 reason = "base is not in Pmode";
2426 goto report_error;
2427 }
2428
2429 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
2430 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
2431 {
2432 reason = "base is not valid";
2433 goto report_error;
2434 }
2435 }
2436
2437 /* Validate index register.
2438
2439 Don't allow SUBREG's here, it can lead to spill failures when the index
2440 is one word out of a two word structure, which is represented internally
2441 as a DImode int. */
2442
2443 if (index)
2444 {
2445 reason_rtx = index;
2446
2447 if (GET_CODE (index) != REG)
2448 {
2449 reason = "index is not a register";
2450 goto report_error;
2451 }
2452
2453 if (GET_MODE (index) != Pmode)
2454 {
2455 reason = "index is not in Pmode";
2456 goto report_error;
2457 }
2458
2459 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
2460 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
2461 {
2462 reason = "index is not valid";
2463 goto report_error;
2464 }
2465 }
2466
2467 /* Validate scale factor. */
2468 if (scale != 1)
2469 {
2470 reason_rtx = GEN_INT (scale);
2471 if (!index)
2472 {
2473 reason = "scale without index";
2474 goto report_error;
2475 }
2476
2477 if (scale != 2 && scale != 4 && scale != 8)
2478 {
2479 reason = "scale is not a valid multiplier";
2480 goto report_error;
2481 }
2482 }
2483
2484 /* Validate displacement. */
2485 if (disp)
2486 {
2487 reason_rtx = disp;
2488
2489 if (!CONSTANT_ADDRESS_P (disp))
2490 {
2491 reason = "displacement is not constant";
2492 goto report_error;
2493 }
2494
2495 if (GET_CODE (disp) == CONST_DOUBLE)
2496 {
2497 reason = "displacement is a const_double";
2498 goto report_error;
2499 }
2500
2501 if (flag_pic && SYMBOLIC_CONST (disp))
2502 {
2503 if (! legitimate_pic_address_disp_p (disp))
2504 {
2505 reason = "displacement is an invalid pic construct";
2506 goto report_error;
2507 }
2508
2509 /* This code used to verify that a symbolic pic displacement
2510 includes the pic_offset_table_rtx register.
2511
2512 While this is good idea, unfortunately these constructs may
2513 be created by "adds using lea" optimization for incorrect
2514 code like:
2515
2516 int a;
2517 int foo(int i)
2518 {
2519 return *(&a+i);
2520 }
2521
2522 This code is nonsensical, but results in addressing
2523 GOT table with pic_offset_table_rtx base. We can't
2524 just refuse it easilly, since it gets matched by
2525 "addsi3" pattern, that later gets split to lea in the
2526 case output register differs from input. While this
2527 can be handled by separate addsi pattern for this case
2528 that never results in lea, this seems to be easier and
2529 correct fix for crash to disable this test. */
2530 }
2531 else if (HALF_PIC_P ())
2532 {
2533 if (! HALF_PIC_ADDRESS_P (disp)
2534 || (base != NULL_RTX || index != NULL_RTX))
2535 {
2536 reason = "displacement is an invalid half-pic reference";
2537 goto report_error;
2538 }
2539 }
2540 }
2541
2542 /* Everything looks valid. */
2543 if (TARGET_DEBUG_ADDR)
2544 fprintf (stderr, "Success.\n");
2545 return TRUE;
2546
2547 report_error:
2548 if (TARGET_DEBUG_ADDR)
2549 {
2550 fprintf (stderr, "Error: %s\n", reason);
2551 debug_rtx (reason_rtx);
2552 }
2553 return FALSE;
2554 }
2555 \f
2556 /* Return a legitimate reference for ORIG (an address) using the
2557 register REG. If REG is 0, a new pseudo is generated.
2558
2559 There are two types of references that must be handled:
2560
2561 1. Global data references must load the address from the GOT, via
2562 the PIC reg. An insn is emitted to do this load, and the reg is
2563 returned.
2564
2565 2. Static data references, constant pool addresses, and code labels
2566 compute the address as an offset from the GOT, whose base is in
2567 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
2568 differentiate them from global data objects. The returned
2569 address is the PIC reg + an unspec constant.
2570
2571 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
2572 reg also appears in the address. */
2573
2574 rtx
2575 legitimize_pic_address (orig, reg)
2576 rtx orig;
2577 rtx reg;
2578 {
2579 rtx addr = orig;
2580 rtx new = orig;
2581 rtx base;
2582
2583 if (GET_CODE (addr) == LABEL_REF
2584 || (GET_CODE (addr) == SYMBOL_REF
2585 && (CONSTANT_POOL_ADDRESS_P (addr)
2586 || SYMBOL_REF_FLAG (addr))))
2587 {
2588 /* This symbol may be referenced via a displacement from the PIC
2589 base address (@GOTOFF). */
2590
2591 current_function_uses_pic_offset_table = 1;
2592 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
2593 new = gen_rtx_CONST (VOIDmode, new);
2594 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2595
2596 if (reg != 0)
2597 {
2598 emit_move_insn (reg, new);
2599 new = reg;
2600 }
2601 }
2602 else if (GET_CODE (addr) == SYMBOL_REF)
2603 {
2604 /* This symbol must be referenced via a load from the
2605 Global Offset Table (@GOT). */
2606
2607 current_function_uses_pic_offset_table = 1;
2608 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
2609 new = gen_rtx_CONST (VOIDmode, new);
2610 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2611 new = gen_rtx_MEM (Pmode, new);
2612 RTX_UNCHANGING_P (new) = 1;
2613
2614 if (reg == 0)
2615 reg = gen_reg_rtx (Pmode);
2616 emit_move_insn (reg, new);
2617 new = reg;
2618 }
2619 else
2620 {
2621 if (GET_CODE (addr) == CONST)
2622 {
2623 addr = XEXP (addr, 0);
2624 if (GET_CODE (addr) == UNSPEC)
2625 {
2626 /* Check that the unspec is one of the ones we generate? */
2627 }
2628 else if (GET_CODE (addr) != PLUS)
2629 abort ();
2630 }
2631 if (GET_CODE (addr) == PLUS)
2632 {
2633 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
2634
2635 /* Check first to see if this is a constant offset from a @GOTOFF
2636 symbol reference. */
2637 if ((GET_CODE (op0) == LABEL_REF
2638 || (GET_CODE (op0) == SYMBOL_REF
2639 && (CONSTANT_POOL_ADDRESS_P (op0)
2640 || SYMBOL_REF_FLAG (op0))))
2641 && GET_CODE (op1) == CONST_INT)
2642 {
2643 current_function_uses_pic_offset_table = 1;
2644 new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
2645 new = gen_rtx_PLUS (VOIDmode, new, op1);
2646 new = gen_rtx_CONST (VOIDmode, new);
2647 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
2648
2649 if (reg != 0)
2650 {
2651 emit_move_insn (reg, new);
2652 new = reg;
2653 }
2654 }
2655 else
2656 {
2657 base = legitimize_pic_address (XEXP (addr, 0), reg);
2658 new = legitimize_pic_address (XEXP (addr, 1),
2659 base == reg ? NULL_RTX : reg);
2660
2661 if (GET_CODE (new) == CONST_INT)
2662 new = plus_constant (base, INTVAL (new));
2663 else
2664 {
2665 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
2666 {
2667 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
2668 new = XEXP (new, 1);
2669 }
2670 new = gen_rtx_PLUS (Pmode, base, new);
2671 }
2672 }
2673 }
2674 }
2675 return new;
2676 }
2677 \f
2678 /* Try machine-dependent ways of modifying an illegitimate address
2679 to be legitimate. If we find one, return the new, valid address.
2680 This macro is used in only one place: `memory_address' in explow.c.
2681
2682 OLDX is the address as it was before break_out_memory_refs was called.
2683 In some cases it is useful to look at this to decide what needs to be done.
2684
2685 MODE and WIN are passed so that this macro can use
2686 GO_IF_LEGITIMATE_ADDRESS.
2687
2688 It is always safe for this macro to do nothing. It exists to recognize
2689 opportunities to optimize the output.
2690
2691 For the 80386, we handle X+REG by loading X into a register R and
2692 using R+REG. R will go in a general reg and indexing will be used.
2693 However, if REG is a broken-out memory address or multiplication,
2694 nothing needs to be done because REG can certainly go in a general reg.
2695
2696 When -fpic is used, special handling is needed for symbolic references.
2697 See comments by legitimize_pic_address in i386.c for details. */
2698
2699 rtx
2700 legitimize_address (x, oldx, mode)
2701 register rtx x;
2702 register rtx oldx ATTRIBUTE_UNUSED;
2703 enum machine_mode mode;
2704 {
2705 int changed = 0;
2706 unsigned log;
2707
2708 if (TARGET_DEBUG_ADDR)
2709 {
2710 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
2711 GET_MODE_NAME (mode));
2712 debug_rtx (x);
2713 }
2714
2715 if (flag_pic && SYMBOLIC_CONST (x))
2716 return legitimize_pic_address (x, 0);
2717
2718 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
2719 if (GET_CODE (x) == ASHIFT
2720 && GET_CODE (XEXP (x, 1)) == CONST_INT
2721 && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
2722 {
2723 changed = 1;
2724 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
2725 GEN_INT (1 << log));
2726 }
2727
2728 if (GET_CODE (x) == PLUS)
2729 {
2730 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
2731
2732 if (GET_CODE (XEXP (x, 0)) == ASHIFT
2733 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
2734 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
2735 {
2736 changed = 1;
2737 XEXP (x, 0) = gen_rtx_MULT (Pmode,
2738 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
2739 GEN_INT (1 << log));
2740 }
2741
2742 if (GET_CODE (XEXP (x, 1)) == ASHIFT
2743 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
2744 && (log = (unsigned)exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
2745 {
2746 changed = 1;
2747 XEXP (x, 1) = gen_rtx_MULT (Pmode,
2748 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
2749 GEN_INT (1 << log));
2750 }
2751
2752 /* Put multiply first if it isn't already. */
2753 if (GET_CODE (XEXP (x, 1)) == MULT)
2754 {
2755 rtx tmp = XEXP (x, 0);
2756 XEXP (x, 0) = XEXP (x, 1);
2757 XEXP (x, 1) = tmp;
2758 changed = 1;
2759 }
2760
2761 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
2762 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
2763 created by virtual register instantiation, register elimination, and
2764 similar optimizations. */
2765 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
2766 {
2767 changed = 1;
2768 x = gen_rtx_PLUS (Pmode,
2769 gen_rtx_PLUS (Pmode, XEXP (x, 0),
2770 XEXP (XEXP (x, 1), 0)),
2771 XEXP (XEXP (x, 1), 1));
2772 }
2773
2774 /* Canonicalize
2775 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
2776 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
2777 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
2778 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
2779 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
2780 && CONSTANT_P (XEXP (x, 1)))
2781 {
2782 rtx constant;
2783 rtx other = NULL_RTX;
2784
2785 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2786 {
2787 constant = XEXP (x, 1);
2788 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
2789 }
2790 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
2791 {
2792 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
2793 other = XEXP (x, 1);
2794 }
2795 else
2796 constant = 0;
2797
2798 if (constant)
2799 {
2800 changed = 1;
2801 x = gen_rtx_PLUS (Pmode,
2802 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
2803 XEXP (XEXP (XEXP (x, 0), 1), 0)),
2804 plus_constant (other, INTVAL (constant)));
2805 }
2806 }
2807
2808 if (changed && legitimate_address_p (mode, x, FALSE))
2809 return x;
2810
2811 if (GET_CODE (XEXP (x, 0)) == MULT)
2812 {
2813 changed = 1;
2814 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
2815 }
2816
2817 if (GET_CODE (XEXP (x, 1)) == MULT)
2818 {
2819 changed = 1;
2820 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
2821 }
2822
2823 if (changed
2824 && GET_CODE (XEXP (x, 1)) == REG
2825 && GET_CODE (XEXP (x, 0)) == REG)
2826 return x;
2827
2828 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
2829 {
2830 changed = 1;
2831 x = legitimize_pic_address (x, 0);
2832 }
2833
2834 if (changed && legitimate_address_p (mode, x, FALSE))
2835 return x;
2836
2837 if (GET_CODE (XEXP (x, 0)) == REG)
2838 {
2839 register rtx temp = gen_reg_rtx (Pmode);
2840 register rtx val = force_operand (XEXP (x, 1), temp);
2841 if (val != temp)
2842 emit_move_insn (temp, val);
2843
2844 XEXP (x, 1) = temp;
2845 return x;
2846 }
2847
2848 else if (GET_CODE (XEXP (x, 1)) == REG)
2849 {
2850 register rtx temp = gen_reg_rtx (Pmode);
2851 register rtx val = force_operand (XEXP (x, 0), temp);
2852 if (val != temp)
2853 emit_move_insn (temp, val);
2854
2855 XEXP (x, 0) = temp;
2856 return x;
2857 }
2858 }
2859
2860 return x;
2861 }
2862 \f
2863 /* Print an integer constant expression in assembler syntax. Addition
2864 and subtraction are the only arithmetic that may appear in these
2865 expressions. FILE is the stdio stream to write to, X is the rtx, and
2866 CODE is the operand print code from the output string. */
2867
2868 static void
2869 output_pic_addr_const (file, x, code)
2870 FILE *file;
2871 rtx x;
2872 int code;
2873 {
2874 char buf[256];
2875
2876 switch (GET_CODE (x))
2877 {
2878 case PC:
2879 if (flag_pic)
2880 putc ('.', file);
2881 else
2882 abort ();
2883 break;
2884
2885 case SYMBOL_REF:
2886 assemble_name (file, XSTR (x, 0));
2887 if (code == 'P' && ! SYMBOL_REF_FLAG (x))
2888 fputs ("@PLT", file);
2889 break;
2890
2891 case LABEL_REF:
2892 x = XEXP (x, 0);
2893 /* FALLTHRU */
2894 case CODE_LABEL:
2895 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
2896 assemble_name (asm_out_file, buf);
2897 break;
2898
2899 case CONST_INT:
2900 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2901 break;
2902
2903 case CONST:
2904 /* This used to output parentheses around the expression,
2905 but that does not work on the 386 (either ATT or BSD assembler). */
2906 output_pic_addr_const (file, XEXP (x, 0), code);
2907 break;
2908
2909 case CONST_DOUBLE:
2910 if (GET_MODE (x) == VOIDmode)
2911 {
2912 /* We can use %d if the number is <32 bits and positive. */
2913 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
2914 fprintf (file, "0x%lx%08lx",
2915 (unsigned long) CONST_DOUBLE_HIGH (x),
2916 (unsigned long) CONST_DOUBLE_LOW (x));
2917 else
2918 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2919 }
2920 else
2921 /* We can't handle floating point constants;
2922 PRINT_OPERAND must handle them. */
2923 output_operand_lossage ("floating constant misused");
2924 break;
2925
2926 case PLUS:
2927 /* Some assemblers need integer constants to appear first. */
2928 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
2929 {
2930 output_pic_addr_const (file, XEXP (x, 0), code);
2931 putc ('+', file);
2932 output_pic_addr_const (file, XEXP (x, 1), code);
2933 }
2934 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2935 {
2936 output_pic_addr_const (file, XEXP (x, 1), code);
2937 putc ('+', file);
2938 output_pic_addr_const (file, XEXP (x, 0), code);
2939 }
2940 else
2941 abort ();
2942 break;
2943
2944 case MINUS:
2945 putc (ASSEMBLER_DIALECT ? '(' : '[', file);
2946 output_pic_addr_const (file, XEXP (x, 0), code);
2947 putc ('-', file);
2948 output_pic_addr_const (file, XEXP (x, 1), code);
2949 putc (ASSEMBLER_DIALECT ? ')' : ']', file);
2950 break;
2951
2952 case UNSPEC:
2953 if (XVECLEN (x, 0) != 1)
2954 abort ();
2955 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
2956 switch (XINT (x, 1))
2957 {
2958 case 6:
2959 fputs ("@GOT", file);
2960 break;
2961 case 7:
2962 fputs ("@GOTOFF", file);
2963 break;
2964 case 8:
2965 fputs ("@PLT", file);
2966 break;
2967 default:
2968 output_operand_lossage ("invalid UNSPEC as operand");
2969 break;
2970 }
2971 break;
2972
2973 default:
2974 output_operand_lossage ("invalid expression as operand");
2975 }
2976 }
2977
2978 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
2979 We need to handle our special PIC relocations. */
2980
2981 void
2982 i386_dwarf_output_addr_const (file, x)
2983 FILE *file;
2984 rtx x;
2985 {
2986 fprintf (file, "\t%s\t", INT_ASM_OP);
2987 if (flag_pic)
2988 output_pic_addr_const (file, x, '\0');
2989 else
2990 output_addr_const (file, x);
2991 fputc ('\n', file);
2992 }
2993
2994 /* In the name of slightly smaller debug output, and to cater to
2995 general assembler losage, recognize PIC+GOTOFF and turn it back
2996 into a direct symbol reference. */
2997
2998 rtx
2999 i386_simplify_dwarf_addr (orig_x)
3000 rtx orig_x;
3001 {
3002 rtx x = orig_x;
3003
3004 if (GET_CODE (x) != PLUS
3005 || GET_CODE (XEXP (x, 0)) != REG
3006 || GET_CODE (XEXP (x, 1)) != CONST)
3007 return orig_x;
3008
3009 x = XEXP (XEXP (x, 1), 0);
3010 if (GET_CODE (x) == UNSPEC
3011 && XINT (x, 1) == 7)
3012 return XVECEXP (x, 0, 0);
3013
3014 if (GET_CODE (x) == PLUS
3015 && GET_CODE (XEXP (x, 0)) == UNSPEC
3016 && GET_CODE (XEXP (x, 1)) == CONST_INT
3017 && XINT (XEXP (x, 0), 1) == 7)
3018 return gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
3019
3020 return orig_x;
3021 }
3022 \f
3023 static void
3024 put_condition_code (code, mode, reverse, fp, file)
3025 enum rtx_code code;
3026 enum machine_mode mode;
3027 int reverse, fp;
3028 FILE *file;
3029 {
3030 const char *suffix;
3031
3032 if (reverse)
3033 code = reverse_condition (code);
3034
3035 switch (code)
3036 {
3037 case EQ:
3038 suffix = "e";
3039 break;
3040 case NE:
3041 suffix = "ne";
3042 break;
3043 case GT:
3044 if (mode == CCNOmode)
3045 abort ();
3046 suffix = "g";
3047 break;
3048 case GTU:
3049 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
3050 Those same assemblers have the same but opposite losage on cmov. */
3051 suffix = fp ? "nbe" : "a";
3052 break;
3053 case LT:
3054 if (mode == CCNOmode)
3055 suffix = "s";
3056 else
3057 suffix = "l";
3058 break;
3059 case LTU:
3060 suffix = "b";
3061 break;
3062 case GE:
3063 if (mode == CCNOmode)
3064 suffix = "ns";
3065 else
3066 suffix = "ge";
3067 break;
3068 case GEU:
3069 /* ??? As above. */
3070 suffix = fp ? "nb" : "ae";
3071 break;
3072 case LE:
3073 if (mode == CCNOmode)
3074 abort ();
3075 suffix = "le";
3076 break;
3077 case LEU:
3078 suffix = "be";
3079 break;
3080 case UNORDERED:
3081 suffix = "p";
3082 break;
3083 case ORDERED:
3084 suffix = "np";
3085 break;
3086 default:
3087 abort ();
3088 }
3089 fputs (suffix, file);
3090 }
3091
3092 void
3093 print_reg (x, code, file)
3094 rtx x;
3095 int code;
3096 FILE *file;
3097 {
3098 if (REGNO (x) == ARG_POINTER_REGNUM
3099 || REGNO (x) == FRAME_POINTER_REGNUM
3100 || REGNO (x) == FLAGS_REG
3101 || REGNO (x) == FPSR_REG)
3102 abort ();
3103
3104 if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
3105 putc ('%', file);
3106
3107 if (code == 'w')
3108 code = 2;
3109 else if (code == 'b')
3110 code = 1;
3111 else if (code == 'k')
3112 code = 4;
3113 else if (code == 'y')
3114 code = 3;
3115 else if (code == 'h')
3116 code = 0;
3117 else if (code == 'm' || MMX_REG_P (x))
3118 code = 5;
3119 else
3120 code = GET_MODE_SIZE (GET_MODE (x));
3121
3122 switch (code)
3123 {
3124 case 5:
3125 fputs (hi_reg_name[REGNO (x)], file);
3126 break;
3127 case 3:
3128 if (STACK_TOP_P (x))
3129 {
3130 fputs ("st(0)", file);
3131 break;
3132 }
3133 /* FALLTHRU */
3134 case 4:
3135 case 8:
3136 case 12:
3137 if (! FP_REG_P (x))
3138 putc ('e', file);
3139 /* FALLTHRU */
3140 case 16:
3141 case 2:
3142 fputs (hi_reg_name[REGNO (x)], file);
3143 break;
3144 case 1:
3145 fputs (qi_reg_name[REGNO (x)], file);
3146 break;
3147 case 0:
3148 fputs (qi_high_reg_name[REGNO (x)], file);
3149 break;
3150 default:
3151 abort ();
3152 }
3153 }
3154
3155 /* Meaning of CODE:
3156 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
3157 C -- print opcode suffix for set/cmov insn.
3158 c -- like C, but print reversed condition
3159 R -- print the prefix for register names.
3160 z -- print the opcode suffix for the size of the current operand.
3161 * -- print a star (in certain assembler syntax)
3162 w -- print the operand as if it's a "word" (HImode) even if it isn't.
3163 s -- print a shift double count, followed by the assemblers argument
3164 delimiter.
3165 b -- print the QImode name of the register for the indicated operand.
3166 %b0 would print %al if operands[0] is reg 0.
3167 w -- likewise, print the HImode name of the register.
3168 k -- likewise, print the SImode name of the register.
3169 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
3170 y -- print "st(0)" instead of "st" as a register.
3171 m -- print "st(n)" as an mmx register. */
3172
3173 void
3174 print_operand (file, x, code)
3175 FILE *file;
3176 rtx x;
3177 int code;
3178 {
3179 if (code)
3180 {
3181 switch (code)
3182 {
3183 case '*':
3184 if (ASSEMBLER_DIALECT == 0)
3185 putc ('*', file);
3186 return;
3187
3188 case 'L':
3189 if (ASSEMBLER_DIALECT == 0)
3190 putc ('l', file);
3191 return;
3192
3193 case 'W':
3194 if (ASSEMBLER_DIALECT == 0)
3195 putc ('w', file);
3196 return;
3197
3198 case 'B':
3199 if (ASSEMBLER_DIALECT == 0)
3200 putc ('b', file);
3201 return;
3202
3203 case 'Q':
3204 if (ASSEMBLER_DIALECT == 0)
3205 putc ('l', file);
3206 return;
3207
3208 case 'S':
3209 if (ASSEMBLER_DIALECT == 0)
3210 putc ('s', file);
3211 return;
3212
3213 case 'T':
3214 if (ASSEMBLER_DIALECT == 0)
3215 putc ('t', file);
3216 return;
3217
3218 case 'z':
3219 /* 387 opcodes don't get size suffixes if the operands are
3220 registers. */
3221
3222 if (STACK_REG_P (x))
3223 return;
3224
3225 /* Intel syntax has no truck with instruction suffixes. */
3226 if (ASSEMBLER_DIALECT != 0)
3227 return;
3228
3229 /* this is the size of op from size of operand */
3230 switch (GET_MODE_SIZE (GET_MODE (x)))
3231 {
3232 case 2:
3233 #ifdef HAVE_GAS_FILDS_FISTS
3234 putc ('s', file);
3235 #endif
3236 return;
3237
3238 case 4:
3239 if (GET_MODE (x) == SFmode)
3240 {
3241 putc ('s', file);
3242 return;
3243 }
3244 else
3245 putc ('l', file);
3246 return;
3247
3248 case 12:
3249 putc ('t', file);
3250 return;
3251
3252 case 8:
3253 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
3254 {
3255 #ifdef GAS_MNEMONICS
3256 putc ('q', file);
3257 #else
3258 putc ('l', file);
3259 putc ('l', file);
3260 #endif
3261 }
3262 else
3263 putc ('l', file);
3264 return;
3265
3266 default:
3267 abort ();
3268 }
3269
3270 case 'b':
3271 case 'w':
3272 case 'k':
3273 case 'h':
3274 case 'y':
3275 case 'm':
3276 case 'X':
3277 case 'P':
3278 break;
3279
3280 case 's':
3281 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
3282 {
3283 PRINT_OPERAND (file, x, 0);
3284 putc (',', file);
3285 }
3286 return;
3287
3288 case 'C':
3289 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
3290 return;
3291 case 'F':
3292 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
3293 return;
3294
3295 /* Like above, but reverse condition */
3296 case 'c':
3297 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
3298 return;
3299 case 'f':
3300 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
3301 return;
3302
3303 default:
3304 {
3305 char str[50];
3306 sprintf (str, "invalid operand code `%c'", code);
3307 output_operand_lossage (str);
3308 }
3309 }
3310 }
3311
3312 if (GET_CODE (x) == REG)
3313 {
3314 PRINT_REG (x, code, file);
3315 }
3316
3317 else if (GET_CODE (x) == MEM)
3318 {
3319 /* No `byte ptr' prefix for call instructions. */
3320 if (ASSEMBLER_DIALECT != 0 && code != 'X' && code != 'P')
3321 {
3322 const char * size;
3323 switch (GET_MODE_SIZE (GET_MODE (x)))
3324 {
3325 case 1: size = "BYTE"; break;
3326 case 2: size = "WORD"; break;
3327 case 4: size = "DWORD"; break;
3328 case 8: size = "QWORD"; break;
3329 case 12: size = "XWORD"; break;
3330 case 16: size = "XMMWORD"; break;
3331 default:
3332 abort ();
3333 }
3334 fputs (size, file);
3335 fputs (" PTR ", file);
3336 }
3337
3338 x = XEXP (x, 0);
3339 if (flag_pic && CONSTANT_ADDRESS_P (x))
3340 output_pic_addr_const (file, x, code);
3341 else
3342 output_address (x);
3343 }
3344
3345 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
3346 {
3347 REAL_VALUE_TYPE r;
3348 long l;
3349
3350 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3351 REAL_VALUE_TO_TARGET_SINGLE (r, l);
3352
3353 if (ASSEMBLER_DIALECT == 0)
3354 putc ('$', file);
3355 fprintf (file, "0x%lx", l);
3356 }
3357
3358 /* These float cases don't actually occur as immediate operands. */
3359 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
3360 {
3361 REAL_VALUE_TYPE r;
3362 char dstr[30];
3363
3364 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3365 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3366 fprintf (file, "%s", dstr);
3367 }
3368
3369 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
3370 {
3371 REAL_VALUE_TYPE r;
3372 char dstr[30];
3373
3374 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3375 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
3376 fprintf (file, "%s", dstr);
3377 }
3378 else
3379 {
3380 if (code != 'P')
3381 {
3382 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
3383 {
3384 if (ASSEMBLER_DIALECT == 0)
3385 putc ('$', file);
3386 }
3387 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
3388 || GET_CODE (x) == LABEL_REF)
3389 {
3390 if (ASSEMBLER_DIALECT == 0)
3391 putc ('$', file);
3392 else
3393 fputs ("OFFSET FLAT:", file);
3394 }
3395 }
3396 if (GET_CODE (x) == CONST_INT)
3397 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3398 else if (flag_pic)
3399 output_pic_addr_const (file, x, code);
3400 else
3401 output_addr_const (file, x);
3402 }
3403 }
3404 \f
3405 /* Print a memory operand whose address is ADDR. */
3406
3407 void
3408 print_operand_address (file, addr)
3409 FILE *file;
3410 register rtx addr;
3411 {
3412 struct ix86_address parts;
3413 rtx base, index, disp;
3414 int scale;
3415
3416 if (! ix86_decompose_address (addr, &parts))
3417 abort ();
3418
3419 base = parts.base;
3420 index = parts.index;
3421 disp = parts.disp;
3422 scale = parts.scale;
3423
3424 if (!base && !index)
3425 {
3426 /* Displacement only requires special attention. */
3427
3428 if (GET_CODE (disp) == CONST_INT)
3429 {
3430 if (ASSEMBLER_DIALECT != 0)
3431 fputs ("ds:", file);
3432 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
3433 }
3434 else if (flag_pic)
3435 output_pic_addr_const (file, addr, 0);
3436 else
3437 output_addr_const (file, addr);
3438 }
3439 else
3440 {
3441 if (ASSEMBLER_DIALECT == 0)
3442 {
3443 if (disp)
3444 {
3445 if (flag_pic)
3446 output_pic_addr_const (file, disp, 0);
3447 else if (GET_CODE (disp) == LABEL_REF)
3448 output_asm_label (disp);
3449 else
3450 output_addr_const (file, disp);
3451 }
3452
3453 putc ('(', file);
3454 if (base)
3455 PRINT_REG (base, 0, file);
3456 if (index)
3457 {
3458 putc (',', file);
3459 PRINT_REG (index, 0, file);
3460 if (scale != 1)
3461 fprintf (file, ",%d", scale);
3462 }
3463 putc (')', file);
3464 }
3465 else
3466 {
3467 rtx offset = NULL_RTX;
3468
3469 if (disp)
3470 {
3471 /* Pull out the offset of a symbol; print any symbol itself. */
3472 if (GET_CODE (disp) == CONST
3473 && GET_CODE (XEXP (disp, 0)) == PLUS
3474 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3475 {
3476 offset = XEXP (XEXP (disp, 0), 1);
3477 disp = gen_rtx_CONST (VOIDmode,
3478 XEXP (XEXP (disp, 0), 0));
3479 }
3480
3481 if (flag_pic)
3482 output_pic_addr_const (file, disp, 0);
3483 else if (GET_CODE (disp) == LABEL_REF)
3484 output_asm_label (disp);
3485 else if (GET_CODE (disp) == CONST_INT)
3486 offset = disp;
3487 else
3488 output_addr_const (file, disp);
3489 }
3490
3491 putc ('[', file);
3492 if (base)
3493 {
3494 PRINT_REG (base, 0, file);
3495 if (offset)
3496 {
3497 if (INTVAL (offset) >= 0)
3498 putc ('+', file);
3499 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3500 }
3501 }
3502 else if (offset)
3503 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
3504 else
3505 putc ('0', file);
3506
3507 if (index)
3508 {
3509 putc ('+', file);
3510 PRINT_REG (index, 0, file);
3511 if (scale != 1)
3512 fprintf (file, "*%d", scale);
3513 }
3514 putc (']', file);
3515 }
3516 }
3517 }
3518 \f
3519 /* Split one or more DImode RTL references into pairs of SImode
3520 references. The RTL can be REG, offsettable MEM, integer constant, or
3521 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
3522 split and "num" is its length. lo_half and hi_half are output arrays
3523 that parallel "operands". */
3524
3525 void
3526 split_di (operands, num, lo_half, hi_half)
3527 rtx operands[];
3528 int num;
3529 rtx lo_half[], hi_half[];
3530 {
3531 while (num--)
3532 {
3533 rtx op = operands[num];
3534 if (CONSTANT_P (op))
3535 split_double (op, &lo_half[num], &hi_half[num]);
3536 else if (! reload_completed)
3537 {
3538 lo_half[num] = gen_lowpart (SImode, op);
3539 hi_half[num] = gen_highpart (SImode, op);
3540 }
3541 else if (GET_CODE (op) == REG)
3542 {
3543 lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
3544 hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
3545 }
3546 else if (offsettable_memref_p (op))
3547 {
3548 rtx lo_addr = XEXP (op, 0);
3549 rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
3550 lo_half[num] = change_address (op, SImode, lo_addr);
3551 hi_half[num] = change_address (op, SImode, hi_addr);
3552 }
3553 else
3554 abort ();
3555 }
3556 }
3557 \f
3558 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
3559 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
3560 is the expression of the binary operation. The output may either be
3561 emitted here, or returned to the caller, like all output_* functions.
3562
3563 There is no guarantee that the operands are the same mode, as they
3564 might be within FLOAT or FLOAT_EXTEND expressions. */
3565
3566 #ifndef SYSV386_COMPAT
3567 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
3568 wants to fix the assemblers because that causes incompatibility
3569 with gcc. No-one wants to fix gcc because that causes
3570 incompatibility with assemblers... You can use the option of
3571 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
3572 #define SYSV386_COMPAT 1
3573 #endif
3574
3575 const char *
3576 output_387_binary_op (insn, operands)
3577 rtx insn;
3578 rtx *operands;
3579 {
3580 static char buf[30];
3581 const char *p;
3582
3583 #ifdef ENABLE_CHECKING
3584 /* Even if we do not want to check the inputs, this documents input
3585 constraints. Which helps in understanding the following code. */
3586 if (STACK_REG_P (operands[0])
3587 && ((REG_P (operands[1])
3588 && REGNO (operands[0]) == REGNO (operands[1])
3589 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
3590 || (REG_P (operands[2])
3591 && REGNO (operands[0]) == REGNO (operands[2])
3592 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
3593 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
3594 ; /* ok */
3595 else
3596 abort ();
3597 #endif
3598
3599 switch (GET_CODE (operands[3]))
3600 {
3601 case PLUS:
3602 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3603 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3604 p = "fiadd";
3605 else
3606 p = "fadd";
3607 break;
3608
3609 case MINUS:
3610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3612 p = "fisub";
3613 else
3614 p = "fsub";
3615 break;
3616
3617 case MULT:
3618 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3619 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3620 p = "fimul";
3621 else
3622 p = "fmul";
3623 break;
3624
3625 case DIV:
3626 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
3627 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
3628 p = "fidiv";
3629 else
3630 p = "fdiv";
3631 break;
3632
3633 default:
3634 abort ();
3635 }
3636
3637 strcpy (buf, p);
3638
3639 switch (GET_CODE (operands[3]))
3640 {
3641 case MULT:
3642 case PLUS:
3643 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
3644 {
3645 rtx temp = operands[2];
3646 operands[2] = operands[1];
3647 operands[1] = temp;
3648 }
3649
3650 /* know operands[0] == operands[1]. */
3651
3652 if (GET_CODE (operands[2]) == MEM)
3653 {
3654 p = "%z2\t%2";
3655 break;
3656 }
3657
3658 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3659 {
3660 if (STACK_TOP_P (operands[0]))
3661 /* How is it that we are storing to a dead operand[2]?
3662 Well, presumably operands[1] is dead too. We can't
3663 store the result to st(0) as st(0) gets popped on this
3664 instruction. Instead store to operands[2] (which I
3665 think has to be st(1)). st(1) will be popped later.
3666 gcc <= 2.8.1 didn't have this check and generated
3667 assembly code that the Unixware assembler rejected. */
3668 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3669 else
3670 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3671 break;
3672 }
3673
3674 if (STACK_TOP_P (operands[0]))
3675 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3676 else
3677 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3678 break;
3679
3680 case MINUS:
3681 case DIV:
3682 if (GET_CODE (operands[1]) == MEM)
3683 {
3684 p = "r%z1\t%1";
3685 break;
3686 }
3687
3688 if (GET_CODE (operands[2]) == MEM)
3689 {
3690 p = "%z2\t%2";
3691 break;
3692 }
3693
3694 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
3695 {
3696 #if SYSV386_COMPAT
3697 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
3698 derived assemblers, confusingly reverse the direction of
3699 the operation for fsub{r} and fdiv{r} when the
3700 destination register is not st(0). The Intel assembler
3701 doesn't have this brain damage. Read !SYSV386_COMPAT to
3702 figure out what the hardware really does. */
3703 if (STACK_TOP_P (operands[0]))
3704 p = "{p\t%0, %2|rp\t%2, %0}";
3705 else
3706 p = "{rp\t%2, %0|p\t%0, %2}";
3707 #else
3708 if (STACK_TOP_P (operands[0]))
3709 /* As above for fmul/fadd, we can't store to st(0). */
3710 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
3711 else
3712 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
3713 #endif
3714 break;
3715 }
3716
3717 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
3718 {
3719 #if SYSV386_COMPAT
3720 if (STACK_TOP_P (operands[0]))
3721 p = "{rp\t%0, %1|p\t%1, %0}";
3722 else
3723 p = "{p\t%1, %0|rp\t%0, %1}";
3724 #else
3725 if (STACK_TOP_P (operands[0]))
3726 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
3727 else
3728 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
3729 #endif
3730 break;
3731 }
3732
3733 if (STACK_TOP_P (operands[0]))
3734 {
3735 if (STACK_TOP_P (operands[1]))
3736 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
3737 else
3738 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
3739 break;
3740 }
3741 else if (STACK_TOP_P (operands[1]))
3742 {
3743 #if SYSV386_COMPAT
3744 p = "{\t%1, %0|r\t%0, %1}";
3745 #else
3746 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
3747 #endif
3748 }
3749 else
3750 {
3751 #if SYSV386_COMPAT
3752 p = "{r\t%2, %0|\t%0, %2}";
3753 #else
3754 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
3755 #endif
3756 }
3757 break;
3758
3759 default:
3760 abort ();
3761 }
3762
3763 strcat (buf, p);
3764 return buf;
3765 }
3766
3767 /* Output code for INSN to convert a float to a signed int. OPERANDS
3768 are the insn operands. The output may be [HSD]Imode and the input
3769 operand may be [SDX]Fmode. */
3770
3771 const char *
3772 output_fix_trunc (insn, operands)
3773 rtx insn;
3774 rtx *operands;
3775 {
3776 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3777 int dimode_p = GET_MODE (operands[0]) == DImode;
3778 rtx xops[4];
3779
3780 /* Jump through a hoop or two for DImode, since the hardware has no
3781 non-popping instruction. We used to do this a different way, but
3782 that was somewhat fragile and broke with post-reload splitters. */
3783 if (dimode_p && !stack_top_dies)
3784 output_asm_insn ("fld\t%y1", operands);
3785
3786 if (! STACK_TOP_P (operands[1]))
3787 abort ();
3788
3789 xops[0] = GEN_INT (12);
3790 xops[1] = adj_offsettable_operand (operands[2], 1);
3791 xops[1] = change_address (xops[1], QImode, NULL_RTX);
3792
3793 xops[2] = operands[0];
3794 if (GET_CODE (operands[0]) != MEM)
3795 xops[2] = operands[3];
3796
3797 output_asm_insn ("fnstcw\t%2", operands);
3798 output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
3799 output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
3800 output_asm_insn ("fldcw\t%2", operands);
3801 output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
3802
3803 if (stack_top_dies || dimode_p)
3804 output_asm_insn ("fistp%z2\t%2", xops);
3805 else
3806 output_asm_insn ("fist%z2\t%2", xops);
3807
3808 output_asm_insn ("fldcw\t%2", operands);
3809
3810 if (GET_CODE (operands[0]) != MEM)
3811 {
3812 if (dimode_p)
3813 {
3814 split_di (operands+0, 1, xops+0, xops+1);
3815 split_di (operands+3, 1, xops+2, xops+3);
3816 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3817 output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
3818 }
3819 else if (GET_MODE (operands[0]) == SImode)
3820 output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
3821 else
3822 output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
3823 }
3824
3825 return "";
3826 }
3827
3828 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
3829 should be used and 2 when fnstsw should be used. UNORDERED_P is true
3830 when fucom should be used. */
3831
3832 const char *
3833 output_fp_compare (insn, operands, eflags_p, unordered_p)
3834 rtx insn;
3835 rtx *operands;
3836 int eflags_p, unordered_p;
3837 {
3838 int stack_top_dies;
3839 rtx cmp_op0 = operands[0];
3840 rtx cmp_op1 = operands[1];
3841
3842 if (eflags_p == 2)
3843 {
3844 cmp_op0 = cmp_op1;
3845 cmp_op1 = operands[2];
3846 }
3847
3848 if (! STACK_TOP_P (cmp_op0))
3849 abort ();
3850
3851 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
3852
3853 if (STACK_REG_P (cmp_op1)
3854 && stack_top_dies
3855 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
3856 && REGNO (cmp_op1) != FIRST_STACK_REG)
3857 {
3858 /* If both the top of the 387 stack dies, and the other operand
3859 is also a stack register that dies, then this must be a
3860 `fcompp' float compare */
3861
3862 if (eflags_p == 1)
3863 {
3864 /* There is no double popping fcomi variant. Fortunately,
3865 eflags is immune from the fstp's cc clobbering. */
3866 if (unordered_p)
3867 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
3868 else
3869 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
3870 return "fstp\t%y0";
3871 }
3872 else
3873 {
3874 if (eflags_p == 2)
3875 {
3876 if (unordered_p)
3877 return "fucompp\n\tfnstsw\t%0";
3878 else
3879 return "fcompp\n\tfnstsw\t%0";
3880 }
3881 else
3882 {
3883 if (unordered_p)
3884 return "fucompp";
3885 else
3886 return "fcompp";
3887 }
3888 }
3889 }
3890 else
3891 {
3892 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
3893
3894 static const char * const alt[24] =
3895 {
3896 "fcom%z1\t%y1",
3897 "fcomp%z1\t%y1",
3898 "fucom%z1\t%y1",
3899 "fucomp%z1\t%y1",
3900
3901 "ficom%z1\t%y1",
3902 "ficomp%z1\t%y1",
3903 NULL,
3904 NULL,
3905
3906 "fcomi\t{%y1, %0|%0, %y1}",
3907 "fcomip\t{%y1, %0|%0, %y1}",
3908 "fucomi\t{%y1, %0|%0, %y1}",
3909 "fucomip\t{%y1, %0|%0, %y1}",
3910
3911 NULL,
3912 NULL,
3913 NULL,
3914 NULL,
3915
3916 "fcom%z2\t%y2\n\tfnstsw\t%0",
3917 "fcomp%z2\t%y2\n\tfnstsw\t%0",
3918 "fucom%z2\t%y2\n\tfnstsw\t%0",
3919 "fucomp%z2\t%y2\n\tfnstsw\t%0",
3920
3921 "ficom%z2\t%y2\n\tfnstsw\t%0",
3922 "ficomp%z2\t%y2\n\tfnstsw\t%0",
3923 NULL,
3924 NULL
3925 };
3926
3927 int mask;
3928 const char *ret;
3929
3930 mask = eflags_p << 3;
3931 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
3932 mask |= unordered_p << 1;
3933 mask |= stack_top_dies;
3934
3935 if (mask >= 24)
3936 abort ();
3937 ret = alt[mask];
3938 if (ret == NULL)
3939 abort ();
3940
3941 return ret;
3942 }
3943 }
3944
3945 /* Output assembler code to FILE to initialize basic-block profiling.
3946
3947 If profile_block_flag == 2
3948
3949 Output code to call the subroutine `__bb_init_trace_func'
3950 and pass two parameters to it. The first parameter is
3951 the address of a block allocated in the object module.
3952 The second parameter is the number of the first basic block
3953 of the function.
3954
3955 The name of the block is a local symbol made with this statement:
3956
3957 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
3958
3959 Of course, since you are writing the definition of
3960 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
3961 can take a short cut in the definition of this macro and use the
3962 name that you know will result.
3963
3964 The number of the first basic block of the function is
3965 passed to the macro in BLOCK_OR_LABEL.
3966
3967 If described in a virtual assembler language the code to be
3968 output looks like:
3969
3970 parameter1 <- LPBX0
3971 parameter2 <- BLOCK_OR_LABEL
3972 call __bb_init_trace_func
3973
3974 else if profile_block_flag != 0
3975
3976 Output code to call the subroutine `__bb_init_func'
3977 and pass one single parameter to it, which is the same
3978 as the first parameter to `__bb_init_trace_func'.
3979
3980 The first word of this parameter is a flag which will be nonzero if
3981 the object module has already been initialized. So test this word
3982 first, and do not call `__bb_init_func' if the flag is nonzero.
3983 Note: When profile_block_flag == 2 the test need not be done
3984 but `__bb_init_trace_func' *must* be called.
3985
3986 BLOCK_OR_LABEL may be used to generate a label number as a
3987 branch destination in case `__bb_init_func' will not be called.
3988
3989 If described in a virtual assembler language the code to be
3990 output looks like:
3991
3992 cmp (LPBX0),0
3993 jne local_label
3994 parameter1 <- LPBX0
3995 call __bb_init_func
3996 local_label:
3997 */
3998
3999 void
4000 ix86_output_function_block_profiler (file, block_or_label)
4001 FILE *file;
4002 int block_or_label;
4003 {
4004 static int num_func = 0;
4005 rtx xops[8];
4006 char block_table[80], false_label[80];
4007
4008 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4009
4010 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4011 xops[5] = stack_pointer_rtx;
4012 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4013
4014 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4015
4016 switch (profile_block_flag)
4017 {
4018 case 2:
4019 xops[2] = GEN_INT (block_or_label);
4020 xops[3] = gen_rtx_MEM (Pmode,
4021 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_trace_func"));
4022 xops[6] = GEN_INT (8);
4023
4024 output_asm_insn ("push{l}\t%2", xops);
4025 if (!flag_pic)
4026 output_asm_insn ("push{l}\t%1", xops);
4027 else
4028 {
4029 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4030 output_asm_insn ("push{l}\t%7", xops);
4031 }
4032 output_asm_insn ("call\t%P3", xops);
4033 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4034 break;
4035
4036 default:
4037 ASM_GENERATE_INTERNAL_LABEL (false_label, "LPBZ", num_func);
4038
4039 xops[0] = const0_rtx;
4040 xops[2] = gen_rtx_MEM (Pmode,
4041 gen_rtx_SYMBOL_REF (VOIDmode, false_label));
4042 xops[3] = gen_rtx_MEM (Pmode,
4043 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_init_func"));
4044 xops[4] = gen_rtx_MEM (Pmode, xops[1]);
4045 xops[6] = GEN_INT (4);
4046
4047 CONSTANT_POOL_ADDRESS_P (xops[2]) = TRUE;
4048
4049 output_asm_insn ("cmp{l}\t{%0, %4|%4, %0}", xops);
4050 output_asm_insn ("jne\t%2", xops);
4051
4052 if (!flag_pic)
4053 output_asm_insn ("push{l}\t%1", xops);
4054 else
4055 {
4056 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a2}", xops);
4057 output_asm_insn ("push{l}\t%7", xops);
4058 }
4059 output_asm_insn ("call\t%P3", xops);
4060 output_asm_insn ("add{l}\t{%6, %5|%5, %6}", xops);
4061 ASM_OUTPUT_INTERNAL_LABEL (file, "LPBZ", num_func);
4062 num_func++;
4063 break;
4064 }
4065 }
4066
4067 /* Output assembler code to FILE to increment a counter associated
4068 with basic block number BLOCKNO.
4069
4070 If profile_block_flag == 2
4071
4072 Output code to initialize the global structure `__bb' and
4073 call the function `__bb_trace_func' which will increment the
4074 counter.
4075
4076 `__bb' consists of two words. In the first word the number
4077 of the basic block has to be stored. In the second word
4078 the address of a block allocated in the object module
4079 has to be stored.
4080
4081 The basic block number is given by BLOCKNO.
4082
4083 The address of the block is given by the label created with
4084
4085 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
4086
4087 by FUNCTION_BLOCK_PROFILER.
4088
4089 Of course, since you are writing the definition of
4090 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4091 can take a short cut in the definition of this macro and use the
4092 name that you know will result.
4093
4094 If described in a virtual assembler language the code to be
4095 output looks like:
4096
4097 move BLOCKNO -> (__bb)
4098 move LPBX0 -> (__bb+4)
4099 call __bb_trace_func
4100
4101 Note that function `__bb_trace_func' must not change the
4102 machine state, especially the flag register. To grant
4103 this, you must output code to save and restore registers
4104 either in this macro or in the macros MACHINE_STATE_SAVE
4105 and MACHINE_STATE_RESTORE. The last two macros will be
4106 used in the function `__bb_trace_func', so you must make
4107 sure that the function prologue does not change any
4108 register prior to saving it with MACHINE_STATE_SAVE.
4109
4110 else if profile_block_flag != 0
4111
4112 Output code to increment the counter directly.
4113 Basic blocks are numbered separately from zero within each
4114 compiled object module. The count associated with block number
4115 BLOCKNO is at index BLOCKNO in an array of words; the name of
4116 this array is a local symbol made with this statement:
4117
4118 ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
4119
4120 Of course, since you are writing the definition of
4121 `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
4122 can take a short cut in the definition of this macro and use the
4123 name that you know will result.
4124
4125 If described in a virtual assembler language the code to be
4126 output looks like:
4127
4128 inc (LPBX2+4*BLOCKNO)
4129 */
4130
4131 void
4132 ix86_output_block_profiler (file, blockno)
4133 FILE *file ATTRIBUTE_UNUSED;
4134 int blockno;
4135 {
4136 rtx xops[8], cnt_rtx;
4137 char counts[80];
4138 char *block_table = counts;
4139
4140 switch (profile_block_flag)
4141 {
4142 case 2:
4143 ASM_GENERATE_INTERNAL_LABEL (block_table, "LPBX", 0);
4144
4145 xops[1] = gen_rtx_SYMBOL_REF (VOIDmode, block_table);
4146 xops[2] = GEN_INT (blockno);
4147 xops[3] = gen_rtx_MEM (Pmode,
4148 gen_rtx_SYMBOL_REF (VOIDmode, "__bb_trace_func"));
4149 xops[4] = gen_rtx_SYMBOL_REF (VOIDmode, "__bb");
4150 xops[5] = plus_constant (xops[4], 4);
4151 xops[0] = gen_rtx_MEM (SImode, xops[4]);
4152 xops[6] = gen_rtx_MEM (SImode, xops[5]);
4153
4154 CONSTANT_POOL_ADDRESS_P (xops[1]) = TRUE;
4155
4156 output_asm_insn ("pushf", xops);
4157 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4158 if (flag_pic)
4159 {
4160 xops[7] = gen_rtx_REG (Pmode, 0); /* eax */
4161 output_asm_insn ("push{l}\t%7", xops);
4162 output_asm_insn ("lea{l}\t{%a1, %7|%7, %a1}", xops);
4163 output_asm_insn ("mov{l}\t{%7, %6|%6, %7}", xops);
4164 output_asm_insn ("pop{l}\t%7", xops);
4165 }
4166 else
4167 output_asm_insn ("mov{l}\t{%1, %6|%6, %1}", xops);
4168 output_asm_insn ("call\t%P3", xops);
4169 output_asm_insn ("popf", xops);
4170
4171 break;
4172
4173 default:
4174 ASM_GENERATE_INTERNAL_LABEL (counts, "LPBX", 2);
4175 cnt_rtx = gen_rtx_SYMBOL_REF (VOIDmode, counts);
4176 SYMBOL_REF_FLAG (cnt_rtx) = TRUE;
4177
4178 if (blockno)
4179 cnt_rtx = plus_constant (cnt_rtx, blockno*4);
4180
4181 if (flag_pic)
4182 cnt_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, cnt_rtx);
4183
4184 xops[0] = gen_rtx_MEM (SImode, cnt_rtx);
4185 output_asm_insn ("inc{l}\t%0", xops);
4186
4187 break;
4188 }
4189 }
4190 \f
4191 void
4192 ix86_expand_move (mode, operands)
4193 enum machine_mode mode;
4194 rtx operands[];
4195 {
4196 int strict = (reload_in_progress || reload_completed);
4197 rtx insn;
4198
4199 if (flag_pic && mode == Pmode && symbolic_operand (operands[1], Pmode))
4200 {
4201 /* Emit insns to move operands[1] into operands[0]. */
4202
4203 if (GET_CODE (operands[0]) == MEM)
4204 operands[1] = force_reg (Pmode, operands[1]);
4205 else
4206 {
4207 rtx temp = operands[0];
4208 if (GET_CODE (temp) != REG)
4209 temp = gen_reg_rtx (Pmode);
4210 temp = legitimize_pic_address (operands[1], temp);
4211 if (temp == operands[0])
4212 return;
4213 operands[1] = temp;
4214 }
4215 }
4216 else
4217 {
4218 if (GET_CODE (operands[0]) == MEM
4219 && (GET_MODE (operands[0]) == QImode
4220 || !push_operand (operands[0], mode))
4221 && GET_CODE (operands[1]) == MEM)
4222 operands[1] = force_reg (mode, operands[1]);
4223
4224 if (push_operand (operands[0], mode)
4225 && ! general_no_elim_operand (operands[1], mode))
4226 operands[1] = copy_to_mode_reg (mode, operands[1]);
4227
4228 if (FLOAT_MODE_P (mode))
4229 {
4230 /* If we are loading a floating point constant to a register,
4231 force the value to memory now, since we'll get better code
4232 out the back end. */
4233
4234 if (strict)
4235 ;
4236 else if (GET_CODE (operands[1]) == CONST_DOUBLE
4237 && register_operand (operands[0], mode))
4238 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
4239 }
4240 }
4241
4242 insn = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
4243
4244 emit_insn (insn);
4245 }
4246
4247 /* Attempt to expand a binary operator. Make the expansion closer to the
4248 actual machine, then just general_operand, which will allow 3 separate
4249 memory references (one output, two input) in a single insn. */
4250
4251 void
4252 ix86_expand_binary_operator (code, mode, operands)
4253 enum rtx_code code;
4254 enum machine_mode mode;
4255 rtx operands[];
4256 {
4257 int matching_memory;
4258 rtx src1, src2, dst, op, clob;
4259
4260 dst = operands[0];
4261 src1 = operands[1];
4262 src2 = operands[2];
4263
4264 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
4265 if (GET_RTX_CLASS (code) == 'c'
4266 && (rtx_equal_p (dst, src2)
4267 || immediate_operand (src1, mode)))
4268 {
4269 rtx temp = src1;
4270 src1 = src2;
4271 src2 = temp;
4272 }
4273
4274 /* If the destination is memory, and we do not have matching source
4275 operands, do things in registers. */
4276 matching_memory = 0;
4277 if (GET_CODE (dst) == MEM)
4278 {
4279 if (rtx_equal_p (dst, src1))
4280 matching_memory = 1;
4281 else if (GET_RTX_CLASS (code) == 'c'
4282 && rtx_equal_p (dst, src2))
4283 matching_memory = 2;
4284 else
4285 dst = gen_reg_rtx (mode);
4286 }
4287
4288 /* Both source operands cannot be in memory. */
4289 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
4290 {
4291 if (matching_memory != 2)
4292 src2 = force_reg (mode, src2);
4293 else
4294 src1 = force_reg (mode, src1);
4295 }
4296
4297 /* If the operation is not commutable, source 1 cannot be a constant
4298 or non-matching memory. */
4299 if ((CONSTANT_P (src1)
4300 || (!matching_memory && GET_CODE (src1) == MEM))
4301 && GET_RTX_CLASS (code) != 'c')
4302 src1 = force_reg (mode, src1);
4303
4304 /* If optimizing, copy to regs to improve CSE */
4305 if (optimize && ! no_new_pseudos)
4306 {
4307 if (GET_CODE (dst) == MEM)
4308 dst = gen_reg_rtx (mode);
4309 if (GET_CODE (src1) == MEM)
4310 src1 = force_reg (mode, src1);
4311 if (GET_CODE (src2) == MEM)
4312 src2 = force_reg (mode, src2);
4313 }
4314
4315 /* Emit the instruction. */
4316
4317 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
4318 if (reload_in_progress)
4319 {
4320 /* Reload doesn't know about the flags register, and doesn't know that
4321 it doesn't want to clobber it. We can only do this with PLUS. */
4322 if (code != PLUS)
4323 abort ();
4324 emit_insn (op);
4325 }
4326 else
4327 {
4328 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4329 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4330 }
4331
4332 /* Fix up the destination if needed. */
4333 if (dst != operands[0])
4334 emit_move_insn (operands[0], dst);
4335 }
4336
4337 /* Return TRUE or FALSE depending on whether the binary operator meets the
4338 appropriate constraints. */
4339
4340 int
4341 ix86_binary_operator_ok (code, mode, operands)
4342 enum rtx_code code;
4343 enum machine_mode mode ATTRIBUTE_UNUSED;
4344 rtx operands[3];
4345 {
4346 /* Both source operands cannot be in memory. */
4347 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
4348 return 0;
4349 /* If the operation is not commutable, source 1 cannot be a constant. */
4350 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
4351 return 0;
4352 /* If the destination is memory, we must have a matching source operand. */
4353 if (GET_CODE (operands[0]) == MEM
4354 && ! (rtx_equal_p (operands[0], operands[1])
4355 || (GET_RTX_CLASS (code) == 'c'
4356 && rtx_equal_p (operands[0], operands[2]))))
4357 return 0;
4358 /* If the operation is not commutable and the source 1 is memory, we must
4359 have a matching destionation. */
4360 if (GET_CODE (operands[1]) == MEM
4361 && GET_RTX_CLASS (code) != 'c'
4362 && ! rtx_equal_p (operands[0], operands[1]))
4363 return 0;
4364 return 1;
4365 }
4366
4367 /* Attempt to expand a unary operator. Make the expansion closer to the
4368 actual machine, then just general_operand, which will allow 2 separate
4369 memory references (one output, one input) in a single insn. */
4370
4371 void
4372 ix86_expand_unary_operator (code, mode, operands)
4373 enum rtx_code code;
4374 enum machine_mode mode;
4375 rtx operands[];
4376 {
4377 int matching_memory;
4378 rtx src, dst, op, clob;
4379
4380 dst = operands[0];
4381 src = operands[1];
4382
4383 /* If the destination is memory, and we do not have matching source
4384 operands, do things in registers. */
4385 matching_memory = 0;
4386 if (GET_CODE (dst) == MEM)
4387 {
4388 if (rtx_equal_p (dst, src))
4389 matching_memory = 1;
4390 else
4391 dst = gen_reg_rtx (mode);
4392 }
4393
4394 /* When source operand is memory, destination must match. */
4395 if (!matching_memory && GET_CODE (src) == MEM)
4396 src = force_reg (mode, src);
4397
4398 /* If optimizing, copy to regs to improve CSE */
4399 if (optimize && ! no_new_pseudos)
4400 {
4401 if (GET_CODE (dst) == MEM)
4402 dst = gen_reg_rtx (mode);
4403 if (GET_CODE (src) == MEM)
4404 src = force_reg (mode, src);
4405 }
4406
4407 /* Emit the instruction. */
4408
4409 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
4410 if (reload_in_progress || code == NOT)
4411 {
4412 /* Reload doesn't know about the flags register, and doesn't know that
4413 it doesn't want to clobber it. */
4414 if (code != NOT)
4415 abort ();
4416 emit_insn (op);
4417 }
4418 else
4419 {
4420 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
4421 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
4422 }
4423
4424 /* Fix up the destination if needed. */
4425 if (dst != operands[0])
4426 emit_move_insn (operands[0], dst);
4427 }
4428
4429 /* Return TRUE or FALSE depending on whether the unary operator meets the
4430 appropriate constraints. */
4431
4432 int
4433 ix86_unary_operator_ok (code, mode, operands)
4434 enum rtx_code code ATTRIBUTE_UNUSED;
4435 enum machine_mode mode ATTRIBUTE_UNUSED;
4436 rtx operands[2] ATTRIBUTE_UNUSED;
4437 {
4438 /* If one of operands is memory, source and destination must match. */
4439 if ((GET_CODE (operands[0]) == MEM
4440 || GET_CODE (operands[1]) == MEM)
4441 && ! rtx_equal_p (operands[0], operands[1]))
4442 return FALSE;
4443 return TRUE;
4444 }
4445
4446 /* Return TRUE or FALSE depending on whether the first SET in INSN
4447 has source and destination with matching CC modes, and that the
4448 CC mode is at least as constrained as REQ_MODE. */
4449
4450 int
4451 ix86_match_ccmode (insn, req_mode)
4452 rtx insn;
4453 enum machine_mode req_mode;
4454 {
4455 rtx set;
4456 enum machine_mode set_mode;
4457
4458 set = PATTERN (insn);
4459 if (GET_CODE (set) == PARALLEL)
4460 set = XVECEXP (set, 0, 0);
4461 if (GET_CODE (set) != SET)
4462 abort ();
4463
4464 set_mode = GET_MODE (SET_DEST (set));
4465 switch (set_mode)
4466 {
4467 case CCmode:
4468 if (req_mode == CCNOmode)
4469 return 0;
4470 /* FALLTHRU */
4471 case CCNOmode:
4472 if (req_mode == CCZmode)
4473 return 0;
4474 /* FALLTHRU */
4475 case CCZmode:
4476 break;
4477
4478 default:
4479 abort ();
4480 }
4481
4482 return (GET_MODE (SET_SRC (set)) == set_mode);
4483 }
4484
4485 /* Produce an unsigned comparison for a given signed comparison. */
4486
4487 static enum rtx_code
4488 unsigned_comparison (code)
4489 enum rtx_code code;
4490 {
4491 switch (code)
4492 {
4493 case GT:
4494 code = GTU;
4495 break;
4496 case LT:
4497 code = LTU;
4498 break;
4499 case GE:
4500 code = GEU;
4501 break;
4502 case LE:
4503 code = LEU;
4504 break;
4505 case EQ:
4506 case NE:
4507 case LEU:
4508 case LTU:
4509 case GEU:
4510 case GTU:
4511 case UNORDERED:
4512 case ORDERED:
4513 break;
4514 default:
4515 abort ();
4516 }
4517 return code;
4518 }
4519
4520 /* Generate insn patterns to do an integer compare of OPERANDS. */
4521
4522 static rtx
4523 ix86_expand_int_compare (code, op0, op1)
4524 enum rtx_code code;
4525 rtx op0, op1;
4526 {
4527 enum machine_mode cmpmode;
4528 rtx tmp, flags;
4529
4530 cmpmode = SELECT_CC_MODE (code, op0, op1);
4531 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
4532
4533 /* This is very simple, but making the interface the same as in the
4534 FP case makes the rest of the code easier. */
4535 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
4536 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
4537
4538 /* Return the test that should be put into the flags user, i.e.
4539 the bcc, scc, or cmov instruction. */
4540 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
4541 }
4542
4543 /* Figure out whether to use ordered or unordered fp comparisons.
4544 Return the appropriate mode to use. */
4545
4546 static enum machine_mode
4547 ix86_fp_compare_mode (code)
4548 enum rtx_code code;
4549 {
4550 int unordered;
4551
4552 switch (code)
4553 {
4554 case NE: case EQ:
4555 /* When not doing IEEE compliant compares, fault on NaNs. */
4556 unordered = (TARGET_IEEE_FP != 0);
4557 break;
4558
4559 case LT: case LE: case GT: case GE:
4560 unordered = 0;
4561 break;
4562
4563 case UNORDERED: case ORDERED:
4564 case UNEQ: case UNGE: case UNGT: case UNLE: case UNLT: case LTGT:
4565 unordered = 1;
4566 break;
4567
4568 default:
4569 abort ();
4570 }
4571
4572 /* ??? If we knew whether invalid-operand exceptions were masked,
4573 we could rely on fcom to raise an exception and take care of
4574 NaNs. But we don't. We could know this from c99 math pragmas. */
4575 if (TARGET_IEEE_FP)
4576 unordered = 1;
4577
4578 return unordered ? CCFPUmode : CCFPmode;
4579 }
4580
4581 /* Return true if we should use an FCOMI instruction for this fp comparison. */
4582
4583 int
4584 ix86_use_fcomi_compare (code)
4585 enum rtx_code code;
4586 {
4587 return (TARGET_CMOVE
4588 && (code == ORDERED || code == UNORDERED
4589 /* All other unordered compares require checking
4590 multiple sets of bits. */
4591 || ix86_fp_compare_mode (code) == CCFPmode));
4592 }
4593
4594 /* Swap, force into registers, or otherwise massage the two operands
4595 to a fp comparison. The operands are updated in place; the new
4596 comparsion code is returned. */
4597
4598 static enum rtx_code
4599 ix86_prepare_fp_compare_args (code, pop0, pop1)
4600 enum rtx_code code;
4601 rtx *pop0, *pop1;
4602 {
4603 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
4604 rtx op0 = *pop0, op1 = *pop1;
4605 enum machine_mode op_mode = GET_MODE (op0);
4606
4607 /* All of the unordered compare instructions only work on registers.
4608 The same is true of the XFmode compare instructions. The same is
4609 true of the fcomi compare instructions. */
4610
4611 if (fpcmp_mode == CCFPUmode
4612 || op_mode == XFmode
4613 || ix86_use_fcomi_compare (code))
4614 {
4615 op0 = force_reg (op_mode, op0);
4616 op1 = force_reg (op_mode, op1);
4617 }
4618 else
4619 {
4620 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
4621 things around if they appear profitable, otherwise force op0
4622 into a register. */
4623
4624 if (standard_80387_constant_p (op0) == 0
4625 || (GET_CODE (op0) == MEM
4626 && ! (standard_80387_constant_p (op1) == 0
4627 || GET_CODE (op1) == MEM)))
4628 {
4629 rtx tmp;
4630 tmp = op0, op0 = op1, op1 = tmp;
4631 code = swap_condition (code);
4632 }
4633
4634 if (GET_CODE (op0) != REG)
4635 op0 = force_reg (op_mode, op0);
4636
4637 if (CONSTANT_P (op1))
4638 {
4639 if (standard_80387_constant_p (op1))
4640 op1 = force_reg (op_mode, op1);
4641 else
4642 op1 = validize_mem (force_const_mem (op_mode, op1));
4643 }
4644 }
4645
4646 *pop0 = op0;
4647 *pop1 = op1;
4648 return code;
4649 }
4650
4651 /* Generate insn patterns to do a floating point compare of OPERANDS. */
4652
4653 rtx
4654 ix86_expand_fp_compare (code, op0, op1, scratch)
4655 enum rtx_code code;
4656 rtx op0, op1, scratch;
4657 {
4658 enum machine_mode fpcmp_mode, intcmp_mode;
4659 rtx tmp;
4660
4661 fpcmp_mode = ix86_fp_compare_mode (code);
4662 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
4663
4664 /* %%% fcomi is probably always faster, even when dealing with memory,
4665 since compare-and-branch would be three insns instead of four. */
4666 if (ix86_use_fcomi_compare (code))
4667 {
4668 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4669 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
4670 emit_insn (tmp);
4671
4672 /* The FP codes work out to act like unsigned. */
4673 code = unsigned_comparison (code);
4674 intcmp_mode = CCmode;
4675 }
4676 else
4677 {
4678 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
4679
4680 rtx tmp2;
4681 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
4682 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), 9);
4683 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
4684
4685 if (fpcmp_mode == CCFPmode
4686 || code == ORDERED
4687 || code == UNORDERED)
4688 {
4689 /* We have two options here -- use sahf, or testing bits of ah
4690 directly. On PPRO, they are equivalent, sahf being one byte
4691 smaller. On Pentium, sahf is non-pairable while test is UV
4692 pairable. */
4693
4694 if (TARGET_USE_SAHF || optimize_size)
4695 {
4696 do_sahf:
4697 emit_insn (gen_x86_sahf_1 (scratch));
4698
4699 /* The FP codes work out to act like unsigned. */
4700 code = unsigned_comparison (code);
4701 intcmp_mode = CCmode;
4702 }
4703 else
4704 {
4705 /*
4706 * The numbers below correspond to the bits of the FPSW in AH.
4707 * C3, C2, and C0 are in bits 0x40, 0x4, and 0x01 respectively.
4708 *
4709 * cmp C3 C2 C0
4710 * > 0 0 0
4711 * < 0 0 1
4712 * = 1 0 0
4713 * un 1 1 1
4714 */
4715
4716 int mask;
4717
4718 switch (code)
4719 {
4720 case GT:
4721 mask = 0x41;
4722 code = EQ;
4723 break;
4724 case LT:
4725 mask = 0x01;
4726 code = NE;
4727 break;
4728 case GE:
4729 /* We'd have to use `xorb 1,ah; andb 0x41,ah', so it's
4730 faster in all cases to just fall back on sahf. */
4731 goto do_sahf;
4732 case LE:
4733 mask = 0x41;
4734 code = NE;
4735 break;
4736 case EQ:
4737 mask = 0x40;
4738 code = NE;
4739 break;
4740 case NE:
4741 mask = 0x40;
4742 code = EQ;
4743 break;
4744 case UNORDERED:
4745 mask = 0x04;
4746 code = NE;
4747 break;
4748 case ORDERED:
4749 mask = 0x04;
4750 code = EQ;
4751 break;
4752
4753 default:
4754 abort ();
4755 }
4756
4757 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (mask)));
4758 intcmp_mode = CCNOmode;
4759 }
4760 }
4761 else
4762 {
4763 /* In the unordered case, we have to check C2 for NaN's, which
4764 doesn't happen to work out to anything nice combination-wise.
4765 So do some bit twiddling on the value we've got in AH to come
4766 up with an appropriate set of condition codes. */
4767
4768 intcmp_mode = CCNOmode;
4769 switch (code)
4770 {
4771 case GT:
4772 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4773 code = EQ;
4774 break;
4775 case LT:
4776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4777 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
4778 intcmp_mode = CCmode;
4779 code = EQ;
4780 break;
4781 case GE:
4782 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
4783 code = EQ;
4784 break;
4785 case LE:
4786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4787 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4788 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4789 intcmp_mode = CCmode;
4790 code = LTU;
4791 break;
4792 case EQ:
4793 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4794 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
4795 intcmp_mode = CCmode;
4796 code = EQ;
4797 break;
4798 case NE:
4799 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4800 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x40)));
4801 code = NE;
4802 break;
4803
4804 case UNORDERED:
4805 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4806 code = NE;
4807 break;
4808 case ORDERED:
4809 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
4810 code = EQ;
4811 break;
4812 case UNEQ:
4813 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4814 code = NE;
4815 break;
4816 case UNGE:
4817 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4818 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, GEN_INT (0x01)));
4819 code = NE;
4820 break;
4821 case UNGT:
4822 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
4823 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
4824 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
4825 code = GEU;
4826 break;
4827 case UNLE:
4828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
4829 code = NE;
4830 break;
4831 case UNLT:
4832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
4833 code = NE;
4834 break;
4835 case LTGT:
4836 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
4837 code = EQ;
4838 break;
4839
4840 default:
4841 abort ();
4842 }
4843 }
4844 }
4845
4846 /* Return the test that should be put into the flags user, i.e.
4847 the bcc, scc, or cmov instruction. */
4848 return gen_rtx_fmt_ee (code, VOIDmode,
4849 gen_rtx_REG (intcmp_mode, FLAGS_REG),
4850 const0_rtx);
4851 }
4852
4853 static rtx
4854 ix86_expand_compare (code)
4855 enum rtx_code code;
4856 {
4857 rtx op0, op1, ret;
4858 op0 = ix86_compare_op0;
4859 op1 = ix86_compare_op1;
4860
4861 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
4862 ret = ix86_expand_fp_compare (code, op0, op1, gen_reg_rtx (HImode));
4863 else
4864 ret = ix86_expand_int_compare (code, op0, op1);
4865
4866 return ret;
4867 }
4868
4869 void
4870 ix86_expand_branch (code, label)
4871 enum rtx_code code;
4872 rtx label;
4873 {
4874 rtx tmp;
4875
4876 switch (GET_MODE (ix86_compare_op0))
4877 {
4878 case QImode:
4879 case HImode:
4880 case SImode:
4881 tmp = ix86_expand_compare (code);
4882 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4883 gen_rtx_LABEL_REF (VOIDmode, label),
4884 pc_rtx);
4885 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
4886 return;
4887
4888 case SFmode:
4889 case DFmode:
4890 case XFmode:
4891 /* Don't expand the comparison early, so that we get better code
4892 when jump or whoever decides to reverse the comparison. */
4893 {
4894 rtvec vec;
4895 int use_fcomi;
4896
4897 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
4898 &ix86_compare_op1);
4899
4900 tmp = gen_rtx_fmt_ee (code, VOIDmode,
4901 ix86_compare_op0, ix86_compare_op1);
4902 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
4903 gen_rtx_LABEL_REF (VOIDmode, label),
4904 pc_rtx);
4905 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
4906
4907 use_fcomi = ix86_use_fcomi_compare (code);
4908 vec = rtvec_alloc (3 + !use_fcomi);
4909 RTVEC_ELT (vec, 0) = tmp;
4910 RTVEC_ELT (vec, 1)
4911 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
4912 RTVEC_ELT (vec, 2)
4913 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
4914 if (! use_fcomi)
4915 RTVEC_ELT (vec, 3)
4916 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
4917
4918 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
4919 return;
4920 }
4921
4922 case DImode:
4923 /* Expand DImode branch into multiple compare+branch. */
4924 {
4925 rtx lo[2], hi[2], label2;
4926 enum rtx_code code1, code2, code3;
4927
4928 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
4929 {
4930 tmp = ix86_compare_op0;
4931 ix86_compare_op0 = ix86_compare_op1;
4932 ix86_compare_op1 = tmp;
4933 code = swap_condition (code);
4934 }
4935 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
4936 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
4937
4938 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
4939 avoid two branches. This costs one extra insn, so disable when
4940 optimizing for size. */
4941
4942 if ((code == EQ || code == NE)
4943 && (!optimize_size
4944 || hi[1] == const0_rtx || lo[1] == const0_rtx))
4945 {
4946 rtx xor0, xor1;
4947
4948 xor1 = hi[0];
4949 if (hi[1] != const0_rtx)
4950 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
4951 NULL_RTX, 0, OPTAB_WIDEN);
4952
4953 xor0 = lo[0];
4954 if (lo[1] != const0_rtx)
4955 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
4956 NULL_RTX, 0, OPTAB_WIDEN);
4957
4958 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
4959 NULL_RTX, 0, OPTAB_WIDEN);
4960
4961 ix86_compare_op0 = tmp;
4962 ix86_compare_op1 = const0_rtx;
4963 ix86_expand_branch (code, label);
4964 return;
4965 }
4966
4967 /* Otherwise, if we are doing less-than, op1 is a constant and the
4968 low word is zero, then we can just examine the high word. */
4969
4970 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx
4971 && (code == LT || code == LTU))
4972 {
4973 ix86_compare_op0 = hi[0];
4974 ix86_compare_op1 = hi[1];
4975 ix86_expand_branch (code, label);
4976 return;
4977 }
4978
4979 /* Otherwise, we need two or three jumps. */
4980
4981 label2 = gen_label_rtx ();
4982
4983 code1 = code;
4984 code2 = swap_condition (code);
4985 code3 = unsigned_condition (code);
4986
4987 switch (code)
4988 {
4989 case LT: case GT: case LTU: case GTU:
4990 break;
4991
4992 case LE: code1 = LT; code2 = GT; break;
4993 case GE: code1 = GT; code2 = LT; break;
4994 case LEU: code1 = LTU; code2 = GTU; break;
4995 case GEU: code1 = GTU; code2 = LTU; break;
4996
4997 case EQ: code1 = NIL; code2 = NE; break;
4998 case NE: code2 = NIL; break;
4999
5000 default:
5001 abort ();
5002 }
5003
5004 /*
5005 * a < b =>
5006 * if (hi(a) < hi(b)) goto true;
5007 * if (hi(a) > hi(b)) goto false;
5008 * if (lo(a) < lo(b)) goto true;
5009 * false:
5010 */
5011
5012 ix86_compare_op0 = hi[0];
5013 ix86_compare_op1 = hi[1];
5014
5015 if (code1 != NIL)
5016 ix86_expand_branch (code1, label);
5017 if (code2 != NIL)
5018 ix86_expand_branch (code2, label2);
5019
5020 ix86_compare_op0 = lo[0];
5021 ix86_compare_op1 = lo[1];
5022 ix86_expand_branch (code3, label);
5023
5024 if (code2 != NIL)
5025 emit_label (label2);
5026 return;
5027 }
5028
5029 default:
5030 abort ();
5031 }
5032 }
5033
5034 int
5035 ix86_expand_setcc (code, dest)
5036 enum rtx_code code;
5037 rtx dest;
5038 {
5039 rtx ret, tmp;
5040 int type;
5041
5042 if (GET_MODE (ix86_compare_op0) == DImode)
5043 return 0; /* FAIL */
5044
5045 /* Three modes of generation:
5046 0 -- destination does not overlap compare sources:
5047 clear dest first, emit strict_low_part setcc.
5048 1 -- destination does overlap compare sources:
5049 emit subreg setcc, zero extend.
5050 2 -- destination is in QImode:
5051 emit setcc only.
5052 */
5053
5054 type = 0;
5055
5056 if (GET_MODE (dest) == QImode)
5057 type = 2;
5058 else if (reg_overlap_mentioned_p (dest, ix86_compare_op0)
5059 || reg_overlap_mentioned_p (dest, ix86_compare_op1))
5060 type = 1;
5061
5062 if (type == 0)
5063 emit_move_insn (dest, const0_rtx);
5064
5065 ret = ix86_expand_compare (code);
5066 PUT_MODE (ret, QImode);
5067
5068 tmp = dest;
5069 if (type == 0)
5070 {
5071 tmp = gen_lowpart (QImode, dest);
5072 tmp = gen_rtx_STRICT_LOW_PART (VOIDmode, tmp);
5073 }
5074 else if (type == 1)
5075 {
5076 if (!cse_not_expected)
5077 tmp = gen_reg_rtx (QImode);
5078 else
5079 tmp = gen_lowpart (QImode, dest);
5080 }
5081
5082 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
5083
5084 if (type == 1)
5085 {
5086 rtx clob;
5087
5088 tmp = gen_rtx_ZERO_EXTEND (GET_MODE (dest), tmp);
5089 tmp = gen_rtx_SET (VOIDmode, dest, tmp);
5090 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
5091 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5092 emit_insn (tmp);
5093 }
5094
5095 return 1; /* DONE */
5096 }
5097
5098 int
5099 ix86_expand_int_movcc (operands)
5100 rtx operands[];
5101 {
5102 enum rtx_code code = GET_CODE (operands[1]), compare_code;
5103 rtx compare_seq, compare_op;
5104
5105 /* When the compare code is not LTU or GEU, we can not use sbbl case.
5106 In case comparsion is done with immediate, we can convert it to LTU or
5107 GEU by altering the integer. */
5108
5109 if ((code == LEU || code == GTU)
5110 && GET_CODE (ix86_compare_op1) == CONST_INT
5111 && GET_MODE (operands[0]) != HImode
5112 && (unsigned int)INTVAL (ix86_compare_op1) != 0xffffffff
5113 && GET_CODE (operands[2]) == CONST_INT
5114 && GET_CODE (operands[3]) == CONST_INT)
5115 {
5116 if (code == LEU)
5117 code = LTU;
5118 else
5119 code = GEU;
5120 ix86_compare_op1 = GEN_INT (INTVAL (ix86_compare_op1) + 1);
5121 }
5122
5123 start_sequence ();
5124 compare_op = ix86_expand_compare (code);
5125 compare_seq = gen_sequence ();
5126 end_sequence ();
5127
5128 compare_code = GET_CODE (compare_op);
5129
5130 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
5131 HImode insns, we'd be swallowed in word prefix ops. */
5132
5133 if (GET_MODE (operands[0]) != HImode
5134 && GET_CODE (operands[2]) == CONST_INT
5135 && GET_CODE (operands[3]) == CONST_INT)
5136 {
5137 rtx out = operands[0];
5138 HOST_WIDE_INT ct = INTVAL (operands[2]);
5139 HOST_WIDE_INT cf = INTVAL (operands[3]);
5140 HOST_WIDE_INT diff;
5141
5142 if (compare_code == LTU || compare_code == GEU)
5143 {
5144
5145 /* Detect overlap between destination and compare sources. */
5146 rtx tmp = out;
5147
5148 /* To simplify rest of code, restrict to the GEU case. */
5149 if (compare_code == LTU)
5150 {
5151 int tmp = ct;
5152 ct = cf;
5153 cf = tmp;
5154 compare_code = reverse_condition (compare_code);
5155 code = reverse_condition (code);
5156 }
5157 diff = ct - cf;
5158
5159 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
5160 || reg_overlap_mentioned_p (out, ix86_compare_op1))
5161 tmp = gen_reg_rtx (SImode);
5162
5163 emit_insn (compare_seq);
5164 emit_insn (gen_x86_movsicc_0_m1 (tmp));
5165
5166 if (diff == 1)
5167 {
5168 /*
5169 * cmpl op0,op1
5170 * sbbl dest,dest
5171 * [addl dest, ct]
5172 *
5173 * Size 5 - 8.
5174 */
5175 if (ct)
5176 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5177 }
5178 else if (cf == -1)
5179 {
5180 /*
5181 * cmpl op0,op1
5182 * sbbl dest,dest
5183 * orl $ct, dest
5184 *
5185 * Size 8.
5186 */
5187 emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
5188 }
5189 else if (diff == -1 && ct)
5190 {
5191 /*
5192 * cmpl op0,op1
5193 * sbbl dest,dest
5194 * xorl $-1, dest
5195 * [addl dest, cf]
5196 *
5197 * Size 8 - 11.
5198 */
5199 emit_insn (gen_one_cmplsi2 (tmp, tmp));
5200 if (cf)
5201 emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
5202 }
5203 else
5204 {
5205 /*
5206 * cmpl op0,op1
5207 * sbbl dest,dest
5208 * andl cf - ct, dest
5209 * [addl dest, ct]
5210 *
5211 * Size 8 - 11.
5212 */
5213 emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
5214 if (ct)
5215 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5216 }
5217
5218 if (tmp != out)
5219 emit_move_insn (out, tmp);
5220
5221 return 1; /* DONE */
5222 }
5223
5224 diff = ct - cf;
5225 if (diff < 0)
5226 {
5227 HOST_WIDE_INT tmp;
5228 tmp = ct, ct = cf, cf = tmp;
5229 diff = -diff;
5230 compare_code = reverse_condition (compare_code);
5231 code = reverse_condition (code);
5232 }
5233 if (diff == 1 || diff == 2 || diff == 4 || diff == 8
5234 || diff == 3 || diff == 5 || diff == 9)
5235 {
5236 /*
5237 * xorl dest,dest
5238 * cmpl op1,op2
5239 * setcc dest
5240 * lea cf(dest*(ct-cf)),dest
5241 *
5242 * Size 14.
5243 *
5244 * This also catches the degenerate setcc-only case.
5245 */
5246
5247 rtx tmp;
5248 int nops;
5249
5250 out = emit_store_flag (out, code, ix86_compare_op0,
5251 ix86_compare_op1, VOIDmode, 0, 1);
5252
5253 nops = 0;
5254 if (diff == 1)
5255 tmp = out;
5256 else
5257 {
5258 tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
5259 nops++;
5260 if (diff & 1)
5261 {
5262 tmp = gen_rtx_PLUS (SImode, tmp, out);
5263 nops++;
5264 }
5265 }
5266 if (cf != 0)
5267 {
5268 tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
5269 nops++;
5270 }
5271 if (tmp != out)
5272 {
5273 if (nops == 0)
5274 emit_move_insn (out, tmp);
5275 else if (nops == 1)
5276 {
5277 rtx clob;
5278
5279 clob = gen_rtx_REG (CCmode, FLAGS_REG);
5280 clob = gen_rtx_CLOBBER (VOIDmode, clob);
5281
5282 tmp = gen_rtx_SET (VOIDmode, out, tmp);
5283 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
5284 emit_insn (tmp);
5285 }
5286 else
5287 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
5288 }
5289 if (out != operands[0])
5290 emit_move_insn (operands[0], out);
5291
5292 return 1; /* DONE */
5293 }
5294
5295 /*
5296 * General case: Jumpful:
5297 * xorl dest,dest cmpl op1, op2
5298 * cmpl op1, op2 movl ct, dest
5299 * setcc dest jcc 1f
5300 * decl dest movl cf, dest
5301 * andl (cf-ct),dest 1:
5302 * addl ct,dest
5303 *
5304 * Size 20. Size 14.
5305 *
5306 * This is reasonably steep, but branch mispredict costs are
5307 * high on modern cpus, so consider failing only if optimizing
5308 * for space.
5309 *
5310 * %%% Parameterize branch_cost on the tuning architecture, then
5311 * use that. The 80386 couldn't care less about mispredicts.
5312 */
5313
5314 if (!optimize_size && !TARGET_CMOVE)
5315 {
5316 if (ct == 0)
5317 {
5318 ct = cf;
5319 cf = 0;
5320 compare_code = reverse_condition (compare_code);
5321 code = reverse_condition (code);
5322 }
5323
5324 out = emit_store_flag (out, code, ix86_compare_op0,
5325 ix86_compare_op1, VOIDmode, 0, 1);
5326
5327 emit_insn (gen_addsi3 (out, out, constm1_rtx));
5328 emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
5329 if (ct != 0)
5330 emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
5331 if (out != operands[0])
5332 emit_move_insn (operands[0], out);
5333
5334 return 1; /* DONE */
5335 }
5336 }
5337
5338 if (!TARGET_CMOVE)
5339 {
5340 /* Try a few things more with specific constants and a variable. */
5341
5342 optab op;
5343 rtx var, orig_out, out, tmp;
5344
5345 if (optimize_size)
5346 return 0; /* FAIL */
5347
5348 /* If one of the two operands is an interesting constant, load a
5349 constant with the above and mask it in with a logical operation. */
5350
5351 if (GET_CODE (operands[2]) == CONST_INT)
5352 {
5353 var = operands[3];
5354 if (INTVAL (operands[2]) == 0)
5355 operands[3] = constm1_rtx, op = and_optab;
5356 else if (INTVAL (operands[2]) == -1)
5357 operands[3] = const0_rtx, op = ior_optab;
5358 else
5359 return 0; /* FAIL */
5360 }
5361 else if (GET_CODE (operands[3]) == CONST_INT)
5362 {
5363 var = operands[2];
5364 if (INTVAL (operands[3]) == 0)
5365 operands[2] = constm1_rtx, op = and_optab;
5366 else if (INTVAL (operands[3]) == -1)
5367 operands[2] = const0_rtx, op = ior_optab;
5368 else
5369 return 0; /* FAIL */
5370 }
5371 else
5372 return 0; /* FAIL */
5373
5374 orig_out = operands[0];
5375 tmp = gen_reg_rtx (GET_MODE (orig_out));
5376 operands[0] = tmp;
5377
5378 /* Recurse to get the constant loaded. */
5379 if (ix86_expand_int_movcc (operands) == 0)
5380 return 0; /* FAIL */
5381
5382 /* Mask in the interesting variable. */
5383 out = expand_binop (GET_MODE (orig_out), op, var, tmp, orig_out, 0,
5384 OPTAB_WIDEN);
5385 if (out != orig_out)
5386 emit_move_insn (orig_out, out);
5387
5388 return 1; /* DONE */
5389 }
5390
5391 /*
5392 * For comparison with above,
5393 *
5394 * movl cf,dest
5395 * movl ct,tmp
5396 * cmpl op1,op2
5397 * cmovcc tmp,dest
5398 *
5399 * Size 15.
5400 */
5401
5402 if (! nonimmediate_operand (operands[2], GET_MODE (operands[0])))
5403 operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
5404 if (! nonimmediate_operand (operands[3], GET_MODE (operands[0])))
5405 operands[3] = force_reg (GET_MODE (operands[0]), operands[3]);
5406
5407 emit_insn (compare_seq);
5408 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5409 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5410 compare_op, operands[2],
5411 operands[3])));
5412
5413 return 1; /* DONE */
5414 }
5415
5416 int
5417 ix86_expand_fp_movcc (operands)
5418 rtx operands[];
5419 {
5420 enum rtx_code code;
5421 enum machine_mode mode;
5422 rtx tmp;
5423
5424 /* The floating point conditional move instructions don't directly
5425 support conditions resulting from a signed integer comparison. */
5426
5427 code = GET_CODE (operands[1]);
5428 switch (code)
5429 {
5430 case LT:
5431 case LE:
5432 case GE:
5433 case GT:
5434 tmp = gen_reg_rtx (QImode);
5435 ix86_expand_setcc (code, tmp);
5436 code = NE;
5437 ix86_compare_op0 = tmp;
5438 ix86_compare_op1 = const0_rtx;
5439 break;
5440
5441 default:
5442 break;
5443 }
5444
5445 mode = SELECT_CC_MODE (code, ix86_compare_op0, ix86_compare_op1);
5446 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (mode, FLAGS_REG),
5447 gen_rtx_COMPARE (mode,
5448 ix86_compare_op0,
5449 ix86_compare_op1)));
5450 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5451 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
5452 gen_rtx_fmt_ee (code, VOIDmode,
5453 gen_rtx_REG (mode, FLAGS_REG),
5454 const0_rtx),
5455 operands[2],
5456 operands[3])));
5457
5458 return 1;
5459 }
5460
5461 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
5462 works for floating pointer parameters and nonoffsetable memories.
5463 For pushes, it returns just stack offsets; the values will be saved
5464 in the right order. Maximally three parts are generated. */
5465
5466 static void
5467 ix86_split_to_parts (operand, parts, mode)
5468 rtx operand;
5469 rtx *parts;
5470 enum machine_mode mode;
5471 {
5472 int size = GET_MODE_SIZE (mode) / 4;
5473
5474 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
5475 abort ();
5476 if (size < 2 || size > 3)
5477 abort ();
5478
5479 /* Optimize constant pool reference to immediates. This is used by fp moves,
5480 that force all constants to memory to allow combining. */
5481
5482 if (GET_CODE (operand) == MEM
5483 && GET_CODE (XEXP (operand, 0)) == SYMBOL_REF
5484 && CONSTANT_POOL_ADDRESS_P (XEXP (operand, 0)))
5485 operand = get_pool_constant (XEXP (operand, 0));
5486
5487 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
5488 {
5489 /* The only non-offsetable memories we handle are pushes. */
5490 if (! push_operand (operand, VOIDmode))
5491 abort ();
5492
5493 PUT_MODE (operand, SImode);
5494 parts[0] = parts[1] = parts[2] = operand;
5495 }
5496 else
5497 {
5498 if (mode == DImode)
5499 split_di (&operand, 1, &parts[0], &parts[1]);
5500 else
5501 {
5502 if (REG_P (operand))
5503 {
5504 if (!reload_completed)
5505 abort ();
5506 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
5507 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
5508 if (size == 3)
5509 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
5510 }
5511 else if (offsettable_memref_p (operand))
5512 {
5513 PUT_MODE (operand, SImode);
5514 parts[0] = operand;
5515 parts[1] = adj_offsettable_operand (operand, 4);
5516 if (size == 3)
5517 parts[2] = adj_offsettable_operand (operand, 8);
5518 }
5519 else if (GET_CODE (operand) == CONST_DOUBLE)
5520 {
5521 REAL_VALUE_TYPE r;
5522 long l[3];
5523
5524 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
5525 switch (mode)
5526 {
5527 case XFmode:
5528 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
5529 parts[2] = GEN_INT (l[2]);
5530 break;
5531 case DFmode:
5532 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
5533 break;
5534 default:
5535 abort ();
5536 }
5537 parts[1] = GEN_INT (l[1]);
5538 parts[0] = GEN_INT (l[0]);
5539 }
5540 else
5541 abort ();
5542 }
5543 }
5544
5545 return;
5546 }
5547
5548 /* Emit insns to perform a move or push of DI, DF, and XF values.
5549 Return false when normal moves are needed; true when all required
5550 insns have been emitted. Operands 2-4 contain the input values
5551 int the correct order; operands 5-7 contain the output values. */
5552
5553 int
5554 ix86_split_long_move (operands1)
5555 rtx operands1[];
5556 {
5557 rtx part[2][3];
5558 rtx operands[2];
5559 int size = GET_MODE_SIZE (GET_MODE (operands1[0])) / 4;
5560 int push = 0;
5561 int collisions = 0;
5562
5563 /* Make our own copy to avoid clobbering the operands. */
5564 operands[0] = copy_rtx (operands1[0]);
5565 operands[1] = copy_rtx (operands1[1]);
5566
5567 if (size < 2 || size > 3)
5568 abort ();
5569
5570 /* The only non-offsettable memory we handle is push. */
5571 if (push_operand (operands[0], VOIDmode))
5572 push = 1;
5573 else if (GET_CODE (operands[0]) == MEM
5574 && ! offsettable_memref_p (operands[0]))
5575 abort ();
5576
5577 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
5578 ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
5579
5580 /* When emitting push, take care for source operands on the stack. */
5581 if (push && GET_CODE (operands[1]) == MEM
5582 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
5583 {
5584 if (size == 3)
5585 part[1][1] = part[1][2];
5586 part[1][0] = part[1][1];
5587 }
5588
5589 /* We need to do copy in the right order in case an address register
5590 of the source overlaps the destination. */
5591 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
5592 {
5593 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
5594 collisions++;
5595 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5596 collisions++;
5597 if (size == 3
5598 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
5599 collisions++;
5600
5601 /* Collision in the middle part can be handled by reordering. */
5602 if (collisions == 1 && size == 3
5603 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
5604 {
5605 rtx tmp;
5606 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
5607 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
5608 }
5609
5610 /* If there are more collisions, we can't handle it by reordering.
5611 Do an lea to the last part and use only one colliding move. */
5612 else if (collisions > 1)
5613 {
5614 collisions = 1;
5615 emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
5616 XEXP (part[1][0], 0)));
5617 part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
5618 part[1][1] = adj_offsettable_operand (part[1][0], 4);
5619 if (size == 3)
5620 part[1][2] = adj_offsettable_operand (part[1][0], 8);
5621 }
5622 }
5623
5624 if (push)
5625 {
5626 if (size == 3)
5627 emit_insn (gen_push (part[1][2]));
5628 emit_insn (gen_push (part[1][1]));
5629 emit_insn (gen_push (part[1][0]));
5630 return 1;
5631 }
5632
5633 /* Choose correct order to not overwrite the source before it is copied. */
5634 if ((REG_P (part[0][0])
5635 && REG_P (part[1][1])
5636 && (REGNO (part[0][0]) == REGNO (part[1][1])
5637 || (size == 3
5638 && REGNO (part[0][0]) == REGNO (part[1][2]))))
5639 || (collisions > 0
5640 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
5641 {
5642 if (size == 3)
5643 {
5644 operands1[2] = part[0][2];
5645 operands1[3] = part[0][1];
5646 operands1[4] = part[0][0];
5647 operands1[5] = part[1][2];
5648 operands1[6] = part[1][1];
5649 operands1[7] = part[1][0];
5650 }
5651 else
5652 {
5653 operands1[2] = part[0][1];
5654 operands1[3] = part[0][0];
5655 operands1[5] = part[1][1];
5656 operands1[6] = part[1][0];
5657 }
5658 }
5659 else
5660 {
5661 if (size == 3)
5662 {
5663 operands1[2] = part[0][0];
5664 operands1[3] = part[0][1];
5665 operands1[4] = part[0][2];
5666 operands1[5] = part[1][0];
5667 operands1[6] = part[1][1];
5668 operands1[7] = part[1][2];
5669 }
5670 else
5671 {
5672 operands1[2] = part[0][0];
5673 operands1[3] = part[0][1];
5674 operands1[5] = part[1][0];
5675 operands1[6] = part[1][1];
5676 }
5677 }
5678
5679 return 0;
5680 }
5681
5682 void
5683 ix86_split_ashldi (operands, scratch)
5684 rtx *operands, scratch;
5685 {
5686 rtx low[2], high[2];
5687 int count;
5688
5689 if (GET_CODE (operands[2]) == CONST_INT)
5690 {
5691 split_di (operands, 2, low, high);
5692 count = INTVAL (operands[2]) & 63;
5693
5694 if (count >= 32)
5695 {
5696 emit_move_insn (high[0], low[1]);
5697 emit_move_insn (low[0], const0_rtx);
5698
5699 if (count > 32)
5700 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
5701 }
5702 else
5703 {
5704 if (!rtx_equal_p (operands[0], operands[1]))
5705 emit_move_insn (operands[0], operands[1]);
5706 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
5707 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
5708 }
5709 }
5710 else
5711 {
5712 if (!rtx_equal_p (operands[0], operands[1]))
5713 emit_move_insn (operands[0], operands[1]);
5714
5715 split_di (operands, 1, low, high);
5716
5717 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
5718 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
5719
5720 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5721 {
5722 if (! no_new_pseudos)
5723 scratch = force_reg (SImode, const0_rtx);
5724 else
5725 emit_move_insn (scratch, const0_rtx);
5726
5727 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
5728 scratch));
5729 }
5730 else
5731 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
5732 }
5733 }
5734
5735 void
5736 ix86_split_ashrdi (operands, scratch)
5737 rtx *operands, scratch;
5738 {
5739 rtx low[2], high[2];
5740 int count;
5741
5742 if (GET_CODE (operands[2]) == CONST_INT)
5743 {
5744 split_di (operands, 2, low, high);
5745 count = INTVAL (operands[2]) & 63;
5746
5747 if (count >= 32)
5748 {
5749 emit_move_insn (low[0], high[1]);
5750
5751 if (! reload_completed)
5752 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
5753 else
5754 {
5755 emit_move_insn (high[0], low[0]);
5756 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
5757 }
5758
5759 if (count > 32)
5760 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
5761 }
5762 else
5763 {
5764 if (!rtx_equal_p (operands[0], operands[1]))
5765 emit_move_insn (operands[0], operands[1]);
5766 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5767 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
5768 }
5769 }
5770 else
5771 {
5772 if (!rtx_equal_p (operands[0], operands[1]))
5773 emit_move_insn (operands[0], operands[1]);
5774
5775 split_di (operands, 1, low, high);
5776
5777 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5778 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
5779
5780 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5781 {
5782 if (! no_new_pseudos)
5783 scratch = gen_reg_rtx (SImode);
5784 emit_move_insn (scratch, high[0]);
5785 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
5786 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5787 scratch));
5788 }
5789 else
5790 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
5791 }
5792 }
5793
5794 void
5795 ix86_split_lshrdi (operands, scratch)
5796 rtx *operands, scratch;
5797 {
5798 rtx low[2], high[2];
5799 int count;
5800
5801 if (GET_CODE (operands[2]) == CONST_INT)
5802 {
5803 split_di (operands, 2, low, high);
5804 count = INTVAL (operands[2]) & 63;
5805
5806 if (count >= 32)
5807 {
5808 emit_move_insn (low[0], high[1]);
5809 emit_move_insn (high[0], const0_rtx);
5810
5811 if (count > 32)
5812 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
5813 }
5814 else
5815 {
5816 if (!rtx_equal_p (operands[0], operands[1]))
5817 emit_move_insn (operands[0], operands[1]);
5818 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
5819 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
5820 }
5821 }
5822 else
5823 {
5824 if (!rtx_equal_p (operands[0], operands[1]))
5825 emit_move_insn (operands[0], operands[1]);
5826
5827 split_di (operands, 1, low, high);
5828
5829 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
5830 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
5831
5832 /* Heh. By reversing the arguments, we can reuse this pattern. */
5833 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
5834 {
5835 if (! no_new_pseudos)
5836 scratch = force_reg (SImode, const0_rtx);
5837 else
5838 emit_move_insn (scratch, const0_rtx);
5839
5840 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
5841 scratch));
5842 }
5843 else
5844 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
5845 }
5846 }
5847
5848 /* Expand the appropriate insns for doing strlen if not just doing
5849 repnz; scasb
5850
5851 out = result, initialized with the start address
5852 align_rtx = alignment of the address.
5853 scratch = scratch register, initialized with the startaddress when
5854 not aligned, otherwise undefined
5855
5856 This is just the body. It needs the initialisations mentioned above and
5857 some address computing at the end. These things are done in i386.md. */
5858
5859 void
5860 ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
5861 rtx out, align_rtx, scratch;
5862 {
5863 int align;
5864 rtx tmp;
5865 rtx align_2_label = NULL_RTX;
5866 rtx align_3_label = NULL_RTX;
5867 rtx align_4_label = gen_label_rtx ();
5868 rtx end_0_label = gen_label_rtx ();
5869 rtx mem;
5870 rtx no_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5871 rtx z_flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
5872 rtx tmpreg = gen_reg_rtx (SImode);
5873
5874 align = 0;
5875 if (GET_CODE (align_rtx) == CONST_INT)
5876 align = INTVAL (align_rtx);
5877
5878 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
5879
5880 /* Is there a known alignment and is it less than 4? */
5881 if (align < 4)
5882 {
5883 /* Is there a known alignment and is it not 2? */
5884 if (align != 2)
5885 {
5886 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
5887 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
5888
5889 /* Leave just the 3 lower bits. */
5890 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
5891 NULL_RTX, 0, OPTAB_WIDEN);
5892
5893 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
5894
5895 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5896 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5897 gen_rtx_LABEL_REF (VOIDmode,
5898 align_4_label),
5899 pc_rtx);
5900 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5901
5902 emit_insn (gen_cmpsi_ccno_1 (align_rtx, GEN_INT (2)));
5903
5904 tmp = gen_rtx_EQ (VOIDmode, no_flags, const0_rtx);
5905 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5906 gen_rtx_LABEL_REF (VOIDmode,
5907 align_2_label),
5908 pc_rtx);
5909 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5910
5911 tmp = gen_rtx_GTU (VOIDmode, no_flags, const0_rtx);
5912 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5913 gen_rtx_LABEL_REF (VOIDmode,
5914 align_3_label),
5915 pc_rtx);
5916 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5917 }
5918 else
5919 {
5920 /* Since the alignment is 2, we have to check 2 or 0 bytes;
5921 check if is aligned to 4 - byte. */
5922
5923 align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
5924 NULL_RTX, 0, OPTAB_WIDEN);
5925
5926 emit_insn (gen_cmpsi_ccz_1 (align_rtx, const0_rtx));
5927
5928 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5929 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5930 gen_rtx_LABEL_REF (VOIDmode,
5931 align_4_label),
5932 pc_rtx);
5933 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5934 }
5935
5936 mem = gen_rtx_MEM (QImode, out);
5937
5938 /* Now compare the bytes. */
5939
5940 /* Compare the first n unaligned byte on a byte per byte basis. */
5941 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5942
5943 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5944 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5945 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5946 pc_rtx);
5947 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5948
5949 /* Increment the address. */
5950 emit_insn (gen_addsi3 (out, out, const1_rtx));
5951
5952 /* Not needed with an alignment of 2 */
5953 if (align != 2)
5954 {
5955 emit_label (align_2_label);
5956
5957 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5958
5959 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5960 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5961 gen_rtx_LABEL_REF (VOIDmode,
5962 end_0_label),
5963 pc_rtx);
5964 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5965
5966 emit_insn (gen_addsi3 (out, out, const1_rtx));
5967
5968 emit_label (align_3_label);
5969 }
5970
5971 emit_insn (gen_cmpqi_ccz_1 (mem, const0_rtx));
5972
5973 tmp = gen_rtx_EQ (VOIDmode, z_flags, const0_rtx);
5974 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
5975 gen_rtx_LABEL_REF (VOIDmode, end_0_label),
5976 pc_rtx);
5977 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
5978
5979 emit_insn (gen_addsi3 (out, out, const1_rtx));
5980 }
5981
5982 /* Generate loop to check 4 bytes at a time. It is not a good idea to
5983 align this loop. It gives only huge programs, but does not help to
5984 speed up. */
5985 emit_label (align_4_label);
5986
5987 mem = gen_rtx_MEM (SImode, out);
5988 emit_move_insn (scratch, mem);
5989 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
5990
5991 /* This formula yields a nonzero result iff one of the bytes is zero.
5992 This saves three branches inside loop and many cycles. */
5993
5994 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
5995 emit_insn (gen_one_cmplsi2 (scratch, scratch));
5996 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
5997 emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
5998 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 0, align_4_label);
5999
6000 if (TARGET_CMOVE)
6001 {
6002 rtx reg = gen_reg_rtx (SImode);
6003 emit_move_insn (reg, tmpreg);
6004 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
6005
6006 /* If zero is not in the first two bytes, move two bytes forward. */
6007 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6008 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6009 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6010 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
6011 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6012 reg,
6013 tmpreg)));
6014 /* Emit lea manually to avoid clobbering of flags. */
6015 emit_insn (gen_rtx_SET (SImode, reg,
6016 gen_rtx_PLUS (SImode, out, GEN_INT (2))));
6017
6018 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6019 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
6020 emit_insn (gen_rtx_SET (VOIDmode, out,
6021 gen_rtx_IF_THEN_ELSE (SImode, tmp,
6022 reg,
6023 out)));
6024
6025 }
6026 else
6027 {
6028 rtx end_2_label = gen_label_rtx ();
6029 /* Is zero in the first two bytes? */
6030
6031 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
6032 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
6033 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
6034 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
6035 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
6036 pc_rtx);
6037 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
6038 JUMP_LABEL (tmp) = end_2_label;
6039
6040 /* Not in the first two. Move two bytes forward. */
6041 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
6042 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
6043
6044 emit_label (end_2_label);
6045
6046 }
6047
6048 /* Avoid branch in fixing the byte. */
6049 tmpreg = gen_lowpart (QImode, tmpreg);
6050 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
6051 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
6052
6053 emit_label (end_0_label);
6054 }
6055 \f
6056 /* Clear stack slot assignments remembered from previous functions.
6057 This is called from INIT_EXPANDERS once before RTL is emitted for each
6058 function. */
6059
6060 static void
6061 ix86_init_machine_status (p)
6062 struct function *p;
6063 {
6064 enum machine_mode mode;
6065 int n;
6066 p->machine
6067 = (struct machine_function *) xmalloc (sizeof (struct machine_function));
6068
6069 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6070 mode = (enum machine_mode) ((int) mode + 1))
6071 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6072 ix86_stack_locals[(int) mode][n] = NULL_RTX;
6073 }
6074
6075 /* Mark machine specific bits of P for GC. */
6076 static void
6077 ix86_mark_machine_status (p)
6078 struct function *p;
6079 {
6080 enum machine_mode mode;
6081 int n;
6082
6083 for (mode = VOIDmode; (int) mode < (int) MAX_MACHINE_MODE;
6084 mode = (enum machine_mode) ((int) mode + 1))
6085 for (n = 0; n < MAX_386_STACK_LOCALS; n++)
6086 ggc_mark_rtx (p->machine->stack_locals[(int) mode][n]);
6087 }
6088
6089 /* Return a MEM corresponding to a stack slot with mode MODE.
6090 Allocate a new slot if necessary.
6091
6092 The RTL for a function can have several slots available: N is
6093 which slot to use. */
6094
6095 rtx
6096 assign_386_stack_local (mode, n)
6097 enum machine_mode mode;
6098 int n;
6099 {
6100 if (n < 0 || n >= MAX_386_STACK_LOCALS)
6101 abort ();
6102
6103 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
6104 ix86_stack_locals[(int) mode][n]
6105 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
6106
6107 return ix86_stack_locals[(int) mode][n];
6108 }
6109 \f
6110 /* Calculate the length of the memory address in the instruction
6111 encoding. Does not include the one-byte modrm, opcode, or prefix. */
6112
6113 static int
6114 memory_address_length (addr)
6115 rtx addr;
6116 {
6117 struct ix86_address parts;
6118 rtx base, index, disp;
6119 int len;
6120
6121 if (GET_CODE (addr) == PRE_DEC
6122 || GET_CODE (addr) == POST_INC)
6123 return 0;
6124
6125 if (! ix86_decompose_address (addr, &parts))
6126 abort ();
6127
6128 base = parts.base;
6129 index = parts.index;
6130 disp = parts.disp;
6131 len = 0;
6132
6133 /* Register Indirect. */
6134 if (base && !index && !disp)
6135 {
6136 /* Special cases: ebp and esp need the two-byte modrm form. */
6137 if (addr == stack_pointer_rtx
6138 || addr == arg_pointer_rtx
6139 || addr == frame_pointer_rtx
6140 || addr == hard_frame_pointer_rtx)
6141 len = 1;
6142 }
6143
6144 /* Direct Addressing. */
6145 else if (disp && !base && !index)
6146 len = 4;
6147
6148 else
6149 {
6150 /* Find the length of the displacement constant. */
6151 if (disp)
6152 {
6153 if (GET_CODE (disp) == CONST_INT
6154 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
6155 len = 1;
6156 else
6157 len = 4;
6158 }
6159
6160 /* An index requires the two-byte modrm form. */
6161 if (index)
6162 len += 1;
6163 }
6164
6165 return len;
6166 }
6167
6168 /* Compute default value for "length_immediate" attribute. When SHORTFORM is set
6169 expect that insn have 8bit immediate alternative. */
6170 int
6171 ix86_attr_length_immediate_default (insn, shortform)
6172 rtx insn;
6173 int shortform;
6174 {
6175 int len = 0;
6176 int i;
6177 extract_insn (insn);
6178 for (i = recog_data.n_operands - 1; i >= 0; --i)
6179 if (CONSTANT_P (recog_data.operand[i]))
6180 {
6181 if (len)
6182 abort ();
6183 if (shortform
6184 && GET_CODE (recog_data.operand[i]) == CONST_INT
6185 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
6186 len = 1;
6187 else
6188 {
6189 switch (get_attr_mode (insn))
6190 {
6191 case MODE_QI:
6192 len+=1;
6193 break;
6194 case MODE_HI:
6195 len+=2;
6196 break;
6197 case MODE_SI:
6198 len+=4;
6199 break;
6200 default:
6201 fatal_insn ("Unknown insn mode", insn);
6202 }
6203 }
6204 }
6205 return len;
6206 }
6207 /* Compute default value for "length_address" attribute. */
6208 int
6209 ix86_attr_length_address_default (insn)
6210 rtx insn;
6211 {
6212 int i;
6213 extract_insn (insn);
6214 for (i = recog_data.n_operands - 1; i >= 0; --i)
6215 if (GET_CODE (recog_data.operand[i]) == MEM)
6216 {
6217 return memory_address_length (XEXP (recog_data.operand[i], 0));
6218 break;
6219 }
6220 return 0;
6221 }
6222 \f
6223 /* Return the maximum number of instructions a cpu can issue. */
6224
6225 int
6226 ix86_issue_rate ()
6227 {
6228 switch (ix86_cpu)
6229 {
6230 case PROCESSOR_PENTIUM:
6231 case PROCESSOR_K6:
6232 return 2;
6233
6234 case PROCESSOR_PENTIUMPRO:
6235 return 3;
6236
6237 default:
6238 return 1;
6239 }
6240 }
6241
6242 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
6243 by DEP_INSN and nothing set by DEP_INSN. */
6244
6245 static int
6246 ix86_flags_dependant (insn, dep_insn, insn_type)
6247 rtx insn, dep_insn;
6248 enum attr_type insn_type;
6249 {
6250 rtx set, set2;
6251
6252 /* Simplify the test for uninteresting insns. */
6253 if (insn_type != TYPE_SETCC
6254 && insn_type != TYPE_ICMOV
6255 && insn_type != TYPE_FCMOV
6256 && insn_type != TYPE_IBR)
6257 return 0;
6258
6259 if ((set = single_set (dep_insn)) != 0)
6260 {
6261 set = SET_DEST (set);
6262 set2 = NULL_RTX;
6263 }
6264 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
6265 && XVECLEN (PATTERN (dep_insn), 0) == 2
6266 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
6267 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
6268 {
6269 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6270 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
6271 }
6272 else
6273 return 0;
6274
6275 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
6276 return 0;
6277
6278 /* This test is true if the dependant insn reads the flags but
6279 not any other potentially set register. */
6280 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
6281 return 0;
6282
6283 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
6284 return 0;
6285
6286 return 1;
6287 }
6288
6289 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
6290 address with operands set by DEP_INSN. */
6291
6292 static int
6293 ix86_agi_dependant (insn, dep_insn, insn_type)
6294 rtx insn, dep_insn;
6295 enum attr_type insn_type;
6296 {
6297 rtx addr;
6298
6299 if (insn_type == TYPE_LEA)
6300 {
6301 addr = PATTERN (insn);
6302 if (GET_CODE (addr) == SET)
6303 ;
6304 else if (GET_CODE (addr) == PARALLEL
6305 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
6306 addr = XVECEXP (addr, 0, 0);
6307 else
6308 abort ();
6309 addr = SET_SRC (addr);
6310 }
6311 else
6312 {
6313 int i;
6314 extract_insn (insn);
6315 for (i = recog_data.n_operands - 1; i >= 0; --i)
6316 if (GET_CODE (recog_data.operand[i]) == MEM)
6317 {
6318 addr = XEXP (recog_data.operand[i], 0);
6319 goto found;
6320 }
6321 return 0;
6322 found:;
6323 }
6324
6325 return modified_in_p (addr, dep_insn);
6326 }
6327
6328 int
6329 ix86_adjust_cost (insn, link, dep_insn, cost)
6330 rtx insn, link, dep_insn;
6331 int cost;
6332 {
6333 enum attr_type insn_type, dep_insn_type;
6334 enum attr_memory memory;
6335 rtx set, set2;
6336 int dep_insn_code_number;
6337
6338 /* Anti and output depenancies have zero cost on all CPUs. */
6339 if (REG_NOTE_KIND (link) != 0)
6340 return 0;
6341
6342 dep_insn_code_number = recog_memoized (dep_insn);
6343
6344 /* If we can't recognize the insns, we can't really do anything. */
6345 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
6346 return cost;
6347
6348 insn_type = get_attr_type (insn);
6349 dep_insn_type = get_attr_type (dep_insn);
6350
6351 /* Prologue and epilogue allocators can have a false dependency on ebp.
6352 This results in one cycle extra stall on Pentium prologue scheduling,
6353 so handle this important case manually. */
6354 if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
6355 && dep_insn_type == TYPE_ALU
6356 && !reg_mentioned_p (stack_pointer_rtx, insn))
6357 return 0;
6358
6359 switch (ix86_cpu)
6360 {
6361 case PROCESSOR_PENTIUM:
6362 /* Address Generation Interlock adds a cycle of latency. */
6363 if (ix86_agi_dependant (insn, dep_insn, insn_type))
6364 cost += 1;
6365
6366 /* ??? Compares pair with jump/setcc. */
6367 if (ix86_flags_dependant (insn, dep_insn, insn_type))
6368 cost = 0;
6369
6370 /* Floating point stores require value to be ready one cycle ealier. */
6371 if (insn_type == TYPE_FMOV
6372 && get_attr_memory (insn) == MEMORY_STORE
6373 && !ix86_agi_dependant (insn, dep_insn, insn_type))
6374 cost += 1;
6375 break;
6376
6377 case PROCESSOR_PENTIUMPRO:
6378 /* Since we can't represent delayed latencies of load+operation,
6379 increase the cost here for non-imov insns. */
6380 if (dep_insn_type != TYPE_IMOV
6381 && dep_insn_type != TYPE_FMOV
6382 && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6383 || memory == MEMORY_BOTH))
6384 cost += 1;
6385
6386 /* INT->FP conversion is expensive. */
6387 if (get_attr_fp_int_src (dep_insn))
6388 cost += 5;
6389
6390 /* There is one cycle extra latency between an FP op and a store. */
6391 if (insn_type == TYPE_FMOV
6392 && (set = single_set (dep_insn)) != NULL_RTX
6393 && (set2 = single_set (insn)) != NULL_RTX
6394 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
6395 && GET_CODE (SET_DEST (set2)) == MEM)
6396 cost += 1;
6397 break;
6398
6399 case PROCESSOR_K6:
6400 /* The esp dependency is resolved before the instruction is really
6401 finished. */
6402 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
6403 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
6404 return 1;
6405
6406 /* Since we can't represent delayed latencies of load+operation,
6407 increase the cost here for non-imov insns. */
6408 if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
6409 || memory == MEMORY_BOTH)
6410 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
6411
6412 /* INT->FP conversion is expensive. */
6413 if (get_attr_fp_int_src (dep_insn))
6414 cost += 5;
6415 break;
6416
6417 case PROCESSOR_ATHLON:
6418 if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
6419 || memory == MEMORY_BOTH)
6420 {
6421 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
6422 cost += 2;
6423 else
6424 cost += 3;
6425 }
6426
6427 default:
6428 break;
6429 }
6430
6431 return cost;
6432 }
6433
6434 static union
6435 {
6436 struct ppro_sched_data
6437 {
6438 rtx decode[3];
6439 int issued_this_cycle;
6440 } ppro;
6441 } ix86_sched_data;
6442
6443 static int
6444 ix86_safe_length (insn)
6445 rtx insn;
6446 {
6447 if (recog_memoized (insn) >= 0)
6448 return get_attr_length(insn);
6449 else
6450 return 128;
6451 }
6452
6453 static int
6454 ix86_safe_length_prefix (insn)
6455 rtx insn;
6456 {
6457 if (recog_memoized (insn) >= 0)
6458 return get_attr_length(insn);
6459 else
6460 return 0;
6461 }
6462
6463 static enum attr_memory
6464 ix86_safe_memory (insn)
6465 rtx insn;
6466 {
6467 if (recog_memoized (insn) >= 0)
6468 return get_attr_memory(insn);
6469 else
6470 return MEMORY_UNKNOWN;
6471 }
6472
6473 static enum attr_pent_pair
6474 ix86_safe_pent_pair (insn)
6475 rtx insn;
6476 {
6477 if (recog_memoized (insn) >= 0)
6478 return get_attr_pent_pair(insn);
6479 else
6480 return PENT_PAIR_NP;
6481 }
6482
6483 static enum attr_ppro_uops
6484 ix86_safe_ppro_uops (insn)
6485 rtx insn;
6486 {
6487 if (recog_memoized (insn) >= 0)
6488 return get_attr_ppro_uops (insn);
6489 else
6490 return PPRO_UOPS_MANY;
6491 }
6492
6493 static void
6494 ix86_dump_ppro_packet (dump)
6495 FILE *dump;
6496 {
6497 if (ix86_sched_data.ppro.decode[0])
6498 {
6499 fprintf (dump, "PPRO packet: %d",
6500 INSN_UID (ix86_sched_data.ppro.decode[0]));
6501 if (ix86_sched_data.ppro.decode[1])
6502 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
6503 if (ix86_sched_data.ppro.decode[2])
6504 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
6505 fputc ('\n', dump);
6506 }
6507 }
6508
6509 /* We're beginning a new block. Initialize data structures as necessary. */
6510
6511 void
6512 ix86_sched_init (dump, sched_verbose)
6513 FILE *dump ATTRIBUTE_UNUSED;
6514 int sched_verbose ATTRIBUTE_UNUSED;
6515 {
6516 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
6517 }
6518
6519 /* Shift INSN to SLOT, and shift everything else down. */
6520
6521 static void
6522 ix86_reorder_insn (insnp, slot)
6523 rtx *insnp, *slot;
6524 {
6525 if (insnp != slot)
6526 {
6527 rtx insn = *insnp;
6528 do
6529 insnp[0] = insnp[1];
6530 while (++insnp != slot);
6531 *insnp = insn;
6532 }
6533 }
6534
6535 /* Find an instruction with given pairability and minimal amount of cycles
6536 lost by the fact that the CPU waits for both pipelines to finish before
6537 reading next instructions. Also take care that both instructions together
6538 can not exceed 7 bytes. */
6539
6540 static rtx *
6541 ix86_pent_find_pair (e_ready, ready, type, first)
6542 rtx *e_ready;
6543 rtx *ready;
6544 enum attr_pent_pair type;
6545 rtx first;
6546 {
6547 int mincycles, cycles;
6548 enum attr_pent_pair tmp;
6549 enum attr_memory memory;
6550 rtx *insnp, *bestinsnp = NULL;
6551
6552 if (ix86_safe_length (first) > 7 + ix86_safe_length_prefix (first))
6553 return NULL;
6554
6555 memory = ix86_safe_memory (first);
6556 cycles = result_ready_cost (first);
6557 mincycles = INT_MAX;
6558
6559 for (insnp = e_ready; insnp >= ready && mincycles; --insnp)
6560 if ((tmp = ix86_safe_pent_pair (*insnp)) == type
6561 && ix86_safe_length (*insnp) <= 7 + ix86_safe_length_prefix (*insnp))
6562 {
6563 enum attr_memory second_memory;
6564 int secondcycles, currentcycles;
6565
6566 second_memory = ix86_safe_memory (*insnp);
6567 secondcycles = result_ready_cost (*insnp);
6568 currentcycles = abs (cycles - secondcycles);
6569
6570 if (secondcycles >= 1 && cycles >= 1)
6571 {
6572 /* Two read/modify/write instructions together takes two
6573 cycles longer. */
6574 if (memory == MEMORY_BOTH && second_memory == MEMORY_BOTH)
6575 currentcycles += 2;
6576
6577 /* Read modify/write instruction followed by read/modify
6578 takes one cycle longer. */
6579 if (memory == MEMORY_BOTH && second_memory == MEMORY_LOAD
6580 && tmp != PENT_PAIR_UV
6581 && ix86_safe_pent_pair (first) != PENT_PAIR_UV)
6582 currentcycles += 1;
6583 }
6584 if (currentcycles < mincycles)
6585 bestinsnp = insnp, mincycles = currentcycles;
6586 }
6587
6588 return bestinsnp;
6589 }
6590
6591 /* Subroutines of ix86_sched_reorder. */
6592
6593 static void
6594 ix86_sched_reorder_pentium (ready, e_ready)
6595 rtx *ready;
6596 rtx *e_ready;
6597 {
6598 enum attr_pent_pair pair1, pair2;
6599 rtx *insnp;
6600
6601 /* This wouldn't be necessary if Haifa knew that static insn ordering
6602 is important to which pipe an insn is issued to. So we have to make
6603 some minor rearrangements. */
6604
6605 pair1 = ix86_safe_pent_pair (*e_ready);
6606
6607 /* If the first insn is non-pairable, let it be. */
6608 if (pair1 == PENT_PAIR_NP)
6609 return;
6610
6611 pair2 = PENT_PAIR_NP;
6612 insnp = 0;
6613
6614 /* If the first insn is UV or PV pairable, search for a PU
6615 insn to go with. */
6616 if (pair1 == PENT_PAIR_UV || pair1 == PENT_PAIR_PV)
6617 {
6618 insnp = ix86_pent_find_pair (e_ready-1, ready,
6619 PENT_PAIR_PU, *e_ready);
6620 if (insnp)
6621 pair2 = PENT_PAIR_PU;
6622 }
6623
6624 /* If the first insn is PU or UV pairable, search for a PV
6625 insn to go with. */
6626 if (pair2 == PENT_PAIR_NP
6627 && (pair1 == PENT_PAIR_PU || pair1 == PENT_PAIR_UV))
6628 {
6629 insnp = ix86_pent_find_pair (e_ready-1, ready,
6630 PENT_PAIR_PV, *e_ready);
6631 if (insnp)
6632 pair2 = PENT_PAIR_PV;
6633 }
6634
6635 /* If the first insn is pairable, search for a UV
6636 insn to go with. */
6637 if (pair2 == PENT_PAIR_NP)
6638 {
6639 insnp = ix86_pent_find_pair (e_ready-1, ready,
6640 PENT_PAIR_UV, *e_ready);
6641 if (insnp)
6642 pair2 = PENT_PAIR_UV;
6643 }
6644
6645 if (pair2 == PENT_PAIR_NP)
6646 return;
6647
6648 /* Found something! Decide if we need to swap the order. */
6649 if (pair1 == PENT_PAIR_PV || pair2 == PENT_PAIR_PU
6650 || (pair1 == PENT_PAIR_UV && pair2 == PENT_PAIR_UV
6651 && ix86_safe_memory (*e_ready) == MEMORY_BOTH
6652 && ix86_safe_memory (*insnp) == MEMORY_LOAD))
6653 ix86_reorder_insn (insnp, e_ready);
6654 else
6655 ix86_reorder_insn (insnp, e_ready - 1);
6656 }
6657
6658 static void
6659 ix86_sched_reorder_ppro (ready, e_ready)
6660 rtx *ready;
6661 rtx *e_ready;
6662 {
6663 rtx decode[3];
6664 enum attr_ppro_uops cur_uops;
6665 int issued_this_cycle;
6666 rtx *insnp;
6667 int i;
6668
6669 /* At this point .ppro.decode contains the state of the three
6670 decoders from last "cycle". That is, those insns that were
6671 actually independent. But here we're scheduling for the
6672 decoder, and we may find things that are decodable in the
6673 same cycle. */
6674
6675 memcpy (decode, ix86_sched_data.ppro.decode, sizeof(decode));
6676 issued_this_cycle = 0;
6677
6678 insnp = e_ready;
6679 cur_uops = ix86_safe_ppro_uops (*insnp);
6680
6681 /* If the decoders are empty, and we've a complex insn at the
6682 head of the priority queue, let it issue without complaint. */
6683 if (decode[0] == NULL)
6684 {
6685 if (cur_uops == PPRO_UOPS_MANY)
6686 {
6687 decode[0] = *insnp;
6688 goto ppro_done;
6689 }
6690
6691 /* Otherwise, search for a 2-4 uop unsn to issue. */
6692 while (cur_uops != PPRO_UOPS_FEW)
6693 {
6694 if (insnp == ready)
6695 break;
6696 cur_uops = ix86_safe_ppro_uops (*--insnp);
6697 }
6698
6699 /* If so, move it to the head of the line. */
6700 if (cur_uops == PPRO_UOPS_FEW)
6701 ix86_reorder_insn (insnp, e_ready);
6702
6703 /* Issue the head of the queue. */
6704 issued_this_cycle = 1;
6705 decode[0] = *e_ready--;
6706 }
6707
6708 /* Look for simple insns to fill in the other two slots. */
6709 for (i = 1; i < 3; ++i)
6710 if (decode[i] == NULL)
6711 {
6712 if (ready >= e_ready)
6713 goto ppro_done;
6714
6715 insnp = e_ready;
6716 cur_uops = ix86_safe_ppro_uops (*insnp);
6717 while (cur_uops != PPRO_UOPS_ONE)
6718 {
6719 if (insnp == ready)
6720 break;
6721 cur_uops = ix86_safe_ppro_uops (*--insnp);
6722 }
6723
6724 /* Found one. Move it to the head of the queue and issue it. */
6725 if (cur_uops == PPRO_UOPS_ONE)
6726 {
6727 ix86_reorder_insn (insnp, e_ready);
6728 decode[i] = *e_ready--;
6729 issued_this_cycle++;
6730 continue;
6731 }
6732
6733 /* ??? Didn't find one. Ideally, here we would do a lazy split
6734 of 2-uop insns, issue one and queue the other. */
6735 }
6736
6737 ppro_done:
6738 if (issued_this_cycle == 0)
6739 issued_this_cycle = 1;
6740 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
6741 }
6742
6743
6744 /* We are about to being issuing insns for this clock cycle.
6745 Override the default sort algorithm to better slot instructions. */
6746 int
6747 ix86_sched_reorder (dump, sched_verbose, ready, n_ready, clock_var)
6748 FILE *dump ATTRIBUTE_UNUSED;
6749 int sched_verbose ATTRIBUTE_UNUSED;
6750 rtx *ready;
6751 int n_ready;
6752 int clock_var ATTRIBUTE_UNUSED;
6753 {
6754 rtx *e_ready = ready + n_ready - 1;
6755
6756 if (n_ready < 2)
6757 goto out;
6758
6759 switch (ix86_cpu)
6760 {
6761 default:
6762 break;
6763
6764 case PROCESSOR_PENTIUM:
6765 ix86_sched_reorder_pentium (ready, e_ready);
6766 break;
6767
6768 case PROCESSOR_PENTIUMPRO:
6769 ix86_sched_reorder_ppro (ready, e_ready);
6770 break;
6771 }
6772
6773 out:
6774 return ix86_issue_rate ();
6775 }
6776
6777 /* We are about to issue INSN. Return the number of insns left on the
6778 ready queue that can be issued this cycle. */
6779
6780 int
6781 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
6782 FILE *dump;
6783 int sched_verbose;
6784 rtx insn;
6785 int can_issue_more;
6786 {
6787 int i;
6788 switch (ix86_cpu)
6789 {
6790 default:
6791 return can_issue_more - 1;
6792
6793 case PROCESSOR_PENTIUMPRO:
6794 {
6795 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
6796
6797 if (uops == PPRO_UOPS_MANY)
6798 {
6799 if (sched_verbose)
6800 ix86_dump_ppro_packet (dump);
6801 ix86_sched_data.ppro.decode[0] = insn;
6802 ix86_sched_data.ppro.decode[1] = NULL;
6803 ix86_sched_data.ppro.decode[2] = NULL;
6804 if (sched_verbose)
6805 ix86_dump_ppro_packet (dump);
6806 ix86_sched_data.ppro.decode[0] = NULL;
6807 }
6808 else if (uops == PPRO_UOPS_FEW)
6809 {
6810 if (sched_verbose)
6811 ix86_dump_ppro_packet (dump);
6812 ix86_sched_data.ppro.decode[0] = insn;
6813 ix86_sched_data.ppro.decode[1] = NULL;
6814 ix86_sched_data.ppro.decode[2] = NULL;
6815 }
6816 else
6817 {
6818 for (i = 0; i < 3; ++i)
6819 if (ix86_sched_data.ppro.decode[i] == NULL)
6820 {
6821 ix86_sched_data.ppro.decode[i] = insn;
6822 break;
6823 }
6824 if (i == 3)
6825 abort ();
6826 if (i == 2)
6827 {
6828 if (sched_verbose)
6829 ix86_dump_ppro_packet (dump);
6830 ix86_sched_data.ppro.decode[0] = NULL;
6831 ix86_sched_data.ppro.decode[1] = NULL;
6832 ix86_sched_data.ppro.decode[2] = NULL;
6833 }
6834 }
6835 }
6836 return --ix86_sched_data.ppro.issued_this_cycle;
6837 }
6838 }
6839 \f
6840 /* Compute the alignment given to a constant that is being placed in memory.
6841 EXP is the constant and ALIGN is the alignment that the object would
6842 ordinarily have.
6843 The value of this function is used instead of that alignment to align
6844 the object. */
6845
6846 int
6847 ix86_constant_alignment (exp, align)
6848 tree exp;
6849 int align;
6850 {
6851 if (TREE_CODE (exp) == REAL_CST)
6852 {
6853 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
6854 return 64;
6855 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
6856 return 128;
6857 }
6858 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
6859 && align < 256)
6860 return 256;
6861
6862 return align;
6863 }
6864
6865 /* Compute the alignment for a static variable.
6866 TYPE is the data type, and ALIGN is the alignment that
6867 the object would ordinarily have. The value of this function is used
6868 instead of that alignment to align the object. */
6869
6870 int
6871 ix86_data_alignment (type, align)
6872 tree type;
6873 int align;
6874 {
6875 if (AGGREGATE_TYPE_P (type)
6876 && TYPE_SIZE (type)
6877 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6878 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
6879 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
6880 return 256;
6881
6882 if (TREE_CODE (type) == ARRAY_TYPE)
6883 {
6884 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6885 return 64;
6886 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6887 return 128;
6888 }
6889 else if (TREE_CODE (type) == COMPLEX_TYPE)
6890 {
6891
6892 if (TYPE_MODE (type) == DCmode && align < 64)
6893 return 64;
6894 if (TYPE_MODE (type) == XCmode && align < 128)
6895 return 128;
6896 }
6897 else if ((TREE_CODE (type) == RECORD_TYPE
6898 || TREE_CODE (type) == UNION_TYPE
6899 || TREE_CODE (type) == QUAL_UNION_TYPE)
6900 && TYPE_FIELDS (type))
6901 {
6902 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
6903 return 64;
6904 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
6905 return 128;
6906 }
6907 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
6908 || TREE_CODE (type) == INTEGER_TYPE)
6909 {
6910 if (TYPE_MODE (type) == DFmode && align < 64)
6911 return 64;
6912 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
6913 return 128;
6914 }
6915
6916 return align;
6917 }
6918
6919 /* Compute the alignment for a local variable.
6920 TYPE is the data type, and ALIGN is the alignment that
6921 the object would ordinarily have. The value of this macro is used
6922 instead of that alignment to align the object. */
6923
6924 int
6925 ix86_local_alignment (type, align)
6926 tree type;
6927 int align;
6928 {
6929 if (TREE_CODE (type) == ARRAY_TYPE)
6930 {
6931 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
6932 return 64;
6933 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
6934 return 128;
6935 }
6936 else if (TREE_CODE (type) == COMPLEX_TYPE)
6937 {
6938 if (TYPE_MODE (type) == DCmode && align < 64)
6939 return 64;
6940 if (TYPE_MODE (type) == XCmode && align < 128)
6941 return 128;
6942 }
6943 else if ((TREE_CODE (type) == RECORD_TYPE
6944 || TREE_CODE (type) == UNION_TYPE
6945 || TREE_CODE (type) == QUAL_UNION_TYPE)
6946 && TYPE_FIELDS (type))
6947 {
6948 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
6949 return 64;
6950 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
6951 return 128;
6952 }
6953 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
6954 || TREE_CODE (type) == INTEGER_TYPE)
6955 {
6956
6957 if (TYPE_MODE (type) == DFmode && align < 64)
6958 return 64;
6959 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
6960 return 128;
6961 }
6962 return align;
6963 }