* i386.c (classify_argument): Handle variable sized objects.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 };
88 /* Processor costs (relative to an add) */
89 static const
90 struct processor_costs i386_cost = { /* 386 specific costs */
91 1, /* cost of an add instruction */
92 1, /* cost of a lea instruction */
93 3, /* variable shift costs */
94 2, /* constant shift costs */
95 6, /* cost of starting a multiply */
96 1, /* cost of multiply per each bit set */
97 23, /* cost of a divide/mod */
98 3, /* cost of movsx */
99 2, /* cost of movzx */
100 15, /* "large" insn */
101 3, /* MOVE_RATIO */
102 4, /* cost for loading QImode using movzbl */
103 {2, 4, 2}, /* cost of loading integer registers
104 in QImode, HImode and SImode.
105 Relative to reg-reg move (2). */
106 {2, 4, 2}, /* cost of storing integer registers */
107 2, /* cost of reg,reg fld/fst */
108 {8, 8, 8}, /* cost of loading fp registers
109 in SFmode, DFmode and XFmode */
110 {8, 8, 8}, /* cost of loading integer registers */
111 2, /* cost of moving MMX register */
112 {4, 8}, /* cost of loading MMX registers
113 in SImode and DImode */
114 {4, 8}, /* cost of storing MMX registers
115 in SImode and DImode */
116 2, /* cost of moving SSE register */
117 {4, 8, 16}, /* cost of loading SSE registers
118 in SImode, DImode and TImode */
119 {4, 8, 16}, /* cost of storing SSE registers
120 in SImode, DImode and TImode */
121 3, /* MMX or SSE register to integer */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
124 };
125
126 static const
127 struct processor_costs i486_cost = { /* 486 specific costs */
128 1, /* cost of an add instruction */
129 1, /* cost of a lea instruction */
130 3, /* variable shift costs */
131 2, /* constant shift costs */
132 12, /* cost of starting a multiply */
133 1, /* cost of multiply per each bit set */
134 40, /* cost of a divide/mod */
135 3, /* cost of movsx */
136 2, /* cost of movzx */
137 15, /* "large" insn */
138 3, /* MOVE_RATIO */
139 4, /* cost for loading QImode using movzbl */
140 {2, 4, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 4, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {8, 8, 8}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {8, 8, 8}, /* cost of loading integer registers */
148 2, /* cost of moving MMX register */
149 {4, 8}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {4, 8}, /* cost of storing MMX registers
152 in SImode and DImode */
153 2, /* cost of moving SSE register */
154 {4, 8, 16}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {4, 8, 16}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of prefetch block */
160 0, /* number of parallel prefetches */
161 };
162
163 static const
164 struct processor_costs pentium_cost = {
165 1, /* cost of an add instruction */
166 1, /* cost of a lea instruction */
167 4, /* variable shift costs */
168 1, /* constant shift costs */
169 11, /* cost of starting a multiply */
170 0, /* cost of multiply per each bit set */
171 25, /* cost of a divide/mod */
172 3, /* cost of movsx */
173 2, /* cost of movzx */
174 8, /* "large" insn */
175 6, /* MOVE_RATIO */
176 6, /* cost for loading QImode using movzbl */
177 {2, 4, 2}, /* cost of loading integer registers
178 in QImode, HImode and SImode.
179 Relative to reg-reg move (2). */
180 {2, 4, 2}, /* cost of storing integer registers */
181 2, /* cost of reg,reg fld/fst */
182 {2, 2, 6}, /* cost of loading fp registers
183 in SFmode, DFmode and XFmode */
184 {4, 4, 6}, /* cost of loading integer registers */
185 8, /* cost of moving MMX register */
186 {8, 8}, /* cost of loading MMX registers
187 in SImode and DImode */
188 {8, 8}, /* cost of storing MMX registers
189 in SImode and DImode */
190 2, /* cost of moving SSE register */
191 {4, 8, 16}, /* cost of loading SSE registers
192 in SImode, DImode and TImode */
193 {4, 8, 16}, /* cost of storing SSE registers
194 in SImode, DImode and TImode */
195 3, /* MMX or SSE register to integer */
196 0, /* size of prefetch block */
197 0, /* number of parallel prefetches */
198 };
199
200 static const
201 struct processor_costs pentiumpro_cost = {
202 1, /* cost of an add instruction */
203 1, /* cost of a lea instruction */
204 1, /* variable shift costs */
205 1, /* constant shift costs */
206 4, /* cost of starting a multiply */
207 0, /* cost of multiply per each bit set */
208 17, /* cost of a divide/mod */
209 1, /* cost of movsx */
210 1, /* cost of movzx */
211 8, /* "large" insn */
212 6, /* MOVE_RATIO */
213 2, /* cost for loading QImode using movzbl */
214 {4, 4, 4}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 2, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {2, 2, 6}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {4, 4, 6}, /* cost of loading integer registers */
222 2, /* cost of moving MMX register */
223 {2, 2}, /* cost of loading MMX registers
224 in SImode and DImode */
225 {2, 2}, /* cost of storing MMX registers
226 in SImode and DImode */
227 2, /* cost of moving SSE register */
228 {2, 2, 8}, /* cost of loading SSE registers
229 in SImode, DImode and TImode */
230 {2, 2, 8}, /* cost of storing SSE registers
231 in SImode, DImode and TImode */
232 3, /* MMX or SSE register to integer */
233 32, /* size of prefetch block */
234 6, /* number of parallel prefetches */
235 };
236
237 static const
238 struct processor_costs k6_cost = {
239 1, /* cost of an add instruction */
240 2, /* cost of a lea instruction */
241 1, /* variable shift costs */
242 1, /* constant shift costs */
243 3, /* cost of starting a multiply */
244 0, /* cost of multiply per each bit set */
245 18, /* cost of a divide/mod */
246 2, /* cost of movsx */
247 2, /* cost of movzx */
248 8, /* "large" insn */
249 4, /* MOVE_RATIO */
250 3, /* cost for loading QImode using movzbl */
251 {4, 5, 4}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 3, 2}, /* cost of storing integer registers */
255 4, /* cost of reg,reg fld/fst */
256 {6, 6, 6}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {4, 4, 4}, /* cost of loading integer registers */
259 2, /* cost of moving MMX register */
260 {2, 2}, /* cost of loading MMX registers
261 in SImode and DImode */
262 {2, 2}, /* cost of storing MMX registers
263 in SImode and DImode */
264 2, /* cost of moving SSE register */
265 {2, 2, 8}, /* cost of loading SSE registers
266 in SImode, DImode and TImode */
267 {2, 2, 8}, /* cost of storing SSE registers
268 in SImode, DImode and TImode */
269 6, /* MMX or SSE register to integer */
270 32, /* size of prefetch block */
271 1, /* number of parallel prefetches */
272 };
273
274 static const
275 struct processor_costs athlon_cost = {
276 1, /* cost of an add instruction */
277 2, /* cost of a lea instruction */
278 1, /* variable shift costs */
279 1, /* constant shift costs */
280 5, /* cost of starting a multiply */
281 0, /* cost of multiply per each bit set */
282 42, /* cost of a divide/mod */
283 1, /* cost of movsx */
284 1, /* cost of movzx */
285 8, /* "large" insn */
286 9, /* MOVE_RATIO */
287 4, /* cost for loading QImode using movzbl */
288 {4, 5, 4}, /* cost of loading integer registers
289 in QImode, HImode and SImode.
290 Relative to reg-reg move (2). */
291 {2, 3, 2}, /* cost of storing integer registers */
292 4, /* cost of reg,reg fld/fst */
293 {6, 6, 20}, /* cost of loading fp registers
294 in SFmode, DFmode and XFmode */
295 {4, 4, 16}, /* cost of loading integer registers */
296 2, /* cost of moving MMX register */
297 {2, 2}, /* cost of loading MMX registers
298 in SImode and DImode */
299 {2, 2}, /* cost of storing MMX registers
300 in SImode and DImode */
301 2, /* cost of moving SSE register */
302 {2, 2, 8}, /* cost of loading SSE registers
303 in SImode, DImode and TImode */
304 {2, 2, 8}, /* cost of storing SSE registers
305 in SImode, DImode and TImode */
306 6, /* MMX or SSE register to integer */
307 64, /* size of prefetch block */
308 6, /* number of parallel prefetches */
309 };
310
311 static const
312 struct processor_costs pentium4_cost = {
313 1, /* cost of an add instruction */
314 1, /* cost of a lea instruction */
315 8, /* variable shift costs */
316 8, /* constant shift costs */
317 30, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 112, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 16, /* "large" insn */
323 6, /* MOVE_RATIO */
324 2, /* cost for loading QImode using movzbl */
325 {4, 5, 4}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 3, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 12, /* cost of moving SSE register */
339 {12, 12, 12}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 10, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 };
347
348 const struct processor_costs *ix86_cost = &pentium_cost;
349
350 /* Processor feature/optimization bitmasks. */
351 #define m_386 (1<<PROCESSOR_I386)
352 #define m_486 (1<<PROCESSOR_I486)
353 #define m_PENT (1<<PROCESSOR_PENTIUM)
354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
355 #define m_K6 (1<<PROCESSOR_K6)
356 #define m_ATHLON (1<<PROCESSOR_ATHLON)
357 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
358
359 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
360 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
361 const int x86_zero_extend_with_and = m_486 | m_PENT;
362 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
363 const int x86_double_with_add = ~m_386;
364 const int x86_use_bit_test = m_386;
365 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
366 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
367 const int x86_3dnow_a = m_ATHLON;
368 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
369 const int x86_branch_hints = m_PENT4;
370 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
371 const int x86_partial_reg_stall = m_PPRO;
372 const int x86_use_loop = m_K6;
373 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
374 const int x86_use_mov0 = m_K6;
375 const int x86_use_cltd = ~(m_PENT | m_K6);
376 const int x86_read_modify_write = ~m_PENT;
377 const int x86_read_modify = ~(m_PENT | m_PPRO);
378 const int x86_split_long_moves = m_PPRO;
379 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
380 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
381 const int x86_single_stringop = m_386 | m_PENT4;
382 const int x86_qimode_math = ~(0);
383 const int x86_promote_qi_regs = 0;
384 const int x86_himode_math = ~(m_PPRO);
385 const int x86_promote_hi_regs = m_PPRO;
386 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
387 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
388 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
389 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
390 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
391 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
392 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
393 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
394 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
395 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
396 const int x86_decompose_lea = m_PENT4;
397 const int x86_shift1 = ~m_486;
398 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
399
400 /* In case the avreage insn count for single function invocation is
401 lower than this constant, emit fast (but longer) prologue and
402 epilogue code. */
403 #define FAST_PROLOGUE_INSN_COUNT 30
404
405 /* Set by prologue expander and used by epilogue expander to determine
406 the style used. */
407 static int use_fast_prologue_epilogue;
408
409 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
410 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
411 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
412 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
413
414 /* Array of the smallest class containing reg number REGNO, indexed by
415 REGNO. Used by REGNO_REG_CLASS in i386.h. */
416
417 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
418 {
419 /* ax, dx, cx, bx */
420 AREG, DREG, CREG, BREG,
421 /* si, di, bp, sp */
422 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
423 /* FP registers */
424 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
425 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
426 /* arg pointer */
427 NON_Q_REGS,
428 /* flags, fpsr, dirflag, frame */
429 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
430 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
431 SSE_REGS, SSE_REGS,
432 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
433 MMX_REGS, MMX_REGS,
434 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
435 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
436 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
437 SSE_REGS, SSE_REGS,
438 };
439
440 /* The "default" register map used in 32bit mode. */
441
442 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
443 {
444 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
445 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
446 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
447 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
448 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
449 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
450 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
451 };
452
453 static int const x86_64_int_parameter_registers[6] =
454 {
455 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
456 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
457 };
458
459 static int const x86_64_int_return_registers[4] =
460 {
461 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
462 };
463
464 /* The "default" register map used in 64bit mode. */
465 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
466 {
467 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
468 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
469 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
470 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
471 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
472 8,9,10,11,12,13,14,15, /* extended integer registers */
473 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
474 };
475
476 /* Define the register numbers to be used in Dwarf debugging information.
477 The SVR4 reference port C compiler uses the following register numbers
478 in its Dwarf output code:
479 0 for %eax (gcc regno = 0)
480 1 for %ecx (gcc regno = 2)
481 2 for %edx (gcc regno = 1)
482 3 for %ebx (gcc regno = 3)
483 4 for %esp (gcc regno = 7)
484 5 for %ebp (gcc regno = 6)
485 6 for %esi (gcc regno = 4)
486 7 for %edi (gcc regno = 5)
487 The following three DWARF register numbers are never generated by
488 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
489 believes these numbers have these meanings.
490 8 for %eip (no gcc equivalent)
491 9 for %eflags (gcc regno = 17)
492 10 for %trapno (no gcc equivalent)
493 It is not at all clear how we should number the FP stack registers
494 for the x86 architecture. If the version of SDB on x86/svr4 were
495 a bit less brain dead with respect to floating-point then we would
496 have a precedent to follow with respect to DWARF register numbers
497 for x86 FP registers, but the SDB on x86/svr4 is so completely
498 broken with respect to FP registers that it is hardly worth thinking
499 of it as something to strive for compatibility with.
500 The version of x86/svr4 SDB I have at the moment does (partially)
501 seem to believe that DWARF register number 11 is associated with
502 the x86 register %st(0), but that's about all. Higher DWARF
503 register numbers don't seem to be associated with anything in
504 particular, and even for DWARF regno 11, SDB only seems to under-
505 stand that it should say that a variable lives in %st(0) (when
506 asked via an `=' command) if we said it was in DWARF regno 11,
507 but SDB still prints garbage when asked for the value of the
508 variable in question (via a `/' command).
509 (Also note that the labels SDB prints for various FP stack regs
510 when doing an `x' command are all wrong.)
511 Note that these problems generally don't affect the native SVR4
512 C compiler because it doesn't allow the use of -O with -g and
513 because when it is *not* optimizing, it allocates a memory
514 location for each floating-point variable, and the memory
515 location is what gets described in the DWARF AT_location
516 attribute for the variable in question.
517 Regardless of the severe mental illness of the x86/svr4 SDB, we
518 do something sensible here and we use the following DWARF
519 register numbers. Note that these are all stack-top-relative
520 numbers.
521 11 for %st(0) (gcc regno = 8)
522 12 for %st(1) (gcc regno = 9)
523 13 for %st(2) (gcc regno = 10)
524 14 for %st(3) (gcc regno = 11)
525 15 for %st(4) (gcc regno = 12)
526 16 for %st(5) (gcc regno = 13)
527 17 for %st(6) (gcc regno = 14)
528 18 for %st(7) (gcc regno = 15)
529 */
530 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
531 {
532 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
533 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
534 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
535 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
536 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
537 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
538 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
539 };
540
541 /* Test and compare insns in i386.md store the information needed to
542 generate branch and scc insns here. */
543
544 rtx ix86_compare_op0 = NULL_RTX;
545 rtx ix86_compare_op1 = NULL_RTX;
546
547 /* The encoding characters for the four TLS models present in ELF. */
548
549 static char const tls_model_chars[] = " GLil";
550
551 #define MAX_386_STACK_LOCALS 3
552 /* Size of the register save area. */
553 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
554
555 /* Define the structure for the machine field in struct function. */
556 struct machine_function GTY(())
557 {
558 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
559 const char *some_ld_name;
560 int save_varrargs_registers;
561 int accesses_prev_frame;
562 };
563
564 #define ix86_stack_locals (cfun->machine->stack_locals)
565 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
566
567 /* Structure describing stack frame layout.
568 Stack grows downward:
569
570 [arguments]
571 <- ARG_POINTER
572 saved pc
573
574 saved frame pointer if frame_pointer_needed
575 <- HARD_FRAME_POINTER
576 [saved regs]
577
578 [padding1] \
579 )
580 [va_arg registers] (
581 > to_allocate <- FRAME_POINTER
582 [frame] (
583 )
584 [padding2] /
585 */
586 struct ix86_frame
587 {
588 int nregs;
589 int padding1;
590 int va_arg_size;
591 HOST_WIDE_INT frame;
592 int padding2;
593 int outgoing_arguments_size;
594 int red_zone_size;
595
596 HOST_WIDE_INT to_allocate;
597 /* The offsets relative to ARG_POINTER. */
598 HOST_WIDE_INT frame_pointer_offset;
599 HOST_WIDE_INT hard_frame_pointer_offset;
600 HOST_WIDE_INT stack_pointer_offset;
601 };
602
603 /* Used to enable/disable debugging features. */
604 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
605 /* Code model option as passed by user. */
606 const char *ix86_cmodel_string;
607 /* Parsed value. */
608 enum cmodel ix86_cmodel;
609 /* Asm dialect. */
610 const char *ix86_asm_string;
611 enum asm_dialect ix86_asm_dialect = ASM_ATT;
612 /* TLS dialext. */
613 const char *ix86_tls_dialect_string;
614 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
615
616 /* Which unit we are generating floating point math for. */
617 enum fpmath_unit ix86_fpmath;
618
619 /* Which cpu are we scheduling for. */
620 enum processor_type ix86_cpu;
621 /* Which instruction set architecture to use. */
622 enum processor_type ix86_arch;
623
624 /* Strings to hold which cpu and instruction set architecture to use. */
625 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
626 const char *ix86_arch_string; /* for -march=<xxx> */
627 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
628
629 /* # of registers to use to pass arguments. */
630 const char *ix86_regparm_string;
631
632 /* true if sse prefetch instruction is not NOOP. */
633 int x86_prefetch_sse;
634
635 /* ix86_regparm_string as a number */
636 int ix86_regparm;
637
638 /* Alignment to use for loops and jumps: */
639
640 /* Power of two alignment for loops. */
641 const char *ix86_align_loops_string;
642
643 /* Power of two alignment for non-loop jumps. */
644 const char *ix86_align_jumps_string;
645
646 /* Power of two alignment for stack boundary in bytes. */
647 const char *ix86_preferred_stack_boundary_string;
648
649 /* Preferred alignment for stack boundary in bits. */
650 int ix86_preferred_stack_boundary;
651
652 /* Values 1-5: see jump.c */
653 int ix86_branch_cost;
654 const char *ix86_branch_cost_string;
655
656 /* Power of two alignment for functions. */
657 const char *ix86_align_funcs_string;
658
659 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
660 static char internal_label_prefix[16];
661 static int internal_label_prefix_len;
662 \f
663 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
664 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
665 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
666 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
667 int, int, FILE *));
668 static const char *get_some_local_dynamic_name PARAMS ((void));
669 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
670 static rtx maybe_get_pool_constant PARAMS ((rtx));
671 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
672 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
673 rtx *, rtx *));
674 static rtx get_thread_pointer PARAMS ((void));
675 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
676 static rtx gen_push PARAMS ((rtx));
677 static int memory_address_length PARAMS ((rtx addr));
678 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
679 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
680 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
681 static void ix86_dump_ppro_packet PARAMS ((FILE *));
682 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
683 static struct machine_function * ix86_init_machine_status PARAMS ((void));
684 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
685 static int ix86_nsaved_regs PARAMS ((void));
686 static void ix86_emit_save_regs PARAMS ((void));
687 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
688 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
689 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
690 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
691 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
692 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
693 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
694 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
695 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
696 static int ix86_issue_rate PARAMS ((void));
697 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
698 static void ix86_sched_init PARAMS ((FILE *, int, int));
699 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
700 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
701 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
702 static int ia32_multipass_dfa_lookahead PARAMS ((void));
703 static void ix86_init_mmx_sse_builtins PARAMS ((void));
704
705 struct ix86_address
706 {
707 rtx base, index, disp;
708 HOST_WIDE_INT scale;
709 };
710
711 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
712
713 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
714 static const char *ix86_strip_name_encoding PARAMS ((const char *))
715 ATTRIBUTE_UNUSED;
716
717 struct builtin_description;
718 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
719 tree, rtx));
720 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
721 tree, rtx));
722 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
723 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
724 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
725 static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
726 tree, rtx));
727 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
728 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
729 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
730 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
731 enum rtx_code *,
732 enum rtx_code *,
733 enum rtx_code *));
734 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
735 rtx *, rtx *));
736 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
737 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
738 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
739 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
740 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
741 static int ix86_save_reg PARAMS ((unsigned int, int));
742 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
743 static int ix86_comp_type_attributes PARAMS ((tree, tree));
744 const struct attribute_spec ix86_attribute_table[];
745 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
746 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
747 static int ix86_value_regno PARAMS ((enum machine_mode));
748
749 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
750 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
751 #endif
752
753 /* Register class used for passing given 64bit part of the argument.
754 These represent classes as documented by the PS ABI, with the exception
755 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
756 use SF or DFmode move instead of DImode to avoid reformating penalties.
757
758 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
759 whenever possible (upper half does contain padding).
760 */
761 enum x86_64_reg_class
762 {
763 X86_64_NO_CLASS,
764 X86_64_INTEGER_CLASS,
765 X86_64_INTEGERSI_CLASS,
766 X86_64_SSE_CLASS,
767 X86_64_SSESF_CLASS,
768 X86_64_SSEDF_CLASS,
769 X86_64_SSEUP_CLASS,
770 X86_64_X87_CLASS,
771 X86_64_X87UP_CLASS,
772 X86_64_MEMORY_CLASS
773 };
774 static const char * const x86_64_reg_class_name[] =
775 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
776
777 #define MAX_CLASSES 4
778 static int classify_argument PARAMS ((enum machine_mode, tree,
779 enum x86_64_reg_class [MAX_CLASSES],
780 int));
781 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
782 int *));
783 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
784 const int *, int));
785 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
786 enum x86_64_reg_class));
787 \f
788 /* Initialize the GCC target structure. */
789 #undef TARGET_ATTRIBUTE_TABLE
790 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
791 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
792 # undef TARGET_MERGE_DECL_ATTRIBUTES
793 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
794 #endif
795
796 #undef TARGET_COMP_TYPE_ATTRIBUTES
797 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
798
799 #undef TARGET_INIT_BUILTINS
800 #define TARGET_INIT_BUILTINS ix86_init_builtins
801
802 #undef TARGET_EXPAND_BUILTIN
803 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
804
805 #undef TARGET_ASM_FUNCTION_EPILOGUE
806 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
807
808 #undef TARGET_ASM_OPEN_PAREN
809 #define TARGET_ASM_OPEN_PAREN ""
810 #undef TARGET_ASM_CLOSE_PAREN
811 #define TARGET_ASM_CLOSE_PAREN ""
812
813 #undef TARGET_ASM_ALIGNED_HI_OP
814 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
815 #undef TARGET_ASM_ALIGNED_SI_OP
816 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
817 #ifdef ASM_QUAD
818 #undef TARGET_ASM_ALIGNED_DI_OP
819 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
820 #endif
821
822 #undef TARGET_ASM_UNALIGNED_HI_OP
823 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
824 #undef TARGET_ASM_UNALIGNED_SI_OP
825 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
826 #undef TARGET_ASM_UNALIGNED_DI_OP
827 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
828
829 #undef TARGET_SCHED_ADJUST_COST
830 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
831 #undef TARGET_SCHED_ISSUE_RATE
832 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
833 #undef TARGET_SCHED_VARIABLE_ISSUE
834 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
835 #undef TARGET_SCHED_INIT
836 #define TARGET_SCHED_INIT ix86_sched_init
837 #undef TARGET_SCHED_REORDER
838 #define TARGET_SCHED_REORDER ix86_sched_reorder
839 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
840 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
841 ia32_use_dfa_pipeline_interface
842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
844 ia32_multipass_dfa_lookahead
845
846 #ifdef HAVE_AS_TLS
847 #undef TARGET_HAVE_TLS
848 #define TARGET_HAVE_TLS true
849 #endif
850
851 struct gcc_target targetm = TARGET_INITIALIZER;
852 \f
853 /* Sometimes certain combinations of command options do not make
854 sense on a particular target machine. You can define a macro
855 `OVERRIDE_OPTIONS' to take account of this. This macro, if
856 defined, is executed once just after all the command options have
857 been parsed.
858
859 Don't use this macro to turn on various extra optimizations for
860 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
861
862 void
863 override_options ()
864 {
865 int i;
866 /* Comes from final.c -- no real reason to change it. */
867 #define MAX_CODE_ALIGN 16
868
869 static struct ptt
870 {
871 const struct processor_costs *cost; /* Processor costs */
872 const int target_enable; /* Target flags to enable. */
873 const int target_disable; /* Target flags to disable. */
874 const int align_loop; /* Default alignments. */
875 const int align_loop_max_skip;
876 const int align_jump;
877 const int align_jump_max_skip;
878 const int align_func;
879 const int branch_cost;
880 }
881 const processor_target_table[PROCESSOR_max] =
882 {
883 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
884 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
885 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
886 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
887 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
888 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
889 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
890 };
891
892 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
893 static struct pta
894 {
895 const char *const name; /* processor name or nickname. */
896 const enum processor_type processor;
897 const enum pta_flags
898 {
899 PTA_SSE = 1,
900 PTA_SSE2 = 2,
901 PTA_MMX = 4,
902 PTA_PREFETCH_SSE = 8,
903 PTA_3DNOW = 16,
904 PTA_3DNOW_A = 64
905 } flags;
906 }
907 const processor_alias_table[] =
908 {
909 {"i386", PROCESSOR_I386, 0},
910 {"i486", PROCESSOR_I486, 0},
911 {"i586", PROCESSOR_PENTIUM, 0},
912 {"pentium", PROCESSOR_PENTIUM, 0},
913 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
914 {"i686", PROCESSOR_PENTIUMPRO, 0},
915 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
916 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
917 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
918 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
919 PTA_MMX | PTA_PREFETCH_SSE},
920 {"k6", PROCESSOR_K6, PTA_MMX},
921 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
922 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
923 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
924 | PTA_3DNOW_A},
925 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
926 | PTA_3DNOW | PTA_3DNOW_A},
927 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
928 | PTA_3DNOW_A | PTA_SSE},
929 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
930 | PTA_3DNOW_A | PTA_SSE},
931 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
932 | PTA_3DNOW_A | PTA_SSE},
933 };
934
935 int const pta_size = ARRAY_SIZE (processor_alias_table);
936
937 #ifdef SUBTARGET_OVERRIDE_OPTIONS
938 SUBTARGET_OVERRIDE_OPTIONS;
939 #endif
940
941 if (!ix86_cpu_string && ix86_arch_string)
942 ix86_cpu_string = ix86_arch_string;
943 if (!ix86_cpu_string)
944 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
945 if (!ix86_arch_string)
946 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
947
948 if (ix86_cmodel_string != 0)
949 {
950 if (!strcmp (ix86_cmodel_string, "small"))
951 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
952 else if (flag_pic)
953 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
954 else if (!strcmp (ix86_cmodel_string, "32"))
955 ix86_cmodel = CM_32;
956 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
957 ix86_cmodel = CM_KERNEL;
958 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
959 ix86_cmodel = CM_MEDIUM;
960 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
961 ix86_cmodel = CM_LARGE;
962 else
963 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
964 }
965 else
966 {
967 ix86_cmodel = CM_32;
968 if (TARGET_64BIT)
969 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
970 }
971 if (ix86_asm_string != 0)
972 {
973 if (!strcmp (ix86_asm_string, "intel"))
974 ix86_asm_dialect = ASM_INTEL;
975 else if (!strcmp (ix86_asm_string, "att"))
976 ix86_asm_dialect = ASM_ATT;
977 else
978 error ("bad value (%s) for -masm= switch", ix86_asm_string);
979 }
980 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
981 error ("code model `%s' not supported in the %s bit mode",
982 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
983 if (ix86_cmodel == CM_LARGE)
984 sorry ("code model `large' not supported yet");
985 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
986 sorry ("%i-bit mode not compiled in",
987 (target_flags & MASK_64BIT) ? 64 : 32);
988
989 for (i = 0; i < pta_size; i++)
990 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
991 {
992 ix86_arch = processor_alias_table[i].processor;
993 /* Default cpu tuning to the architecture. */
994 ix86_cpu = ix86_arch;
995 if (processor_alias_table[i].flags & PTA_MMX
996 && !(target_flags & MASK_MMX_SET))
997 target_flags |= MASK_MMX;
998 if (processor_alias_table[i].flags & PTA_3DNOW
999 && !(target_flags & MASK_3DNOW_SET))
1000 target_flags |= MASK_3DNOW;
1001 if (processor_alias_table[i].flags & PTA_3DNOW_A
1002 && !(target_flags & MASK_3DNOW_A_SET))
1003 target_flags |= MASK_3DNOW_A;
1004 if (processor_alias_table[i].flags & PTA_SSE
1005 && !(target_flags & MASK_SSE_SET))
1006 target_flags |= MASK_SSE;
1007 if (processor_alias_table[i].flags & PTA_SSE2
1008 && !(target_flags & MASK_SSE2_SET))
1009 target_flags |= MASK_SSE2;
1010 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1011 x86_prefetch_sse = true;
1012 break;
1013 }
1014
1015 if (i == pta_size)
1016 error ("bad value (%s) for -march= switch", ix86_arch_string);
1017
1018 for (i = 0; i < pta_size; i++)
1019 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1020 {
1021 ix86_cpu = processor_alias_table[i].processor;
1022 break;
1023 }
1024 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1025 x86_prefetch_sse = true;
1026 if (i == pta_size)
1027 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1028
1029 if (optimize_size)
1030 ix86_cost = &size_cost;
1031 else
1032 ix86_cost = processor_target_table[ix86_cpu].cost;
1033 target_flags |= processor_target_table[ix86_cpu].target_enable;
1034 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1035
1036 /* Arrange to set up i386_stack_locals for all functions. */
1037 init_machine_status = ix86_init_machine_status;
1038
1039 /* Validate -mregparm= value. */
1040 if (ix86_regparm_string)
1041 {
1042 i = atoi (ix86_regparm_string);
1043 if (i < 0 || i > REGPARM_MAX)
1044 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1045 else
1046 ix86_regparm = i;
1047 }
1048 else
1049 if (TARGET_64BIT)
1050 ix86_regparm = REGPARM_MAX;
1051
1052 /* If the user has provided any of the -malign-* options,
1053 warn and use that value only if -falign-* is not set.
1054 Remove this code in GCC 3.2 or later. */
1055 if (ix86_align_loops_string)
1056 {
1057 warning ("-malign-loops is obsolete, use -falign-loops");
1058 if (align_loops == 0)
1059 {
1060 i = atoi (ix86_align_loops_string);
1061 if (i < 0 || i > MAX_CODE_ALIGN)
1062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1063 else
1064 align_loops = 1 << i;
1065 }
1066 }
1067
1068 if (ix86_align_jumps_string)
1069 {
1070 warning ("-malign-jumps is obsolete, use -falign-jumps");
1071 if (align_jumps == 0)
1072 {
1073 i = atoi (ix86_align_jumps_string);
1074 if (i < 0 || i > MAX_CODE_ALIGN)
1075 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1076 else
1077 align_jumps = 1 << i;
1078 }
1079 }
1080
1081 if (ix86_align_funcs_string)
1082 {
1083 warning ("-malign-functions is obsolete, use -falign-functions");
1084 if (align_functions == 0)
1085 {
1086 i = atoi (ix86_align_funcs_string);
1087 if (i < 0 || i > MAX_CODE_ALIGN)
1088 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1089 else
1090 align_functions = 1 << i;
1091 }
1092 }
1093
1094 /* Default align_* from the processor table. */
1095 if (align_loops == 0)
1096 {
1097 align_loops = processor_target_table[ix86_cpu].align_loop;
1098 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1099 }
1100 if (align_jumps == 0)
1101 {
1102 align_jumps = processor_target_table[ix86_cpu].align_jump;
1103 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1104 }
1105 if (align_functions == 0)
1106 {
1107 align_functions = processor_target_table[ix86_cpu].align_func;
1108 }
1109
1110 /* Validate -mpreferred-stack-boundary= value, or provide default.
1111 The default of 128 bits is for Pentium III's SSE __m128, but we
1112 don't want additional code to keep the stack aligned when
1113 optimizing for code size. */
1114 ix86_preferred_stack_boundary = (optimize_size
1115 ? TARGET_64BIT ? 64 : 32
1116 : 128);
1117 if (ix86_preferred_stack_boundary_string)
1118 {
1119 i = atoi (ix86_preferred_stack_boundary_string);
1120 if (i < (TARGET_64BIT ? 3 : 2) || i > 12)
1121 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1122 TARGET_64BIT ? 3 : 2);
1123 else
1124 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1125 }
1126
1127 /* Validate -mbranch-cost= value, or provide default. */
1128 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1129 if (ix86_branch_cost_string)
1130 {
1131 i = atoi (ix86_branch_cost_string);
1132 if (i < 0 || i > 5)
1133 error ("-mbranch-cost=%d is not between 0 and 5", i);
1134 else
1135 ix86_branch_cost = i;
1136 }
1137
1138 if (ix86_tls_dialect_string)
1139 {
1140 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1141 ix86_tls_dialect = TLS_DIALECT_GNU;
1142 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1143 ix86_tls_dialect = TLS_DIALECT_SUN;
1144 else
1145 error ("bad value (%s) for -mtls-dialect= switch",
1146 ix86_tls_dialect_string);
1147 }
1148
1149 if (profile_flag)
1150 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1151
1152 /* Keep nonleaf frame pointers. */
1153 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1154 flag_omit_frame_pointer = 1;
1155
1156 /* If we're doing fast math, we don't care about comparison order
1157 wrt NaNs. This lets us use a shorter comparison sequence. */
1158 if (flag_unsafe_math_optimizations)
1159 target_flags &= ~MASK_IEEE_FP;
1160
1161 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1162 since the insns won't need emulation. */
1163 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1164 target_flags &= ~MASK_NO_FANCY_MATH_387;
1165
1166 if (TARGET_64BIT)
1167 {
1168 if (TARGET_ALIGN_DOUBLE)
1169 error ("-malign-double makes no sense in the 64bit mode");
1170 if (TARGET_RTD)
1171 error ("-mrtd calling convention not supported in the 64bit mode");
1172 /* Enable by default the SSE and MMX builtins. */
1173 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1174 ix86_fpmath = FPMATH_SSE;
1175 }
1176 else
1177 ix86_fpmath = FPMATH_387;
1178
1179 if (ix86_fpmath_string != 0)
1180 {
1181 if (! strcmp (ix86_fpmath_string, "387"))
1182 ix86_fpmath = FPMATH_387;
1183 else if (! strcmp (ix86_fpmath_string, "sse"))
1184 {
1185 if (!TARGET_SSE)
1186 {
1187 warning ("SSE instruction set disabled, using 387 arithmetics");
1188 ix86_fpmath = FPMATH_387;
1189 }
1190 else
1191 ix86_fpmath = FPMATH_SSE;
1192 }
1193 else if (! strcmp (ix86_fpmath_string, "387,sse")
1194 || ! strcmp (ix86_fpmath_string, "sse,387"))
1195 {
1196 if (!TARGET_SSE)
1197 {
1198 warning ("SSE instruction set disabled, using 387 arithmetics");
1199 ix86_fpmath = FPMATH_387;
1200 }
1201 else if (!TARGET_80387)
1202 {
1203 warning ("387 instruction set disabled, using SSE arithmetics");
1204 ix86_fpmath = FPMATH_SSE;
1205 }
1206 else
1207 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1208 }
1209 else
1210 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1211 }
1212
1213 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1214 on by -msse. */
1215 if (TARGET_SSE)
1216 {
1217 target_flags |= MASK_MMX;
1218 x86_prefetch_sse = true;
1219 }
1220
1221 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1222 if (TARGET_3DNOW)
1223 {
1224 target_flags |= MASK_MMX;
1225 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1226 extensions it adds. */
1227 if (x86_3dnow_a & (1 << ix86_arch))
1228 target_flags |= MASK_3DNOW_A;
1229 }
1230 if ((x86_accumulate_outgoing_args & CPUMASK)
1231 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
1232 && !optimize_size)
1233 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1234
1235 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1236 {
1237 char *p;
1238 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1239 p = strchr (internal_label_prefix, 'X');
1240 internal_label_prefix_len = p - internal_label_prefix;
1241 *p = '\0';
1242 }
1243 }
1244 \f
1245 void
1246 optimization_options (level, size)
1247 int level;
1248 int size ATTRIBUTE_UNUSED;
1249 {
1250 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1251 make the problem with not enough registers even worse. */
1252 #ifdef INSN_SCHEDULING
1253 if (level > 1)
1254 flag_schedule_insns = 0;
1255 #endif
1256 if (TARGET_64BIT && optimize >= 1)
1257 flag_omit_frame_pointer = 1;
1258 if (TARGET_64BIT)
1259 {
1260 flag_pcc_struct_return = 0;
1261 flag_asynchronous_unwind_tables = 1;
1262 }
1263 if (profile_flag)
1264 flag_omit_frame_pointer = 0;
1265 }
1266 \f
1267 /* Table of valid machine attributes. */
1268 const struct attribute_spec ix86_attribute_table[] =
1269 {
1270 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1271 /* Stdcall attribute says callee is responsible for popping arguments
1272 if they are not variable. */
1273 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1274 /* Cdecl attribute says the callee is a normal C declaration */
1275 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1276 /* Regparm attribute specifies how many integer arguments are to be
1277 passed in registers. */
1278 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1279 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1280 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1281 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1282 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1283 #endif
1284 { NULL, 0, 0, false, false, false, NULL }
1285 };
1286
1287 /* Handle a "cdecl" or "stdcall" attribute;
1288 arguments as in struct attribute_spec.handler. */
1289 static tree
1290 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1291 tree *node;
1292 tree name;
1293 tree args ATTRIBUTE_UNUSED;
1294 int flags ATTRIBUTE_UNUSED;
1295 bool *no_add_attrs;
1296 {
1297 if (TREE_CODE (*node) != FUNCTION_TYPE
1298 && TREE_CODE (*node) != METHOD_TYPE
1299 && TREE_CODE (*node) != FIELD_DECL
1300 && TREE_CODE (*node) != TYPE_DECL)
1301 {
1302 warning ("`%s' attribute only applies to functions",
1303 IDENTIFIER_POINTER (name));
1304 *no_add_attrs = true;
1305 }
1306
1307 if (TARGET_64BIT)
1308 {
1309 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1310 *no_add_attrs = true;
1311 }
1312
1313 return NULL_TREE;
1314 }
1315
1316 /* Handle a "regparm" attribute;
1317 arguments as in struct attribute_spec.handler. */
1318 static tree
1319 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1320 tree *node;
1321 tree name;
1322 tree args;
1323 int flags ATTRIBUTE_UNUSED;
1324 bool *no_add_attrs;
1325 {
1326 if (TREE_CODE (*node) != FUNCTION_TYPE
1327 && TREE_CODE (*node) != METHOD_TYPE
1328 && TREE_CODE (*node) != FIELD_DECL
1329 && TREE_CODE (*node) != TYPE_DECL)
1330 {
1331 warning ("`%s' attribute only applies to functions",
1332 IDENTIFIER_POINTER (name));
1333 *no_add_attrs = true;
1334 }
1335 else
1336 {
1337 tree cst;
1338
1339 cst = TREE_VALUE (args);
1340 if (TREE_CODE (cst) != INTEGER_CST)
1341 {
1342 warning ("`%s' attribute requires an integer constant argument",
1343 IDENTIFIER_POINTER (name));
1344 *no_add_attrs = true;
1345 }
1346 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1347 {
1348 warning ("argument to `%s' attribute larger than %d",
1349 IDENTIFIER_POINTER (name), REGPARM_MAX);
1350 *no_add_attrs = true;
1351 }
1352 }
1353
1354 return NULL_TREE;
1355 }
1356
1357 /* Return 0 if the attributes for two types are incompatible, 1 if they
1358 are compatible, and 2 if they are nearly compatible (which causes a
1359 warning to be generated). */
1360
1361 static int
1362 ix86_comp_type_attributes (type1, type2)
1363 tree type1;
1364 tree type2;
1365 {
1366 /* Check for mismatch of non-default calling convention. */
1367 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1368
1369 if (TREE_CODE (type1) != FUNCTION_TYPE)
1370 return 1;
1371
1372 /* Check for mismatched return types (cdecl vs stdcall). */
1373 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1374 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1375 return 0;
1376 return 1;
1377 }
1378 \f
1379 /* Value is the number of bytes of arguments automatically
1380 popped when returning from a subroutine call.
1381 FUNDECL is the declaration node of the function (as a tree),
1382 FUNTYPE is the data type of the function (as a tree),
1383 or for a library call it is an identifier node for the subroutine name.
1384 SIZE is the number of bytes of arguments passed on the stack.
1385
1386 On the 80386, the RTD insn may be used to pop them if the number
1387 of args is fixed, but if the number is variable then the caller
1388 must pop them all. RTD can't be used for library calls now
1389 because the library is compiled with the Unix compiler.
1390 Use of RTD is a selectable option, since it is incompatible with
1391 standard Unix calling sequences. If the option is not selected,
1392 the caller must always pop the args.
1393
1394 The attribute stdcall is equivalent to RTD on a per module basis. */
1395
1396 int
1397 ix86_return_pops_args (fundecl, funtype, size)
1398 tree fundecl;
1399 tree funtype;
1400 int size;
1401 {
1402 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1403
1404 /* Cdecl functions override -mrtd, and never pop the stack. */
1405 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1406
1407 /* Stdcall functions will pop the stack if not variable args. */
1408 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1409 rtd = 1;
1410
1411 if (rtd
1412 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1413 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1414 == void_type_node)))
1415 return size;
1416 }
1417
1418 /* Lose any fake structure return argument if it is passed on the stack. */
1419 if (aggregate_value_p (TREE_TYPE (funtype))
1420 && !TARGET_64BIT)
1421 {
1422 int nregs = ix86_regparm;
1423
1424 if (funtype)
1425 {
1426 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1427
1428 if (attr)
1429 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1430 }
1431
1432 if (!nregs)
1433 return GET_MODE_SIZE (Pmode);
1434 }
1435
1436 return 0;
1437 }
1438 \f
1439 /* Argument support functions. */
1440
1441 /* Return true when register may be used to pass function parameters. */
1442 bool
1443 ix86_function_arg_regno_p (regno)
1444 int regno;
1445 {
1446 int i;
1447 if (!TARGET_64BIT)
1448 return (regno < REGPARM_MAX
1449 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1450 if (SSE_REGNO_P (regno) && TARGET_SSE)
1451 return true;
1452 /* RAX is used as hidden argument to va_arg functions. */
1453 if (!regno)
1454 return true;
1455 for (i = 0; i < REGPARM_MAX; i++)
1456 if (regno == x86_64_int_parameter_registers[i])
1457 return true;
1458 return false;
1459 }
1460
1461 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1462 for a call to a function whose data type is FNTYPE.
1463 For a library call, FNTYPE is 0. */
1464
1465 void
1466 init_cumulative_args (cum, fntype, libname)
1467 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1468 tree fntype; /* tree ptr for function decl */
1469 rtx libname; /* SYMBOL_REF of library name or 0 */
1470 {
1471 static CUMULATIVE_ARGS zero_cum;
1472 tree param, next_param;
1473
1474 if (TARGET_DEBUG_ARG)
1475 {
1476 fprintf (stderr, "\ninit_cumulative_args (");
1477 if (fntype)
1478 fprintf (stderr, "fntype code = %s, ret code = %s",
1479 tree_code_name[(int) TREE_CODE (fntype)],
1480 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1481 else
1482 fprintf (stderr, "no fntype");
1483
1484 if (libname)
1485 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1486 }
1487
1488 *cum = zero_cum;
1489
1490 /* Set up the number of registers to use for passing arguments. */
1491 cum->nregs = ix86_regparm;
1492 cum->sse_nregs = SSE_REGPARM_MAX;
1493 if (fntype && !TARGET_64BIT)
1494 {
1495 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1496
1497 if (attr)
1498 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1499 }
1500 cum->maybe_vaarg = false;
1501
1502 /* Determine if this function has variable arguments. This is
1503 indicated by the last argument being 'void_type_mode' if there
1504 are no variable arguments. If there are variable arguments, then
1505 we won't pass anything in registers */
1506
1507 if (cum->nregs)
1508 {
1509 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1510 param != 0; param = next_param)
1511 {
1512 next_param = TREE_CHAIN (param);
1513 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1514 {
1515 if (!TARGET_64BIT)
1516 cum->nregs = 0;
1517 cum->maybe_vaarg = true;
1518 }
1519 }
1520 }
1521 if ((!fntype && !libname)
1522 || (fntype && !TYPE_ARG_TYPES (fntype)))
1523 cum->maybe_vaarg = 1;
1524
1525 if (TARGET_DEBUG_ARG)
1526 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1527
1528 return;
1529 }
1530
1531 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1532 of this code is to classify each 8bytes of incoming argument by the register
1533 class and assign registers accordingly. */
1534
1535 /* Return the union class of CLASS1 and CLASS2.
1536 See the x86-64 PS ABI for details. */
1537
1538 static enum x86_64_reg_class
1539 merge_classes (class1, class2)
1540 enum x86_64_reg_class class1, class2;
1541 {
1542 /* Rule #1: If both classes are equal, this is the resulting class. */
1543 if (class1 == class2)
1544 return class1;
1545
1546 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1547 the other class. */
1548 if (class1 == X86_64_NO_CLASS)
1549 return class2;
1550 if (class2 == X86_64_NO_CLASS)
1551 return class1;
1552
1553 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1554 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1555 return X86_64_MEMORY_CLASS;
1556
1557 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1558 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1559 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1560 return X86_64_INTEGERSI_CLASS;
1561 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1562 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1563 return X86_64_INTEGER_CLASS;
1564
1565 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1566 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1567 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1568 return X86_64_MEMORY_CLASS;
1569
1570 /* Rule #6: Otherwise class SSE is used. */
1571 return X86_64_SSE_CLASS;
1572 }
1573
1574 /* Classify the argument of type TYPE and mode MODE.
1575 CLASSES will be filled by the register class used to pass each word
1576 of the operand. The number of words is returned. In case the parameter
1577 should be passed in memory, 0 is returned. As a special case for zero
1578 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1579
1580 BIT_OFFSET is used internally for handling records and specifies offset
1581 of the offset in bits modulo 256 to avoid overflow cases.
1582
1583 See the x86-64 PS ABI for details.
1584 */
1585
1586 static int
1587 classify_argument (mode, type, classes, bit_offset)
1588 enum machine_mode mode;
1589 tree type;
1590 enum x86_64_reg_class classes[MAX_CLASSES];
1591 int bit_offset;
1592 {
1593 int bytes =
1594 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1595 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1596
1597 /* Variable sized entities are always passed/returned in memory. */
1598 if (bytes < 0)
1599 return 0;
1600
1601 if (type && AGGREGATE_TYPE_P (type))
1602 {
1603 int i;
1604 tree field;
1605 enum x86_64_reg_class subclasses[MAX_CLASSES];
1606
1607 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1608 if (bytes > 16)
1609 return 0;
1610
1611 for (i = 0; i < words; i++)
1612 classes[i] = X86_64_NO_CLASS;
1613
1614 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1615 signalize memory class, so handle it as special case. */
1616 if (!words)
1617 {
1618 classes[0] = X86_64_NO_CLASS;
1619 return 1;
1620 }
1621
1622 /* Classify each field of record and merge classes. */
1623 if (TREE_CODE (type) == RECORD_TYPE)
1624 {
1625 /* For classes first merge in the field of the subclasses. */
1626 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1627 {
1628 tree bases = TYPE_BINFO_BASETYPES (type);
1629 int n_bases = TREE_VEC_LENGTH (bases);
1630 int i;
1631
1632 for (i = 0; i < n_bases; ++i)
1633 {
1634 tree binfo = TREE_VEC_ELT (bases, i);
1635 int num;
1636 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1637 tree type = BINFO_TYPE (binfo);
1638
1639 num = classify_argument (TYPE_MODE (type),
1640 type, subclasses,
1641 (offset + bit_offset) % 256);
1642 if (!num)
1643 return 0;
1644 for (i = 0; i < num; i++)
1645 {
1646 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1647 classes[i + pos] =
1648 merge_classes (subclasses[i], classes[i + pos]);
1649 }
1650 }
1651 }
1652 /* And now merge the fields of structure. */
1653 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1654 {
1655 if (TREE_CODE (field) == FIELD_DECL)
1656 {
1657 int num;
1658
1659 /* Bitfields are always classified as integer. Handle them
1660 early, since later code would consider them to be
1661 misaligned integers. */
1662 if (DECL_BIT_FIELD (field))
1663 {
1664 for (i = int_bit_position (field) / 8 / 8;
1665 i < (int_bit_position (field)
1666 + tree_low_cst (DECL_SIZE (field), 0)
1667 + 63) / 8 / 8; i++)
1668 classes[i] =
1669 merge_classes (X86_64_INTEGER_CLASS,
1670 classes[i]);
1671 }
1672 else
1673 {
1674 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1675 TREE_TYPE (field), subclasses,
1676 (int_bit_position (field)
1677 + bit_offset) % 256);
1678 if (!num)
1679 return 0;
1680 for (i = 0; i < num; i++)
1681 {
1682 int pos =
1683 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1684 classes[i + pos] =
1685 merge_classes (subclasses[i], classes[i + pos]);
1686 }
1687 }
1688 }
1689 }
1690 }
1691 /* Arrays are handled as small records. */
1692 else if (TREE_CODE (type) == ARRAY_TYPE)
1693 {
1694 int num;
1695 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1696 TREE_TYPE (type), subclasses, bit_offset);
1697 if (!num)
1698 return 0;
1699
1700 /* The partial classes are now full classes. */
1701 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1702 subclasses[0] = X86_64_SSE_CLASS;
1703 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1704 subclasses[0] = X86_64_INTEGER_CLASS;
1705
1706 for (i = 0; i < words; i++)
1707 classes[i] = subclasses[i % num];
1708 }
1709 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1710 else if (TREE_CODE (type) == UNION_TYPE
1711 || TREE_CODE (type) == QUAL_UNION_TYPE)
1712 {
1713 /* For classes first merge in the field of the subclasses. */
1714 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1715 {
1716 tree bases = TYPE_BINFO_BASETYPES (type);
1717 int n_bases = TREE_VEC_LENGTH (bases);
1718 int i;
1719
1720 for (i = 0; i < n_bases; ++i)
1721 {
1722 tree binfo = TREE_VEC_ELT (bases, i);
1723 int num;
1724 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1725 tree type = BINFO_TYPE (binfo);
1726
1727 num = classify_argument (TYPE_MODE (type),
1728 type, subclasses,
1729 (offset + (bit_offset % 64)) % 256);
1730 if (!num)
1731 return 0;
1732 for (i = 0; i < num; i++)
1733 {
1734 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1735 classes[i + pos] =
1736 merge_classes (subclasses[i], classes[i + pos]);
1737 }
1738 }
1739 }
1740 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1741 {
1742 if (TREE_CODE (field) == FIELD_DECL)
1743 {
1744 int num;
1745 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1746 TREE_TYPE (field), subclasses,
1747 bit_offset);
1748 if (!num)
1749 return 0;
1750 for (i = 0; i < num; i++)
1751 classes[i] = merge_classes (subclasses[i], classes[i]);
1752 }
1753 }
1754 }
1755 else
1756 abort ();
1757
1758 /* Final merger cleanup. */
1759 for (i = 0; i < words; i++)
1760 {
1761 /* If one class is MEMORY, everything should be passed in
1762 memory. */
1763 if (classes[i] == X86_64_MEMORY_CLASS)
1764 return 0;
1765
1766 /* The X86_64_SSEUP_CLASS should be always preceded by
1767 X86_64_SSE_CLASS. */
1768 if (classes[i] == X86_64_SSEUP_CLASS
1769 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1770 classes[i] = X86_64_SSE_CLASS;
1771
1772 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1773 if (classes[i] == X86_64_X87UP_CLASS
1774 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1775 classes[i] = X86_64_SSE_CLASS;
1776 }
1777 return words;
1778 }
1779
1780 /* Compute alignment needed. We align all types to natural boundaries with
1781 exception of XFmode that is aligned to 64bits. */
1782 if (mode != VOIDmode && mode != BLKmode)
1783 {
1784 int mode_alignment = GET_MODE_BITSIZE (mode);
1785
1786 if (mode == XFmode)
1787 mode_alignment = 128;
1788 else if (mode == XCmode)
1789 mode_alignment = 256;
1790 /* Misaligned fields are always returned in memory. */
1791 if (bit_offset % mode_alignment)
1792 return 0;
1793 }
1794
1795 /* Classification of atomic types. */
1796 switch (mode)
1797 {
1798 case DImode:
1799 case SImode:
1800 case HImode:
1801 case QImode:
1802 case CSImode:
1803 case CHImode:
1804 case CQImode:
1805 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1806 classes[0] = X86_64_INTEGERSI_CLASS;
1807 else
1808 classes[0] = X86_64_INTEGER_CLASS;
1809 return 1;
1810 case CDImode:
1811 case TImode:
1812 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1813 return 2;
1814 case CTImode:
1815 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1816 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1817 return 4;
1818 case SFmode:
1819 if (!(bit_offset % 64))
1820 classes[0] = X86_64_SSESF_CLASS;
1821 else
1822 classes[0] = X86_64_SSE_CLASS;
1823 return 1;
1824 case DFmode:
1825 classes[0] = X86_64_SSEDF_CLASS;
1826 return 1;
1827 case TFmode:
1828 classes[0] = X86_64_X87_CLASS;
1829 classes[1] = X86_64_X87UP_CLASS;
1830 return 2;
1831 case TCmode:
1832 classes[0] = X86_64_X87_CLASS;
1833 classes[1] = X86_64_X87UP_CLASS;
1834 classes[2] = X86_64_X87_CLASS;
1835 classes[3] = X86_64_X87UP_CLASS;
1836 return 4;
1837 case DCmode:
1838 classes[0] = X86_64_SSEDF_CLASS;
1839 classes[1] = X86_64_SSEDF_CLASS;
1840 return 2;
1841 case SCmode:
1842 classes[0] = X86_64_SSE_CLASS;
1843 return 1;
1844 case V4SFmode:
1845 case V4SImode:
1846 case V16QImode:
1847 case V8HImode:
1848 case V2DFmode:
1849 case V2DImode:
1850 classes[0] = X86_64_SSE_CLASS;
1851 classes[1] = X86_64_SSEUP_CLASS;
1852 return 2;
1853 case V2SFmode:
1854 case V2SImode:
1855 case V4HImode:
1856 case V8QImode:
1857 classes[0] = X86_64_SSE_CLASS;
1858 return 1;
1859 case BLKmode:
1860 case VOIDmode:
1861 return 0;
1862 default:
1863 abort ();
1864 }
1865 }
1866
1867 /* Examine the argument and return set number of register required in each
1868 class. Return 0 iff parameter should be passed in memory. */
1869 static int
1870 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1871 enum machine_mode mode;
1872 tree type;
1873 int *int_nregs, *sse_nregs;
1874 int in_return;
1875 {
1876 enum x86_64_reg_class class[MAX_CLASSES];
1877 int n = classify_argument (mode, type, class, 0);
1878
1879 *int_nregs = 0;
1880 *sse_nregs = 0;
1881 if (!n)
1882 return 0;
1883 for (n--; n >= 0; n--)
1884 switch (class[n])
1885 {
1886 case X86_64_INTEGER_CLASS:
1887 case X86_64_INTEGERSI_CLASS:
1888 (*int_nregs)++;
1889 break;
1890 case X86_64_SSE_CLASS:
1891 case X86_64_SSESF_CLASS:
1892 case X86_64_SSEDF_CLASS:
1893 (*sse_nregs)++;
1894 break;
1895 case X86_64_NO_CLASS:
1896 case X86_64_SSEUP_CLASS:
1897 break;
1898 case X86_64_X87_CLASS:
1899 case X86_64_X87UP_CLASS:
1900 if (!in_return)
1901 return 0;
1902 break;
1903 case X86_64_MEMORY_CLASS:
1904 abort ();
1905 }
1906 return 1;
1907 }
1908 /* Construct container for the argument used by GCC interface. See
1909 FUNCTION_ARG for the detailed description. */
1910 static rtx
1911 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1912 enum machine_mode mode;
1913 tree type;
1914 int in_return;
1915 int nintregs, nsseregs;
1916 const int * intreg;
1917 int sse_regno;
1918 {
1919 enum machine_mode tmpmode;
1920 int bytes =
1921 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1922 enum x86_64_reg_class class[MAX_CLASSES];
1923 int n;
1924 int i;
1925 int nexps = 0;
1926 int needed_sseregs, needed_intregs;
1927 rtx exp[MAX_CLASSES];
1928 rtx ret;
1929
1930 n = classify_argument (mode, type, class, 0);
1931 if (TARGET_DEBUG_ARG)
1932 {
1933 if (!n)
1934 fprintf (stderr, "Memory class\n");
1935 else
1936 {
1937 fprintf (stderr, "Classes:");
1938 for (i = 0; i < n; i++)
1939 {
1940 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1941 }
1942 fprintf (stderr, "\n");
1943 }
1944 }
1945 if (!n)
1946 return NULL;
1947 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
1948 return NULL;
1949 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
1950 return NULL;
1951
1952 /* First construct simple cases. Avoid SCmode, since we want to use
1953 single register to pass this type. */
1954 if (n == 1 && mode != SCmode)
1955 switch (class[0])
1956 {
1957 case X86_64_INTEGER_CLASS:
1958 case X86_64_INTEGERSI_CLASS:
1959 return gen_rtx_REG (mode, intreg[0]);
1960 case X86_64_SSE_CLASS:
1961 case X86_64_SSESF_CLASS:
1962 case X86_64_SSEDF_CLASS:
1963 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1964 case X86_64_X87_CLASS:
1965 return gen_rtx_REG (mode, FIRST_STACK_REG);
1966 case X86_64_NO_CLASS:
1967 /* Zero sized array, struct or class. */
1968 return NULL;
1969 default:
1970 abort ();
1971 }
1972 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
1973 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
1974 if (n == 2
1975 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
1976 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
1977 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
1978 && class[1] == X86_64_INTEGER_CLASS
1979 && (mode == CDImode || mode == TImode)
1980 && intreg[0] + 1 == intreg[1])
1981 return gen_rtx_REG (mode, intreg[0]);
1982 if (n == 4
1983 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
1984 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
1985 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
1986
1987 /* Otherwise figure out the entries of the PARALLEL. */
1988 for (i = 0; i < n; i++)
1989 {
1990 switch (class[i])
1991 {
1992 case X86_64_NO_CLASS:
1993 break;
1994 case X86_64_INTEGER_CLASS:
1995 case X86_64_INTEGERSI_CLASS:
1996 /* Merge TImodes on aligned occassions here too. */
1997 if (i * 8 + 8 > bytes)
1998 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
1999 else if (class[i] == X86_64_INTEGERSI_CLASS)
2000 tmpmode = SImode;
2001 else
2002 tmpmode = DImode;
2003 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2004 if (tmpmode == BLKmode)
2005 tmpmode = DImode;
2006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2007 gen_rtx_REG (tmpmode, *intreg),
2008 GEN_INT (i*8));
2009 intreg++;
2010 break;
2011 case X86_64_SSESF_CLASS:
2012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2013 gen_rtx_REG (SFmode,
2014 SSE_REGNO (sse_regno)),
2015 GEN_INT (i*8));
2016 sse_regno++;
2017 break;
2018 case X86_64_SSEDF_CLASS:
2019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2020 gen_rtx_REG (DFmode,
2021 SSE_REGNO (sse_regno)),
2022 GEN_INT (i*8));
2023 sse_regno++;
2024 break;
2025 case X86_64_SSE_CLASS:
2026 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2027 tmpmode = TImode, i++;
2028 else
2029 tmpmode = DImode;
2030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2031 gen_rtx_REG (tmpmode,
2032 SSE_REGNO (sse_regno)),
2033 GEN_INT (i*8));
2034 sse_regno++;
2035 break;
2036 default:
2037 abort ();
2038 }
2039 }
2040 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2041 for (i = 0; i < nexps; i++)
2042 XVECEXP (ret, 0, i) = exp [i];
2043 return ret;
2044 }
2045
2046 /* Update the data in CUM to advance over an argument
2047 of mode MODE and data type TYPE.
2048 (TYPE is null for libcalls where that information may not be available.) */
2049
2050 void
2051 function_arg_advance (cum, mode, type, named)
2052 CUMULATIVE_ARGS *cum; /* current arg information */
2053 enum machine_mode mode; /* current arg mode */
2054 tree type; /* type of the argument or 0 if lib support */
2055 int named; /* whether or not the argument was named */
2056 {
2057 int bytes =
2058 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2059 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2060
2061 if (TARGET_DEBUG_ARG)
2062 fprintf (stderr,
2063 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2064 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2065 if (TARGET_64BIT)
2066 {
2067 int int_nregs, sse_nregs;
2068 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2069 cum->words += words;
2070 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2071 {
2072 cum->nregs -= int_nregs;
2073 cum->sse_nregs -= sse_nregs;
2074 cum->regno += int_nregs;
2075 cum->sse_regno += sse_nregs;
2076 }
2077 else
2078 cum->words += words;
2079 }
2080 else
2081 {
2082 if (TARGET_SSE && mode == TImode)
2083 {
2084 cum->sse_words += words;
2085 cum->sse_nregs -= 1;
2086 cum->sse_regno += 1;
2087 if (cum->sse_nregs <= 0)
2088 {
2089 cum->sse_nregs = 0;
2090 cum->sse_regno = 0;
2091 }
2092 }
2093 else
2094 {
2095 cum->words += words;
2096 cum->nregs -= words;
2097 cum->regno += words;
2098
2099 if (cum->nregs <= 0)
2100 {
2101 cum->nregs = 0;
2102 cum->regno = 0;
2103 }
2104 }
2105 }
2106 return;
2107 }
2108
2109 /* Define where to put the arguments to a function.
2110 Value is zero to push the argument on the stack,
2111 or a hard register in which to store the argument.
2112
2113 MODE is the argument's machine mode.
2114 TYPE is the data type of the argument (as a tree).
2115 This is null for libcalls where that information may
2116 not be available.
2117 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2118 the preceding args and about the function being called.
2119 NAMED is nonzero if this argument is a named parameter
2120 (otherwise it is an extra parameter matching an ellipsis). */
2121
2122 rtx
2123 function_arg (cum, mode, type, named)
2124 CUMULATIVE_ARGS *cum; /* current arg information */
2125 enum machine_mode mode; /* current arg mode */
2126 tree type; /* type of the argument or 0 if lib support */
2127 int named; /* != 0 for normal args, == 0 for ... args */
2128 {
2129 rtx ret = NULL_RTX;
2130 int bytes =
2131 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2132 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2133
2134 /* Handle an hidden AL argument containing number of registers for varargs
2135 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2136 any AL settings. */
2137 if (mode == VOIDmode)
2138 {
2139 if (TARGET_64BIT)
2140 return GEN_INT (cum->maybe_vaarg
2141 ? (cum->sse_nregs < 0
2142 ? SSE_REGPARM_MAX
2143 : cum->sse_regno)
2144 : -1);
2145 else
2146 return constm1_rtx;
2147 }
2148 if (TARGET_64BIT)
2149 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2150 &x86_64_int_parameter_registers [cum->regno],
2151 cum->sse_regno);
2152 else
2153 switch (mode)
2154 {
2155 /* For now, pass fp/complex values on the stack. */
2156 default:
2157 break;
2158
2159 case BLKmode:
2160 case DImode:
2161 case SImode:
2162 case HImode:
2163 case QImode:
2164 if (words <= cum->nregs)
2165 ret = gen_rtx_REG (mode, cum->regno);
2166 break;
2167 case TImode:
2168 if (cum->sse_nregs)
2169 ret = gen_rtx_REG (mode, cum->sse_regno);
2170 break;
2171 }
2172
2173 if (TARGET_DEBUG_ARG)
2174 {
2175 fprintf (stderr,
2176 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2177 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2178
2179 if (ret)
2180 print_simple_rtl (stderr, ret);
2181 else
2182 fprintf (stderr, ", stack");
2183
2184 fprintf (stderr, " )\n");
2185 }
2186
2187 return ret;
2188 }
2189
2190 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2191 and type. */
2192
2193 int
2194 ix86_function_arg_boundary (mode, type)
2195 enum machine_mode mode;
2196 tree type;
2197 {
2198 int align;
2199 if (!TARGET_64BIT)
2200 return PARM_BOUNDARY;
2201 if (type)
2202 align = TYPE_ALIGN (type);
2203 else
2204 align = GET_MODE_ALIGNMENT (mode);
2205 if (align < PARM_BOUNDARY)
2206 align = PARM_BOUNDARY;
2207 if (align > 128)
2208 align = 128;
2209 return align;
2210 }
2211
2212 /* Return true if N is a possible register number of function value. */
2213 bool
2214 ix86_function_value_regno_p (regno)
2215 int regno;
2216 {
2217 if (!TARGET_64BIT)
2218 {
2219 return ((regno) == 0
2220 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2221 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2222 }
2223 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2224 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2225 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2226 }
2227
2228 /* Define how to find the value returned by a function.
2229 VALTYPE is the data type of the value (as a tree).
2230 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2231 otherwise, FUNC is 0. */
2232 rtx
2233 ix86_function_value (valtype)
2234 tree valtype;
2235 {
2236 if (TARGET_64BIT)
2237 {
2238 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2239 REGPARM_MAX, SSE_REGPARM_MAX,
2240 x86_64_int_return_registers, 0);
2241 /* For zero sized structures, construct_continer return NULL, but we need
2242 to keep rest of compiler happy by returning meaningfull value. */
2243 if (!ret)
2244 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2245 return ret;
2246 }
2247 else
2248 return gen_rtx_REG (TYPE_MODE (valtype),
2249 ix86_value_regno (TYPE_MODE (valtype)));
2250 }
2251
2252 /* Return false iff type is returned in memory. */
2253 int
2254 ix86_return_in_memory (type)
2255 tree type;
2256 {
2257 int needed_intregs, needed_sseregs;
2258 if (TARGET_64BIT)
2259 {
2260 return !examine_argument (TYPE_MODE (type), type, 1,
2261 &needed_intregs, &needed_sseregs);
2262 }
2263 else
2264 {
2265 if (TYPE_MODE (type) == BLKmode
2266 || (VECTOR_MODE_P (TYPE_MODE (type))
2267 && int_size_in_bytes (type) == 8)
2268 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2269 && TYPE_MODE (type) != TFmode
2270 && !VECTOR_MODE_P (TYPE_MODE (type))))
2271 return 1;
2272 return 0;
2273 }
2274 }
2275
2276 /* Define how to find the value returned by a library function
2277 assuming the value has mode MODE. */
2278 rtx
2279 ix86_libcall_value (mode)
2280 enum machine_mode mode;
2281 {
2282 if (TARGET_64BIT)
2283 {
2284 switch (mode)
2285 {
2286 case SFmode:
2287 case SCmode:
2288 case DFmode:
2289 case DCmode:
2290 return gen_rtx_REG (mode, FIRST_SSE_REG);
2291 case TFmode:
2292 case TCmode:
2293 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2294 default:
2295 return gen_rtx_REG (mode, 0);
2296 }
2297 }
2298 else
2299 return gen_rtx_REG (mode, ix86_value_regno (mode));
2300 }
2301
2302 /* Given a mode, return the register to use for a return value. */
2303
2304 static int
2305 ix86_value_regno (mode)
2306 enum machine_mode mode;
2307 {
2308 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2309 return FIRST_FLOAT_REG;
2310 if (mode == TImode || VECTOR_MODE_P (mode))
2311 return FIRST_SSE_REG;
2312 return 0;
2313 }
2314 \f
2315 /* Create the va_list data type. */
2316
2317 tree
2318 ix86_build_va_list ()
2319 {
2320 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2321
2322 /* For i386 we use plain pointer to argument area. */
2323 if (!TARGET_64BIT)
2324 return build_pointer_type (char_type_node);
2325
2326 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2327 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2328
2329 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2330 unsigned_type_node);
2331 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2332 unsigned_type_node);
2333 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2334 ptr_type_node);
2335 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2336 ptr_type_node);
2337
2338 DECL_FIELD_CONTEXT (f_gpr) = record;
2339 DECL_FIELD_CONTEXT (f_fpr) = record;
2340 DECL_FIELD_CONTEXT (f_ovf) = record;
2341 DECL_FIELD_CONTEXT (f_sav) = record;
2342
2343 TREE_CHAIN (record) = type_decl;
2344 TYPE_NAME (record) = type_decl;
2345 TYPE_FIELDS (record) = f_gpr;
2346 TREE_CHAIN (f_gpr) = f_fpr;
2347 TREE_CHAIN (f_fpr) = f_ovf;
2348 TREE_CHAIN (f_ovf) = f_sav;
2349
2350 layout_type (record);
2351
2352 /* The correct type is an array type of one element. */
2353 return build_array_type (record, build_index_type (size_zero_node));
2354 }
2355
2356 /* Perform any needed actions needed for a function that is receiving a
2357 variable number of arguments.
2358
2359 CUM is as above.
2360
2361 MODE and TYPE are the mode and type of the current parameter.
2362
2363 PRETEND_SIZE is a variable that should be set to the amount of stack
2364 that must be pushed by the prolog to pretend that our caller pushed
2365 it.
2366
2367 Normally, this macro will push all remaining incoming registers on the
2368 stack and set PRETEND_SIZE to the length of the registers pushed. */
2369
2370 void
2371 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2372 CUMULATIVE_ARGS *cum;
2373 enum machine_mode mode;
2374 tree type;
2375 int *pretend_size ATTRIBUTE_UNUSED;
2376 int no_rtl;
2377
2378 {
2379 CUMULATIVE_ARGS next_cum;
2380 rtx save_area = NULL_RTX, mem;
2381 rtx label;
2382 rtx label_ref;
2383 rtx tmp_reg;
2384 rtx nsse_reg;
2385 int set;
2386 tree fntype;
2387 int stdarg_p;
2388 int i;
2389
2390 if (!TARGET_64BIT)
2391 return;
2392
2393 /* Indicate to allocate space on the stack for varargs save area. */
2394 ix86_save_varrargs_registers = 1;
2395
2396 fntype = TREE_TYPE (current_function_decl);
2397 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2398 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2399 != void_type_node));
2400
2401 /* For varargs, we do not want to skip the dummy va_dcl argument.
2402 For stdargs, we do want to skip the last named argument. */
2403 next_cum = *cum;
2404 if (stdarg_p)
2405 function_arg_advance (&next_cum, mode, type, 1);
2406
2407 if (!no_rtl)
2408 save_area = frame_pointer_rtx;
2409
2410 set = get_varargs_alias_set ();
2411
2412 for (i = next_cum.regno; i < ix86_regparm; i++)
2413 {
2414 mem = gen_rtx_MEM (Pmode,
2415 plus_constant (save_area, i * UNITS_PER_WORD));
2416 set_mem_alias_set (mem, set);
2417 emit_move_insn (mem, gen_rtx_REG (Pmode,
2418 x86_64_int_parameter_registers[i]));
2419 }
2420
2421 if (next_cum.sse_nregs)
2422 {
2423 /* Now emit code to save SSE registers. The AX parameter contains number
2424 of SSE parameter regsiters used to call this function. We use
2425 sse_prologue_save insn template that produces computed jump across
2426 SSE saves. We need some preparation work to get this working. */
2427
2428 label = gen_label_rtx ();
2429 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2430
2431 /* Compute address to jump to :
2432 label - 5*eax + nnamed_sse_arguments*5 */
2433 tmp_reg = gen_reg_rtx (Pmode);
2434 nsse_reg = gen_reg_rtx (Pmode);
2435 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2436 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2437 gen_rtx_MULT (Pmode, nsse_reg,
2438 GEN_INT (4))));
2439 if (next_cum.sse_regno)
2440 emit_move_insn
2441 (nsse_reg,
2442 gen_rtx_CONST (DImode,
2443 gen_rtx_PLUS (DImode,
2444 label_ref,
2445 GEN_INT (next_cum.sse_regno * 4))));
2446 else
2447 emit_move_insn (nsse_reg, label_ref);
2448 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2449
2450 /* Compute address of memory block we save into. We always use pointer
2451 pointing 127 bytes after first byte to store - this is needed to keep
2452 instruction size limited by 4 bytes. */
2453 tmp_reg = gen_reg_rtx (Pmode);
2454 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2455 plus_constant (save_area,
2456 8 * REGPARM_MAX + 127)));
2457 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2458 set_mem_alias_set (mem, set);
2459 set_mem_align (mem, BITS_PER_WORD);
2460
2461 /* And finally do the dirty job! */
2462 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2463 GEN_INT (next_cum.sse_regno), label));
2464 }
2465
2466 }
2467
2468 /* Implement va_start. */
2469
2470 void
2471 ix86_va_start (valist, nextarg)
2472 tree valist;
2473 rtx nextarg;
2474 {
2475 HOST_WIDE_INT words, n_gpr, n_fpr;
2476 tree f_gpr, f_fpr, f_ovf, f_sav;
2477 tree gpr, fpr, ovf, sav, t;
2478
2479 /* Only 64bit target needs something special. */
2480 if (!TARGET_64BIT)
2481 {
2482 std_expand_builtin_va_start (valist, nextarg);
2483 return;
2484 }
2485
2486 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2487 f_fpr = TREE_CHAIN (f_gpr);
2488 f_ovf = TREE_CHAIN (f_fpr);
2489 f_sav = TREE_CHAIN (f_ovf);
2490
2491 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2492 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2493 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2494 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2495 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2496
2497 /* Count number of gp and fp argument registers used. */
2498 words = current_function_args_info.words;
2499 n_gpr = current_function_args_info.regno;
2500 n_fpr = current_function_args_info.sse_regno;
2501
2502 if (TARGET_DEBUG_ARG)
2503 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2504 (int) words, (int) n_gpr, (int) n_fpr);
2505
2506 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2507 build_int_2 (n_gpr * 8, 0));
2508 TREE_SIDE_EFFECTS (t) = 1;
2509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2510
2511 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2512 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2513 TREE_SIDE_EFFECTS (t) = 1;
2514 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2515
2516 /* Find the overflow area. */
2517 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2518 if (words != 0)
2519 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2520 build_int_2 (words * UNITS_PER_WORD, 0));
2521 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2522 TREE_SIDE_EFFECTS (t) = 1;
2523 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2524
2525 /* Find the register save area.
2526 Prologue of the function save it right above stack frame. */
2527 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2528 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2529 TREE_SIDE_EFFECTS (t) = 1;
2530 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2531 }
2532
2533 /* Implement va_arg. */
2534 rtx
2535 ix86_va_arg (valist, type)
2536 tree valist, type;
2537 {
2538 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2539 tree f_gpr, f_fpr, f_ovf, f_sav;
2540 tree gpr, fpr, ovf, sav, t;
2541 int size, rsize;
2542 rtx lab_false, lab_over = NULL_RTX;
2543 rtx addr_rtx, r;
2544 rtx container;
2545
2546 /* Only 64bit target needs something special. */
2547 if (!TARGET_64BIT)
2548 {
2549 return std_expand_builtin_va_arg (valist, type);
2550 }
2551
2552 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2553 f_fpr = TREE_CHAIN (f_gpr);
2554 f_ovf = TREE_CHAIN (f_fpr);
2555 f_sav = TREE_CHAIN (f_ovf);
2556
2557 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2558 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2559 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2560 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2561 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2562
2563 size = int_size_in_bytes (type);
2564 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2565
2566 container = construct_container (TYPE_MODE (type), type, 0,
2567 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2568 /*
2569 * Pull the value out of the saved registers ...
2570 */
2571
2572 addr_rtx = gen_reg_rtx (Pmode);
2573
2574 if (container)
2575 {
2576 rtx int_addr_rtx, sse_addr_rtx;
2577 int needed_intregs, needed_sseregs;
2578 int need_temp;
2579
2580 lab_over = gen_label_rtx ();
2581 lab_false = gen_label_rtx ();
2582
2583 examine_argument (TYPE_MODE (type), type, 0,
2584 &needed_intregs, &needed_sseregs);
2585
2586
2587 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2588 || TYPE_ALIGN (type) > 128);
2589
2590 /* In case we are passing structure, verify that it is consetuctive block
2591 on the register save area. If not we need to do moves. */
2592 if (!need_temp && !REG_P (container))
2593 {
2594 /* Verify that all registers are strictly consetuctive */
2595 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2596 {
2597 int i;
2598
2599 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2600 {
2601 rtx slot = XVECEXP (container, 0, i);
2602 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2603 || INTVAL (XEXP (slot, 1)) != i * 16)
2604 need_temp = 1;
2605 }
2606 }
2607 else
2608 {
2609 int i;
2610
2611 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2612 {
2613 rtx slot = XVECEXP (container, 0, i);
2614 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2615 || INTVAL (XEXP (slot, 1)) != i * 8)
2616 need_temp = 1;
2617 }
2618 }
2619 }
2620 if (!need_temp)
2621 {
2622 int_addr_rtx = addr_rtx;
2623 sse_addr_rtx = addr_rtx;
2624 }
2625 else
2626 {
2627 int_addr_rtx = gen_reg_rtx (Pmode);
2628 sse_addr_rtx = gen_reg_rtx (Pmode);
2629 }
2630 /* First ensure that we fit completely in registers. */
2631 if (needed_intregs)
2632 {
2633 emit_cmp_and_jump_insns (expand_expr
2634 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2635 GEN_INT ((REGPARM_MAX - needed_intregs +
2636 1) * 8), GE, const1_rtx, SImode,
2637 1, lab_false);
2638 }
2639 if (needed_sseregs)
2640 {
2641 emit_cmp_and_jump_insns (expand_expr
2642 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2643 GEN_INT ((SSE_REGPARM_MAX -
2644 needed_sseregs + 1) * 16 +
2645 REGPARM_MAX * 8), GE, const1_rtx,
2646 SImode, 1, lab_false);
2647 }
2648
2649 /* Compute index to start of area used for integer regs. */
2650 if (needed_intregs)
2651 {
2652 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2653 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2654 if (r != int_addr_rtx)
2655 emit_move_insn (int_addr_rtx, r);
2656 }
2657 if (needed_sseregs)
2658 {
2659 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2660 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2661 if (r != sse_addr_rtx)
2662 emit_move_insn (sse_addr_rtx, r);
2663 }
2664 if (need_temp)
2665 {
2666 int i;
2667 rtx mem;
2668
2669 /* Never use the memory itself, as it has the alias set. */
2670 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2671 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2672 set_mem_alias_set (mem, get_varargs_alias_set ());
2673 set_mem_align (mem, BITS_PER_UNIT);
2674
2675 for (i = 0; i < XVECLEN (container, 0); i++)
2676 {
2677 rtx slot = XVECEXP (container, 0, i);
2678 rtx reg = XEXP (slot, 0);
2679 enum machine_mode mode = GET_MODE (reg);
2680 rtx src_addr;
2681 rtx src_mem;
2682 int src_offset;
2683 rtx dest_mem;
2684
2685 if (SSE_REGNO_P (REGNO (reg)))
2686 {
2687 src_addr = sse_addr_rtx;
2688 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2689 }
2690 else
2691 {
2692 src_addr = int_addr_rtx;
2693 src_offset = REGNO (reg) * 8;
2694 }
2695 src_mem = gen_rtx_MEM (mode, src_addr);
2696 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2697 src_mem = adjust_address (src_mem, mode, src_offset);
2698 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2699 emit_move_insn (dest_mem, src_mem);
2700 }
2701 }
2702
2703 if (needed_intregs)
2704 {
2705 t =
2706 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2707 build_int_2 (needed_intregs * 8, 0));
2708 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2709 TREE_SIDE_EFFECTS (t) = 1;
2710 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2711 }
2712 if (needed_sseregs)
2713 {
2714 t =
2715 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2716 build_int_2 (needed_sseregs * 16, 0));
2717 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2718 TREE_SIDE_EFFECTS (t) = 1;
2719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2720 }
2721
2722 emit_jump_insn (gen_jump (lab_over));
2723 emit_barrier ();
2724 emit_label (lab_false);
2725 }
2726
2727 /* ... otherwise out of the overflow area. */
2728
2729 /* Care for on-stack alignment if needed. */
2730 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2731 t = ovf;
2732 else
2733 {
2734 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2735 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2736 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2737 }
2738 t = save_expr (t);
2739
2740 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2741 if (r != addr_rtx)
2742 emit_move_insn (addr_rtx, r);
2743
2744 t =
2745 build (PLUS_EXPR, TREE_TYPE (t), t,
2746 build_int_2 (rsize * UNITS_PER_WORD, 0));
2747 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2748 TREE_SIDE_EFFECTS (t) = 1;
2749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2750
2751 if (container)
2752 emit_label (lab_over);
2753
2754 return addr_rtx;
2755 }
2756 \f
2757 /* Return nonzero if OP is general operand representable on x86_64. */
2758
2759 int
2760 x86_64_general_operand (op, mode)
2761 rtx op;
2762 enum machine_mode mode;
2763 {
2764 if (!TARGET_64BIT)
2765 return general_operand (op, mode);
2766 if (nonimmediate_operand (op, mode))
2767 return 1;
2768 return x86_64_sign_extended_value (op);
2769 }
2770
2771 /* Return nonzero if OP is general operand representable on x86_64
2772 as either sign extended or zero extended constant. */
2773
2774 int
2775 x86_64_szext_general_operand (op, mode)
2776 rtx op;
2777 enum machine_mode mode;
2778 {
2779 if (!TARGET_64BIT)
2780 return general_operand (op, mode);
2781 if (nonimmediate_operand (op, mode))
2782 return 1;
2783 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2784 }
2785
2786 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2787
2788 int
2789 x86_64_nonmemory_operand (op, mode)
2790 rtx op;
2791 enum machine_mode mode;
2792 {
2793 if (!TARGET_64BIT)
2794 return nonmemory_operand (op, mode);
2795 if (register_operand (op, mode))
2796 return 1;
2797 return x86_64_sign_extended_value (op);
2798 }
2799
2800 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2801
2802 int
2803 x86_64_movabs_operand (op, mode)
2804 rtx op;
2805 enum machine_mode mode;
2806 {
2807 if (!TARGET_64BIT || !flag_pic)
2808 return nonmemory_operand (op, mode);
2809 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2810 return 1;
2811 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2812 return 1;
2813 return 0;
2814 }
2815
2816 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2817
2818 int
2819 x86_64_szext_nonmemory_operand (op, mode)
2820 rtx op;
2821 enum machine_mode mode;
2822 {
2823 if (!TARGET_64BIT)
2824 return nonmemory_operand (op, mode);
2825 if (register_operand (op, mode))
2826 return 1;
2827 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2828 }
2829
2830 /* Return nonzero if OP is immediate operand representable on x86_64. */
2831
2832 int
2833 x86_64_immediate_operand (op, mode)
2834 rtx op;
2835 enum machine_mode mode;
2836 {
2837 if (!TARGET_64BIT)
2838 return immediate_operand (op, mode);
2839 return x86_64_sign_extended_value (op);
2840 }
2841
2842 /* Return nonzero if OP is immediate operand representable on x86_64. */
2843
2844 int
2845 x86_64_zext_immediate_operand (op, mode)
2846 rtx op;
2847 enum machine_mode mode ATTRIBUTE_UNUSED;
2848 {
2849 return x86_64_zero_extended_value (op);
2850 }
2851
2852 /* Return nonzero if OP is (const_int 1), else return zero. */
2853
2854 int
2855 const_int_1_operand (op, mode)
2856 rtx op;
2857 enum machine_mode mode ATTRIBUTE_UNUSED;
2858 {
2859 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2860 }
2861
2862 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2863 for shift & compare patterns, as shifting by 0 does not change flags),
2864 else return zero. */
2865
2866 int
2867 const_int_1_31_operand (op, mode)
2868 rtx op;
2869 enum machine_mode mode ATTRIBUTE_UNUSED;
2870 {
2871 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2872 }
2873
2874 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2875 reference and a constant. */
2876
2877 int
2878 symbolic_operand (op, mode)
2879 register rtx op;
2880 enum machine_mode mode ATTRIBUTE_UNUSED;
2881 {
2882 switch (GET_CODE (op))
2883 {
2884 case SYMBOL_REF:
2885 case LABEL_REF:
2886 return 1;
2887
2888 case CONST:
2889 op = XEXP (op, 0);
2890 if (GET_CODE (op) == SYMBOL_REF
2891 || GET_CODE (op) == LABEL_REF
2892 || (GET_CODE (op) == UNSPEC
2893 && (XINT (op, 1) == UNSPEC_GOT
2894 || XINT (op, 1) == UNSPEC_GOTOFF
2895 || XINT (op, 1) == UNSPEC_GOTPCREL)))
2896 return 1;
2897 if (GET_CODE (op) != PLUS
2898 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2899 return 0;
2900
2901 op = XEXP (op, 0);
2902 if (GET_CODE (op) == SYMBOL_REF
2903 || GET_CODE (op) == LABEL_REF)
2904 return 1;
2905 /* Only @GOTOFF gets offsets. */
2906 if (GET_CODE (op) != UNSPEC
2907 || XINT (op, 1) != UNSPEC_GOTOFF)
2908 return 0;
2909
2910 op = XVECEXP (op, 0, 0);
2911 if (GET_CODE (op) == SYMBOL_REF
2912 || GET_CODE (op) == LABEL_REF)
2913 return 1;
2914 return 0;
2915
2916 default:
2917 return 0;
2918 }
2919 }
2920
2921 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
2922
2923 int
2924 pic_symbolic_operand (op, mode)
2925 register rtx op;
2926 enum machine_mode mode ATTRIBUTE_UNUSED;
2927 {
2928 if (GET_CODE (op) != CONST)
2929 return 0;
2930 op = XEXP (op, 0);
2931 if (TARGET_64BIT)
2932 {
2933 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
2934 return 1;
2935 }
2936 else
2937 {
2938 if (GET_CODE (op) == UNSPEC)
2939 return 1;
2940 if (GET_CODE (op) != PLUS
2941 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2942 return 0;
2943 op = XEXP (op, 0);
2944 if (GET_CODE (op) == UNSPEC)
2945 return 1;
2946 }
2947 return 0;
2948 }
2949
2950 /* Return true if OP is a symbolic operand that resolves locally. */
2951
2952 static int
2953 local_symbolic_operand (op, mode)
2954 rtx op;
2955 enum machine_mode mode ATTRIBUTE_UNUSED;
2956 {
2957 if (GET_CODE (op) == LABEL_REF)
2958 return 1;
2959
2960 if (GET_CODE (op) == CONST
2961 && GET_CODE (XEXP (op, 0)) == PLUS
2962 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
2963 op = XEXP (XEXP (op, 0), 0);
2964
2965 if (GET_CODE (op) != SYMBOL_REF)
2966 return 0;
2967
2968 /* These we've been told are local by varasm and encode_section_info
2969 respectively. */
2970 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
2971 return 1;
2972
2973 /* There is, however, a not insubstantial body of code in the rest of
2974 the compiler that assumes it can just stick the results of
2975 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
2976 /* ??? This is a hack. Should update the body of the compiler to
2977 always create a DECL an invoke targetm.encode_section_info. */
2978 if (strncmp (XSTR (op, 0), internal_label_prefix,
2979 internal_label_prefix_len) == 0)
2980 return 1;
2981
2982 return 0;
2983 }
2984
2985 /* Test for various thread-local symbols. See ix86_encode_section_info. */
2986
2987 int
2988 tls_symbolic_operand (op, mode)
2989 register rtx op;
2990 enum machine_mode mode ATTRIBUTE_UNUSED;
2991 {
2992 const char *symbol_str;
2993
2994 if (GET_CODE (op) != SYMBOL_REF)
2995 return 0;
2996 symbol_str = XSTR (op, 0);
2997
2998 if (symbol_str[0] != '%')
2999 return 0;
3000 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3001 }
3002
3003 static int
3004 tls_symbolic_operand_1 (op, kind)
3005 rtx op;
3006 enum tls_model kind;
3007 {
3008 const char *symbol_str;
3009
3010 if (GET_CODE (op) != SYMBOL_REF)
3011 return 0;
3012 symbol_str = XSTR (op, 0);
3013
3014 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3015 }
3016
3017 int
3018 global_dynamic_symbolic_operand (op, mode)
3019 register rtx op;
3020 enum machine_mode mode ATTRIBUTE_UNUSED;
3021 {
3022 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3023 }
3024
3025 int
3026 local_dynamic_symbolic_operand (op, mode)
3027 register rtx op;
3028 enum machine_mode mode ATTRIBUTE_UNUSED;
3029 {
3030 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3031 }
3032
3033 int
3034 initial_exec_symbolic_operand (op, mode)
3035 register rtx op;
3036 enum machine_mode mode ATTRIBUTE_UNUSED;
3037 {
3038 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3039 }
3040
3041 int
3042 local_exec_symbolic_operand (op, mode)
3043 register rtx op;
3044 enum machine_mode mode ATTRIBUTE_UNUSED;
3045 {
3046 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3047 }
3048
3049 /* Test for a valid operand for a call instruction. Don't allow the
3050 arg pointer register or virtual regs since they may decay into
3051 reg + const, which the patterns can't handle. */
3052
3053 int
3054 call_insn_operand (op, mode)
3055 rtx op;
3056 enum machine_mode mode ATTRIBUTE_UNUSED;
3057 {
3058 /* Disallow indirect through a virtual register. This leads to
3059 compiler aborts when trying to eliminate them. */
3060 if (GET_CODE (op) == REG
3061 && (op == arg_pointer_rtx
3062 || op == frame_pointer_rtx
3063 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3064 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3065 return 0;
3066
3067 /* Disallow `call 1234'. Due to varying assembler lameness this
3068 gets either rejected or translated to `call .+1234'. */
3069 if (GET_CODE (op) == CONST_INT)
3070 return 0;
3071
3072 /* Explicitly allow SYMBOL_REF even if pic. */
3073 if (GET_CODE (op) == SYMBOL_REF)
3074 return 1;
3075
3076 /* Otherwise we can allow any general_operand in the address. */
3077 return general_operand (op, Pmode);
3078 }
3079
3080 int
3081 constant_call_address_operand (op, mode)
3082 rtx op;
3083 enum machine_mode mode ATTRIBUTE_UNUSED;
3084 {
3085 if (GET_CODE (op) == CONST
3086 && GET_CODE (XEXP (op, 0)) == PLUS
3087 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3088 op = XEXP (XEXP (op, 0), 0);
3089 return GET_CODE (op) == SYMBOL_REF;
3090 }
3091
3092 /* Match exactly zero and one. */
3093
3094 int
3095 const0_operand (op, mode)
3096 register rtx op;
3097 enum machine_mode mode;
3098 {
3099 return op == CONST0_RTX (mode);
3100 }
3101
3102 int
3103 const1_operand (op, mode)
3104 register rtx op;
3105 enum machine_mode mode ATTRIBUTE_UNUSED;
3106 {
3107 return op == const1_rtx;
3108 }
3109
3110 /* Match 2, 4, or 8. Used for leal multiplicands. */
3111
3112 int
3113 const248_operand (op, mode)
3114 register rtx op;
3115 enum machine_mode mode ATTRIBUTE_UNUSED;
3116 {
3117 return (GET_CODE (op) == CONST_INT
3118 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3119 }
3120
3121 /* True if this is a constant appropriate for an increment or decremenmt. */
3122
3123 int
3124 incdec_operand (op, mode)
3125 register rtx op;
3126 enum machine_mode mode ATTRIBUTE_UNUSED;
3127 {
3128 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3129 registers, since carry flag is not set. */
3130 if (TARGET_PENTIUM4 && !optimize_size)
3131 return 0;
3132 return op == const1_rtx || op == constm1_rtx;
3133 }
3134
3135 /* Return nonzero if OP is acceptable as operand of DImode shift
3136 expander. */
3137
3138 int
3139 shiftdi_operand (op, mode)
3140 rtx op;
3141 enum machine_mode mode ATTRIBUTE_UNUSED;
3142 {
3143 if (TARGET_64BIT)
3144 return nonimmediate_operand (op, mode);
3145 else
3146 return register_operand (op, mode);
3147 }
3148
3149 /* Return false if this is the stack pointer, or any other fake
3150 register eliminable to the stack pointer. Otherwise, this is
3151 a register operand.
3152
3153 This is used to prevent esp from being used as an index reg.
3154 Which would only happen in pathological cases. */
3155
3156 int
3157 reg_no_sp_operand (op, mode)
3158 register rtx op;
3159 enum machine_mode mode;
3160 {
3161 rtx t = op;
3162 if (GET_CODE (t) == SUBREG)
3163 t = SUBREG_REG (t);
3164 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3165 return 0;
3166
3167 return register_operand (op, mode);
3168 }
3169
3170 int
3171 mmx_reg_operand (op, mode)
3172 register rtx op;
3173 enum machine_mode mode ATTRIBUTE_UNUSED;
3174 {
3175 return MMX_REG_P (op);
3176 }
3177
3178 /* Return false if this is any eliminable register. Otherwise
3179 general_operand. */
3180
3181 int
3182 general_no_elim_operand (op, mode)
3183 register rtx op;
3184 enum machine_mode mode;
3185 {
3186 rtx t = op;
3187 if (GET_CODE (t) == SUBREG)
3188 t = SUBREG_REG (t);
3189 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3190 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3191 || t == virtual_stack_dynamic_rtx)
3192 return 0;
3193 if (REG_P (t)
3194 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3195 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3196 return 0;
3197
3198 return general_operand (op, mode);
3199 }
3200
3201 /* Return false if this is any eliminable register. Otherwise
3202 register_operand or const_int. */
3203
3204 int
3205 nonmemory_no_elim_operand (op, mode)
3206 register rtx op;
3207 enum machine_mode mode;
3208 {
3209 rtx t = op;
3210 if (GET_CODE (t) == SUBREG)
3211 t = SUBREG_REG (t);
3212 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3213 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3214 || t == virtual_stack_dynamic_rtx)
3215 return 0;
3216
3217 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3218 }
3219
3220 /* Return true if op is a Q_REGS class register. */
3221
3222 int
3223 q_regs_operand (op, mode)
3224 register rtx op;
3225 enum machine_mode mode;
3226 {
3227 if (mode != VOIDmode && GET_MODE (op) != mode)
3228 return 0;
3229 if (GET_CODE (op) == SUBREG)
3230 op = SUBREG_REG (op);
3231 return ANY_QI_REG_P (op);
3232 }
3233
3234 /* Return true if op is a NON_Q_REGS class register. */
3235
3236 int
3237 non_q_regs_operand (op, mode)
3238 register rtx op;
3239 enum machine_mode mode;
3240 {
3241 if (mode != VOIDmode && GET_MODE (op) != mode)
3242 return 0;
3243 if (GET_CODE (op) == SUBREG)
3244 op = SUBREG_REG (op);
3245 return NON_QI_REG_P (op);
3246 }
3247
3248 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3249 insns. */
3250 int
3251 sse_comparison_operator (op, mode)
3252 rtx op;
3253 enum machine_mode mode ATTRIBUTE_UNUSED;
3254 {
3255 enum rtx_code code = GET_CODE (op);
3256 switch (code)
3257 {
3258 /* Operations supported directly. */
3259 case EQ:
3260 case LT:
3261 case LE:
3262 case UNORDERED:
3263 case NE:
3264 case UNGE:
3265 case UNGT:
3266 case ORDERED:
3267 return 1;
3268 /* These are equivalent to ones above in non-IEEE comparisons. */
3269 case UNEQ:
3270 case UNLT:
3271 case UNLE:
3272 case LTGT:
3273 case GE:
3274 case GT:
3275 return !TARGET_IEEE_FP;
3276 default:
3277 return 0;
3278 }
3279 }
3280 /* Return 1 if OP is a valid comparison operator in valid mode. */
3281 int
3282 ix86_comparison_operator (op, mode)
3283 register rtx op;
3284 enum machine_mode mode;
3285 {
3286 enum machine_mode inmode;
3287 enum rtx_code code = GET_CODE (op);
3288 if (mode != VOIDmode && GET_MODE (op) != mode)
3289 return 0;
3290 if (GET_RTX_CLASS (code) != '<')
3291 return 0;
3292 inmode = GET_MODE (XEXP (op, 0));
3293
3294 if (inmode == CCFPmode || inmode == CCFPUmode)
3295 {
3296 enum rtx_code second_code, bypass_code;
3297 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3298 return (bypass_code == NIL && second_code == NIL);
3299 }
3300 switch (code)
3301 {
3302 case EQ: case NE:
3303 return 1;
3304 case LT: case GE:
3305 if (inmode == CCmode || inmode == CCGCmode
3306 || inmode == CCGOCmode || inmode == CCNOmode)
3307 return 1;
3308 return 0;
3309 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3310 if (inmode == CCmode)
3311 return 1;
3312 return 0;
3313 case GT: case LE:
3314 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3315 return 1;
3316 return 0;
3317 default:
3318 return 0;
3319 }
3320 }
3321
3322 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3323
3324 int
3325 fcmov_comparison_operator (op, mode)
3326 register rtx op;
3327 enum machine_mode mode;
3328 {
3329 enum machine_mode inmode;
3330 enum rtx_code code = GET_CODE (op);
3331 if (mode != VOIDmode && GET_MODE (op) != mode)
3332 return 0;
3333 if (GET_RTX_CLASS (code) != '<')
3334 return 0;
3335 inmode = GET_MODE (XEXP (op, 0));
3336 if (inmode == CCFPmode || inmode == CCFPUmode)
3337 {
3338 enum rtx_code second_code, bypass_code;
3339 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3340 if (bypass_code != NIL || second_code != NIL)
3341 return 0;
3342 code = ix86_fp_compare_code_to_integer (code);
3343 }
3344 /* i387 supports just limited amount of conditional codes. */
3345 switch (code)
3346 {
3347 case LTU: case GTU: case LEU: case GEU:
3348 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3349 return 1;
3350 return 0;
3351 case ORDERED: case UNORDERED:
3352 case EQ: case NE:
3353 return 1;
3354 default:
3355 return 0;
3356 }
3357 }
3358
3359 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3360
3361 int
3362 promotable_binary_operator (op, mode)
3363 register rtx op;
3364 enum machine_mode mode ATTRIBUTE_UNUSED;
3365 {
3366 switch (GET_CODE (op))
3367 {
3368 case MULT:
3369 /* Modern CPUs have same latency for HImode and SImode multiply,
3370 but 386 and 486 do HImode multiply faster. */
3371 return ix86_cpu > PROCESSOR_I486;
3372 case PLUS:
3373 case AND:
3374 case IOR:
3375 case XOR:
3376 case ASHIFT:
3377 return 1;
3378 default:
3379 return 0;
3380 }
3381 }
3382
3383 /* Nearly general operand, but accept any const_double, since we wish
3384 to be able to drop them into memory rather than have them get pulled
3385 into registers. */
3386
3387 int
3388 cmp_fp_expander_operand (op, mode)
3389 register rtx op;
3390 enum machine_mode mode;
3391 {
3392 if (mode != VOIDmode && mode != GET_MODE (op))
3393 return 0;
3394 if (GET_CODE (op) == CONST_DOUBLE)
3395 return 1;
3396 return general_operand (op, mode);
3397 }
3398
3399 /* Match an SI or HImode register for a zero_extract. */
3400
3401 int
3402 ext_register_operand (op, mode)
3403 register rtx op;
3404 enum machine_mode mode ATTRIBUTE_UNUSED;
3405 {
3406 int regno;
3407 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3408 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3409 return 0;
3410
3411 if (!register_operand (op, VOIDmode))
3412 return 0;
3413
3414 /* Be curefull to accept only registers having upper parts. */
3415 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3416 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3417 }
3418
3419 /* Return 1 if this is a valid binary floating-point operation.
3420 OP is the expression matched, and MODE is its mode. */
3421
3422 int
3423 binary_fp_operator (op, mode)
3424 register rtx op;
3425 enum machine_mode mode;
3426 {
3427 if (mode != VOIDmode && mode != GET_MODE (op))
3428 return 0;
3429
3430 switch (GET_CODE (op))
3431 {
3432 case PLUS:
3433 case MINUS:
3434 case MULT:
3435 case DIV:
3436 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3437
3438 default:
3439 return 0;
3440 }
3441 }
3442
3443 int
3444 mult_operator (op, mode)
3445 register rtx op;
3446 enum machine_mode mode ATTRIBUTE_UNUSED;
3447 {
3448 return GET_CODE (op) == MULT;
3449 }
3450
3451 int
3452 div_operator (op, mode)
3453 register rtx op;
3454 enum machine_mode mode ATTRIBUTE_UNUSED;
3455 {
3456 return GET_CODE (op) == DIV;
3457 }
3458
3459 int
3460 arith_or_logical_operator (op, mode)
3461 rtx op;
3462 enum machine_mode mode;
3463 {
3464 return ((mode == VOIDmode || GET_MODE (op) == mode)
3465 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3466 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3467 }
3468
3469 /* Returns 1 if OP is memory operand with a displacement. */
3470
3471 int
3472 memory_displacement_operand (op, mode)
3473 register rtx op;
3474 enum machine_mode mode;
3475 {
3476 struct ix86_address parts;
3477
3478 if (! memory_operand (op, mode))
3479 return 0;
3480
3481 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3482 abort ();
3483
3484 return parts.disp != NULL_RTX;
3485 }
3486
3487 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3488 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3489
3490 ??? It seems likely that this will only work because cmpsi is an
3491 expander, and no actual insns use this. */
3492
3493 int
3494 cmpsi_operand (op, mode)
3495 rtx op;
3496 enum machine_mode mode;
3497 {
3498 if (nonimmediate_operand (op, mode))
3499 return 1;
3500
3501 if (GET_CODE (op) == AND
3502 && GET_MODE (op) == SImode
3503 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3504 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3505 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3506 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3507 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3508 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3509 return 1;
3510
3511 return 0;
3512 }
3513
3514 /* Returns 1 if OP is memory operand that can not be represented by the
3515 modRM array. */
3516
3517 int
3518 long_memory_operand (op, mode)
3519 register rtx op;
3520 enum machine_mode mode;
3521 {
3522 if (! memory_operand (op, mode))
3523 return 0;
3524
3525 return memory_address_length (op) != 0;
3526 }
3527
3528 /* Return nonzero if the rtx is known aligned. */
3529
3530 int
3531 aligned_operand (op, mode)
3532 rtx op;
3533 enum machine_mode mode;
3534 {
3535 struct ix86_address parts;
3536
3537 if (!general_operand (op, mode))
3538 return 0;
3539
3540 /* Registers and immediate operands are always "aligned". */
3541 if (GET_CODE (op) != MEM)
3542 return 1;
3543
3544 /* Don't even try to do any aligned optimizations with volatiles. */
3545 if (MEM_VOLATILE_P (op))
3546 return 0;
3547
3548 op = XEXP (op, 0);
3549
3550 /* Pushes and pops are only valid on the stack pointer. */
3551 if (GET_CODE (op) == PRE_DEC
3552 || GET_CODE (op) == POST_INC)
3553 return 1;
3554
3555 /* Decode the address. */
3556 if (! ix86_decompose_address (op, &parts))
3557 abort ();
3558
3559 if (parts.base && GET_CODE (parts.base) == SUBREG)
3560 parts.base = SUBREG_REG (parts.base);
3561 if (parts.index && GET_CODE (parts.index) == SUBREG)
3562 parts.index = SUBREG_REG (parts.index);
3563
3564 /* Look for some component that isn't known to be aligned. */
3565 if (parts.index)
3566 {
3567 if (parts.scale < 4
3568 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3569 return 0;
3570 }
3571 if (parts.base)
3572 {
3573 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3574 return 0;
3575 }
3576 if (parts.disp)
3577 {
3578 if (GET_CODE (parts.disp) != CONST_INT
3579 || (INTVAL (parts.disp) & 3) != 0)
3580 return 0;
3581 }
3582
3583 /* Didn't find one -- this must be an aligned address. */
3584 return 1;
3585 }
3586 \f
3587 /* Return true if the constant is something that can be loaded with
3588 a special instruction. Only handle 0.0 and 1.0; others are less
3589 worthwhile. */
3590
3591 int
3592 standard_80387_constant_p (x)
3593 rtx x;
3594 {
3595 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3596 return -1;
3597 /* Note that on the 80387, other constants, such as pi, that we should support
3598 too. On some machines, these are much slower to load as standard constant,
3599 than to load from doubles in memory. */
3600 if (x == CONST0_RTX (GET_MODE (x)))
3601 return 1;
3602 if (x == CONST1_RTX (GET_MODE (x)))
3603 return 2;
3604 return 0;
3605 }
3606
3607 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3608 */
3609 int
3610 standard_sse_constant_p (x)
3611 rtx x;
3612 {
3613 if (GET_CODE (x) != CONST_DOUBLE)
3614 return -1;
3615 return (x == CONST0_RTX (GET_MODE (x)));
3616 }
3617
3618 /* Returns 1 if OP contains a symbol reference */
3619
3620 int
3621 symbolic_reference_mentioned_p (op)
3622 rtx op;
3623 {
3624 register const char *fmt;
3625 register int i;
3626
3627 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3628 return 1;
3629
3630 fmt = GET_RTX_FORMAT (GET_CODE (op));
3631 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3632 {
3633 if (fmt[i] == 'E')
3634 {
3635 register int j;
3636
3637 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3638 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3639 return 1;
3640 }
3641
3642 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3643 return 1;
3644 }
3645
3646 return 0;
3647 }
3648
3649 /* Return 1 if it is appropriate to emit `ret' instructions in the
3650 body of a function. Do this only if the epilogue is simple, needing a
3651 couple of insns. Prior to reloading, we can't tell how many registers
3652 must be saved, so return 0 then. Return 0 if there is no frame
3653 marker to de-allocate.
3654
3655 If NON_SAVING_SETJMP is defined and true, then it is not possible
3656 for the epilogue to be simple, so return 0. This is a special case
3657 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3658 until final, but jump_optimize may need to know sooner if a
3659 `return' is OK. */
3660
3661 int
3662 ix86_can_use_return_insn_p ()
3663 {
3664 struct ix86_frame frame;
3665
3666 #ifdef NON_SAVING_SETJMP
3667 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3668 return 0;
3669 #endif
3670
3671 if (! reload_completed || frame_pointer_needed)
3672 return 0;
3673
3674 /* Don't allow more than 32 pop, since that's all we can do
3675 with one instruction. */
3676 if (current_function_pops_args
3677 && current_function_args_size >= 32768)
3678 return 0;
3679
3680 ix86_compute_frame_layout (&frame);
3681 return frame.to_allocate == 0 && frame.nregs == 0;
3682 }
3683 \f
3684 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3685 int
3686 x86_64_sign_extended_value (value)
3687 rtx value;
3688 {
3689 switch (GET_CODE (value))
3690 {
3691 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3692 to be at least 32 and this all acceptable constants are
3693 represented as CONST_INT. */
3694 case CONST_INT:
3695 if (HOST_BITS_PER_WIDE_INT == 32)
3696 return 1;
3697 else
3698 {
3699 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3700 return trunc_int_for_mode (val, SImode) == val;
3701 }
3702 break;
3703
3704 /* For certain code models, the symbolic references are known to fit. */
3705 case SYMBOL_REF:
3706 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3707
3708 /* For certain code models, the code is near as well. */
3709 case LABEL_REF:
3710 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3711
3712 /* We also may accept the offsetted memory references in certain special
3713 cases. */
3714 case CONST:
3715 if (GET_CODE (XEXP (value, 0)) == UNSPEC
3716 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
3717 return 1;
3718 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3719 {
3720 rtx op1 = XEXP (XEXP (value, 0), 0);
3721 rtx op2 = XEXP (XEXP (value, 0), 1);
3722 HOST_WIDE_INT offset;
3723
3724 if (ix86_cmodel == CM_LARGE)
3725 return 0;
3726 if (GET_CODE (op2) != CONST_INT)
3727 return 0;
3728 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3729 switch (GET_CODE (op1))
3730 {
3731 case SYMBOL_REF:
3732 /* For CM_SMALL assume that latest object is 1MB before
3733 end of 31bits boundary. We may also accept pretty
3734 large negative constants knowing that all objects are
3735 in the positive half of address space. */
3736 if (ix86_cmodel == CM_SMALL
3737 && offset < 1024*1024*1024
3738 && trunc_int_for_mode (offset, SImode) == offset)
3739 return 1;
3740 /* For CM_KERNEL we know that all object resist in the
3741 negative half of 32bits address space. We may not
3742 accept negative offsets, since they may be just off
3743 and we may accept pretty large positive ones. */
3744 if (ix86_cmodel == CM_KERNEL
3745 && offset > 0
3746 && trunc_int_for_mode (offset, SImode) == offset)
3747 return 1;
3748 break;
3749 case LABEL_REF:
3750 /* These conditions are similar to SYMBOL_REF ones, just the
3751 constraints for code models differ. */
3752 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3753 && offset < 1024*1024*1024
3754 && trunc_int_for_mode (offset, SImode) == offset)
3755 return 1;
3756 if (ix86_cmodel == CM_KERNEL
3757 && offset > 0
3758 && trunc_int_for_mode (offset, SImode) == offset)
3759 return 1;
3760 break;
3761 default:
3762 return 0;
3763 }
3764 }
3765 return 0;
3766 default:
3767 return 0;
3768 }
3769 }
3770
3771 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3772 int
3773 x86_64_zero_extended_value (value)
3774 rtx value;
3775 {
3776 switch (GET_CODE (value))
3777 {
3778 case CONST_DOUBLE:
3779 if (HOST_BITS_PER_WIDE_INT == 32)
3780 return (GET_MODE (value) == VOIDmode
3781 && !CONST_DOUBLE_HIGH (value));
3782 else
3783 return 0;
3784 case CONST_INT:
3785 if (HOST_BITS_PER_WIDE_INT == 32)
3786 return INTVAL (value) >= 0;
3787 else
3788 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3789 break;
3790
3791 /* For certain code models, the symbolic references are known to fit. */
3792 case SYMBOL_REF:
3793 return ix86_cmodel == CM_SMALL;
3794
3795 /* For certain code models, the code is near as well. */
3796 case LABEL_REF:
3797 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3798
3799 /* We also may accept the offsetted memory references in certain special
3800 cases. */
3801 case CONST:
3802 if (GET_CODE (XEXP (value, 0)) == PLUS)
3803 {
3804 rtx op1 = XEXP (XEXP (value, 0), 0);
3805 rtx op2 = XEXP (XEXP (value, 0), 1);
3806
3807 if (ix86_cmodel == CM_LARGE)
3808 return 0;
3809 switch (GET_CODE (op1))
3810 {
3811 case SYMBOL_REF:
3812 return 0;
3813 /* For small code model we may accept pretty large positive
3814 offsets, since one bit is available for free. Negative
3815 offsets are limited by the size of NULL pointer area
3816 specified by the ABI. */
3817 if (ix86_cmodel == CM_SMALL
3818 && GET_CODE (op2) == CONST_INT
3819 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3820 && (trunc_int_for_mode (INTVAL (op2), SImode)
3821 == INTVAL (op2)))
3822 return 1;
3823 /* ??? For the kernel, we may accept adjustment of
3824 -0x10000000, since we know that it will just convert
3825 negative address space to positive, but perhaps this
3826 is not worthwhile. */
3827 break;
3828 case LABEL_REF:
3829 /* These conditions are similar to SYMBOL_REF ones, just the
3830 constraints for code models differ. */
3831 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3832 && GET_CODE (op2) == CONST_INT
3833 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3834 && (trunc_int_for_mode (INTVAL (op2), SImode)
3835 == INTVAL (op2)))
3836 return 1;
3837 break;
3838 default:
3839 return 0;
3840 }
3841 }
3842 return 0;
3843 default:
3844 return 0;
3845 }
3846 }
3847
3848 /* Value should be nonzero if functions must have frame pointers.
3849 Zero means the frame pointer need not be set up (and parms may
3850 be accessed via the stack pointer) in functions that seem suitable. */
3851
3852 int
3853 ix86_frame_pointer_required ()
3854 {
3855 /* If we accessed previous frames, then the generated code expects
3856 to be able to access the saved ebp value in our frame. */
3857 if (cfun->machine->accesses_prev_frame)
3858 return 1;
3859
3860 /* Several x86 os'es need a frame pointer for other reasons,
3861 usually pertaining to setjmp. */
3862 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3863 return 1;
3864
3865 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3866 the frame pointer by default. Turn it back on now if we've not
3867 got a leaf function. */
3868 if (TARGET_OMIT_LEAF_FRAME_POINTER
3869 && (!current_function_is_leaf || current_function_profile))
3870 return 1;
3871
3872 return 0;
3873 }
3874
3875 /* Record that the current function accesses previous call frames. */
3876
3877 void
3878 ix86_setup_frame_addresses ()
3879 {
3880 cfun->machine->accesses_prev_frame = 1;
3881 }
3882 \f
3883 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
3884 # define USE_HIDDEN_LINKONCE 1
3885 #else
3886 # define USE_HIDDEN_LINKONCE 0
3887 #endif
3888
3889 static int pic_labels_used;
3890
3891 /* Fills in the label name that should be used for a pc thunk for
3892 the given register. */
3893
3894 static void
3895 get_pc_thunk_name (name, regno)
3896 char name[32];
3897 unsigned int regno;
3898 {
3899 if (USE_HIDDEN_LINKONCE)
3900 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
3901 else
3902 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
3903 }
3904
3905
3906 /* This function generates code for -fpic that loads %ebx with
3907 the return address of the caller and then returns. */
3908
3909 void
3910 ix86_asm_file_end (file)
3911 FILE *file;
3912 {
3913 rtx xops[2];
3914 int regno;
3915
3916 for (regno = 0; regno < 8; ++regno)
3917 {
3918 char name[32];
3919
3920 if (! ((pic_labels_used >> regno) & 1))
3921 continue;
3922
3923 get_pc_thunk_name (name, regno);
3924
3925 if (USE_HIDDEN_LINKONCE)
3926 {
3927 tree decl;
3928
3929 decl = build_decl (FUNCTION_DECL, get_identifier (name),
3930 error_mark_node);
3931 TREE_PUBLIC (decl) = 1;
3932 TREE_STATIC (decl) = 1;
3933 DECL_ONE_ONLY (decl) = 1;
3934
3935 (*targetm.asm_out.unique_section) (decl, 0);
3936 named_section (decl, NULL, 0);
3937
3938 (*targetm.asm_out.globalize_label) (file, name);
3939 fputs ("\t.hidden\t", file);
3940 assemble_name (file, name);
3941 fputc ('\n', file);
3942 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
3943 }
3944 else
3945 {
3946 text_section ();
3947 ASM_OUTPUT_LABEL (file, name);
3948 }
3949
3950 xops[0] = gen_rtx_REG (SImode, regno);
3951 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
3952 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
3953 output_asm_insn ("ret", xops);
3954 }
3955 }
3956
3957 /* Emit code for the SET_GOT patterns. */
3958
3959 const char *
3960 output_set_got (dest)
3961 rtx dest;
3962 {
3963 rtx xops[3];
3964
3965 xops[0] = dest;
3966 xops[1] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3967
3968 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
3969 {
3970 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
3971
3972 if (!flag_pic)
3973 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
3974 else
3975 output_asm_insn ("call\t%a2", xops);
3976
3977 #if TARGET_MACHO
3978 /* Output the "canonical" label name ("Lxx$pb") here too. This
3979 is what will be referred to by the Mach-O PIC subsystem. */
3980 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
3981 #endif
3982 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3983 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
3984
3985 if (flag_pic)
3986 output_asm_insn ("pop{l}\t%0", xops);
3987 }
3988 else
3989 {
3990 char name[32];
3991 get_pc_thunk_name (name, REGNO (dest));
3992 pic_labels_used |= 1 << REGNO (dest);
3993
3994 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
3995 xops[2] = gen_rtx_MEM (QImode, xops[2]);
3996 output_asm_insn ("call\t%X2", xops);
3997 }
3998
3999 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4000 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4001 else if (!TARGET_MACHO)
4002 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4003
4004 return "";
4005 }
4006
4007 /* Generate an "push" pattern for input ARG. */
4008
4009 static rtx
4010 gen_push (arg)
4011 rtx arg;
4012 {
4013 return gen_rtx_SET (VOIDmode,
4014 gen_rtx_MEM (Pmode,
4015 gen_rtx_PRE_DEC (Pmode,
4016 stack_pointer_rtx)),
4017 arg);
4018 }
4019
4020 /* Return >= 0 if there is an unused call-clobbered register available
4021 for the entire function. */
4022
4023 static unsigned int
4024 ix86_select_alt_pic_regnum ()
4025 {
4026 if (current_function_is_leaf && !current_function_profile)
4027 {
4028 int i;
4029 for (i = 2; i >= 0; --i)
4030 if (!regs_ever_live[i])
4031 return i;
4032 }
4033
4034 return INVALID_REGNUM;
4035 }
4036
4037 /* Return 1 if we need to save REGNO. */
4038 static int
4039 ix86_save_reg (regno, maybe_eh_return)
4040 unsigned int regno;
4041 int maybe_eh_return;
4042 {
4043 if (pic_offset_table_rtx
4044 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4045 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4046 || current_function_profile
4047 || current_function_calls_eh_return))
4048 {
4049 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4050 return 0;
4051 return 1;
4052 }
4053
4054 if (current_function_calls_eh_return && maybe_eh_return)
4055 {
4056 unsigned i;
4057 for (i = 0; ; i++)
4058 {
4059 unsigned test = EH_RETURN_DATA_REGNO (i);
4060 if (test == INVALID_REGNUM)
4061 break;
4062 if (test == regno)
4063 return 1;
4064 }
4065 }
4066
4067 return (regs_ever_live[regno]
4068 && !call_used_regs[regno]
4069 && !fixed_regs[regno]
4070 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4071 }
4072
4073 /* Return number of registers to be saved on the stack. */
4074
4075 static int
4076 ix86_nsaved_regs ()
4077 {
4078 int nregs = 0;
4079 int regno;
4080
4081 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4082 if (ix86_save_reg (regno, true))
4083 nregs++;
4084 return nregs;
4085 }
4086
4087 /* Return the offset between two registers, one to be eliminated, and the other
4088 its replacement, at the start of a routine. */
4089
4090 HOST_WIDE_INT
4091 ix86_initial_elimination_offset (from, to)
4092 int from;
4093 int to;
4094 {
4095 struct ix86_frame frame;
4096 ix86_compute_frame_layout (&frame);
4097
4098 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4099 return frame.hard_frame_pointer_offset;
4100 else if (from == FRAME_POINTER_REGNUM
4101 && to == HARD_FRAME_POINTER_REGNUM)
4102 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4103 else
4104 {
4105 if (to != STACK_POINTER_REGNUM)
4106 abort ();
4107 else if (from == ARG_POINTER_REGNUM)
4108 return frame.stack_pointer_offset;
4109 else if (from != FRAME_POINTER_REGNUM)
4110 abort ();
4111 else
4112 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4113 }
4114 }
4115
4116 /* Fill structure ix86_frame about frame of currently computed function. */
4117
4118 static void
4119 ix86_compute_frame_layout (frame)
4120 struct ix86_frame *frame;
4121 {
4122 HOST_WIDE_INT total_size;
4123 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4124 int offset;
4125 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4126 HOST_WIDE_INT size = get_frame_size ();
4127
4128 frame->nregs = ix86_nsaved_regs ();
4129 total_size = size;
4130
4131 /* Skip return address and saved base pointer. */
4132 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4133
4134 frame->hard_frame_pointer_offset = offset;
4135
4136 /* Do some sanity checking of stack_alignment_needed and
4137 preferred_alignment, since i386 port is the only using those features
4138 that may break easily. */
4139
4140 if (size && !stack_alignment_needed)
4141 abort ();
4142 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4143 abort ();
4144 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4145 abort ();
4146 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4147 abort ();
4148
4149 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4150 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4151
4152 /* Register save area */
4153 offset += frame->nregs * UNITS_PER_WORD;
4154
4155 /* Va-arg area */
4156 if (ix86_save_varrargs_registers)
4157 {
4158 offset += X86_64_VARARGS_SIZE;
4159 frame->va_arg_size = X86_64_VARARGS_SIZE;
4160 }
4161 else
4162 frame->va_arg_size = 0;
4163
4164 /* Align start of frame for local function. */
4165 frame->padding1 = ((offset + stack_alignment_needed - 1)
4166 & -stack_alignment_needed) - offset;
4167
4168 offset += frame->padding1;
4169
4170 /* Frame pointer points here. */
4171 frame->frame_pointer_offset = offset;
4172
4173 offset += size;
4174
4175 /* Add outgoing arguments area. Can be skipped if we eliminated
4176 all the function calls as dead code. */
4177 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4178 {
4179 offset += current_function_outgoing_args_size;
4180 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4181 }
4182 else
4183 frame->outgoing_arguments_size = 0;
4184
4185 /* Align stack boundary. Only needed if we're calling another function
4186 or using alloca. */
4187 if (!current_function_is_leaf || current_function_calls_alloca)
4188 frame->padding2 = ((offset + preferred_alignment - 1)
4189 & -preferred_alignment) - offset;
4190 else
4191 frame->padding2 = 0;
4192
4193 offset += frame->padding2;
4194
4195 /* We've reached end of stack frame. */
4196 frame->stack_pointer_offset = offset;
4197
4198 /* Size prologue needs to allocate. */
4199 frame->to_allocate =
4200 (size + frame->padding1 + frame->padding2
4201 + frame->outgoing_arguments_size + frame->va_arg_size);
4202
4203 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4204 && current_function_is_leaf)
4205 {
4206 frame->red_zone_size = frame->to_allocate;
4207 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4208 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4209 }
4210 else
4211 frame->red_zone_size = 0;
4212 frame->to_allocate -= frame->red_zone_size;
4213 frame->stack_pointer_offset -= frame->red_zone_size;
4214 #if 0
4215 fprintf (stderr, "nregs: %i\n", frame->nregs);
4216 fprintf (stderr, "size: %i\n", size);
4217 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4218 fprintf (stderr, "padding1: %i\n", frame->padding1);
4219 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4220 fprintf (stderr, "padding2: %i\n", frame->padding2);
4221 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4222 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4223 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4224 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4225 frame->hard_frame_pointer_offset);
4226 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4227 #endif
4228 }
4229
4230 /* Emit code to save registers in the prologue. */
4231
4232 static void
4233 ix86_emit_save_regs ()
4234 {
4235 register int regno;
4236 rtx insn;
4237
4238 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4239 if (ix86_save_reg (regno, true))
4240 {
4241 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4242 RTX_FRAME_RELATED_P (insn) = 1;
4243 }
4244 }
4245
4246 /* Emit code to save registers using MOV insns. First register
4247 is restored from POINTER + OFFSET. */
4248 static void
4249 ix86_emit_save_regs_using_mov (pointer, offset)
4250 rtx pointer;
4251 HOST_WIDE_INT offset;
4252 {
4253 int regno;
4254 rtx insn;
4255
4256 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4257 if (ix86_save_reg (regno, true))
4258 {
4259 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4260 Pmode, offset),
4261 gen_rtx_REG (Pmode, regno));
4262 RTX_FRAME_RELATED_P (insn) = 1;
4263 offset += UNITS_PER_WORD;
4264 }
4265 }
4266
4267 /* Expand the prologue into a bunch of separate insns. */
4268
4269 void
4270 ix86_expand_prologue ()
4271 {
4272 rtx insn;
4273 bool pic_reg_used;
4274 struct ix86_frame frame;
4275 int use_mov = 0;
4276 HOST_WIDE_INT allocate;
4277
4278 if (!optimize_size)
4279 {
4280 use_fast_prologue_epilogue
4281 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4282 if (TARGET_PROLOGUE_USING_MOVE)
4283 use_mov = use_fast_prologue_epilogue;
4284 }
4285 ix86_compute_frame_layout (&frame);
4286
4287 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4288 slower on all targets. Also sdb doesn't like it. */
4289
4290 if (frame_pointer_needed)
4291 {
4292 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4293 RTX_FRAME_RELATED_P (insn) = 1;
4294
4295 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4296 RTX_FRAME_RELATED_P (insn) = 1;
4297 }
4298
4299 allocate = frame.to_allocate;
4300 /* In case we are dealing only with single register and empty frame,
4301 push is equivalent of the mov+add sequence. */
4302 if (allocate == 0 && frame.nregs <= 1)
4303 use_mov = 0;
4304
4305 if (!use_mov)
4306 ix86_emit_save_regs ();
4307 else
4308 allocate += frame.nregs * UNITS_PER_WORD;
4309
4310 if (allocate == 0)
4311 ;
4312 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4313 {
4314 insn = emit_insn (gen_pro_epilogue_adjust_stack
4315 (stack_pointer_rtx, stack_pointer_rtx,
4316 GEN_INT (-allocate)));
4317 RTX_FRAME_RELATED_P (insn) = 1;
4318 }
4319 else
4320 {
4321 /* ??? Is this only valid for Win32? */
4322
4323 rtx arg0, sym;
4324
4325 if (TARGET_64BIT)
4326 abort ();
4327
4328 arg0 = gen_rtx_REG (SImode, 0);
4329 emit_move_insn (arg0, GEN_INT (allocate));
4330
4331 sym = gen_rtx_MEM (FUNCTION_MODE,
4332 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4333 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4334
4335 CALL_INSN_FUNCTION_USAGE (insn)
4336 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4337 CALL_INSN_FUNCTION_USAGE (insn));
4338 }
4339 if (use_mov)
4340 {
4341 if (!frame_pointer_needed || !frame.to_allocate)
4342 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4343 else
4344 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4345 -frame.nregs * UNITS_PER_WORD);
4346 }
4347
4348 #ifdef SUBTARGET_PROLOGUE
4349 SUBTARGET_PROLOGUE;
4350 #endif
4351
4352 pic_reg_used = false;
4353 if (pic_offset_table_rtx
4354 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4355 || current_function_profile))
4356 {
4357 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4358
4359 if (alt_pic_reg_used != INVALID_REGNUM)
4360 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4361
4362 pic_reg_used = true;
4363 }
4364
4365 if (pic_reg_used)
4366 {
4367 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4368
4369 /* Even with accurate pre-reload life analysis, we can wind up
4370 deleting all references to the pic register after reload.
4371 Consider if cross-jumping unifies two sides of a branch
4372 controled by a comparison vs the only read from a global.
4373 In which case, allow the set_got to be deleted, though we're
4374 too late to do anything about the ebx save in the prologue. */
4375 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4376 }
4377
4378 /* Prevent function calls from be scheduled before the call to mcount.
4379 In the pic_reg_used case, make sure that the got load isn't deleted. */
4380 if (current_function_profile)
4381 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4382 }
4383
4384 /* Emit code to restore saved registers using MOV insns. First register
4385 is restored from POINTER + OFFSET. */
4386 static void
4387 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4388 rtx pointer;
4389 int offset;
4390 int maybe_eh_return;
4391 {
4392 int regno;
4393
4394 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4395 if (ix86_save_reg (regno, maybe_eh_return))
4396 {
4397 emit_move_insn (gen_rtx_REG (Pmode, regno),
4398 adjust_address (gen_rtx_MEM (Pmode, pointer),
4399 Pmode, offset));
4400 offset += UNITS_PER_WORD;
4401 }
4402 }
4403
4404 /* Restore function stack, frame, and registers. */
4405
4406 void
4407 ix86_expand_epilogue (style)
4408 int style;
4409 {
4410 int regno;
4411 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4412 struct ix86_frame frame;
4413 HOST_WIDE_INT offset;
4414
4415 ix86_compute_frame_layout (&frame);
4416
4417 /* Calculate start of saved registers relative to ebp. Special care
4418 must be taken for the normal return case of a function using
4419 eh_return: the eax and edx registers are marked as saved, but not
4420 restored along this path. */
4421 offset = frame.nregs;
4422 if (current_function_calls_eh_return && style != 2)
4423 offset -= 2;
4424 offset *= -UNITS_PER_WORD;
4425
4426 /* If we're only restoring one register and sp is not valid then
4427 using a move instruction to restore the register since it's
4428 less work than reloading sp and popping the register.
4429
4430 The default code result in stack adjustment using add/lea instruction,
4431 while this code results in LEAVE instruction (or discrete equivalent),
4432 so it is profitable in some other cases as well. Especially when there
4433 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4434 and there is exactly one register to pop. This heruistic may need some
4435 tuning in future. */
4436 if ((!sp_valid && frame.nregs <= 1)
4437 || (TARGET_EPILOGUE_USING_MOVE
4438 && use_fast_prologue_epilogue
4439 && (frame.nregs > 1 || frame.to_allocate))
4440 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4441 || (frame_pointer_needed && TARGET_USE_LEAVE
4442 && use_fast_prologue_epilogue && frame.nregs == 1)
4443 || current_function_calls_eh_return)
4444 {
4445 /* Restore registers. We can use ebp or esp to address the memory
4446 locations. If both are available, default to ebp, since offsets
4447 are known to be small. Only exception is esp pointing directly to the
4448 end of block of saved registers, where we may simplify addressing
4449 mode. */
4450
4451 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4452 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4453 frame.to_allocate, style == 2);
4454 else
4455 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4456 offset, style == 2);
4457
4458 /* eh_return epilogues need %ecx added to the stack pointer. */
4459 if (style == 2)
4460 {
4461 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4462
4463 if (frame_pointer_needed)
4464 {
4465 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4466 tmp = plus_constant (tmp, UNITS_PER_WORD);
4467 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4468
4469 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4470 emit_move_insn (hard_frame_pointer_rtx, tmp);
4471
4472 emit_insn (gen_pro_epilogue_adjust_stack
4473 (stack_pointer_rtx, sa, const0_rtx));
4474 }
4475 else
4476 {
4477 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4478 tmp = plus_constant (tmp, (frame.to_allocate
4479 + frame.nregs * UNITS_PER_WORD));
4480 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4481 }
4482 }
4483 else if (!frame_pointer_needed)
4484 emit_insn (gen_pro_epilogue_adjust_stack
4485 (stack_pointer_rtx, stack_pointer_rtx,
4486 GEN_INT (frame.to_allocate
4487 + frame.nregs * UNITS_PER_WORD)));
4488 /* If not an i386, mov & pop is faster than "leave". */
4489 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4490 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4491 else
4492 {
4493 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4494 hard_frame_pointer_rtx,
4495 const0_rtx));
4496 if (TARGET_64BIT)
4497 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4498 else
4499 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4500 }
4501 }
4502 else
4503 {
4504 /* First step is to deallocate the stack frame so that we can
4505 pop the registers. */
4506 if (!sp_valid)
4507 {
4508 if (!frame_pointer_needed)
4509 abort ();
4510 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4511 hard_frame_pointer_rtx,
4512 GEN_INT (offset)));
4513 }
4514 else if (frame.to_allocate)
4515 emit_insn (gen_pro_epilogue_adjust_stack
4516 (stack_pointer_rtx, stack_pointer_rtx,
4517 GEN_INT (frame.to_allocate)));
4518
4519 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4520 if (ix86_save_reg (regno, false))
4521 {
4522 if (TARGET_64BIT)
4523 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4524 else
4525 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4526 }
4527 if (frame_pointer_needed)
4528 {
4529 /* Leave results in shorter dependency chains on CPUs that are
4530 able to grok it fast. */
4531 if (TARGET_USE_LEAVE)
4532 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4533 else if (TARGET_64BIT)
4534 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4535 else
4536 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4537 }
4538 }
4539
4540 /* Sibcall epilogues don't want a return instruction. */
4541 if (style == 0)
4542 return;
4543
4544 if (current_function_pops_args && current_function_args_size)
4545 {
4546 rtx popc = GEN_INT (current_function_pops_args);
4547
4548 /* i386 can only pop 64K bytes. If asked to pop more, pop
4549 return address, do explicit add, and jump indirectly to the
4550 caller. */
4551
4552 if (current_function_pops_args >= 65536)
4553 {
4554 rtx ecx = gen_rtx_REG (SImode, 2);
4555
4556 /* There are is no "pascal" calling convention in 64bit ABI. */
4557 if (TARGET_64BIT)
4558 abort ();
4559
4560 emit_insn (gen_popsi1 (ecx));
4561 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4562 emit_jump_insn (gen_return_indirect_internal (ecx));
4563 }
4564 else
4565 emit_jump_insn (gen_return_pop_internal (popc));
4566 }
4567 else
4568 emit_jump_insn (gen_return_internal ());
4569 }
4570
4571 /* Reset from the function's potential modifications. */
4572
4573 static void
4574 ix86_output_function_epilogue (file, size)
4575 FILE *file ATTRIBUTE_UNUSED;
4576 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4577 {
4578 if (pic_offset_table_rtx)
4579 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4580 }
4581 \f
4582 /* Extract the parts of an RTL expression that is a valid memory address
4583 for an instruction. Return 0 if the structure of the address is
4584 grossly off. Return -1 if the address contains ASHIFT, so it is not
4585 strictly valid, but still used for computing length of lea instruction.
4586 */
4587
4588 static int
4589 ix86_decompose_address (addr, out)
4590 register rtx addr;
4591 struct ix86_address *out;
4592 {
4593 rtx base = NULL_RTX;
4594 rtx index = NULL_RTX;
4595 rtx disp = NULL_RTX;
4596 HOST_WIDE_INT scale = 1;
4597 rtx scale_rtx = NULL_RTX;
4598 int retval = 1;
4599
4600 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4601 base = addr;
4602 else if (GET_CODE (addr) == PLUS)
4603 {
4604 rtx op0 = XEXP (addr, 0);
4605 rtx op1 = XEXP (addr, 1);
4606 enum rtx_code code0 = GET_CODE (op0);
4607 enum rtx_code code1 = GET_CODE (op1);
4608
4609 if (code0 == REG || code0 == SUBREG)
4610 {
4611 if (code1 == REG || code1 == SUBREG)
4612 index = op0, base = op1; /* index + base */
4613 else
4614 base = op0, disp = op1; /* base + displacement */
4615 }
4616 else if (code0 == MULT)
4617 {
4618 index = XEXP (op0, 0);
4619 scale_rtx = XEXP (op0, 1);
4620 if (code1 == REG || code1 == SUBREG)
4621 base = op1; /* index*scale + base */
4622 else
4623 disp = op1; /* index*scale + disp */
4624 }
4625 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4626 {
4627 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4628 scale_rtx = XEXP (XEXP (op0, 0), 1);
4629 base = XEXP (op0, 1);
4630 disp = op1;
4631 }
4632 else if (code0 == PLUS)
4633 {
4634 index = XEXP (op0, 0); /* index + base + disp */
4635 base = XEXP (op0, 1);
4636 disp = op1;
4637 }
4638 else
4639 return 0;
4640 }
4641 else if (GET_CODE (addr) == MULT)
4642 {
4643 index = XEXP (addr, 0); /* index*scale */
4644 scale_rtx = XEXP (addr, 1);
4645 }
4646 else if (GET_CODE (addr) == ASHIFT)
4647 {
4648 rtx tmp;
4649
4650 /* We're called for lea too, which implements ashift on occasion. */
4651 index = XEXP (addr, 0);
4652 tmp = XEXP (addr, 1);
4653 if (GET_CODE (tmp) != CONST_INT)
4654 return 0;
4655 scale = INTVAL (tmp);
4656 if ((unsigned HOST_WIDE_INT) scale > 3)
4657 return 0;
4658 scale = 1 << scale;
4659 retval = -1;
4660 }
4661 else
4662 disp = addr; /* displacement */
4663
4664 /* Extract the integral value of scale. */
4665 if (scale_rtx)
4666 {
4667 if (GET_CODE (scale_rtx) != CONST_INT)
4668 return 0;
4669 scale = INTVAL (scale_rtx);
4670 }
4671
4672 /* Allow arg pointer and stack pointer as index if there is not scaling */
4673 if (base && index && scale == 1
4674 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4675 || index == stack_pointer_rtx))
4676 {
4677 rtx tmp = base;
4678 base = index;
4679 index = tmp;
4680 }
4681
4682 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4683 if ((base == hard_frame_pointer_rtx
4684 || base == frame_pointer_rtx
4685 || base == arg_pointer_rtx) && !disp)
4686 disp = const0_rtx;
4687
4688 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4689 Avoid this by transforming to [%esi+0]. */
4690 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4691 && base && !index && !disp
4692 && REG_P (base)
4693 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4694 disp = const0_rtx;
4695
4696 /* Special case: encode reg+reg instead of reg*2. */
4697 if (!base && index && scale && scale == 2)
4698 base = index, scale = 1;
4699
4700 /* Special case: scaling cannot be encoded without base or displacement. */
4701 if (!base && !disp && index && scale != 1)
4702 disp = const0_rtx;
4703
4704 out->base = base;
4705 out->index = index;
4706 out->disp = disp;
4707 out->scale = scale;
4708
4709 return retval;
4710 }
4711 \f
4712 /* Return cost of the memory address x.
4713 For i386, it is better to use a complex address than let gcc copy
4714 the address into a reg and make a new pseudo. But not if the address
4715 requires to two regs - that would mean more pseudos with longer
4716 lifetimes. */
4717 int
4718 ix86_address_cost (x)
4719 rtx x;
4720 {
4721 struct ix86_address parts;
4722 int cost = 1;
4723
4724 if (!ix86_decompose_address (x, &parts))
4725 abort ();
4726
4727 if (parts.base && GET_CODE (parts.base) == SUBREG)
4728 parts.base = SUBREG_REG (parts.base);
4729 if (parts.index && GET_CODE (parts.index) == SUBREG)
4730 parts.index = SUBREG_REG (parts.index);
4731
4732 /* More complex memory references are better. */
4733 if (parts.disp && parts.disp != const0_rtx)
4734 cost--;
4735
4736 /* Attempt to minimize number of registers in the address. */
4737 if ((parts.base
4738 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4739 || (parts.index
4740 && (!REG_P (parts.index)
4741 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4742 cost++;
4743
4744 if (parts.base
4745 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4746 && parts.index
4747 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4748 && parts.base != parts.index)
4749 cost++;
4750
4751 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4752 since it's predecode logic can't detect the length of instructions
4753 and it degenerates to vector decoded. Increase cost of such
4754 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4755 to split such addresses or even refuse such addresses at all.
4756
4757 Following addressing modes are affected:
4758 [base+scale*index]
4759 [scale*index+disp]
4760 [base+index]
4761
4762 The first and last case may be avoidable by explicitly coding the zero in
4763 memory address, but I don't have AMD-K6 machine handy to check this
4764 theory. */
4765
4766 if (TARGET_K6
4767 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4768 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4769 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4770 cost += 10;
4771
4772 return cost;
4773 }
4774 \f
4775 /* If X is a machine specific address (i.e. a symbol or label being
4776 referenced as a displacement from the GOT implemented using an
4777 UNSPEC), then return the base term. Otherwise return X. */
4778
4779 rtx
4780 ix86_find_base_term (x)
4781 rtx x;
4782 {
4783 rtx term;
4784
4785 if (TARGET_64BIT)
4786 {
4787 if (GET_CODE (x) != CONST)
4788 return x;
4789 term = XEXP (x, 0);
4790 if (GET_CODE (term) == PLUS
4791 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4792 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4793 term = XEXP (term, 0);
4794 if (GET_CODE (term) != UNSPEC
4795 || XINT (term, 1) != UNSPEC_GOTPCREL)
4796 return x;
4797
4798 term = XVECEXP (term, 0, 0);
4799
4800 if (GET_CODE (term) != SYMBOL_REF
4801 && GET_CODE (term) != LABEL_REF)
4802 return x;
4803
4804 return term;
4805 }
4806
4807 if (GET_CODE (x) != PLUS
4808 || XEXP (x, 0) != pic_offset_table_rtx
4809 || GET_CODE (XEXP (x, 1)) != CONST)
4810 return x;
4811
4812 term = XEXP (XEXP (x, 1), 0);
4813
4814 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4815 term = XEXP (term, 0);
4816
4817 if (GET_CODE (term) != UNSPEC
4818 || XINT (term, 1) != UNSPEC_GOTOFF)
4819 return x;
4820
4821 term = XVECEXP (term, 0, 0);
4822
4823 if (GET_CODE (term) != SYMBOL_REF
4824 && GET_CODE (term) != LABEL_REF)
4825 return x;
4826
4827 return term;
4828 }
4829 \f
4830 /* Determine if a given RTX is a valid constant. We already know this
4831 satisfies CONSTANT_P. */
4832
4833 bool
4834 legitimate_constant_p (x)
4835 rtx x;
4836 {
4837 rtx inner;
4838
4839 switch (GET_CODE (x))
4840 {
4841 case SYMBOL_REF:
4842 /* TLS symbols are not constant. */
4843 if (tls_symbolic_operand (x, Pmode))
4844 return false;
4845 break;
4846
4847 case CONST:
4848 inner = XEXP (x, 0);
4849
4850 /* Offsets of TLS symbols are never valid.
4851 Discourage CSE from creating them. */
4852 if (GET_CODE (inner) == PLUS
4853 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4854 return false;
4855
4856 /* Only some unspecs are valid as "constants". */
4857 if (GET_CODE (inner) == UNSPEC)
4858 switch (XINT (inner, 1))
4859 {
4860 case UNSPEC_TPOFF:
4861 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4862 default:
4863 return false;
4864 }
4865 break;
4866
4867 default:
4868 break;
4869 }
4870
4871 /* Otherwise we handle everything else in the move patterns. */
4872 return true;
4873 }
4874
4875 /* Determine if a given RTX is a valid constant address. */
4876
4877 bool
4878 constant_address_p (x)
4879 rtx x;
4880 {
4881 switch (GET_CODE (x))
4882 {
4883 case LABEL_REF:
4884 case CONST_INT:
4885 return true;
4886
4887 case CONST_DOUBLE:
4888 return TARGET_64BIT;
4889
4890 case CONST:
4891 /* For Mach-O, really believe the CONST. */
4892 if (TARGET_MACHO)
4893 return true;
4894 /* Otherwise fall through. */
4895 case SYMBOL_REF:
4896 return !flag_pic && legitimate_constant_p (x);
4897
4898 default:
4899 return false;
4900 }
4901 }
4902
4903 /* Nonzero if the constant value X is a legitimate general operand
4904 when generating PIC code. It is given that flag_pic is on and
4905 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
4906
4907 bool
4908 legitimate_pic_operand_p (x)
4909 rtx x;
4910 {
4911 rtx inner;
4912
4913 switch (GET_CODE (x))
4914 {
4915 case CONST:
4916 inner = XEXP (x, 0);
4917
4918 /* Only some unspecs are valid as "constants". */
4919 if (GET_CODE (inner) == UNSPEC)
4920 switch (XINT (inner, 1))
4921 {
4922 case UNSPEC_TPOFF:
4923 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
4924 default:
4925 return false;
4926 }
4927 /* FALLTHRU */
4928
4929 case SYMBOL_REF:
4930 case LABEL_REF:
4931 return legitimate_pic_address_disp_p (x);
4932
4933 default:
4934 return true;
4935 }
4936 }
4937
4938 /* Determine if a given CONST RTX is a valid memory displacement
4939 in PIC mode. */
4940
4941 int
4942 legitimate_pic_address_disp_p (disp)
4943 register rtx disp;
4944 {
4945 bool saw_plus;
4946
4947 /* In 64bit mode we can allow direct addresses of symbols and labels
4948 when they are not dynamic symbols. */
4949 if (TARGET_64BIT)
4950 {
4951 rtx x = disp;
4952 if (GET_CODE (disp) == CONST)
4953 x = XEXP (disp, 0);
4954 /* ??? Handle PIC code models */
4955 if (GET_CODE (x) == PLUS
4956 && (GET_CODE (XEXP (x, 1)) == CONST_INT
4957 && ix86_cmodel == CM_SMALL_PIC
4958 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
4959 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
4960 x = XEXP (x, 0);
4961 if (local_symbolic_operand (x, Pmode))
4962 return 1;
4963 }
4964 if (GET_CODE (disp) != CONST)
4965 return 0;
4966 disp = XEXP (disp, 0);
4967
4968 if (TARGET_64BIT)
4969 {
4970 /* We are unsafe to allow PLUS expressions. This limit allowed distance
4971 of GOT tables. We should not need these anyway. */
4972 if (GET_CODE (disp) != UNSPEC
4973 || XINT (disp, 1) != UNSPEC_GOTPCREL)
4974 return 0;
4975
4976 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
4977 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
4978 return 0;
4979 return 1;
4980 }
4981
4982 saw_plus = false;
4983 if (GET_CODE (disp) == PLUS)
4984 {
4985 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
4986 return 0;
4987 disp = XEXP (disp, 0);
4988 saw_plus = true;
4989 }
4990
4991 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
4992 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
4993 {
4994 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
4995 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
4996 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
4997 {
4998 const char *sym_name = XSTR (XEXP (disp, 1), 0);
4999 if (strstr (sym_name, "$pb") != 0)
5000 return 1;
5001 }
5002 }
5003
5004 if (GET_CODE (disp) != UNSPEC)
5005 return 0;
5006
5007 switch (XINT (disp, 1))
5008 {
5009 case UNSPEC_GOT:
5010 if (saw_plus)
5011 return false;
5012 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5013 case UNSPEC_GOTOFF:
5014 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5015 case UNSPEC_GOTTPOFF:
5016 if (saw_plus)
5017 return false;
5018 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5019 case UNSPEC_NTPOFF:
5020 /* ??? Could support offset here. */
5021 if (saw_plus)
5022 return false;
5023 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5024 case UNSPEC_DTPOFF:
5025 /* ??? Could support offset here. */
5026 if (saw_plus)
5027 return false;
5028 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5029 }
5030
5031 return 0;
5032 }
5033
5034 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5035 memory address for an instruction. The MODE argument is the machine mode
5036 for the MEM expression that wants to use this address.
5037
5038 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5039 convert common non-canonical forms to canonical form so that they will
5040 be recognized. */
5041
5042 int
5043 legitimate_address_p (mode, addr, strict)
5044 enum machine_mode mode;
5045 register rtx addr;
5046 int strict;
5047 {
5048 struct ix86_address parts;
5049 rtx base, index, disp;
5050 HOST_WIDE_INT scale;
5051 const char *reason = NULL;
5052 rtx reason_rtx = NULL_RTX;
5053
5054 if (TARGET_DEBUG_ADDR)
5055 {
5056 fprintf (stderr,
5057 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5058 GET_MODE_NAME (mode), strict);
5059 debug_rtx (addr);
5060 }
5061
5062 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5063 {
5064 if (TARGET_DEBUG_ADDR)
5065 fprintf (stderr, "Success.\n");
5066 return TRUE;
5067 }
5068
5069 if (ix86_decompose_address (addr, &parts) <= 0)
5070 {
5071 reason = "decomposition failed";
5072 goto report_error;
5073 }
5074
5075 base = parts.base;
5076 index = parts.index;
5077 disp = parts.disp;
5078 scale = parts.scale;
5079
5080 /* Validate base register.
5081
5082 Don't allow SUBREG's here, it can lead to spill failures when the base
5083 is one word out of a two word structure, which is represented internally
5084 as a DImode int. */
5085
5086 if (base)
5087 {
5088 rtx reg;
5089 reason_rtx = base;
5090
5091 if (GET_CODE (base) == SUBREG)
5092 reg = SUBREG_REG (base);
5093 else
5094 reg = base;
5095
5096 if (GET_CODE (reg) != REG)
5097 {
5098 reason = "base is not a register";
5099 goto report_error;
5100 }
5101
5102 if (GET_MODE (base) != Pmode)
5103 {
5104 reason = "base is not in Pmode";
5105 goto report_error;
5106 }
5107
5108 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5109 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5110 {
5111 reason = "base is not valid";
5112 goto report_error;
5113 }
5114 }
5115
5116 /* Validate index register.
5117
5118 Don't allow SUBREG's here, it can lead to spill failures when the index
5119 is one word out of a two word structure, which is represented internally
5120 as a DImode int. */
5121
5122 if (index)
5123 {
5124 rtx reg;
5125 reason_rtx = index;
5126
5127 if (GET_CODE (index) == SUBREG)
5128 reg = SUBREG_REG (index);
5129 else
5130 reg = index;
5131
5132 if (GET_CODE (reg) != REG)
5133 {
5134 reason = "index is not a register";
5135 goto report_error;
5136 }
5137
5138 if (GET_MODE (index) != Pmode)
5139 {
5140 reason = "index is not in Pmode";
5141 goto report_error;
5142 }
5143
5144 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5145 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5146 {
5147 reason = "index is not valid";
5148 goto report_error;
5149 }
5150 }
5151
5152 /* Validate scale factor. */
5153 if (scale != 1)
5154 {
5155 reason_rtx = GEN_INT (scale);
5156 if (!index)
5157 {
5158 reason = "scale without index";
5159 goto report_error;
5160 }
5161
5162 if (scale != 2 && scale != 4 && scale != 8)
5163 {
5164 reason = "scale is not a valid multiplier";
5165 goto report_error;
5166 }
5167 }
5168
5169 /* Validate displacement. */
5170 if (disp)
5171 {
5172 reason_rtx = disp;
5173
5174 if (TARGET_64BIT)
5175 {
5176 if (!x86_64_sign_extended_value (disp))
5177 {
5178 reason = "displacement is out of range";
5179 goto report_error;
5180 }
5181 }
5182 else
5183 {
5184 if (GET_CODE (disp) == CONST_DOUBLE)
5185 {
5186 reason = "displacement is a const_double";
5187 goto report_error;
5188 }
5189 }
5190
5191 if (GET_CODE (disp) == CONST
5192 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5193 switch (XINT (XEXP (disp, 0), 1))
5194 {
5195 case UNSPEC_GOT:
5196 case UNSPEC_GOTOFF:
5197 case UNSPEC_GOTPCREL:
5198 if (!flag_pic)
5199 abort ();
5200 goto is_legitimate_pic;
5201
5202 case UNSPEC_GOTTPOFF:
5203 case UNSPEC_NTPOFF:
5204 case UNSPEC_DTPOFF:
5205 break;
5206
5207 default:
5208 reason = "invalid address unspec";
5209 goto report_error;
5210 }
5211
5212 else if (flag_pic && (SYMBOLIC_CONST (disp)
5213 #if TARGET_MACHO
5214 && !machopic_operand_p (disp)
5215 #endif
5216 ))
5217 {
5218 is_legitimate_pic:
5219 if (TARGET_64BIT && (index || base))
5220 {
5221 reason = "non-constant pic memory reference";
5222 goto report_error;
5223 }
5224 if (! legitimate_pic_address_disp_p (disp))
5225 {
5226 reason = "displacement is an invalid pic construct";
5227 goto report_error;
5228 }
5229
5230 /* This code used to verify that a symbolic pic displacement
5231 includes the pic_offset_table_rtx register.
5232
5233 While this is good idea, unfortunately these constructs may
5234 be created by "adds using lea" optimization for incorrect
5235 code like:
5236
5237 int a;
5238 int foo(int i)
5239 {
5240 return *(&a+i);
5241 }
5242
5243 This code is nonsensical, but results in addressing
5244 GOT table with pic_offset_table_rtx base. We can't
5245 just refuse it easily, since it gets matched by
5246 "addsi3" pattern, that later gets split to lea in the
5247 case output register differs from input. While this
5248 can be handled by separate addsi pattern for this case
5249 that never results in lea, this seems to be easier and
5250 correct fix for crash to disable this test. */
5251 }
5252 else if (!CONSTANT_ADDRESS_P (disp))
5253 {
5254 reason = "displacement is not constant";
5255 goto report_error;
5256 }
5257 }
5258
5259 /* Everything looks valid. */
5260 if (TARGET_DEBUG_ADDR)
5261 fprintf (stderr, "Success.\n");
5262 return TRUE;
5263
5264 report_error:
5265 if (TARGET_DEBUG_ADDR)
5266 {
5267 fprintf (stderr, "Error: %s\n", reason);
5268 debug_rtx (reason_rtx);
5269 }
5270 return FALSE;
5271 }
5272 \f
5273 /* Return an unique alias set for the GOT. */
5274
5275 static HOST_WIDE_INT
5276 ix86_GOT_alias_set ()
5277 {
5278 static HOST_WIDE_INT set = -1;
5279 if (set == -1)
5280 set = new_alias_set ();
5281 return set;
5282 }
5283
5284 /* Return a legitimate reference for ORIG (an address) using the
5285 register REG. If REG is 0, a new pseudo is generated.
5286
5287 There are two types of references that must be handled:
5288
5289 1. Global data references must load the address from the GOT, via
5290 the PIC reg. An insn is emitted to do this load, and the reg is
5291 returned.
5292
5293 2. Static data references, constant pool addresses, and code labels
5294 compute the address as an offset from the GOT, whose base is in
5295 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5296 differentiate them from global data objects. The returned
5297 address is the PIC reg + an unspec constant.
5298
5299 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5300 reg also appears in the address. */
5301
5302 rtx
5303 legitimize_pic_address (orig, reg)
5304 rtx orig;
5305 rtx reg;
5306 {
5307 rtx addr = orig;
5308 rtx new = orig;
5309 rtx base;
5310
5311 #if TARGET_MACHO
5312 if (reg == 0)
5313 reg = gen_reg_rtx (Pmode);
5314 /* Use the generic Mach-O PIC machinery. */
5315 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5316 #endif
5317
5318 if (local_symbolic_operand (addr, Pmode))
5319 {
5320 /* In 64bit mode we can address such objects directly. */
5321 if (TARGET_64BIT)
5322 new = addr;
5323 else
5324 {
5325 /* This symbol may be referenced via a displacement from the PIC
5326 base address (@GOTOFF). */
5327
5328 if (reload_in_progress)
5329 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5330 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5331 new = gen_rtx_CONST (Pmode, new);
5332 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5333
5334 if (reg != 0)
5335 {
5336 emit_move_insn (reg, new);
5337 new = reg;
5338 }
5339 }
5340 }
5341 else if (GET_CODE (addr) == SYMBOL_REF)
5342 {
5343 if (TARGET_64BIT)
5344 {
5345 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5346 new = gen_rtx_CONST (Pmode, new);
5347 new = gen_rtx_MEM (Pmode, new);
5348 RTX_UNCHANGING_P (new) = 1;
5349 set_mem_alias_set (new, ix86_GOT_alias_set ());
5350
5351 if (reg == 0)
5352 reg = gen_reg_rtx (Pmode);
5353 /* Use directly gen_movsi, otherwise the address is loaded
5354 into register for CSE. We don't want to CSE this addresses,
5355 instead we CSE addresses from the GOT table, so skip this. */
5356 emit_insn (gen_movsi (reg, new));
5357 new = reg;
5358 }
5359 else
5360 {
5361 /* This symbol must be referenced via a load from the
5362 Global Offset Table (@GOT). */
5363
5364 if (reload_in_progress)
5365 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5366 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5367 new = gen_rtx_CONST (Pmode, new);
5368 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5369 new = gen_rtx_MEM (Pmode, new);
5370 RTX_UNCHANGING_P (new) = 1;
5371 set_mem_alias_set (new, ix86_GOT_alias_set ());
5372
5373 if (reg == 0)
5374 reg = gen_reg_rtx (Pmode);
5375 emit_move_insn (reg, new);
5376 new = reg;
5377 }
5378 }
5379 else
5380 {
5381 if (GET_CODE (addr) == CONST)
5382 {
5383 addr = XEXP (addr, 0);
5384
5385 /* We must match stuff we generate before. Assume the only
5386 unspecs that can get here are ours. Not that we could do
5387 anything with them anyway... */
5388 if (GET_CODE (addr) == UNSPEC
5389 || (GET_CODE (addr) == PLUS
5390 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5391 return orig;
5392 if (GET_CODE (addr) != PLUS)
5393 abort ();
5394 }
5395 if (GET_CODE (addr) == PLUS)
5396 {
5397 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5398
5399 /* Check first to see if this is a constant offset from a @GOTOFF
5400 symbol reference. */
5401 if (local_symbolic_operand (op0, Pmode)
5402 && GET_CODE (op1) == CONST_INT)
5403 {
5404 if (!TARGET_64BIT)
5405 {
5406 if (reload_in_progress)
5407 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5408 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5409 UNSPEC_GOTOFF);
5410 new = gen_rtx_PLUS (Pmode, new, op1);
5411 new = gen_rtx_CONST (Pmode, new);
5412 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5413
5414 if (reg != 0)
5415 {
5416 emit_move_insn (reg, new);
5417 new = reg;
5418 }
5419 }
5420 else
5421 {
5422 /* ??? We need to limit offsets here. */
5423 }
5424 }
5425 else
5426 {
5427 base = legitimize_pic_address (XEXP (addr, 0), reg);
5428 new = legitimize_pic_address (XEXP (addr, 1),
5429 base == reg ? NULL_RTX : reg);
5430
5431 if (GET_CODE (new) == CONST_INT)
5432 new = plus_constant (base, INTVAL (new));
5433 else
5434 {
5435 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5436 {
5437 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5438 new = XEXP (new, 1);
5439 }
5440 new = gen_rtx_PLUS (Pmode, base, new);
5441 }
5442 }
5443 }
5444 }
5445 return new;
5446 }
5447
5448 static void
5449 ix86_encode_section_info (decl, first)
5450 tree decl;
5451 int first ATTRIBUTE_UNUSED;
5452 {
5453 bool local_p = (*targetm.binds_local_p) (decl);
5454 rtx rtl, symbol;
5455
5456 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5457 if (GET_CODE (rtl) != MEM)
5458 return;
5459 symbol = XEXP (rtl, 0);
5460 if (GET_CODE (symbol) != SYMBOL_REF)
5461 return;
5462
5463 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5464 symbol so that we may access it directly in the GOT. */
5465
5466 if (flag_pic)
5467 SYMBOL_REF_FLAG (symbol) = local_p;
5468
5469 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5470 "local dynamic", "initial exec" or "local exec" TLS models
5471 respectively. */
5472
5473 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5474 {
5475 const char *symbol_str;
5476 char *newstr;
5477 size_t len;
5478 enum tls_model kind;
5479
5480 if (!flag_pic)
5481 {
5482 if (local_p)
5483 kind = TLS_MODEL_LOCAL_EXEC;
5484 else
5485 kind = TLS_MODEL_INITIAL_EXEC;
5486 }
5487 /* Local dynamic is inefficient when we're not combining the
5488 parts of the address. */
5489 else if (optimize && local_p)
5490 kind = TLS_MODEL_LOCAL_DYNAMIC;
5491 else
5492 kind = TLS_MODEL_GLOBAL_DYNAMIC;
5493 if (kind < flag_tls_default)
5494 kind = flag_tls_default;
5495
5496 symbol_str = XSTR (symbol, 0);
5497
5498 if (symbol_str[0] == '%')
5499 {
5500 if (symbol_str[1] == tls_model_chars[kind])
5501 return;
5502 symbol_str += 2;
5503 }
5504 len = strlen (symbol_str) + 1;
5505 newstr = alloca (len + 2);
5506
5507 newstr[0] = '%';
5508 newstr[1] = tls_model_chars[kind];
5509 memcpy (newstr + 2, symbol_str, len);
5510
5511 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5512 }
5513 }
5514
5515 /* Undo the above when printing symbol names. */
5516
5517 static const char *
5518 ix86_strip_name_encoding (str)
5519 const char *str;
5520 {
5521 if (str[0] == '%')
5522 str += 2;
5523 if (str [0] == '*')
5524 str += 1;
5525 return str;
5526 }
5527 \f
5528 /* Load the thread pointer into a register. */
5529
5530 static rtx
5531 get_thread_pointer ()
5532 {
5533 rtx tp;
5534
5535 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5536 tp = gen_rtx_MEM (Pmode, tp);
5537 RTX_UNCHANGING_P (tp) = 1;
5538 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5539 tp = force_reg (Pmode, tp);
5540
5541 return tp;
5542 }
5543
5544 /* Try machine-dependent ways of modifying an illegitimate address
5545 to be legitimate. If we find one, return the new, valid address.
5546 This macro is used in only one place: `memory_address' in explow.c.
5547
5548 OLDX is the address as it was before break_out_memory_refs was called.
5549 In some cases it is useful to look at this to decide what needs to be done.
5550
5551 MODE and WIN are passed so that this macro can use
5552 GO_IF_LEGITIMATE_ADDRESS.
5553
5554 It is always safe for this macro to do nothing. It exists to recognize
5555 opportunities to optimize the output.
5556
5557 For the 80386, we handle X+REG by loading X into a register R and
5558 using R+REG. R will go in a general reg and indexing will be used.
5559 However, if REG is a broken-out memory address or multiplication,
5560 nothing needs to be done because REG can certainly go in a general reg.
5561
5562 When -fpic is used, special handling is needed for symbolic references.
5563 See comments by legitimize_pic_address in i386.c for details. */
5564
5565 rtx
5566 legitimize_address (x, oldx, mode)
5567 register rtx x;
5568 register rtx oldx ATTRIBUTE_UNUSED;
5569 enum machine_mode mode;
5570 {
5571 int changed = 0;
5572 unsigned log;
5573
5574 if (TARGET_DEBUG_ADDR)
5575 {
5576 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5577 GET_MODE_NAME (mode));
5578 debug_rtx (x);
5579 }
5580
5581 log = tls_symbolic_operand (x, mode);
5582 if (log)
5583 {
5584 rtx dest, base, off, pic;
5585
5586 switch (log)
5587 {
5588 case TLS_MODEL_GLOBAL_DYNAMIC:
5589 dest = gen_reg_rtx (Pmode);
5590 emit_insn (gen_tls_global_dynamic (dest, x));
5591 break;
5592
5593 case TLS_MODEL_LOCAL_DYNAMIC:
5594 base = gen_reg_rtx (Pmode);
5595 emit_insn (gen_tls_local_dynamic_base (base));
5596
5597 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5598 off = gen_rtx_CONST (Pmode, off);
5599
5600 return gen_rtx_PLUS (Pmode, base, off);
5601
5602 case TLS_MODEL_INITIAL_EXEC:
5603 if (flag_pic)
5604 {
5605 if (reload_in_progress)
5606 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5607 pic = pic_offset_table_rtx;
5608 }
5609 else
5610 {
5611 pic = gen_reg_rtx (Pmode);
5612 emit_insn (gen_set_got (pic));
5613 }
5614
5615 base = get_thread_pointer ();
5616
5617 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_GOTTPOFF);
5618 off = gen_rtx_CONST (Pmode, off);
5619 off = gen_rtx_PLUS (Pmode, pic, off);
5620 off = gen_rtx_MEM (Pmode, off);
5621 RTX_UNCHANGING_P (off) = 1;
5622 set_mem_alias_set (off, ix86_GOT_alias_set ());
5623
5624 /* Damn Sun for specifing a set of dynamic relocations without
5625 considering the two-operand nature of the architecture!
5626 We'd be much better off with a "GOTNTPOFF" relocation that
5627 already contained the negated constant. */
5628 /* ??? Using negl and reg+reg addressing appears to be a lose
5629 size-wise. The negl is two bytes, just like the extra movl
5630 incurred by the two-operand subl, but reg+reg addressing
5631 uses the two-byte modrm form, unlike plain reg. */
5632
5633 dest = gen_reg_rtx (Pmode);
5634 emit_insn (gen_subsi3 (dest, base, off));
5635 break;
5636
5637 case TLS_MODEL_LOCAL_EXEC:
5638 base = get_thread_pointer ();
5639
5640 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5641 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5642 off = gen_rtx_CONST (Pmode, off);
5643
5644 if (TARGET_GNU_TLS)
5645 return gen_rtx_PLUS (Pmode, base, off);
5646 else
5647 {
5648 dest = gen_reg_rtx (Pmode);
5649 emit_insn (gen_subsi3 (dest, base, off));
5650 }
5651 break;
5652
5653 default:
5654 abort ();
5655 }
5656
5657 return dest;
5658 }
5659
5660 if (flag_pic && SYMBOLIC_CONST (x))
5661 return legitimize_pic_address (x, 0);
5662
5663 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5664 if (GET_CODE (x) == ASHIFT
5665 && GET_CODE (XEXP (x, 1)) == CONST_INT
5666 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5667 {
5668 changed = 1;
5669 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5670 GEN_INT (1 << log));
5671 }
5672
5673 if (GET_CODE (x) == PLUS)
5674 {
5675 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5676
5677 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5678 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5679 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5680 {
5681 changed = 1;
5682 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5683 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5684 GEN_INT (1 << log));
5685 }
5686
5687 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5688 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5689 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5690 {
5691 changed = 1;
5692 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5693 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5694 GEN_INT (1 << log));
5695 }
5696
5697 /* Put multiply first if it isn't already. */
5698 if (GET_CODE (XEXP (x, 1)) == MULT)
5699 {
5700 rtx tmp = XEXP (x, 0);
5701 XEXP (x, 0) = XEXP (x, 1);
5702 XEXP (x, 1) = tmp;
5703 changed = 1;
5704 }
5705
5706 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5707 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5708 created by virtual register instantiation, register elimination, and
5709 similar optimizations. */
5710 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5711 {
5712 changed = 1;
5713 x = gen_rtx_PLUS (Pmode,
5714 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5715 XEXP (XEXP (x, 1), 0)),
5716 XEXP (XEXP (x, 1), 1));
5717 }
5718
5719 /* Canonicalize
5720 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5721 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5722 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5723 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5724 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5725 && CONSTANT_P (XEXP (x, 1)))
5726 {
5727 rtx constant;
5728 rtx other = NULL_RTX;
5729
5730 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5731 {
5732 constant = XEXP (x, 1);
5733 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5734 }
5735 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5736 {
5737 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5738 other = XEXP (x, 1);
5739 }
5740 else
5741 constant = 0;
5742
5743 if (constant)
5744 {
5745 changed = 1;
5746 x = gen_rtx_PLUS (Pmode,
5747 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5748 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5749 plus_constant (other, INTVAL (constant)));
5750 }
5751 }
5752
5753 if (changed && legitimate_address_p (mode, x, FALSE))
5754 return x;
5755
5756 if (GET_CODE (XEXP (x, 0)) == MULT)
5757 {
5758 changed = 1;
5759 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5760 }
5761
5762 if (GET_CODE (XEXP (x, 1)) == MULT)
5763 {
5764 changed = 1;
5765 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5766 }
5767
5768 if (changed
5769 && GET_CODE (XEXP (x, 1)) == REG
5770 && GET_CODE (XEXP (x, 0)) == REG)
5771 return x;
5772
5773 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5774 {
5775 changed = 1;
5776 x = legitimize_pic_address (x, 0);
5777 }
5778
5779 if (changed && legitimate_address_p (mode, x, FALSE))
5780 return x;
5781
5782 if (GET_CODE (XEXP (x, 0)) == REG)
5783 {
5784 register rtx temp = gen_reg_rtx (Pmode);
5785 register rtx val = force_operand (XEXP (x, 1), temp);
5786 if (val != temp)
5787 emit_move_insn (temp, val);
5788
5789 XEXP (x, 1) = temp;
5790 return x;
5791 }
5792
5793 else if (GET_CODE (XEXP (x, 1)) == REG)
5794 {
5795 register rtx temp = gen_reg_rtx (Pmode);
5796 register rtx val = force_operand (XEXP (x, 0), temp);
5797 if (val != temp)
5798 emit_move_insn (temp, val);
5799
5800 XEXP (x, 0) = temp;
5801 return x;
5802 }
5803 }
5804
5805 return x;
5806 }
5807 \f
5808 /* Print an integer constant expression in assembler syntax. Addition
5809 and subtraction are the only arithmetic that may appear in these
5810 expressions. FILE is the stdio stream to write to, X is the rtx, and
5811 CODE is the operand print code from the output string. */
5812
5813 static void
5814 output_pic_addr_const (file, x, code)
5815 FILE *file;
5816 rtx x;
5817 int code;
5818 {
5819 char buf[256];
5820
5821 switch (GET_CODE (x))
5822 {
5823 case PC:
5824 if (flag_pic)
5825 putc ('.', file);
5826 else
5827 abort ();
5828 break;
5829
5830 case SYMBOL_REF:
5831 assemble_name (file, XSTR (x, 0));
5832 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
5833 fputs ("@PLT", file);
5834 break;
5835
5836 case LABEL_REF:
5837 x = XEXP (x, 0);
5838 /* FALLTHRU */
5839 case CODE_LABEL:
5840 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5841 assemble_name (asm_out_file, buf);
5842 break;
5843
5844 case CONST_INT:
5845 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5846 break;
5847
5848 case CONST:
5849 /* This used to output parentheses around the expression,
5850 but that does not work on the 386 (either ATT or BSD assembler). */
5851 output_pic_addr_const (file, XEXP (x, 0), code);
5852 break;
5853
5854 case CONST_DOUBLE:
5855 if (GET_MODE (x) == VOIDmode)
5856 {
5857 /* We can use %d if the number is <32 bits and positive. */
5858 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
5859 fprintf (file, "0x%lx%08lx",
5860 (unsigned long) CONST_DOUBLE_HIGH (x),
5861 (unsigned long) CONST_DOUBLE_LOW (x));
5862 else
5863 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
5864 }
5865 else
5866 /* We can't handle floating point constants;
5867 PRINT_OPERAND must handle them. */
5868 output_operand_lossage ("floating constant misused");
5869 break;
5870
5871 case PLUS:
5872 /* Some assemblers need integer constants to appear first. */
5873 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5874 {
5875 output_pic_addr_const (file, XEXP (x, 0), code);
5876 putc ('+', file);
5877 output_pic_addr_const (file, XEXP (x, 1), code);
5878 }
5879 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5880 {
5881 output_pic_addr_const (file, XEXP (x, 1), code);
5882 putc ('+', file);
5883 output_pic_addr_const (file, XEXP (x, 0), code);
5884 }
5885 else
5886 abort ();
5887 break;
5888
5889 case MINUS:
5890 if (!TARGET_MACHO)
5891 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
5892 output_pic_addr_const (file, XEXP (x, 0), code);
5893 putc ('-', file);
5894 output_pic_addr_const (file, XEXP (x, 1), code);
5895 if (!TARGET_MACHO)
5896 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
5897 break;
5898
5899 case UNSPEC:
5900 if (XVECLEN (x, 0) != 1)
5901 abort ();
5902 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
5903 switch (XINT (x, 1))
5904 {
5905 case UNSPEC_GOT:
5906 fputs ("@GOT", file);
5907 break;
5908 case UNSPEC_GOTOFF:
5909 fputs ("@GOTOFF", file);
5910 break;
5911 case UNSPEC_GOTPCREL:
5912 fputs ("@GOTPCREL(%rip)", file);
5913 break;
5914 case UNSPEC_GOTTPOFF:
5915 fputs ("@GOTTPOFF", file);
5916 break;
5917 case UNSPEC_TPOFF:
5918 fputs ("@TPOFF", file);
5919 break;
5920 case UNSPEC_NTPOFF:
5921 fputs ("@NTPOFF", file);
5922 break;
5923 case UNSPEC_DTPOFF:
5924 fputs ("@DTPOFF", file);
5925 break;
5926 default:
5927 output_operand_lossage ("invalid UNSPEC as operand");
5928 break;
5929 }
5930 break;
5931
5932 default:
5933 output_operand_lossage ("invalid expression as operand");
5934 }
5935 }
5936
5937 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
5938 We need to handle our special PIC relocations. */
5939
5940 void
5941 i386_dwarf_output_addr_const (file, x)
5942 FILE *file;
5943 rtx x;
5944 {
5945 #ifdef ASM_QUAD
5946 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
5947 #else
5948 if (TARGET_64BIT)
5949 abort ();
5950 fprintf (file, "%s", ASM_LONG);
5951 #endif
5952 if (flag_pic)
5953 output_pic_addr_const (file, x, '\0');
5954 else
5955 output_addr_const (file, x);
5956 fputc ('\n', file);
5957 }
5958
5959 /* In the name of slightly smaller debug output, and to cater to
5960 general assembler losage, recognize PIC+GOTOFF and turn it back
5961 into a direct symbol reference. */
5962
5963 rtx
5964 i386_simplify_dwarf_addr (orig_x)
5965 rtx orig_x;
5966 {
5967 rtx x = orig_x, y;
5968
5969 if (GET_CODE (x) == MEM)
5970 x = XEXP (x, 0);
5971
5972 if (TARGET_64BIT)
5973 {
5974 if (GET_CODE (x) != CONST
5975 || GET_CODE (XEXP (x, 0)) != UNSPEC
5976 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
5977 || GET_CODE (orig_x) != MEM)
5978 return orig_x;
5979 return XVECEXP (XEXP (x, 0), 0, 0);
5980 }
5981
5982 if (GET_CODE (x) != PLUS
5983 || GET_CODE (XEXP (x, 1)) != CONST)
5984 return orig_x;
5985
5986 if (GET_CODE (XEXP (x, 0)) == REG
5987 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5988 /* %ebx + GOT/GOTOFF */
5989 y = NULL;
5990 else if (GET_CODE (XEXP (x, 0)) == PLUS)
5991 {
5992 /* %ebx + %reg * scale + GOT/GOTOFF */
5993 y = XEXP (x, 0);
5994 if (GET_CODE (XEXP (y, 0)) == REG
5995 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
5996 y = XEXP (y, 1);
5997 else if (GET_CODE (XEXP (y, 1)) == REG
5998 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
5999 y = XEXP (y, 0);
6000 else
6001 return orig_x;
6002 if (GET_CODE (y) != REG
6003 && GET_CODE (y) != MULT
6004 && GET_CODE (y) != ASHIFT)
6005 return orig_x;
6006 }
6007 else
6008 return orig_x;
6009
6010 x = XEXP (XEXP (x, 1), 0);
6011 if (GET_CODE (x) == UNSPEC
6012 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6013 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6014 {
6015 if (y)
6016 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6017 return XVECEXP (x, 0, 0);
6018 }
6019
6020 if (GET_CODE (x) == PLUS
6021 && GET_CODE (XEXP (x, 0)) == UNSPEC
6022 && GET_CODE (XEXP (x, 1)) == CONST_INT
6023 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6024 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6025 && GET_CODE (orig_x) != MEM)))
6026 {
6027 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6028 if (y)
6029 return gen_rtx_PLUS (Pmode, y, x);
6030 return x;
6031 }
6032
6033 return orig_x;
6034 }
6035 \f
6036 static void
6037 put_condition_code (code, mode, reverse, fp, file)
6038 enum rtx_code code;
6039 enum machine_mode mode;
6040 int reverse, fp;
6041 FILE *file;
6042 {
6043 const char *suffix;
6044
6045 if (mode == CCFPmode || mode == CCFPUmode)
6046 {
6047 enum rtx_code second_code, bypass_code;
6048 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6049 if (bypass_code != NIL || second_code != NIL)
6050 abort ();
6051 code = ix86_fp_compare_code_to_integer (code);
6052 mode = CCmode;
6053 }
6054 if (reverse)
6055 code = reverse_condition (code);
6056
6057 switch (code)
6058 {
6059 case EQ:
6060 suffix = "e";
6061 break;
6062 case NE:
6063 suffix = "ne";
6064 break;
6065 case GT:
6066 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6067 abort ();
6068 suffix = "g";
6069 break;
6070 case GTU:
6071 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6072 Those same assemblers have the same but opposite losage on cmov. */
6073 if (mode != CCmode)
6074 abort ();
6075 suffix = fp ? "nbe" : "a";
6076 break;
6077 case LT:
6078 if (mode == CCNOmode || mode == CCGOCmode)
6079 suffix = "s";
6080 else if (mode == CCmode || mode == CCGCmode)
6081 suffix = "l";
6082 else
6083 abort ();
6084 break;
6085 case LTU:
6086 if (mode != CCmode)
6087 abort ();
6088 suffix = "b";
6089 break;
6090 case GE:
6091 if (mode == CCNOmode || mode == CCGOCmode)
6092 suffix = "ns";
6093 else if (mode == CCmode || mode == CCGCmode)
6094 suffix = "ge";
6095 else
6096 abort ();
6097 break;
6098 case GEU:
6099 /* ??? As above. */
6100 if (mode != CCmode)
6101 abort ();
6102 suffix = fp ? "nb" : "ae";
6103 break;
6104 case LE:
6105 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6106 abort ();
6107 suffix = "le";
6108 break;
6109 case LEU:
6110 if (mode != CCmode)
6111 abort ();
6112 suffix = "be";
6113 break;
6114 case UNORDERED:
6115 suffix = fp ? "u" : "p";
6116 break;
6117 case ORDERED:
6118 suffix = fp ? "nu" : "np";
6119 break;
6120 default:
6121 abort ();
6122 }
6123 fputs (suffix, file);
6124 }
6125
6126 void
6127 print_reg (x, code, file)
6128 rtx x;
6129 int code;
6130 FILE *file;
6131 {
6132 if (REGNO (x) == ARG_POINTER_REGNUM
6133 || REGNO (x) == FRAME_POINTER_REGNUM
6134 || REGNO (x) == FLAGS_REG
6135 || REGNO (x) == FPSR_REG)
6136 abort ();
6137
6138 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6139 putc ('%', file);
6140
6141 if (code == 'w' || MMX_REG_P (x))
6142 code = 2;
6143 else if (code == 'b')
6144 code = 1;
6145 else if (code == 'k')
6146 code = 4;
6147 else if (code == 'q')
6148 code = 8;
6149 else if (code == 'y')
6150 code = 3;
6151 else if (code == 'h')
6152 code = 0;
6153 else
6154 code = GET_MODE_SIZE (GET_MODE (x));
6155
6156 /* Irritatingly, AMD extended registers use different naming convention
6157 from the normal registers. */
6158 if (REX_INT_REG_P (x))
6159 {
6160 if (!TARGET_64BIT)
6161 abort ();
6162 switch (code)
6163 {
6164 case 0:
6165 error ("extended registers have no high halves");
6166 break;
6167 case 1:
6168 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6169 break;
6170 case 2:
6171 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6172 break;
6173 case 4:
6174 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6175 break;
6176 case 8:
6177 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6178 break;
6179 default:
6180 error ("unsupported operand size for extended register");
6181 break;
6182 }
6183 return;
6184 }
6185 switch (code)
6186 {
6187 case 3:
6188 if (STACK_TOP_P (x))
6189 {
6190 fputs ("st(0)", file);
6191 break;
6192 }
6193 /* FALLTHRU */
6194 case 8:
6195 case 4:
6196 case 12:
6197 if (! ANY_FP_REG_P (x))
6198 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6199 /* FALLTHRU */
6200 case 16:
6201 case 2:
6202 fputs (hi_reg_name[REGNO (x)], file);
6203 break;
6204 case 1:
6205 fputs (qi_reg_name[REGNO (x)], file);
6206 break;
6207 case 0:
6208 fputs (qi_high_reg_name[REGNO (x)], file);
6209 break;
6210 default:
6211 abort ();
6212 }
6213 }
6214
6215 /* Locate some local-dynamic symbol still in use by this function
6216 so that we can print its name in some tls_local_dynamic_base
6217 pattern. */
6218
6219 static const char *
6220 get_some_local_dynamic_name ()
6221 {
6222 rtx insn;
6223
6224 if (cfun->machine->some_ld_name)
6225 return cfun->machine->some_ld_name;
6226
6227 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6228 if (INSN_P (insn)
6229 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6230 return cfun->machine->some_ld_name;
6231
6232 abort ();
6233 }
6234
6235 static int
6236 get_some_local_dynamic_name_1 (px, data)
6237 rtx *px;
6238 void *data ATTRIBUTE_UNUSED;
6239 {
6240 rtx x = *px;
6241
6242 if (GET_CODE (x) == SYMBOL_REF
6243 && local_dynamic_symbolic_operand (x, Pmode))
6244 {
6245 cfun->machine->some_ld_name = XSTR (x, 0);
6246 return 1;
6247 }
6248
6249 return 0;
6250 }
6251
6252 /* Meaning of CODE:
6253 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6254 C -- print opcode suffix for set/cmov insn.
6255 c -- like C, but print reversed condition
6256 F,f -- likewise, but for floating-point.
6257 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6258 nothing
6259 R -- print the prefix for register names.
6260 z -- print the opcode suffix for the size of the current operand.
6261 * -- print a star (in certain assembler syntax)
6262 A -- print an absolute memory reference.
6263 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6264 s -- print a shift double count, followed by the assemblers argument
6265 delimiter.
6266 b -- print the QImode name of the register for the indicated operand.
6267 %b0 would print %al if operands[0] is reg 0.
6268 w -- likewise, print the HImode name of the register.
6269 k -- likewise, print the SImode name of the register.
6270 q -- likewise, print the DImode name of the register.
6271 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6272 y -- print "st(0)" instead of "st" as a register.
6273 D -- print condition for SSE cmp instruction.
6274 P -- if PIC, print an @PLT suffix.
6275 X -- don't print any sort of PIC '@' suffix for a symbol.
6276 & -- print some in-use local-dynamic symbol name.
6277 */
6278
6279 void
6280 print_operand (file, x, code)
6281 FILE *file;
6282 rtx x;
6283 int code;
6284 {
6285 if (code)
6286 {
6287 switch (code)
6288 {
6289 case '*':
6290 if (ASSEMBLER_DIALECT == ASM_ATT)
6291 putc ('*', file);
6292 return;
6293
6294 case '&':
6295 assemble_name (file, get_some_local_dynamic_name ());
6296 return;
6297
6298 case 'A':
6299 if (ASSEMBLER_DIALECT == ASM_ATT)
6300 putc ('*', file);
6301 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6302 {
6303 /* Intel syntax. For absolute addresses, registers should not
6304 be surrounded by braces. */
6305 if (GET_CODE (x) != REG)
6306 {
6307 putc ('[', file);
6308 PRINT_OPERAND (file, x, 0);
6309 putc (']', file);
6310 return;
6311 }
6312 }
6313 else
6314 abort ();
6315
6316 PRINT_OPERAND (file, x, 0);
6317 return;
6318
6319
6320 case 'L':
6321 if (ASSEMBLER_DIALECT == ASM_ATT)
6322 putc ('l', file);
6323 return;
6324
6325 case 'W':
6326 if (ASSEMBLER_DIALECT == ASM_ATT)
6327 putc ('w', file);
6328 return;
6329
6330 case 'B':
6331 if (ASSEMBLER_DIALECT == ASM_ATT)
6332 putc ('b', file);
6333 return;
6334
6335 case 'Q':
6336 if (ASSEMBLER_DIALECT == ASM_ATT)
6337 putc ('l', file);
6338 return;
6339
6340 case 'S':
6341 if (ASSEMBLER_DIALECT == ASM_ATT)
6342 putc ('s', file);
6343 return;
6344
6345 case 'T':
6346 if (ASSEMBLER_DIALECT == ASM_ATT)
6347 putc ('t', file);
6348 return;
6349
6350 case 'z':
6351 /* 387 opcodes don't get size suffixes if the operands are
6352 registers. */
6353 if (STACK_REG_P (x))
6354 return;
6355
6356 /* Likewise if using Intel opcodes. */
6357 if (ASSEMBLER_DIALECT == ASM_INTEL)
6358 return;
6359
6360 /* This is the size of op from size of operand. */
6361 switch (GET_MODE_SIZE (GET_MODE (x)))
6362 {
6363 case 2:
6364 #ifdef HAVE_GAS_FILDS_FISTS
6365 putc ('s', file);
6366 #endif
6367 return;
6368
6369 case 4:
6370 if (GET_MODE (x) == SFmode)
6371 {
6372 putc ('s', file);
6373 return;
6374 }
6375 else
6376 putc ('l', file);
6377 return;
6378
6379 case 12:
6380 case 16:
6381 putc ('t', file);
6382 return;
6383
6384 case 8:
6385 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6386 {
6387 #ifdef GAS_MNEMONICS
6388 putc ('q', file);
6389 #else
6390 putc ('l', file);
6391 putc ('l', file);
6392 #endif
6393 }
6394 else
6395 putc ('l', file);
6396 return;
6397
6398 default:
6399 abort ();
6400 }
6401
6402 case 'b':
6403 case 'w':
6404 case 'k':
6405 case 'q':
6406 case 'h':
6407 case 'y':
6408 case 'X':
6409 case 'P':
6410 break;
6411
6412 case 's':
6413 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6414 {
6415 PRINT_OPERAND (file, x, 0);
6416 putc (',', file);
6417 }
6418 return;
6419
6420 case 'D':
6421 /* Little bit of braindamage here. The SSE compare instructions
6422 does use completely different names for the comparisons that the
6423 fp conditional moves. */
6424 switch (GET_CODE (x))
6425 {
6426 case EQ:
6427 case UNEQ:
6428 fputs ("eq", file);
6429 break;
6430 case LT:
6431 case UNLT:
6432 fputs ("lt", file);
6433 break;
6434 case LE:
6435 case UNLE:
6436 fputs ("le", file);
6437 break;
6438 case UNORDERED:
6439 fputs ("unord", file);
6440 break;
6441 case NE:
6442 case LTGT:
6443 fputs ("neq", file);
6444 break;
6445 case UNGE:
6446 case GE:
6447 fputs ("nlt", file);
6448 break;
6449 case UNGT:
6450 case GT:
6451 fputs ("nle", file);
6452 break;
6453 case ORDERED:
6454 fputs ("ord", file);
6455 break;
6456 default:
6457 abort ();
6458 break;
6459 }
6460 return;
6461 case 'O':
6462 #ifdef CMOV_SUN_AS_SYNTAX
6463 if (ASSEMBLER_DIALECT == ASM_ATT)
6464 {
6465 switch (GET_MODE (x))
6466 {
6467 case HImode: putc ('w', file); break;
6468 case SImode:
6469 case SFmode: putc ('l', file); break;
6470 case DImode:
6471 case DFmode: putc ('q', file); break;
6472 default: abort ();
6473 }
6474 putc ('.', file);
6475 }
6476 #endif
6477 return;
6478 case 'C':
6479 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6480 return;
6481 case 'F':
6482 #ifdef CMOV_SUN_AS_SYNTAX
6483 if (ASSEMBLER_DIALECT == ASM_ATT)
6484 putc ('.', file);
6485 #endif
6486 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6487 return;
6488
6489 /* Like above, but reverse condition */
6490 case 'c':
6491 /* Check to see if argument to %c is really a constant
6492 and not a condition code which needs to be reversed. */
6493 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6494 {
6495 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6496 return;
6497 }
6498 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6499 return;
6500 case 'f':
6501 #ifdef CMOV_SUN_AS_SYNTAX
6502 if (ASSEMBLER_DIALECT == ASM_ATT)
6503 putc ('.', file);
6504 #endif
6505 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6506 return;
6507 case '+':
6508 {
6509 rtx x;
6510
6511 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6512 return;
6513
6514 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6515 if (x)
6516 {
6517 int pred_val = INTVAL (XEXP (x, 0));
6518
6519 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6520 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6521 {
6522 int taken = pred_val > REG_BR_PROB_BASE / 2;
6523 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6524
6525 /* Emit hints only in the case default branch prediction
6526 heruistics would fail. */
6527 if (taken != cputaken)
6528 {
6529 /* We use 3e (DS) prefix for taken branches and
6530 2e (CS) prefix for not taken branches. */
6531 if (taken)
6532 fputs ("ds ; ", file);
6533 else
6534 fputs ("cs ; ", file);
6535 }
6536 }
6537 }
6538 return;
6539 }
6540 default:
6541 output_operand_lossage ("invalid operand code `%c'", code);
6542 }
6543 }
6544
6545 if (GET_CODE (x) == REG)
6546 {
6547 PRINT_REG (x, code, file);
6548 }
6549
6550 else if (GET_CODE (x) == MEM)
6551 {
6552 /* No `byte ptr' prefix for call instructions. */
6553 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6554 {
6555 const char * size;
6556 switch (GET_MODE_SIZE (GET_MODE (x)))
6557 {
6558 case 1: size = "BYTE"; break;
6559 case 2: size = "WORD"; break;
6560 case 4: size = "DWORD"; break;
6561 case 8: size = "QWORD"; break;
6562 case 12: size = "XWORD"; break;
6563 case 16: size = "XMMWORD"; break;
6564 default:
6565 abort ();
6566 }
6567
6568 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6569 if (code == 'b')
6570 size = "BYTE";
6571 else if (code == 'w')
6572 size = "WORD";
6573 else if (code == 'k')
6574 size = "DWORD";
6575
6576 fputs (size, file);
6577 fputs (" PTR ", file);
6578 }
6579
6580 x = XEXP (x, 0);
6581 if (flag_pic && CONSTANT_ADDRESS_P (x))
6582 output_pic_addr_const (file, x, code);
6583 /* Avoid (%rip) for call operands. */
6584 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6585 && GET_CODE (x) != CONST_INT)
6586 output_addr_const (file, x);
6587 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6588 output_operand_lossage ("invalid constraints for operand");
6589 else
6590 output_address (x);
6591 }
6592
6593 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6594 {
6595 REAL_VALUE_TYPE r;
6596 long l;
6597
6598 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6599 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6600
6601 if (ASSEMBLER_DIALECT == ASM_ATT)
6602 putc ('$', file);
6603 fprintf (file, "0x%lx", l);
6604 }
6605
6606 /* These float cases don't actually occur as immediate operands. */
6607 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6608 {
6609 REAL_VALUE_TYPE r;
6610 char dstr[30];
6611
6612 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6613 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6614 fprintf (file, "%s", dstr);
6615 }
6616
6617 else if (GET_CODE (x) == CONST_DOUBLE
6618 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6619 {
6620 REAL_VALUE_TYPE r;
6621 char dstr[30];
6622
6623 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6624 REAL_VALUE_TO_DECIMAL (r, "%.22e", dstr);
6625 fprintf (file, "%s", dstr);
6626 }
6627
6628 else
6629 {
6630 if (code != 'P')
6631 {
6632 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6633 {
6634 if (ASSEMBLER_DIALECT == ASM_ATT)
6635 putc ('$', file);
6636 }
6637 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6638 || GET_CODE (x) == LABEL_REF)
6639 {
6640 if (ASSEMBLER_DIALECT == ASM_ATT)
6641 putc ('$', file);
6642 else
6643 fputs ("OFFSET FLAT:", file);
6644 }
6645 }
6646 if (GET_CODE (x) == CONST_INT)
6647 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6648 else if (flag_pic)
6649 output_pic_addr_const (file, x, code);
6650 else
6651 output_addr_const (file, x);
6652 }
6653 }
6654 \f
6655 /* Print a memory operand whose address is ADDR. */
6656
6657 void
6658 print_operand_address (file, addr)
6659 FILE *file;
6660 register rtx addr;
6661 {
6662 struct ix86_address parts;
6663 rtx base, index, disp;
6664 int scale;
6665
6666 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6667 {
6668 if (ASSEMBLER_DIALECT == ASM_INTEL)
6669 fputs ("DWORD PTR ", file);
6670 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6671 putc ('%', file);
6672 fputs ("gs:0", file);
6673 return;
6674 }
6675
6676 if (! ix86_decompose_address (addr, &parts))
6677 abort ();
6678
6679 base = parts.base;
6680 index = parts.index;
6681 disp = parts.disp;
6682 scale = parts.scale;
6683
6684 if (!base && !index)
6685 {
6686 /* Displacement only requires special attention. */
6687
6688 if (GET_CODE (disp) == CONST_INT)
6689 {
6690 if (ASSEMBLER_DIALECT == ASM_INTEL)
6691 {
6692 if (USER_LABEL_PREFIX[0] == 0)
6693 putc ('%', file);
6694 fputs ("ds:", file);
6695 }
6696 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6697 }
6698 else if (flag_pic)
6699 output_pic_addr_const (file, addr, 0);
6700 else
6701 output_addr_const (file, addr);
6702
6703 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6704 if (TARGET_64BIT
6705 && (GET_CODE (addr) == SYMBOL_REF
6706 || GET_CODE (addr) == LABEL_REF
6707 || (GET_CODE (addr) == CONST
6708 && GET_CODE (XEXP (addr, 0)) == PLUS
6709 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6710 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6711 fputs ("(%rip)", file);
6712 }
6713 else
6714 {
6715 if (ASSEMBLER_DIALECT == ASM_ATT)
6716 {
6717 if (disp)
6718 {
6719 if (flag_pic)
6720 output_pic_addr_const (file, disp, 0);
6721 else if (GET_CODE (disp) == LABEL_REF)
6722 output_asm_label (disp);
6723 else
6724 output_addr_const (file, disp);
6725 }
6726
6727 putc ('(', file);
6728 if (base)
6729 PRINT_REG (base, 0, file);
6730 if (index)
6731 {
6732 putc (',', file);
6733 PRINT_REG (index, 0, file);
6734 if (scale != 1)
6735 fprintf (file, ",%d", scale);
6736 }
6737 putc (')', file);
6738 }
6739 else
6740 {
6741 rtx offset = NULL_RTX;
6742
6743 if (disp)
6744 {
6745 /* Pull out the offset of a symbol; print any symbol itself. */
6746 if (GET_CODE (disp) == CONST
6747 && GET_CODE (XEXP (disp, 0)) == PLUS
6748 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6749 {
6750 offset = XEXP (XEXP (disp, 0), 1);
6751 disp = gen_rtx_CONST (VOIDmode,
6752 XEXP (XEXP (disp, 0), 0));
6753 }
6754
6755 if (flag_pic)
6756 output_pic_addr_const (file, disp, 0);
6757 else if (GET_CODE (disp) == LABEL_REF)
6758 output_asm_label (disp);
6759 else if (GET_CODE (disp) == CONST_INT)
6760 offset = disp;
6761 else
6762 output_addr_const (file, disp);
6763 }
6764
6765 putc ('[', file);
6766 if (base)
6767 {
6768 PRINT_REG (base, 0, file);
6769 if (offset)
6770 {
6771 if (INTVAL (offset) >= 0)
6772 putc ('+', file);
6773 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6774 }
6775 }
6776 else if (offset)
6777 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6778 else
6779 putc ('0', file);
6780
6781 if (index)
6782 {
6783 putc ('+', file);
6784 PRINT_REG (index, 0, file);
6785 if (scale != 1)
6786 fprintf (file, "*%d", scale);
6787 }
6788 putc (']', file);
6789 }
6790 }
6791 }
6792
6793 bool
6794 output_addr_const_extra (file, x)
6795 FILE *file;
6796 rtx x;
6797 {
6798 rtx op;
6799
6800 if (GET_CODE (x) != UNSPEC)
6801 return false;
6802
6803 op = XVECEXP (x, 0, 0);
6804 switch (XINT (x, 1))
6805 {
6806 case UNSPEC_GOTTPOFF:
6807 output_addr_const (file, op);
6808 fputs ("@GOTTPOFF", file);
6809 break;
6810 case UNSPEC_TPOFF:
6811 output_addr_const (file, op);
6812 fputs ("@TPOFF", file);
6813 break;
6814 case UNSPEC_NTPOFF:
6815 output_addr_const (file, op);
6816 fputs ("@NTPOFF", file);
6817 break;
6818 case UNSPEC_DTPOFF:
6819 output_addr_const (file, op);
6820 fputs ("@DTPOFF", file);
6821 break;
6822
6823 default:
6824 return false;
6825 }
6826
6827 return true;
6828 }
6829 \f
6830 /* Split one or more DImode RTL references into pairs of SImode
6831 references. The RTL can be REG, offsettable MEM, integer constant, or
6832 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6833 split and "num" is its length. lo_half and hi_half are output arrays
6834 that parallel "operands". */
6835
6836 void
6837 split_di (operands, num, lo_half, hi_half)
6838 rtx operands[];
6839 int num;
6840 rtx lo_half[], hi_half[];
6841 {
6842 while (num--)
6843 {
6844 rtx op = operands[num];
6845
6846 /* simplify_subreg refuse to split volatile memory addresses,
6847 but we still have to handle it. */
6848 if (GET_CODE (op) == MEM)
6849 {
6850 lo_half[num] = adjust_address (op, SImode, 0);
6851 hi_half[num] = adjust_address (op, SImode, 4);
6852 }
6853 else
6854 {
6855 lo_half[num] = simplify_gen_subreg (SImode, op,
6856 GET_MODE (op) == VOIDmode
6857 ? DImode : GET_MODE (op), 0);
6858 hi_half[num] = simplify_gen_subreg (SImode, op,
6859 GET_MODE (op) == VOIDmode
6860 ? DImode : GET_MODE (op), 4);
6861 }
6862 }
6863 }
6864 /* Split one or more TImode RTL references into pairs of SImode
6865 references. The RTL can be REG, offsettable MEM, integer constant, or
6866 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6867 split and "num" is its length. lo_half and hi_half are output arrays
6868 that parallel "operands". */
6869
6870 void
6871 split_ti (operands, num, lo_half, hi_half)
6872 rtx operands[];
6873 int num;
6874 rtx lo_half[], hi_half[];
6875 {
6876 while (num--)
6877 {
6878 rtx op = operands[num];
6879
6880 /* simplify_subreg refuse to split volatile memory addresses, but we
6881 still have to handle it. */
6882 if (GET_CODE (op) == MEM)
6883 {
6884 lo_half[num] = adjust_address (op, DImode, 0);
6885 hi_half[num] = adjust_address (op, DImode, 8);
6886 }
6887 else
6888 {
6889 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
6890 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
6891 }
6892 }
6893 }
6894 \f
6895 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
6896 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
6897 is the expression of the binary operation. The output may either be
6898 emitted here, or returned to the caller, like all output_* functions.
6899
6900 There is no guarantee that the operands are the same mode, as they
6901 might be within FLOAT or FLOAT_EXTEND expressions. */
6902
6903 #ifndef SYSV386_COMPAT
6904 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
6905 wants to fix the assemblers because that causes incompatibility
6906 with gcc. No-one wants to fix gcc because that causes
6907 incompatibility with assemblers... You can use the option of
6908 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
6909 #define SYSV386_COMPAT 1
6910 #endif
6911
6912 const char *
6913 output_387_binary_op (insn, operands)
6914 rtx insn;
6915 rtx *operands;
6916 {
6917 static char buf[30];
6918 const char *p;
6919 const char *ssep;
6920 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
6921
6922 #ifdef ENABLE_CHECKING
6923 /* Even if we do not want to check the inputs, this documents input
6924 constraints. Which helps in understanding the following code. */
6925 if (STACK_REG_P (operands[0])
6926 && ((REG_P (operands[1])
6927 && REGNO (operands[0]) == REGNO (operands[1])
6928 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
6929 || (REG_P (operands[2])
6930 && REGNO (operands[0]) == REGNO (operands[2])
6931 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
6932 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
6933 ; /* ok */
6934 else if (!is_sse)
6935 abort ();
6936 #endif
6937
6938 switch (GET_CODE (operands[3]))
6939 {
6940 case PLUS:
6941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6943 p = "fiadd";
6944 else
6945 p = "fadd";
6946 ssep = "add";
6947 break;
6948
6949 case MINUS:
6950 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6951 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6952 p = "fisub";
6953 else
6954 p = "fsub";
6955 ssep = "sub";
6956 break;
6957
6958 case MULT:
6959 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6960 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6961 p = "fimul";
6962 else
6963 p = "fmul";
6964 ssep = "mul";
6965 break;
6966
6967 case DIV:
6968 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
6969 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
6970 p = "fidiv";
6971 else
6972 p = "fdiv";
6973 ssep = "div";
6974 break;
6975
6976 default:
6977 abort ();
6978 }
6979
6980 if (is_sse)
6981 {
6982 strcpy (buf, ssep);
6983 if (GET_MODE (operands[0]) == SFmode)
6984 strcat (buf, "ss\t{%2, %0|%0, %2}");
6985 else
6986 strcat (buf, "sd\t{%2, %0|%0, %2}");
6987 return buf;
6988 }
6989 strcpy (buf, p);
6990
6991 switch (GET_CODE (operands[3]))
6992 {
6993 case MULT:
6994 case PLUS:
6995 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
6996 {
6997 rtx temp = operands[2];
6998 operands[2] = operands[1];
6999 operands[1] = temp;
7000 }
7001
7002 /* know operands[0] == operands[1]. */
7003
7004 if (GET_CODE (operands[2]) == MEM)
7005 {
7006 p = "%z2\t%2";
7007 break;
7008 }
7009
7010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7011 {
7012 if (STACK_TOP_P (operands[0]))
7013 /* How is it that we are storing to a dead operand[2]?
7014 Well, presumably operands[1] is dead too. We can't
7015 store the result to st(0) as st(0) gets popped on this
7016 instruction. Instead store to operands[2] (which I
7017 think has to be st(1)). st(1) will be popped later.
7018 gcc <= 2.8.1 didn't have this check and generated
7019 assembly code that the Unixware assembler rejected. */
7020 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7021 else
7022 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7023 break;
7024 }
7025
7026 if (STACK_TOP_P (operands[0]))
7027 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7028 else
7029 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7030 break;
7031
7032 case MINUS:
7033 case DIV:
7034 if (GET_CODE (operands[1]) == MEM)
7035 {
7036 p = "r%z1\t%1";
7037 break;
7038 }
7039
7040 if (GET_CODE (operands[2]) == MEM)
7041 {
7042 p = "%z2\t%2";
7043 break;
7044 }
7045
7046 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7047 {
7048 #if SYSV386_COMPAT
7049 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7050 derived assemblers, confusingly reverse the direction of
7051 the operation for fsub{r} and fdiv{r} when the
7052 destination register is not st(0). The Intel assembler
7053 doesn't have this brain damage. Read !SYSV386_COMPAT to
7054 figure out what the hardware really does. */
7055 if (STACK_TOP_P (operands[0]))
7056 p = "{p\t%0, %2|rp\t%2, %0}";
7057 else
7058 p = "{rp\t%2, %0|p\t%0, %2}";
7059 #else
7060 if (STACK_TOP_P (operands[0]))
7061 /* As above for fmul/fadd, we can't store to st(0). */
7062 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7063 else
7064 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7065 #endif
7066 break;
7067 }
7068
7069 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7070 {
7071 #if SYSV386_COMPAT
7072 if (STACK_TOP_P (operands[0]))
7073 p = "{rp\t%0, %1|p\t%1, %0}";
7074 else
7075 p = "{p\t%1, %0|rp\t%0, %1}";
7076 #else
7077 if (STACK_TOP_P (operands[0]))
7078 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7079 else
7080 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7081 #endif
7082 break;
7083 }
7084
7085 if (STACK_TOP_P (operands[0]))
7086 {
7087 if (STACK_TOP_P (operands[1]))
7088 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7089 else
7090 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7091 break;
7092 }
7093 else if (STACK_TOP_P (operands[1]))
7094 {
7095 #if SYSV386_COMPAT
7096 p = "{\t%1, %0|r\t%0, %1}";
7097 #else
7098 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7099 #endif
7100 }
7101 else
7102 {
7103 #if SYSV386_COMPAT
7104 p = "{r\t%2, %0|\t%0, %2}";
7105 #else
7106 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7107 #endif
7108 }
7109 break;
7110
7111 default:
7112 abort ();
7113 }
7114
7115 strcat (buf, p);
7116 return buf;
7117 }
7118
7119 /* Output code to initialize control word copies used by
7120 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7121 is set to control word rounding downwards. */
7122 void
7123 emit_i387_cw_initialization (normal, round_down)
7124 rtx normal, round_down;
7125 {
7126 rtx reg = gen_reg_rtx (HImode);
7127
7128 emit_insn (gen_x86_fnstcw_1 (normal));
7129 emit_move_insn (reg, normal);
7130 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7131 && !TARGET_64BIT)
7132 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7133 else
7134 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7135 emit_move_insn (round_down, reg);
7136 }
7137
7138 /* Output code for INSN to convert a float to a signed int. OPERANDS
7139 are the insn operands. The output may be [HSD]Imode and the input
7140 operand may be [SDX]Fmode. */
7141
7142 const char *
7143 output_fix_trunc (insn, operands)
7144 rtx insn;
7145 rtx *operands;
7146 {
7147 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7148 int dimode_p = GET_MODE (operands[0]) == DImode;
7149
7150 /* Jump through a hoop or two for DImode, since the hardware has no
7151 non-popping instruction. We used to do this a different way, but
7152 that was somewhat fragile and broke with post-reload splitters. */
7153 if (dimode_p && !stack_top_dies)
7154 output_asm_insn ("fld\t%y1", operands);
7155
7156 if (!STACK_TOP_P (operands[1]))
7157 abort ();
7158
7159 if (GET_CODE (operands[0]) != MEM)
7160 abort ();
7161
7162 output_asm_insn ("fldcw\t%3", operands);
7163 if (stack_top_dies || dimode_p)
7164 output_asm_insn ("fistp%z0\t%0", operands);
7165 else
7166 output_asm_insn ("fist%z0\t%0", operands);
7167 output_asm_insn ("fldcw\t%2", operands);
7168
7169 return "";
7170 }
7171
7172 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7173 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7174 when fucom should be used. */
7175
7176 const char *
7177 output_fp_compare (insn, operands, eflags_p, unordered_p)
7178 rtx insn;
7179 rtx *operands;
7180 int eflags_p, unordered_p;
7181 {
7182 int stack_top_dies;
7183 rtx cmp_op0 = operands[0];
7184 rtx cmp_op1 = operands[1];
7185 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7186
7187 if (eflags_p == 2)
7188 {
7189 cmp_op0 = cmp_op1;
7190 cmp_op1 = operands[2];
7191 }
7192 if (is_sse)
7193 {
7194 if (GET_MODE (operands[0]) == SFmode)
7195 if (unordered_p)
7196 return "ucomiss\t{%1, %0|%0, %1}";
7197 else
7198 return "comiss\t{%1, %0|%0, %y}";
7199 else
7200 if (unordered_p)
7201 return "ucomisd\t{%1, %0|%0, %1}";
7202 else
7203 return "comisd\t{%1, %0|%0, %y}";
7204 }
7205
7206 if (! STACK_TOP_P (cmp_op0))
7207 abort ();
7208
7209 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7210
7211 if (STACK_REG_P (cmp_op1)
7212 && stack_top_dies
7213 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7214 && REGNO (cmp_op1) != FIRST_STACK_REG)
7215 {
7216 /* If both the top of the 387 stack dies, and the other operand
7217 is also a stack register that dies, then this must be a
7218 `fcompp' float compare */
7219
7220 if (eflags_p == 1)
7221 {
7222 /* There is no double popping fcomi variant. Fortunately,
7223 eflags is immune from the fstp's cc clobbering. */
7224 if (unordered_p)
7225 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7226 else
7227 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7228 return "fstp\t%y0";
7229 }
7230 else
7231 {
7232 if (eflags_p == 2)
7233 {
7234 if (unordered_p)
7235 return "fucompp\n\tfnstsw\t%0";
7236 else
7237 return "fcompp\n\tfnstsw\t%0";
7238 }
7239 else
7240 {
7241 if (unordered_p)
7242 return "fucompp";
7243 else
7244 return "fcompp";
7245 }
7246 }
7247 }
7248 else
7249 {
7250 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7251
7252 static const char * const alt[24] =
7253 {
7254 "fcom%z1\t%y1",
7255 "fcomp%z1\t%y1",
7256 "fucom%z1\t%y1",
7257 "fucomp%z1\t%y1",
7258
7259 "ficom%z1\t%y1",
7260 "ficomp%z1\t%y1",
7261 NULL,
7262 NULL,
7263
7264 "fcomi\t{%y1, %0|%0, %y1}",
7265 "fcomip\t{%y1, %0|%0, %y1}",
7266 "fucomi\t{%y1, %0|%0, %y1}",
7267 "fucomip\t{%y1, %0|%0, %y1}",
7268
7269 NULL,
7270 NULL,
7271 NULL,
7272 NULL,
7273
7274 "fcom%z2\t%y2\n\tfnstsw\t%0",
7275 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7276 "fucom%z2\t%y2\n\tfnstsw\t%0",
7277 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7278
7279 "ficom%z2\t%y2\n\tfnstsw\t%0",
7280 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7281 NULL,
7282 NULL
7283 };
7284
7285 int mask;
7286 const char *ret;
7287
7288 mask = eflags_p << 3;
7289 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7290 mask |= unordered_p << 1;
7291 mask |= stack_top_dies;
7292
7293 if (mask >= 24)
7294 abort ();
7295 ret = alt[mask];
7296 if (ret == NULL)
7297 abort ();
7298
7299 return ret;
7300 }
7301 }
7302
7303 void
7304 ix86_output_addr_vec_elt (file, value)
7305 FILE *file;
7306 int value;
7307 {
7308 const char *directive = ASM_LONG;
7309
7310 if (TARGET_64BIT)
7311 {
7312 #ifdef ASM_QUAD
7313 directive = ASM_QUAD;
7314 #else
7315 abort ();
7316 #endif
7317 }
7318
7319 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7320 }
7321
7322 void
7323 ix86_output_addr_diff_elt (file, value, rel)
7324 FILE *file;
7325 int value, rel;
7326 {
7327 if (TARGET_64BIT)
7328 fprintf (file, "%s%s%d-%s%d\n",
7329 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7330 else if (HAVE_AS_GOTOFF_IN_DATA)
7331 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7332 #if TARGET_MACHO
7333 else if (TARGET_MACHO)
7334 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7335 machopic_function_base_name () + 1);
7336 #endif
7337 else
7338 asm_fprintf (file, "%s%U_GLOBAL_OFFSET_TABLE_+[.-%s%d]\n",
7339 ASM_LONG, LPREFIX, value);
7340 }
7341 \f
7342 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7343 for the target. */
7344
7345 void
7346 ix86_expand_clear (dest)
7347 rtx dest;
7348 {
7349 rtx tmp;
7350
7351 /* We play register width games, which are only valid after reload. */
7352 if (!reload_completed)
7353 abort ();
7354
7355 /* Avoid HImode and its attendant prefix byte. */
7356 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7357 dest = gen_rtx_REG (SImode, REGNO (dest));
7358
7359 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7360
7361 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7362 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7363 {
7364 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7365 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7366 }
7367
7368 emit_insn (tmp);
7369 }
7370
7371 /* X is an unchanging MEM. If it is a constant pool reference, return
7372 the constant pool rtx, else NULL. */
7373
7374 static rtx
7375 maybe_get_pool_constant (x)
7376 rtx x;
7377 {
7378 x = XEXP (x, 0);
7379
7380 if (flag_pic)
7381 {
7382 if (GET_CODE (x) != PLUS)
7383 return NULL_RTX;
7384 if (XEXP (x, 0) != pic_offset_table_rtx)
7385 return NULL_RTX;
7386 x = XEXP (x, 1);
7387 if (GET_CODE (x) != CONST)
7388 return NULL_RTX;
7389 x = XEXP (x, 0);
7390 if (GET_CODE (x) != UNSPEC)
7391 return NULL_RTX;
7392 if (XINT (x, 1) != UNSPEC_GOTOFF)
7393 return NULL_RTX;
7394 x = XVECEXP (x, 0, 0);
7395 }
7396
7397 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7398 return get_pool_constant (x);
7399
7400 return NULL_RTX;
7401 }
7402
7403 void
7404 ix86_expand_move (mode, operands)
7405 enum machine_mode mode;
7406 rtx operands[];
7407 {
7408 int strict = (reload_in_progress || reload_completed);
7409 rtx insn, op0, op1, tmp;
7410
7411 op0 = operands[0];
7412 op1 = operands[1];
7413
7414 /* ??? We have a slight problem. We need to say that tls symbols are
7415 not legitimate constants so that reload does not helpfully reload
7416 these constants from a REG_EQUIV, which we cannot handle. (Recall
7417 that general- and local-dynamic address resolution requires a
7418 function call.)
7419
7420 However, if we say that tls symbols are not legitimate constants,
7421 then emit_move_insn helpfully drop them into the constant pool.
7422
7423 It is far easier to work around emit_move_insn than reload. Recognize
7424 the MEM that we would have created and extract the symbol_ref. */
7425
7426 if (mode == Pmode
7427 && GET_CODE (op1) == MEM
7428 && RTX_UNCHANGING_P (op1))
7429 {
7430 tmp = maybe_get_pool_constant (op1);
7431 /* Note that we only care about symbolic constants here, which
7432 unlike CONST_INT will always have a proper mode. */
7433 if (tmp && GET_MODE (tmp) == Pmode)
7434 op1 = tmp;
7435 }
7436
7437 if (tls_symbolic_operand (op1, Pmode))
7438 {
7439 op1 = legitimize_address (op1, op1, VOIDmode);
7440 if (GET_CODE (op0) == MEM)
7441 {
7442 tmp = gen_reg_rtx (mode);
7443 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7444 op1 = tmp;
7445 }
7446 }
7447 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7448 {
7449 #if TARGET_MACHO
7450 if (MACHOPIC_PURE)
7451 {
7452 rtx temp = ((reload_in_progress
7453 || ((op0 && GET_CODE (op0) == REG)
7454 && mode == Pmode))
7455 ? op0 : gen_reg_rtx (Pmode));
7456 op1 = machopic_indirect_data_reference (op1, temp);
7457 op1 = machopic_legitimize_pic_address (op1, mode,
7458 temp == op1 ? 0 : temp);
7459 }
7460 else
7461 {
7462 if (MACHOPIC_INDIRECT)
7463 op1 = machopic_indirect_data_reference (op1, 0);
7464 }
7465 if (op0 != op1)
7466 {
7467 insn = gen_rtx_SET (VOIDmode, op0, op1);
7468 emit_insn (insn);
7469 }
7470 return;
7471 #endif /* TARGET_MACHO */
7472 if (GET_CODE (op0) == MEM)
7473 op1 = force_reg (Pmode, op1);
7474 else
7475 {
7476 rtx temp = op0;
7477 if (GET_CODE (temp) != REG)
7478 temp = gen_reg_rtx (Pmode);
7479 temp = legitimize_pic_address (op1, temp);
7480 if (temp == op0)
7481 return;
7482 op1 = temp;
7483 }
7484 }
7485 else
7486 {
7487 if (GET_CODE (op0) == MEM
7488 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7489 || !push_operand (op0, mode))
7490 && GET_CODE (op1) == MEM)
7491 op1 = force_reg (mode, op1);
7492
7493 if (push_operand (op0, mode)
7494 && ! general_no_elim_operand (op1, mode))
7495 op1 = copy_to_mode_reg (mode, op1);
7496
7497 /* Force large constants in 64bit compilation into register
7498 to get them CSEed. */
7499 if (TARGET_64BIT && mode == DImode
7500 && immediate_operand (op1, mode)
7501 && !x86_64_zero_extended_value (op1)
7502 && !register_operand (op0, mode)
7503 && optimize && !reload_completed && !reload_in_progress)
7504 op1 = copy_to_mode_reg (mode, op1);
7505
7506 if (FLOAT_MODE_P (mode))
7507 {
7508 /* If we are loading a floating point constant to a register,
7509 force the value to memory now, since we'll get better code
7510 out the back end. */
7511
7512 if (strict)
7513 ;
7514 else if (GET_CODE (op1) == CONST_DOUBLE
7515 && register_operand (op0, mode))
7516 op1 = validize_mem (force_const_mem (mode, op1));
7517 }
7518 }
7519
7520 insn = gen_rtx_SET (VOIDmode, op0, op1);
7521
7522 emit_insn (insn);
7523 }
7524
7525 void
7526 ix86_expand_vector_move (mode, operands)
7527 enum machine_mode mode;
7528 rtx operands[];
7529 {
7530 /* Force constants other than zero into memory. We do not know how
7531 the instructions used to build constants modify the upper 64 bits
7532 of the register, once we have that information we may be able
7533 to handle some of them more efficiently. */
7534 if ((reload_in_progress | reload_completed) == 0
7535 && register_operand (operands[0], mode)
7536 && CONSTANT_P (operands[1]))
7537 {
7538 rtx addr = gen_reg_rtx (Pmode);
7539 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7540 operands[1] = gen_rtx_MEM (mode, addr);
7541 }
7542
7543 /* Make operand1 a register if it isn't already. */
7544 if ((reload_in_progress | reload_completed) == 0
7545 && !register_operand (operands[0], mode)
7546 && !register_operand (operands[1], mode)
7547 && operands[1] != CONST0_RTX (mode))
7548 {
7549 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7550 emit_move_insn (operands[0], temp);
7551 return;
7552 }
7553
7554 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7555 }
7556
7557 /* Attempt to expand a binary operator. Make the expansion closer to the
7558 actual machine, then just general_operand, which will allow 3 separate
7559 memory references (one output, two input) in a single insn. */
7560
7561 void
7562 ix86_expand_binary_operator (code, mode, operands)
7563 enum rtx_code code;
7564 enum machine_mode mode;
7565 rtx operands[];
7566 {
7567 int matching_memory;
7568 rtx src1, src2, dst, op, clob;
7569
7570 dst = operands[0];
7571 src1 = operands[1];
7572 src2 = operands[2];
7573
7574 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7575 if (GET_RTX_CLASS (code) == 'c'
7576 && (rtx_equal_p (dst, src2)
7577 || immediate_operand (src1, mode)))
7578 {
7579 rtx temp = src1;
7580 src1 = src2;
7581 src2 = temp;
7582 }
7583
7584 /* If the destination is memory, and we do not have matching source
7585 operands, do things in registers. */
7586 matching_memory = 0;
7587 if (GET_CODE (dst) == MEM)
7588 {
7589 if (rtx_equal_p (dst, src1))
7590 matching_memory = 1;
7591 else if (GET_RTX_CLASS (code) == 'c'
7592 && rtx_equal_p (dst, src2))
7593 matching_memory = 2;
7594 else
7595 dst = gen_reg_rtx (mode);
7596 }
7597
7598 /* Both source operands cannot be in memory. */
7599 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7600 {
7601 if (matching_memory != 2)
7602 src2 = force_reg (mode, src2);
7603 else
7604 src1 = force_reg (mode, src1);
7605 }
7606
7607 /* If the operation is not commutable, source 1 cannot be a constant
7608 or non-matching memory. */
7609 if ((CONSTANT_P (src1)
7610 || (!matching_memory && GET_CODE (src1) == MEM))
7611 && GET_RTX_CLASS (code) != 'c')
7612 src1 = force_reg (mode, src1);
7613
7614 /* If optimizing, copy to regs to improve CSE */
7615 if (optimize && ! no_new_pseudos)
7616 {
7617 if (GET_CODE (dst) == MEM)
7618 dst = gen_reg_rtx (mode);
7619 if (GET_CODE (src1) == MEM)
7620 src1 = force_reg (mode, src1);
7621 if (GET_CODE (src2) == MEM)
7622 src2 = force_reg (mode, src2);
7623 }
7624
7625 /* Emit the instruction. */
7626
7627 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7628 if (reload_in_progress)
7629 {
7630 /* Reload doesn't know about the flags register, and doesn't know that
7631 it doesn't want to clobber it. We can only do this with PLUS. */
7632 if (code != PLUS)
7633 abort ();
7634 emit_insn (op);
7635 }
7636 else
7637 {
7638 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7639 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7640 }
7641
7642 /* Fix up the destination if needed. */
7643 if (dst != operands[0])
7644 emit_move_insn (operands[0], dst);
7645 }
7646
7647 /* Return TRUE or FALSE depending on whether the binary operator meets the
7648 appropriate constraints. */
7649
7650 int
7651 ix86_binary_operator_ok (code, mode, operands)
7652 enum rtx_code code;
7653 enum machine_mode mode ATTRIBUTE_UNUSED;
7654 rtx operands[3];
7655 {
7656 /* Both source operands cannot be in memory. */
7657 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7658 return 0;
7659 /* If the operation is not commutable, source 1 cannot be a constant. */
7660 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7661 return 0;
7662 /* If the destination is memory, we must have a matching source operand. */
7663 if (GET_CODE (operands[0]) == MEM
7664 && ! (rtx_equal_p (operands[0], operands[1])
7665 || (GET_RTX_CLASS (code) == 'c'
7666 && rtx_equal_p (operands[0], operands[2]))))
7667 return 0;
7668 /* If the operation is not commutable and the source 1 is memory, we must
7669 have a matching destination. */
7670 if (GET_CODE (operands[1]) == MEM
7671 && GET_RTX_CLASS (code) != 'c'
7672 && ! rtx_equal_p (operands[0], operands[1]))
7673 return 0;
7674 return 1;
7675 }
7676
7677 /* Attempt to expand a unary operator. Make the expansion closer to the
7678 actual machine, then just general_operand, which will allow 2 separate
7679 memory references (one output, one input) in a single insn. */
7680
7681 void
7682 ix86_expand_unary_operator (code, mode, operands)
7683 enum rtx_code code;
7684 enum machine_mode mode;
7685 rtx operands[];
7686 {
7687 int matching_memory;
7688 rtx src, dst, op, clob;
7689
7690 dst = operands[0];
7691 src = operands[1];
7692
7693 /* If the destination is memory, and we do not have matching source
7694 operands, do things in registers. */
7695 matching_memory = 0;
7696 if (GET_CODE (dst) == MEM)
7697 {
7698 if (rtx_equal_p (dst, src))
7699 matching_memory = 1;
7700 else
7701 dst = gen_reg_rtx (mode);
7702 }
7703
7704 /* When source operand is memory, destination must match. */
7705 if (!matching_memory && GET_CODE (src) == MEM)
7706 src = force_reg (mode, src);
7707
7708 /* If optimizing, copy to regs to improve CSE */
7709 if (optimize && ! no_new_pseudos)
7710 {
7711 if (GET_CODE (dst) == MEM)
7712 dst = gen_reg_rtx (mode);
7713 if (GET_CODE (src) == MEM)
7714 src = force_reg (mode, src);
7715 }
7716
7717 /* Emit the instruction. */
7718
7719 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7720 if (reload_in_progress || code == NOT)
7721 {
7722 /* Reload doesn't know about the flags register, and doesn't know that
7723 it doesn't want to clobber it. */
7724 if (code != NOT)
7725 abort ();
7726 emit_insn (op);
7727 }
7728 else
7729 {
7730 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7731 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7732 }
7733
7734 /* Fix up the destination if needed. */
7735 if (dst != operands[0])
7736 emit_move_insn (operands[0], dst);
7737 }
7738
7739 /* Return TRUE or FALSE depending on whether the unary operator meets the
7740 appropriate constraints. */
7741
7742 int
7743 ix86_unary_operator_ok (code, mode, operands)
7744 enum rtx_code code ATTRIBUTE_UNUSED;
7745 enum machine_mode mode ATTRIBUTE_UNUSED;
7746 rtx operands[2] ATTRIBUTE_UNUSED;
7747 {
7748 /* If one of operands is memory, source and destination must match. */
7749 if ((GET_CODE (operands[0]) == MEM
7750 || GET_CODE (operands[1]) == MEM)
7751 && ! rtx_equal_p (operands[0], operands[1]))
7752 return FALSE;
7753 return TRUE;
7754 }
7755
7756 /* Return TRUE or FALSE depending on whether the first SET in INSN
7757 has source and destination with matching CC modes, and that the
7758 CC mode is at least as constrained as REQ_MODE. */
7759
7760 int
7761 ix86_match_ccmode (insn, req_mode)
7762 rtx insn;
7763 enum machine_mode req_mode;
7764 {
7765 rtx set;
7766 enum machine_mode set_mode;
7767
7768 set = PATTERN (insn);
7769 if (GET_CODE (set) == PARALLEL)
7770 set = XVECEXP (set, 0, 0);
7771 if (GET_CODE (set) != SET)
7772 abort ();
7773 if (GET_CODE (SET_SRC (set)) != COMPARE)
7774 abort ();
7775
7776 set_mode = GET_MODE (SET_DEST (set));
7777 switch (set_mode)
7778 {
7779 case CCNOmode:
7780 if (req_mode != CCNOmode
7781 && (req_mode != CCmode
7782 || XEXP (SET_SRC (set), 1) != const0_rtx))
7783 return 0;
7784 break;
7785 case CCmode:
7786 if (req_mode == CCGCmode)
7787 return 0;
7788 /* FALLTHRU */
7789 case CCGCmode:
7790 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7791 return 0;
7792 /* FALLTHRU */
7793 case CCGOCmode:
7794 if (req_mode == CCZmode)
7795 return 0;
7796 /* FALLTHRU */
7797 case CCZmode:
7798 break;
7799
7800 default:
7801 abort ();
7802 }
7803
7804 return (GET_MODE (SET_SRC (set)) == set_mode);
7805 }
7806
7807 /* Generate insn patterns to do an integer compare of OPERANDS. */
7808
7809 static rtx
7810 ix86_expand_int_compare (code, op0, op1)
7811 enum rtx_code code;
7812 rtx op0, op1;
7813 {
7814 enum machine_mode cmpmode;
7815 rtx tmp, flags;
7816
7817 cmpmode = SELECT_CC_MODE (code, op0, op1);
7818 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7819
7820 /* This is very simple, but making the interface the same as in the
7821 FP case makes the rest of the code easier. */
7822 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7823 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7824
7825 /* Return the test that should be put into the flags user, i.e.
7826 the bcc, scc, or cmov instruction. */
7827 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7828 }
7829
7830 /* Figure out whether to use ordered or unordered fp comparisons.
7831 Return the appropriate mode to use. */
7832
7833 enum machine_mode
7834 ix86_fp_compare_mode (code)
7835 enum rtx_code code ATTRIBUTE_UNUSED;
7836 {
7837 /* ??? In order to make all comparisons reversible, we do all comparisons
7838 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7839 all forms trapping and nontrapping comparisons, we can make inequality
7840 comparisons trapping again, since it results in better code when using
7841 FCOM based compares. */
7842 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
7843 }
7844
7845 enum machine_mode
7846 ix86_cc_mode (code, op0, op1)
7847 enum rtx_code code;
7848 rtx op0, op1;
7849 {
7850 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7851 return ix86_fp_compare_mode (code);
7852 switch (code)
7853 {
7854 /* Only zero flag is needed. */
7855 case EQ: /* ZF=0 */
7856 case NE: /* ZF!=0 */
7857 return CCZmode;
7858 /* Codes needing carry flag. */
7859 case GEU: /* CF=0 */
7860 case GTU: /* CF=0 & ZF=0 */
7861 case LTU: /* CF=1 */
7862 case LEU: /* CF=1 | ZF=1 */
7863 return CCmode;
7864 /* Codes possibly doable only with sign flag when
7865 comparing against zero. */
7866 case GE: /* SF=OF or SF=0 */
7867 case LT: /* SF<>OF or SF=1 */
7868 if (op1 == const0_rtx)
7869 return CCGOCmode;
7870 else
7871 /* For other cases Carry flag is not required. */
7872 return CCGCmode;
7873 /* Codes doable only with sign flag when comparing
7874 against zero, but we miss jump instruction for it
7875 so we need to use relational tests agains overflow
7876 that thus needs to be zero. */
7877 case GT: /* ZF=0 & SF=OF */
7878 case LE: /* ZF=1 | SF<>OF */
7879 if (op1 == const0_rtx)
7880 return CCNOmode;
7881 else
7882 return CCGCmode;
7883 /* strcmp pattern do (use flags) and combine may ask us for proper
7884 mode. */
7885 case USE:
7886 return CCmode;
7887 default:
7888 abort ();
7889 }
7890 }
7891
7892 /* Return true if we should use an FCOMI instruction for this fp comparison. */
7893
7894 int
7895 ix86_use_fcomi_compare (code)
7896 enum rtx_code code ATTRIBUTE_UNUSED;
7897 {
7898 enum rtx_code swapped_code = swap_condition (code);
7899 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
7900 || (ix86_fp_comparison_cost (swapped_code)
7901 == ix86_fp_comparison_fcomi_cost (swapped_code)));
7902 }
7903
7904 /* Swap, force into registers, or otherwise massage the two operands
7905 to a fp comparison. The operands are updated in place; the new
7906 comparsion code is returned. */
7907
7908 static enum rtx_code
7909 ix86_prepare_fp_compare_args (code, pop0, pop1)
7910 enum rtx_code code;
7911 rtx *pop0, *pop1;
7912 {
7913 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
7914 rtx op0 = *pop0, op1 = *pop1;
7915 enum machine_mode op_mode = GET_MODE (op0);
7916 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
7917
7918 /* All of the unordered compare instructions only work on registers.
7919 The same is true of the XFmode compare instructions. The same is
7920 true of the fcomi compare instructions. */
7921
7922 if (!is_sse
7923 && (fpcmp_mode == CCFPUmode
7924 || op_mode == XFmode
7925 || op_mode == TFmode
7926 || ix86_use_fcomi_compare (code)))
7927 {
7928 op0 = force_reg (op_mode, op0);
7929 op1 = force_reg (op_mode, op1);
7930 }
7931 else
7932 {
7933 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
7934 things around if they appear profitable, otherwise force op0
7935 into a register. */
7936
7937 if (standard_80387_constant_p (op0) == 0
7938 || (GET_CODE (op0) == MEM
7939 && ! (standard_80387_constant_p (op1) == 0
7940 || GET_CODE (op1) == MEM)))
7941 {
7942 rtx tmp;
7943 tmp = op0, op0 = op1, op1 = tmp;
7944 code = swap_condition (code);
7945 }
7946
7947 if (GET_CODE (op0) != REG)
7948 op0 = force_reg (op_mode, op0);
7949
7950 if (CONSTANT_P (op1))
7951 {
7952 if (standard_80387_constant_p (op1))
7953 op1 = force_reg (op_mode, op1);
7954 else
7955 op1 = validize_mem (force_const_mem (op_mode, op1));
7956 }
7957 }
7958
7959 /* Try to rearrange the comparison to make it cheaper. */
7960 if (ix86_fp_comparison_cost (code)
7961 > ix86_fp_comparison_cost (swap_condition (code))
7962 && (GET_CODE (op1) == REG || !no_new_pseudos))
7963 {
7964 rtx tmp;
7965 tmp = op0, op0 = op1, op1 = tmp;
7966 code = swap_condition (code);
7967 if (GET_CODE (op0) != REG)
7968 op0 = force_reg (op_mode, op0);
7969 }
7970
7971 *pop0 = op0;
7972 *pop1 = op1;
7973 return code;
7974 }
7975
7976 /* Convert comparison codes we use to represent FP comparison to integer
7977 code that will result in proper branch. Return UNKNOWN if no such code
7978 is available. */
7979 static enum rtx_code
7980 ix86_fp_compare_code_to_integer (code)
7981 enum rtx_code code;
7982 {
7983 switch (code)
7984 {
7985 case GT:
7986 return GTU;
7987 case GE:
7988 return GEU;
7989 case ORDERED:
7990 case UNORDERED:
7991 return code;
7992 break;
7993 case UNEQ:
7994 return EQ;
7995 break;
7996 case UNLT:
7997 return LTU;
7998 break;
7999 case UNLE:
8000 return LEU;
8001 break;
8002 case LTGT:
8003 return NE;
8004 break;
8005 default:
8006 return UNKNOWN;
8007 }
8008 }
8009
8010 /* Split comparison code CODE into comparisons we can do using branch
8011 instructions. BYPASS_CODE is comparison code for branch that will
8012 branch around FIRST_CODE and SECOND_CODE. If some of branches
8013 is not required, set value to NIL.
8014 We never require more than two branches. */
8015 static void
8016 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8017 enum rtx_code code, *bypass_code, *first_code, *second_code;
8018 {
8019 *first_code = code;
8020 *bypass_code = NIL;
8021 *second_code = NIL;
8022
8023 /* The fcomi comparison sets flags as follows:
8024
8025 cmp ZF PF CF
8026 > 0 0 0
8027 < 0 0 1
8028 = 1 0 0
8029 un 1 1 1 */
8030
8031 switch (code)
8032 {
8033 case GT: /* GTU - CF=0 & ZF=0 */
8034 case GE: /* GEU - CF=0 */
8035 case ORDERED: /* PF=0 */
8036 case UNORDERED: /* PF=1 */
8037 case UNEQ: /* EQ - ZF=1 */
8038 case UNLT: /* LTU - CF=1 */
8039 case UNLE: /* LEU - CF=1 | ZF=1 */
8040 case LTGT: /* EQ - ZF=0 */
8041 break;
8042 case LT: /* LTU - CF=1 - fails on unordered */
8043 *first_code = UNLT;
8044 *bypass_code = UNORDERED;
8045 break;
8046 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8047 *first_code = UNLE;
8048 *bypass_code = UNORDERED;
8049 break;
8050 case EQ: /* EQ - ZF=1 - fails on unordered */
8051 *first_code = UNEQ;
8052 *bypass_code = UNORDERED;
8053 break;
8054 case NE: /* NE - ZF=0 - fails on unordered */
8055 *first_code = LTGT;
8056 *second_code = UNORDERED;
8057 break;
8058 case UNGE: /* GEU - CF=0 - fails on unordered */
8059 *first_code = GE;
8060 *second_code = UNORDERED;
8061 break;
8062 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8063 *first_code = GT;
8064 *second_code = UNORDERED;
8065 break;
8066 default:
8067 abort ();
8068 }
8069 if (!TARGET_IEEE_FP)
8070 {
8071 *second_code = NIL;
8072 *bypass_code = NIL;
8073 }
8074 }
8075
8076 /* Return cost of comparison done fcom + arithmetics operations on AX.
8077 All following functions do use number of instructions as an cost metrics.
8078 In future this should be tweaked to compute bytes for optimize_size and
8079 take into account performance of various instructions on various CPUs. */
8080 static int
8081 ix86_fp_comparison_arithmetics_cost (code)
8082 enum rtx_code code;
8083 {
8084 if (!TARGET_IEEE_FP)
8085 return 4;
8086 /* The cost of code output by ix86_expand_fp_compare. */
8087 switch (code)
8088 {
8089 case UNLE:
8090 case UNLT:
8091 case LTGT:
8092 case GT:
8093 case GE:
8094 case UNORDERED:
8095 case ORDERED:
8096 case UNEQ:
8097 return 4;
8098 break;
8099 case LT:
8100 case NE:
8101 case EQ:
8102 case UNGE:
8103 return 5;
8104 break;
8105 case LE:
8106 case UNGT:
8107 return 6;
8108 break;
8109 default:
8110 abort ();
8111 }
8112 }
8113
8114 /* Return cost of comparison done using fcomi operation.
8115 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8116 static int
8117 ix86_fp_comparison_fcomi_cost (code)
8118 enum rtx_code code;
8119 {
8120 enum rtx_code bypass_code, first_code, second_code;
8121 /* Return arbitarily high cost when instruction is not supported - this
8122 prevents gcc from using it. */
8123 if (!TARGET_CMOVE)
8124 return 1024;
8125 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8126 return (bypass_code != NIL || second_code != NIL) + 2;
8127 }
8128
8129 /* Return cost of comparison done using sahf operation.
8130 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8131 static int
8132 ix86_fp_comparison_sahf_cost (code)
8133 enum rtx_code code;
8134 {
8135 enum rtx_code bypass_code, first_code, second_code;
8136 /* Return arbitarily high cost when instruction is not preferred - this
8137 avoids gcc from using it. */
8138 if (!TARGET_USE_SAHF && !optimize_size)
8139 return 1024;
8140 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8141 return (bypass_code != NIL || second_code != NIL) + 3;
8142 }
8143
8144 /* Compute cost of the comparison done using any method.
8145 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8146 static int
8147 ix86_fp_comparison_cost (code)
8148 enum rtx_code code;
8149 {
8150 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8151 int min;
8152
8153 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8154 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8155
8156 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8157 if (min > sahf_cost)
8158 min = sahf_cost;
8159 if (min > fcomi_cost)
8160 min = fcomi_cost;
8161 return min;
8162 }
8163
8164 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8165
8166 static rtx
8167 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8168 enum rtx_code code;
8169 rtx op0, op1, scratch;
8170 rtx *second_test;
8171 rtx *bypass_test;
8172 {
8173 enum machine_mode fpcmp_mode, intcmp_mode;
8174 rtx tmp, tmp2;
8175 int cost = ix86_fp_comparison_cost (code);
8176 enum rtx_code bypass_code, first_code, second_code;
8177
8178 fpcmp_mode = ix86_fp_compare_mode (code);
8179 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8180
8181 if (second_test)
8182 *second_test = NULL_RTX;
8183 if (bypass_test)
8184 *bypass_test = NULL_RTX;
8185
8186 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8187
8188 /* Do fcomi/sahf based test when profitable. */
8189 if ((bypass_code == NIL || bypass_test)
8190 && (second_code == NIL || second_test)
8191 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8192 {
8193 if (TARGET_CMOVE)
8194 {
8195 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8196 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8197 tmp);
8198 emit_insn (tmp);
8199 }
8200 else
8201 {
8202 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8203 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8204 if (!scratch)
8205 scratch = gen_reg_rtx (HImode);
8206 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8207 emit_insn (gen_x86_sahf_1 (scratch));
8208 }
8209
8210 /* The FP codes work out to act like unsigned. */
8211 intcmp_mode = fpcmp_mode;
8212 code = first_code;
8213 if (bypass_code != NIL)
8214 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8215 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8216 const0_rtx);
8217 if (second_code != NIL)
8218 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8219 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8220 const0_rtx);
8221 }
8222 else
8223 {
8224 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8225 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8226 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8227 if (!scratch)
8228 scratch = gen_reg_rtx (HImode);
8229 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8230
8231 /* In the unordered case, we have to check C2 for NaN's, which
8232 doesn't happen to work out to anything nice combination-wise.
8233 So do some bit twiddling on the value we've got in AH to come
8234 up with an appropriate set of condition codes. */
8235
8236 intcmp_mode = CCNOmode;
8237 switch (code)
8238 {
8239 case GT:
8240 case UNGT:
8241 if (code == GT || !TARGET_IEEE_FP)
8242 {
8243 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8244 code = EQ;
8245 }
8246 else
8247 {
8248 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8249 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8250 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8251 intcmp_mode = CCmode;
8252 code = GEU;
8253 }
8254 break;
8255 case LT:
8256 case UNLT:
8257 if (code == LT && TARGET_IEEE_FP)
8258 {
8259 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8260 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8261 intcmp_mode = CCmode;
8262 code = EQ;
8263 }
8264 else
8265 {
8266 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8267 code = NE;
8268 }
8269 break;
8270 case GE:
8271 case UNGE:
8272 if (code == GE || !TARGET_IEEE_FP)
8273 {
8274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8275 code = EQ;
8276 }
8277 else
8278 {
8279 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8280 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8281 GEN_INT (0x01)));
8282 code = NE;
8283 }
8284 break;
8285 case LE:
8286 case UNLE:
8287 if (code == LE && TARGET_IEEE_FP)
8288 {
8289 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8290 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8291 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8292 intcmp_mode = CCmode;
8293 code = LTU;
8294 }
8295 else
8296 {
8297 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8298 code = NE;
8299 }
8300 break;
8301 case EQ:
8302 case UNEQ:
8303 if (code == EQ && TARGET_IEEE_FP)
8304 {
8305 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8306 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8307 intcmp_mode = CCmode;
8308 code = EQ;
8309 }
8310 else
8311 {
8312 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8313 code = NE;
8314 break;
8315 }
8316 break;
8317 case NE:
8318 case LTGT:
8319 if (code == NE && TARGET_IEEE_FP)
8320 {
8321 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8322 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8323 GEN_INT (0x40)));
8324 code = NE;
8325 }
8326 else
8327 {
8328 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8329 code = EQ;
8330 }
8331 break;
8332
8333 case UNORDERED:
8334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8335 code = NE;
8336 break;
8337 case ORDERED:
8338 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8339 code = EQ;
8340 break;
8341
8342 default:
8343 abort ();
8344 }
8345 }
8346
8347 /* Return the test that should be put into the flags user, i.e.
8348 the bcc, scc, or cmov instruction. */
8349 return gen_rtx_fmt_ee (code, VOIDmode,
8350 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8351 const0_rtx);
8352 }
8353
8354 rtx
8355 ix86_expand_compare (code, second_test, bypass_test)
8356 enum rtx_code code;
8357 rtx *second_test, *bypass_test;
8358 {
8359 rtx op0, op1, ret;
8360 op0 = ix86_compare_op0;
8361 op1 = ix86_compare_op1;
8362
8363 if (second_test)
8364 *second_test = NULL_RTX;
8365 if (bypass_test)
8366 *bypass_test = NULL_RTX;
8367
8368 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8369 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8370 second_test, bypass_test);
8371 else
8372 ret = ix86_expand_int_compare (code, op0, op1);
8373
8374 return ret;
8375 }
8376
8377 /* Return true if the CODE will result in nontrivial jump sequence. */
8378 bool
8379 ix86_fp_jump_nontrivial_p (code)
8380 enum rtx_code code;
8381 {
8382 enum rtx_code bypass_code, first_code, second_code;
8383 if (!TARGET_CMOVE)
8384 return true;
8385 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8386 return bypass_code != NIL || second_code != NIL;
8387 }
8388
8389 void
8390 ix86_expand_branch (code, label)
8391 enum rtx_code code;
8392 rtx label;
8393 {
8394 rtx tmp;
8395
8396 switch (GET_MODE (ix86_compare_op0))
8397 {
8398 case QImode:
8399 case HImode:
8400 case SImode:
8401 simple:
8402 tmp = ix86_expand_compare (code, NULL, NULL);
8403 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8404 gen_rtx_LABEL_REF (VOIDmode, label),
8405 pc_rtx);
8406 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8407 return;
8408
8409 case SFmode:
8410 case DFmode:
8411 case XFmode:
8412 case TFmode:
8413 {
8414 rtvec vec;
8415 int use_fcomi;
8416 enum rtx_code bypass_code, first_code, second_code;
8417
8418 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8419 &ix86_compare_op1);
8420
8421 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8422
8423 /* Check whether we will use the natural sequence with one jump. If
8424 so, we can expand jump early. Otherwise delay expansion by
8425 creating compound insn to not confuse optimizers. */
8426 if (bypass_code == NIL && second_code == NIL
8427 && TARGET_CMOVE)
8428 {
8429 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8430 gen_rtx_LABEL_REF (VOIDmode, label),
8431 pc_rtx, NULL_RTX);
8432 }
8433 else
8434 {
8435 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8436 ix86_compare_op0, ix86_compare_op1);
8437 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8438 gen_rtx_LABEL_REF (VOIDmode, label),
8439 pc_rtx);
8440 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8441
8442 use_fcomi = ix86_use_fcomi_compare (code);
8443 vec = rtvec_alloc (3 + !use_fcomi);
8444 RTVEC_ELT (vec, 0) = tmp;
8445 RTVEC_ELT (vec, 1)
8446 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8447 RTVEC_ELT (vec, 2)
8448 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8449 if (! use_fcomi)
8450 RTVEC_ELT (vec, 3)
8451 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8452
8453 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8454 }
8455 return;
8456 }
8457
8458 case DImode:
8459 if (TARGET_64BIT)
8460 goto simple;
8461 /* Expand DImode branch into multiple compare+branch. */
8462 {
8463 rtx lo[2], hi[2], label2;
8464 enum rtx_code code1, code2, code3;
8465
8466 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8467 {
8468 tmp = ix86_compare_op0;
8469 ix86_compare_op0 = ix86_compare_op1;
8470 ix86_compare_op1 = tmp;
8471 code = swap_condition (code);
8472 }
8473 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8474 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8475
8476 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8477 avoid two branches. This costs one extra insn, so disable when
8478 optimizing for size. */
8479
8480 if ((code == EQ || code == NE)
8481 && (!optimize_size
8482 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8483 {
8484 rtx xor0, xor1;
8485
8486 xor1 = hi[0];
8487 if (hi[1] != const0_rtx)
8488 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8489 NULL_RTX, 0, OPTAB_WIDEN);
8490
8491 xor0 = lo[0];
8492 if (lo[1] != const0_rtx)
8493 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8494 NULL_RTX, 0, OPTAB_WIDEN);
8495
8496 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8497 NULL_RTX, 0, OPTAB_WIDEN);
8498
8499 ix86_compare_op0 = tmp;
8500 ix86_compare_op1 = const0_rtx;
8501 ix86_expand_branch (code, label);
8502 return;
8503 }
8504
8505 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8506 op1 is a constant and the low word is zero, then we can just
8507 examine the high word. */
8508
8509 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8510 switch (code)
8511 {
8512 case LT: case LTU: case GE: case GEU:
8513 ix86_compare_op0 = hi[0];
8514 ix86_compare_op1 = hi[1];
8515 ix86_expand_branch (code, label);
8516 return;
8517 default:
8518 break;
8519 }
8520
8521 /* Otherwise, we need two or three jumps. */
8522
8523 label2 = gen_label_rtx ();
8524
8525 code1 = code;
8526 code2 = swap_condition (code);
8527 code3 = unsigned_condition (code);
8528
8529 switch (code)
8530 {
8531 case LT: case GT: case LTU: case GTU:
8532 break;
8533
8534 case LE: code1 = LT; code2 = GT; break;
8535 case GE: code1 = GT; code2 = LT; break;
8536 case LEU: code1 = LTU; code2 = GTU; break;
8537 case GEU: code1 = GTU; code2 = LTU; break;
8538
8539 case EQ: code1 = NIL; code2 = NE; break;
8540 case NE: code2 = NIL; break;
8541
8542 default:
8543 abort ();
8544 }
8545
8546 /*
8547 * a < b =>
8548 * if (hi(a) < hi(b)) goto true;
8549 * if (hi(a) > hi(b)) goto false;
8550 * if (lo(a) < lo(b)) goto true;
8551 * false:
8552 */
8553
8554 ix86_compare_op0 = hi[0];
8555 ix86_compare_op1 = hi[1];
8556
8557 if (code1 != NIL)
8558 ix86_expand_branch (code1, label);
8559 if (code2 != NIL)
8560 ix86_expand_branch (code2, label2);
8561
8562 ix86_compare_op0 = lo[0];
8563 ix86_compare_op1 = lo[1];
8564 ix86_expand_branch (code3, label);
8565
8566 if (code2 != NIL)
8567 emit_label (label2);
8568 return;
8569 }
8570
8571 default:
8572 abort ();
8573 }
8574 }
8575
8576 /* Split branch based on floating point condition. */
8577 void
8578 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8579 enum rtx_code code;
8580 rtx op1, op2, target1, target2, tmp;
8581 {
8582 rtx second, bypass;
8583 rtx label = NULL_RTX;
8584 rtx condition;
8585 int bypass_probability = -1, second_probability = -1, probability = -1;
8586 rtx i;
8587
8588 if (target2 != pc_rtx)
8589 {
8590 rtx tmp = target2;
8591 code = reverse_condition_maybe_unordered (code);
8592 target2 = target1;
8593 target1 = tmp;
8594 }
8595
8596 condition = ix86_expand_fp_compare (code, op1, op2,
8597 tmp, &second, &bypass);
8598
8599 if (split_branch_probability >= 0)
8600 {
8601 /* Distribute the probabilities across the jumps.
8602 Assume the BYPASS and SECOND to be always test
8603 for UNORDERED. */
8604 probability = split_branch_probability;
8605
8606 /* Value of 1 is low enough to make no need for probability
8607 to be updated. Later we may run some experiments and see
8608 if unordered values are more frequent in practice. */
8609 if (bypass)
8610 bypass_probability = 1;
8611 if (second)
8612 second_probability = 1;
8613 }
8614 if (bypass != NULL_RTX)
8615 {
8616 label = gen_label_rtx ();
8617 i = emit_jump_insn (gen_rtx_SET
8618 (VOIDmode, pc_rtx,
8619 gen_rtx_IF_THEN_ELSE (VOIDmode,
8620 bypass,
8621 gen_rtx_LABEL_REF (VOIDmode,
8622 label),
8623 pc_rtx)));
8624 if (bypass_probability >= 0)
8625 REG_NOTES (i)
8626 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8627 GEN_INT (bypass_probability),
8628 REG_NOTES (i));
8629 }
8630 i = emit_jump_insn (gen_rtx_SET
8631 (VOIDmode, pc_rtx,
8632 gen_rtx_IF_THEN_ELSE (VOIDmode,
8633 condition, target1, target2)));
8634 if (probability >= 0)
8635 REG_NOTES (i)
8636 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8637 GEN_INT (probability),
8638 REG_NOTES (i));
8639 if (second != NULL_RTX)
8640 {
8641 i = emit_jump_insn (gen_rtx_SET
8642 (VOIDmode, pc_rtx,
8643 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8644 target2)));
8645 if (second_probability >= 0)
8646 REG_NOTES (i)
8647 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8648 GEN_INT (second_probability),
8649 REG_NOTES (i));
8650 }
8651 if (label != NULL_RTX)
8652 emit_label (label);
8653 }
8654
8655 int
8656 ix86_expand_setcc (code, dest)
8657 enum rtx_code code;
8658 rtx dest;
8659 {
8660 rtx ret, tmp, tmpreg;
8661 rtx second_test, bypass_test;
8662
8663 if (GET_MODE (ix86_compare_op0) == DImode
8664 && !TARGET_64BIT)
8665 return 0; /* FAIL */
8666
8667 if (GET_MODE (dest) != QImode)
8668 abort ();
8669
8670 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8671 PUT_MODE (ret, QImode);
8672
8673 tmp = dest;
8674 tmpreg = dest;
8675
8676 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8677 if (bypass_test || second_test)
8678 {
8679 rtx test = second_test;
8680 int bypass = 0;
8681 rtx tmp2 = gen_reg_rtx (QImode);
8682 if (bypass_test)
8683 {
8684 if (second_test)
8685 abort ();
8686 test = bypass_test;
8687 bypass = 1;
8688 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8689 }
8690 PUT_MODE (test, QImode);
8691 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8692
8693 if (bypass)
8694 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8695 else
8696 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8697 }
8698
8699 return 1; /* DONE */
8700 }
8701
8702 int
8703 ix86_expand_int_movcc (operands)
8704 rtx operands[];
8705 {
8706 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8707 rtx compare_seq, compare_op;
8708 rtx second_test, bypass_test;
8709 enum machine_mode mode = GET_MODE (operands[0]);
8710
8711 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8712 In case comparsion is done with immediate, we can convert it to LTU or
8713 GEU by altering the integer. */
8714
8715 if ((code == LEU || code == GTU)
8716 && GET_CODE (ix86_compare_op1) == CONST_INT
8717 && mode != HImode
8718 && INTVAL (ix86_compare_op1) != -1
8719 /* For x86-64, the immediate field in the instruction is 32-bit
8720 signed, so we can't increment a DImode value above 0x7fffffff. */
8721 && (!TARGET_64BIT
8722 || GET_MODE (ix86_compare_op0) != DImode
8723 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8724 && GET_CODE (operands[2]) == CONST_INT
8725 && GET_CODE (operands[3]) == CONST_INT)
8726 {
8727 if (code == LEU)
8728 code = LTU;
8729 else
8730 code = GEU;
8731 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8732 GET_MODE (ix86_compare_op0));
8733 }
8734
8735 start_sequence ();
8736 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
8737 compare_seq = get_insns ();
8738 end_sequence ();
8739
8740 compare_code = GET_CODE (compare_op);
8741
8742 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8743 HImode insns, we'd be swallowed in word prefix ops. */
8744
8745 if (mode != HImode
8746 && (mode != DImode || TARGET_64BIT)
8747 && GET_CODE (operands[2]) == CONST_INT
8748 && GET_CODE (operands[3]) == CONST_INT)
8749 {
8750 rtx out = operands[0];
8751 HOST_WIDE_INT ct = INTVAL (operands[2]);
8752 HOST_WIDE_INT cf = INTVAL (operands[3]);
8753 HOST_WIDE_INT diff;
8754
8755 if ((compare_code == LTU || compare_code == GEU)
8756 && !second_test && !bypass_test)
8757 {
8758 /* Detect overlap between destination and compare sources. */
8759 rtx tmp = out;
8760
8761 /* To simplify rest of code, restrict to the GEU case. */
8762 if (compare_code == LTU)
8763 {
8764 int tmp = ct;
8765 ct = cf;
8766 cf = tmp;
8767 compare_code = reverse_condition (compare_code);
8768 code = reverse_condition (code);
8769 }
8770 diff = ct - cf;
8771
8772 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
8773 || reg_overlap_mentioned_p (out, ix86_compare_op1))
8774 tmp = gen_reg_rtx (mode);
8775
8776 emit_insn (compare_seq);
8777 if (mode == DImode)
8778 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8779 else
8780 emit_insn (gen_x86_movsicc_0_m1 (tmp));
8781
8782 if (diff == 1)
8783 {
8784 /*
8785 * cmpl op0,op1
8786 * sbbl dest,dest
8787 * [addl dest, ct]
8788 *
8789 * Size 5 - 8.
8790 */
8791 if (ct)
8792 tmp = expand_simple_binop (mode, PLUS,
8793 tmp, GEN_INT (ct),
8794 tmp, 1, OPTAB_DIRECT);
8795 }
8796 else if (cf == -1)
8797 {
8798 /*
8799 * cmpl op0,op1
8800 * sbbl dest,dest
8801 * orl $ct, dest
8802 *
8803 * Size 8.
8804 */
8805 tmp = expand_simple_binop (mode, IOR,
8806 tmp, GEN_INT (ct),
8807 tmp, 1, OPTAB_DIRECT);
8808 }
8809 else if (diff == -1 && ct)
8810 {
8811 /*
8812 * cmpl op0,op1
8813 * sbbl dest,dest
8814 * notl dest
8815 * [addl dest, cf]
8816 *
8817 * Size 8 - 11.
8818 */
8819 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8820 if (cf)
8821 tmp = expand_simple_binop (mode, PLUS,
8822 tmp, GEN_INT (cf),
8823 tmp, 1, OPTAB_DIRECT);
8824 }
8825 else
8826 {
8827 /*
8828 * cmpl op0,op1
8829 * sbbl dest,dest
8830 * [notl dest]
8831 * andl cf - ct, dest
8832 * [addl dest, ct]
8833 *
8834 * Size 8 - 11.
8835 */
8836
8837 if (cf == 0)
8838 {
8839 cf = ct;
8840 ct = 0;
8841 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8842 }
8843
8844 tmp = expand_simple_binop (mode, AND,
8845 tmp,
8846 gen_int_mode (cf - ct, mode),
8847 tmp, 1, OPTAB_DIRECT);
8848 if (ct)
8849 tmp = expand_simple_binop (mode, PLUS,
8850 tmp, GEN_INT (ct),
8851 tmp, 1, OPTAB_DIRECT);
8852 }
8853
8854 if (tmp != out)
8855 emit_move_insn (out, tmp);
8856
8857 return 1; /* DONE */
8858 }
8859
8860 diff = ct - cf;
8861 if (diff < 0)
8862 {
8863 HOST_WIDE_INT tmp;
8864 tmp = ct, ct = cf, cf = tmp;
8865 diff = -diff;
8866 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
8867 {
8868 /* We may be reversing unordered compare to normal compare, that
8869 is not valid in general (we may convert non-trapping condition
8870 to trapping one), however on i386 we currently emit all
8871 comparisons unordered. */
8872 compare_code = reverse_condition_maybe_unordered (compare_code);
8873 code = reverse_condition_maybe_unordered (code);
8874 }
8875 else
8876 {
8877 compare_code = reverse_condition (compare_code);
8878 code = reverse_condition (code);
8879 }
8880 }
8881
8882 compare_code = NIL;
8883 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
8884 && GET_CODE (ix86_compare_op1) == CONST_INT)
8885 {
8886 if (ix86_compare_op1 == const0_rtx
8887 && (code == LT || code == GE))
8888 compare_code = code;
8889 else if (ix86_compare_op1 == constm1_rtx)
8890 {
8891 if (code == LE)
8892 compare_code = LT;
8893 else if (code == GT)
8894 compare_code = GE;
8895 }
8896 }
8897
8898 /* Optimize dest = (op0 < 0) ? -1 : cf. */
8899 if (compare_code != NIL
8900 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
8901 && (cf == -1 || ct == -1))
8902 {
8903 /* If lea code below could be used, only optimize
8904 if it results in a 2 insn sequence. */
8905
8906 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
8907 || diff == 3 || diff == 5 || diff == 9)
8908 || (compare_code == LT && ct == -1)
8909 || (compare_code == GE && cf == -1))
8910 {
8911 /*
8912 * notl op1 (if necessary)
8913 * sarl $31, op1
8914 * orl cf, op1
8915 */
8916 if (ct != -1)
8917 {
8918 cf = ct;
8919 ct = -1;
8920 code = reverse_condition (code);
8921 }
8922
8923 out = emit_store_flag (out, code, ix86_compare_op0,
8924 ix86_compare_op1, VOIDmode, 0, -1);
8925
8926 out = expand_simple_binop (mode, IOR,
8927 out, GEN_INT (cf),
8928 out, 1, OPTAB_DIRECT);
8929 if (out != operands[0])
8930 emit_move_insn (operands[0], out);
8931
8932 return 1; /* DONE */
8933 }
8934 }
8935
8936 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
8937 || diff == 3 || diff == 5 || diff == 9)
8938 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
8939 {
8940 /*
8941 * xorl dest,dest
8942 * cmpl op1,op2
8943 * setcc dest
8944 * lea cf(dest*(ct-cf)),dest
8945 *
8946 * Size 14.
8947 *
8948 * This also catches the degenerate setcc-only case.
8949 */
8950
8951 rtx tmp;
8952 int nops;
8953
8954 out = emit_store_flag (out, code, ix86_compare_op0,
8955 ix86_compare_op1, VOIDmode, 0, 1);
8956
8957 nops = 0;
8958 /* On x86_64 the lea instruction operates on Pmode, so we need
8959 to get arithmetics done in proper mode to match. */
8960 if (diff == 1)
8961 tmp = out;
8962 else
8963 {
8964 rtx out1;
8965 out1 = out;
8966 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
8967 nops++;
8968 if (diff & 1)
8969 {
8970 tmp = gen_rtx_PLUS (mode, tmp, out1);
8971 nops++;
8972 }
8973 }
8974 if (cf != 0)
8975 {
8976 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
8977 nops++;
8978 }
8979 if (tmp != out
8980 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
8981 {
8982 if (nops == 1)
8983 {
8984 rtx clob;
8985
8986 clob = gen_rtx_REG (CCmode, FLAGS_REG);
8987 clob = gen_rtx_CLOBBER (VOIDmode, clob);
8988
8989 tmp = gen_rtx_SET (VOIDmode, out, tmp);
8990 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8991 emit_insn (tmp);
8992 }
8993 else
8994 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
8995 }
8996 if (out != operands[0])
8997 emit_move_insn (operands[0], copy_rtx (out));
8998
8999 return 1; /* DONE */
9000 }
9001
9002 /*
9003 * General case: Jumpful:
9004 * xorl dest,dest cmpl op1, op2
9005 * cmpl op1, op2 movl ct, dest
9006 * setcc dest jcc 1f
9007 * decl dest movl cf, dest
9008 * andl (cf-ct),dest 1:
9009 * addl ct,dest
9010 *
9011 * Size 20. Size 14.
9012 *
9013 * This is reasonably steep, but branch mispredict costs are
9014 * high on modern cpus, so consider failing only if optimizing
9015 * for space.
9016 *
9017 * %%% Parameterize branch_cost on the tuning architecture, then
9018 * use that. The 80386 couldn't care less about mispredicts.
9019 */
9020
9021 if (!optimize_size && !TARGET_CMOVE)
9022 {
9023 if (cf == 0)
9024 {
9025 cf = ct;
9026 ct = 0;
9027 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9028 /* We may be reversing unordered compare to normal compare,
9029 that is not valid in general (we may convert non-trapping
9030 condition to trapping one), however on i386 we currently
9031 emit all comparisons unordered. */
9032 code = reverse_condition_maybe_unordered (code);
9033 else
9034 {
9035 code = reverse_condition (code);
9036 if (compare_code != NIL)
9037 compare_code = reverse_condition (compare_code);
9038 }
9039 }
9040
9041 if (compare_code != NIL)
9042 {
9043 /* notl op1 (if needed)
9044 sarl $31, op1
9045 andl (cf-ct), op1
9046 addl ct, op1
9047
9048 For x < 0 (resp. x <= -1) there will be no notl,
9049 so if possible swap the constants to get rid of the
9050 complement.
9051 True/false will be -1/0 while code below (store flag
9052 followed by decrement) is 0/-1, so the constants need
9053 to be exchanged once more. */
9054
9055 if (compare_code == GE || !cf)
9056 {
9057 code = reverse_condition (code);
9058 compare_code = LT;
9059 }
9060 else
9061 {
9062 HOST_WIDE_INT tmp = cf;
9063 cf = ct;
9064 ct = tmp;
9065 }
9066
9067 out = emit_store_flag (out, code, ix86_compare_op0,
9068 ix86_compare_op1, VOIDmode, 0, -1);
9069 }
9070 else
9071 {
9072 out = emit_store_flag (out, code, ix86_compare_op0,
9073 ix86_compare_op1, VOIDmode, 0, 1);
9074
9075 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9076 out, 1, OPTAB_DIRECT);
9077 }
9078
9079 out = expand_simple_binop (mode, AND, out,
9080 gen_int_mode (cf - ct, mode),
9081 out, 1, OPTAB_DIRECT);
9082 if (ct)
9083 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9084 out, 1, OPTAB_DIRECT);
9085 if (out != operands[0])
9086 emit_move_insn (operands[0], out);
9087
9088 return 1; /* DONE */
9089 }
9090 }
9091
9092 if (!TARGET_CMOVE)
9093 {
9094 /* Try a few things more with specific constants and a variable. */
9095
9096 optab op;
9097 rtx var, orig_out, out, tmp;
9098
9099 if (optimize_size)
9100 return 0; /* FAIL */
9101
9102 /* If one of the two operands is an interesting constant, load a
9103 constant with the above and mask it in with a logical operation. */
9104
9105 if (GET_CODE (operands[2]) == CONST_INT)
9106 {
9107 var = operands[3];
9108 if (INTVAL (operands[2]) == 0)
9109 operands[3] = constm1_rtx, op = and_optab;
9110 else if (INTVAL (operands[2]) == -1)
9111 operands[3] = const0_rtx, op = ior_optab;
9112 else
9113 return 0; /* FAIL */
9114 }
9115 else if (GET_CODE (operands[3]) == CONST_INT)
9116 {
9117 var = operands[2];
9118 if (INTVAL (operands[3]) == 0)
9119 operands[2] = constm1_rtx, op = and_optab;
9120 else if (INTVAL (operands[3]) == -1)
9121 operands[2] = const0_rtx, op = ior_optab;
9122 else
9123 return 0; /* FAIL */
9124 }
9125 else
9126 return 0; /* FAIL */
9127
9128 orig_out = operands[0];
9129 tmp = gen_reg_rtx (mode);
9130 operands[0] = tmp;
9131
9132 /* Recurse to get the constant loaded. */
9133 if (ix86_expand_int_movcc (operands) == 0)
9134 return 0; /* FAIL */
9135
9136 /* Mask in the interesting variable. */
9137 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9138 OPTAB_WIDEN);
9139 if (out != orig_out)
9140 emit_move_insn (orig_out, out);
9141
9142 return 1; /* DONE */
9143 }
9144
9145 /*
9146 * For comparison with above,
9147 *
9148 * movl cf,dest
9149 * movl ct,tmp
9150 * cmpl op1,op2
9151 * cmovcc tmp,dest
9152 *
9153 * Size 15.
9154 */
9155
9156 if (! nonimmediate_operand (operands[2], mode))
9157 operands[2] = force_reg (mode, operands[2]);
9158 if (! nonimmediate_operand (operands[3], mode))
9159 operands[3] = force_reg (mode, operands[3]);
9160
9161 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9162 {
9163 rtx tmp = gen_reg_rtx (mode);
9164 emit_move_insn (tmp, operands[3]);
9165 operands[3] = tmp;
9166 }
9167 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9168 {
9169 rtx tmp = gen_reg_rtx (mode);
9170 emit_move_insn (tmp, operands[2]);
9171 operands[2] = tmp;
9172 }
9173 if (! register_operand (operands[2], VOIDmode)
9174 && ! register_operand (operands[3], VOIDmode))
9175 operands[2] = force_reg (mode, operands[2]);
9176
9177 emit_insn (compare_seq);
9178 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9179 gen_rtx_IF_THEN_ELSE (mode,
9180 compare_op, operands[2],
9181 operands[3])));
9182 if (bypass_test)
9183 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9184 gen_rtx_IF_THEN_ELSE (mode,
9185 bypass_test,
9186 operands[3],
9187 operands[0])));
9188 if (second_test)
9189 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9190 gen_rtx_IF_THEN_ELSE (mode,
9191 second_test,
9192 operands[2],
9193 operands[0])));
9194
9195 return 1; /* DONE */
9196 }
9197
9198 int
9199 ix86_expand_fp_movcc (operands)
9200 rtx operands[];
9201 {
9202 enum rtx_code code;
9203 rtx tmp;
9204 rtx compare_op, second_test, bypass_test;
9205
9206 /* For SF/DFmode conditional moves based on comparisons
9207 in same mode, we may want to use SSE min/max instructions. */
9208 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9209 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9210 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9211 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9212 && (!TARGET_IEEE_FP
9213 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9214 /* We may be called from the post-reload splitter. */
9215 && (!REG_P (operands[0])
9216 || SSE_REG_P (operands[0])
9217 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9218 {
9219 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9220 code = GET_CODE (operands[1]);
9221
9222 /* See if we have (cross) match between comparison operands and
9223 conditional move operands. */
9224 if (rtx_equal_p (operands[2], op1))
9225 {
9226 rtx tmp = op0;
9227 op0 = op1;
9228 op1 = tmp;
9229 code = reverse_condition_maybe_unordered (code);
9230 }
9231 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9232 {
9233 /* Check for min operation. */
9234 if (code == LT)
9235 {
9236 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9237 if (memory_operand (op0, VOIDmode))
9238 op0 = force_reg (GET_MODE (operands[0]), op0);
9239 if (GET_MODE (operands[0]) == SFmode)
9240 emit_insn (gen_minsf3 (operands[0], op0, op1));
9241 else
9242 emit_insn (gen_mindf3 (operands[0], op0, op1));
9243 return 1;
9244 }
9245 /* Check for max operation. */
9246 if (code == GT)
9247 {
9248 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9249 if (memory_operand (op0, VOIDmode))
9250 op0 = force_reg (GET_MODE (operands[0]), op0);
9251 if (GET_MODE (operands[0]) == SFmode)
9252 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9253 else
9254 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9255 return 1;
9256 }
9257 }
9258 /* Manage condition to be sse_comparison_operator. In case we are
9259 in non-ieee mode, try to canonicalize the destination operand
9260 to be first in the comparison - this helps reload to avoid extra
9261 moves. */
9262 if (!sse_comparison_operator (operands[1], VOIDmode)
9263 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9264 {
9265 rtx tmp = ix86_compare_op0;
9266 ix86_compare_op0 = ix86_compare_op1;
9267 ix86_compare_op1 = tmp;
9268 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9269 VOIDmode, ix86_compare_op0,
9270 ix86_compare_op1);
9271 }
9272 /* Similary try to manage result to be first operand of conditional
9273 move. We also don't support the NE comparison on SSE, so try to
9274 avoid it. */
9275 if ((rtx_equal_p (operands[0], operands[3])
9276 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9277 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9278 {
9279 rtx tmp = operands[2];
9280 operands[2] = operands[3];
9281 operands[3] = tmp;
9282 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9283 (GET_CODE (operands[1])),
9284 VOIDmode, ix86_compare_op0,
9285 ix86_compare_op1);
9286 }
9287 if (GET_MODE (operands[0]) == SFmode)
9288 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9289 operands[2], operands[3],
9290 ix86_compare_op0, ix86_compare_op1));
9291 else
9292 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9293 operands[2], operands[3],
9294 ix86_compare_op0, ix86_compare_op1));
9295 return 1;
9296 }
9297
9298 /* The floating point conditional move instructions don't directly
9299 support conditions resulting from a signed integer comparison. */
9300
9301 code = GET_CODE (operands[1]);
9302 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9303
9304 /* The floating point conditional move instructions don't directly
9305 support signed integer comparisons. */
9306
9307 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9308 {
9309 if (second_test != NULL || bypass_test != NULL)
9310 abort ();
9311 tmp = gen_reg_rtx (QImode);
9312 ix86_expand_setcc (code, tmp);
9313 code = NE;
9314 ix86_compare_op0 = tmp;
9315 ix86_compare_op1 = const0_rtx;
9316 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9317 }
9318 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9319 {
9320 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9321 emit_move_insn (tmp, operands[3]);
9322 operands[3] = tmp;
9323 }
9324 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9325 {
9326 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9327 emit_move_insn (tmp, operands[2]);
9328 operands[2] = tmp;
9329 }
9330
9331 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9332 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9333 compare_op,
9334 operands[2],
9335 operands[3])));
9336 if (bypass_test)
9337 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9338 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9339 bypass_test,
9340 operands[3],
9341 operands[0])));
9342 if (second_test)
9343 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9344 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9345 second_test,
9346 operands[2],
9347 operands[0])));
9348
9349 return 1;
9350 }
9351
9352 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9353 works for floating pointer parameters and nonoffsetable memories.
9354 For pushes, it returns just stack offsets; the values will be saved
9355 in the right order. Maximally three parts are generated. */
9356
9357 static int
9358 ix86_split_to_parts (operand, parts, mode)
9359 rtx operand;
9360 rtx *parts;
9361 enum machine_mode mode;
9362 {
9363 int size;
9364
9365 if (!TARGET_64BIT)
9366 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9367 else
9368 size = (GET_MODE_SIZE (mode) + 4) / 8;
9369
9370 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9371 abort ();
9372 if (size < 2 || size > 3)
9373 abort ();
9374
9375 /* Optimize constant pool reference to immediates. This is used by fp
9376 moves, that force all constants to memory to allow combining. */
9377 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9378 {
9379 rtx tmp = maybe_get_pool_constant (operand);
9380 if (tmp)
9381 operand = tmp;
9382 }
9383
9384 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9385 {
9386 /* The only non-offsetable memories we handle are pushes. */
9387 if (! push_operand (operand, VOIDmode))
9388 abort ();
9389
9390 operand = copy_rtx (operand);
9391 PUT_MODE (operand, Pmode);
9392 parts[0] = parts[1] = parts[2] = operand;
9393 }
9394 else if (!TARGET_64BIT)
9395 {
9396 if (mode == DImode)
9397 split_di (&operand, 1, &parts[0], &parts[1]);
9398 else
9399 {
9400 if (REG_P (operand))
9401 {
9402 if (!reload_completed)
9403 abort ();
9404 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9405 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9406 if (size == 3)
9407 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9408 }
9409 else if (offsettable_memref_p (operand))
9410 {
9411 operand = adjust_address (operand, SImode, 0);
9412 parts[0] = operand;
9413 parts[1] = adjust_address (operand, SImode, 4);
9414 if (size == 3)
9415 parts[2] = adjust_address (operand, SImode, 8);
9416 }
9417 else if (GET_CODE (operand) == CONST_DOUBLE)
9418 {
9419 REAL_VALUE_TYPE r;
9420 long l[4];
9421
9422 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9423 switch (mode)
9424 {
9425 case XFmode:
9426 case TFmode:
9427 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9428 parts[2] = gen_int_mode (l[2], SImode);
9429 break;
9430 case DFmode:
9431 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9432 break;
9433 default:
9434 abort ();
9435 }
9436 parts[1] = gen_int_mode (l[1], SImode);
9437 parts[0] = gen_int_mode (l[0], SImode);
9438 }
9439 else
9440 abort ();
9441 }
9442 }
9443 else
9444 {
9445 if (mode == TImode)
9446 split_ti (&operand, 1, &parts[0], &parts[1]);
9447 if (mode == XFmode || mode == TFmode)
9448 {
9449 if (REG_P (operand))
9450 {
9451 if (!reload_completed)
9452 abort ();
9453 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9454 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9455 }
9456 else if (offsettable_memref_p (operand))
9457 {
9458 operand = adjust_address (operand, DImode, 0);
9459 parts[0] = operand;
9460 parts[1] = adjust_address (operand, SImode, 8);
9461 }
9462 else if (GET_CODE (operand) == CONST_DOUBLE)
9463 {
9464 REAL_VALUE_TYPE r;
9465 long l[3];
9466
9467 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9468 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9469 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9470 if (HOST_BITS_PER_WIDE_INT >= 64)
9471 parts[0]
9472 = gen_int_mode
9473 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9474 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9475 DImode);
9476 else
9477 parts[0] = immed_double_const (l[0], l[1], DImode);
9478 parts[1] = gen_int_mode (l[2], SImode);
9479 }
9480 else
9481 abort ();
9482 }
9483 }
9484
9485 return size;
9486 }
9487
9488 /* Emit insns to perform a move or push of DI, DF, and XF values.
9489 Return false when normal moves are needed; true when all required
9490 insns have been emitted. Operands 2-4 contain the input values
9491 int the correct order; operands 5-7 contain the output values. */
9492
9493 void
9494 ix86_split_long_move (operands)
9495 rtx operands[];
9496 {
9497 rtx part[2][3];
9498 int nparts;
9499 int push = 0;
9500 int collisions = 0;
9501 enum machine_mode mode = GET_MODE (operands[0]);
9502
9503 /* The DFmode expanders may ask us to move double.
9504 For 64bit target this is single move. By hiding the fact
9505 here we simplify i386.md splitters. */
9506 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9507 {
9508 /* Optimize constant pool reference to immediates. This is used by
9509 fp moves, that force all constants to memory to allow combining. */
9510
9511 if (GET_CODE (operands[1]) == MEM
9512 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9513 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9514 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9515 if (push_operand (operands[0], VOIDmode))
9516 {
9517 operands[0] = copy_rtx (operands[0]);
9518 PUT_MODE (operands[0], Pmode);
9519 }
9520 else
9521 operands[0] = gen_lowpart (DImode, operands[0]);
9522 operands[1] = gen_lowpart (DImode, operands[1]);
9523 emit_move_insn (operands[0], operands[1]);
9524 return;
9525 }
9526
9527 /* The only non-offsettable memory we handle is push. */
9528 if (push_operand (operands[0], VOIDmode))
9529 push = 1;
9530 else if (GET_CODE (operands[0]) == MEM
9531 && ! offsettable_memref_p (operands[0]))
9532 abort ();
9533
9534 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9535 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9536
9537 /* When emitting push, take care for source operands on the stack. */
9538 if (push && GET_CODE (operands[1]) == MEM
9539 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9540 {
9541 if (nparts == 3)
9542 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9543 XEXP (part[1][2], 0));
9544 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9545 XEXP (part[1][1], 0));
9546 }
9547
9548 /* We need to do copy in the right order in case an address register
9549 of the source overlaps the destination. */
9550 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9551 {
9552 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9553 collisions++;
9554 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9555 collisions++;
9556 if (nparts == 3
9557 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9558 collisions++;
9559
9560 /* Collision in the middle part can be handled by reordering. */
9561 if (collisions == 1 && nparts == 3
9562 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9563 {
9564 rtx tmp;
9565 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9566 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9567 }
9568
9569 /* If there are more collisions, we can't handle it by reordering.
9570 Do an lea to the last part and use only one colliding move. */
9571 else if (collisions > 1)
9572 {
9573 collisions = 1;
9574 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9575 XEXP (part[1][0], 0)));
9576 part[1][0] = change_address (part[1][0],
9577 TARGET_64BIT ? DImode : SImode,
9578 part[0][nparts - 1]);
9579 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9580 if (nparts == 3)
9581 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9582 }
9583 }
9584
9585 if (push)
9586 {
9587 if (!TARGET_64BIT)
9588 {
9589 if (nparts == 3)
9590 {
9591 /* We use only first 12 bytes of TFmode value, but for pushing we
9592 are required to adjust stack as if we were pushing real 16byte
9593 value. */
9594 if (mode == TFmode && !TARGET_64BIT)
9595 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9596 GEN_INT (-4)));
9597 emit_move_insn (part[0][2], part[1][2]);
9598 }
9599 }
9600 else
9601 {
9602 /* In 64bit mode we don't have 32bit push available. In case this is
9603 register, it is OK - we will just use larger counterpart. We also
9604 retype memory - these comes from attempt to avoid REX prefix on
9605 moving of second half of TFmode value. */
9606 if (GET_MODE (part[1][1]) == SImode)
9607 {
9608 if (GET_CODE (part[1][1]) == MEM)
9609 part[1][1] = adjust_address (part[1][1], DImode, 0);
9610 else if (REG_P (part[1][1]))
9611 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9612 else
9613 abort ();
9614 if (GET_MODE (part[1][0]) == SImode)
9615 part[1][0] = part[1][1];
9616 }
9617 }
9618 emit_move_insn (part[0][1], part[1][1]);
9619 emit_move_insn (part[0][0], part[1][0]);
9620 return;
9621 }
9622
9623 /* Choose correct order to not overwrite the source before it is copied. */
9624 if ((REG_P (part[0][0])
9625 && REG_P (part[1][1])
9626 && (REGNO (part[0][0]) == REGNO (part[1][1])
9627 || (nparts == 3
9628 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9629 || (collisions > 0
9630 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9631 {
9632 if (nparts == 3)
9633 {
9634 operands[2] = part[0][2];
9635 operands[3] = part[0][1];
9636 operands[4] = part[0][0];
9637 operands[5] = part[1][2];
9638 operands[6] = part[1][1];
9639 operands[7] = part[1][0];
9640 }
9641 else
9642 {
9643 operands[2] = part[0][1];
9644 operands[3] = part[0][0];
9645 operands[5] = part[1][1];
9646 operands[6] = part[1][0];
9647 }
9648 }
9649 else
9650 {
9651 if (nparts == 3)
9652 {
9653 operands[2] = part[0][0];
9654 operands[3] = part[0][1];
9655 operands[4] = part[0][2];
9656 operands[5] = part[1][0];
9657 operands[6] = part[1][1];
9658 operands[7] = part[1][2];
9659 }
9660 else
9661 {
9662 operands[2] = part[0][0];
9663 operands[3] = part[0][1];
9664 operands[5] = part[1][0];
9665 operands[6] = part[1][1];
9666 }
9667 }
9668 emit_move_insn (operands[2], operands[5]);
9669 emit_move_insn (operands[3], operands[6]);
9670 if (nparts == 3)
9671 emit_move_insn (operands[4], operands[7]);
9672
9673 return;
9674 }
9675
9676 void
9677 ix86_split_ashldi (operands, scratch)
9678 rtx *operands, scratch;
9679 {
9680 rtx low[2], high[2];
9681 int count;
9682
9683 if (GET_CODE (operands[2]) == CONST_INT)
9684 {
9685 split_di (operands, 2, low, high);
9686 count = INTVAL (operands[2]) & 63;
9687
9688 if (count >= 32)
9689 {
9690 emit_move_insn (high[0], low[1]);
9691 emit_move_insn (low[0], const0_rtx);
9692
9693 if (count > 32)
9694 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9695 }
9696 else
9697 {
9698 if (!rtx_equal_p (operands[0], operands[1]))
9699 emit_move_insn (operands[0], operands[1]);
9700 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9701 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9702 }
9703 }
9704 else
9705 {
9706 if (!rtx_equal_p (operands[0], operands[1]))
9707 emit_move_insn (operands[0], operands[1]);
9708
9709 split_di (operands, 1, low, high);
9710
9711 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9712 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9713
9714 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9715 {
9716 if (! no_new_pseudos)
9717 scratch = force_reg (SImode, const0_rtx);
9718 else
9719 emit_move_insn (scratch, const0_rtx);
9720
9721 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9722 scratch));
9723 }
9724 else
9725 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9726 }
9727 }
9728
9729 void
9730 ix86_split_ashrdi (operands, scratch)
9731 rtx *operands, scratch;
9732 {
9733 rtx low[2], high[2];
9734 int count;
9735
9736 if (GET_CODE (operands[2]) == CONST_INT)
9737 {
9738 split_di (operands, 2, low, high);
9739 count = INTVAL (operands[2]) & 63;
9740
9741 if (count >= 32)
9742 {
9743 emit_move_insn (low[0], high[1]);
9744
9745 if (! reload_completed)
9746 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9747 else
9748 {
9749 emit_move_insn (high[0], low[0]);
9750 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9751 }
9752
9753 if (count > 32)
9754 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9755 }
9756 else
9757 {
9758 if (!rtx_equal_p (operands[0], operands[1]))
9759 emit_move_insn (operands[0], operands[1]);
9760 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9761 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9762 }
9763 }
9764 else
9765 {
9766 if (!rtx_equal_p (operands[0], operands[1]))
9767 emit_move_insn (operands[0], operands[1]);
9768
9769 split_di (operands, 1, low, high);
9770
9771 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9772 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9773
9774 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9775 {
9776 if (! no_new_pseudos)
9777 scratch = gen_reg_rtx (SImode);
9778 emit_move_insn (scratch, high[0]);
9779 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9780 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9781 scratch));
9782 }
9783 else
9784 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
9785 }
9786 }
9787
9788 void
9789 ix86_split_lshrdi (operands, scratch)
9790 rtx *operands, scratch;
9791 {
9792 rtx low[2], high[2];
9793 int count;
9794
9795 if (GET_CODE (operands[2]) == CONST_INT)
9796 {
9797 split_di (operands, 2, low, high);
9798 count = INTVAL (operands[2]) & 63;
9799
9800 if (count >= 32)
9801 {
9802 emit_move_insn (low[0], high[1]);
9803 emit_move_insn (high[0], const0_rtx);
9804
9805 if (count > 32)
9806 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9807 }
9808 else
9809 {
9810 if (!rtx_equal_p (operands[0], operands[1]))
9811 emit_move_insn (operands[0], operands[1]);
9812 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9813 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9814 }
9815 }
9816 else
9817 {
9818 if (!rtx_equal_p (operands[0], operands[1]))
9819 emit_move_insn (operands[0], operands[1]);
9820
9821 split_di (operands, 1, low, high);
9822
9823 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9824 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9825
9826 /* Heh. By reversing the arguments, we can reuse this pattern. */
9827 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9828 {
9829 if (! no_new_pseudos)
9830 scratch = force_reg (SImode, const0_rtx);
9831 else
9832 emit_move_insn (scratch, const0_rtx);
9833
9834 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9835 scratch));
9836 }
9837 else
9838 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9839 }
9840 }
9841
9842 /* Helper function for the string operations below. Dest VARIABLE whether
9843 it is aligned to VALUE bytes. If true, jump to the label. */
9844 static rtx
9845 ix86_expand_aligntest (variable, value)
9846 rtx variable;
9847 int value;
9848 {
9849 rtx label = gen_label_rtx ();
9850 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9851 if (GET_MODE (variable) == DImode)
9852 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
9853 else
9854 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
9855 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
9856 1, label);
9857 return label;
9858 }
9859
9860 /* Adjust COUNTER by the VALUE. */
9861 static void
9862 ix86_adjust_counter (countreg, value)
9863 rtx countreg;
9864 HOST_WIDE_INT value;
9865 {
9866 if (GET_MODE (countreg) == DImode)
9867 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
9868 else
9869 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
9870 }
9871
9872 /* Zero extend possibly SImode EXP to Pmode register. */
9873 rtx
9874 ix86_zero_extend_to_Pmode (exp)
9875 rtx exp;
9876 {
9877 rtx r;
9878 if (GET_MODE (exp) == VOIDmode)
9879 return force_reg (Pmode, exp);
9880 if (GET_MODE (exp) == Pmode)
9881 return copy_to_mode_reg (Pmode, exp);
9882 r = gen_reg_rtx (Pmode);
9883 emit_insn (gen_zero_extendsidi2 (r, exp));
9884 return r;
9885 }
9886
9887 /* Expand string move (memcpy) operation. Use i386 string operations when
9888 profitable. expand_clrstr contains similar code. */
9889 int
9890 ix86_expand_movstr (dst, src, count_exp, align_exp)
9891 rtx dst, src, count_exp, align_exp;
9892 {
9893 rtx srcreg, destreg, countreg;
9894 enum machine_mode counter_mode;
9895 HOST_WIDE_INT align = 0;
9896 unsigned HOST_WIDE_INT count = 0;
9897 rtx insns;
9898
9899 start_sequence ();
9900
9901 if (GET_CODE (align_exp) == CONST_INT)
9902 align = INTVAL (align_exp);
9903
9904 /* This simple hack avoids all inlining code and simplifies code below. */
9905 if (!TARGET_ALIGN_STRINGOPS)
9906 align = 64;
9907
9908 if (GET_CODE (count_exp) == CONST_INT)
9909 count = INTVAL (count_exp);
9910
9911 /* Figure out proper mode for counter. For 32bits it is always SImode,
9912 for 64bits use SImode when possible, otherwise DImode.
9913 Set count to number of bytes copied when known at compile time. */
9914 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
9915 || x86_64_zero_extended_value (count_exp))
9916 counter_mode = SImode;
9917 else
9918 counter_mode = DImode;
9919
9920 if (counter_mode != SImode && counter_mode != DImode)
9921 abort ();
9922
9923 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9924 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
9925
9926 emit_insn (gen_cld ());
9927
9928 /* When optimizing for size emit simple rep ; movsb instruction for
9929 counts not divisible by 4. */
9930
9931 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
9932 {
9933 countreg = ix86_zero_extend_to_Pmode (count_exp);
9934 if (TARGET_64BIT)
9935 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
9936 destreg, srcreg, countreg));
9937 else
9938 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
9939 destreg, srcreg, countreg));
9940 }
9941
9942 /* For constant aligned (or small unaligned) copies use rep movsl
9943 followed by code copying the rest. For PentiumPro ensure 8 byte
9944 alignment to allow rep movsl acceleration. */
9945
9946 else if (count != 0
9947 && (align >= 8
9948 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
9949 || optimize_size || count < (unsigned int) 64))
9950 {
9951 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
9952 if (count & ~(size - 1))
9953 {
9954 countreg = copy_to_mode_reg (counter_mode,
9955 GEN_INT ((count >> (size == 4 ? 2 : 3))
9956 & (TARGET_64BIT ? -1 : 0x3fffffff)));
9957 countreg = ix86_zero_extend_to_Pmode (countreg);
9958 if (size == 4)
9959 {
9960 if (TARGET_64BIT)
9961 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
9962 destreg, srcreg, countreg));
9963 else
9964 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
9965 destreg, srcreg, countreg));
9966 }
9967 else
9968 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
9969 destreg, srcreg, countreg));
9970 }
9971 if (size == 8 && (count & 0x04))
9972 emit_insn (gen_strmovsi (destreg, srcreg));
9973 if (count & 0x02)
9974 emit_insn (gen_strmovhi (destreg, srcreg));
9975 if (count & 0x01)
9976 emit_insn (gen_strmovqi (destreg, srcreg));
9977 }
9978 /* The generic code based on the glibc implementation:
9979 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
9980 allowing accelerated copying there)
9981 - copy the data using rep movsl
9982 - copy the rest. */
9983 else
9984 {
9985 rtx countreg2;
9986 rtx label = NULL;
9987 int desired_alignment = (TARGET_PENTIUMPRO
9988 && (count == 0 || count >= (unsigned int) 260)
9989 ? 8 : UNITS_PER_WORD);
9990
9991 /* In case we don't know anything about the alignment, default to
9992 library version, since it is usually equally fast and result in
9993 shorter code. */
9994 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
9995 {
9996 end_sequence ();
9997 return 0;
9998 }
9999
10000 if (TARGET_SINGLE_STRINGOP)
10001 emit_insn (gen_cld ());
10002
10003 countreg2 = gen_reg_rtx (Pmode);
10004 countreg = copy_to_mode_reg (counter_mode, count_exp);
10005
10006 /* We don't use loops to align destination and to copy parts smaller
10007 than 4 bytes, because gcc is able to optimize such code better (in
10008 the case the destination or the count really is aligned, gcc is often
10009 able to predict the branches) and also it is friendlier to the
10010 hardware branch prediction.
10011
10012 Using loops is benefical for generic case, because we can
10013 handle small counts using the loops. Many CPUs (such as Athlon)
10014 have large REP prefix setup costs.
10015
10016 This is quite costy. Maybe we can revisit this decision later or
10017 add some customizability to this code. */
10018
10019 if (count == 0 && align < desired_alignment)
10020 {
10021 label = gen_label_rtx ();
10022 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10023 LEU, 0, counter_mode, 1, label);
10024 }
10025 if (align <= 1)
10026 {
10027 rtx label = ix86_expand_aligntest (destreg, 1);
10028 emit_insn (gen_strmovqi (destreg, srcreg));
10029 ix86_adjust_counter (countreg, 1);
10030 emit_label (label);
10031 LABEL_NUSES (label) = 1;
10032 }
10033 if (align <= 2)
10034 {
10035 rtx label = ix86_expand_aligntest (destreg, 2);
10036 emit_insn (gen_strmovhi (destreg, srcreg));
10037 ix86_adjust_counter (countreg, 2);
10038 emit_label (label);
10039 LABEL_NUSES (label) = 1;
10040 }
10041 if (align <= 4 && desired_alignment > 4)
10042 {
10043 rtx label = ix86_expand_aligntest (destreg, 4);
10044 emit_insn (gen_strmovsi (destreg, srcreg));
10045 ix86_adjust_counter (countreg, 4);
10046 emit_label (label);
10047 LABEL_NUSES (label) = 1;
10048 }
10049
10050 if (label && desired_alignment > 4 && !TARGET_64BIT)
10051 {
10052 emit_label (label);
10053 LABEL_NUSES (label) = 1;
10054 label = NULL_RTX;
10055 }
10056 if (!TARGET_SINGLE_STRINGOP)
10057 emit_insn (gen_cld ());
10058 if (TARGET_64BIT)
10059 {
10060 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10061 GEN_INT (3)));
10062 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10063 destreg, srcreg, countreg2));
10064 }
10065 else
10066 {
10067 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10068 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10069 destreg, srcreg, countreg2));
10070 }
10071
10072 if (label)
10073 {
10074 emit_label (label);
10075 LABEL_NUSES (label) = 1;
10076 }
10077 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10078 emit_insn (gen_strmovsi (destreg, srcreg));
10079 if ((align <= 4 || count == 0) && TARGET_64BIT)
10080 {
10081 rtx label = ix86_expand_aligntest (countreg, 4);
10082 emit_insn (gen_strmovsi (destreg, srcreg));
10083 emit_label (label);
10084 LABEL_NUSES (label) = 1;
10085 }
10086 if (align > 2 && count != 0 && (count & 2))
10087 emit_insn (gen_strmovhi (destreg, srcreg));
10088 if (align <= 2 || count == 0)
10089 {
10090 rtx label = ix86_expand_aligntest (countreg, 2);
10091 emit_insn (gen_strmovhi (destreg, srcreg));
10092 emit_label (label);
10093 LABEL_NUSES (label) = 1;
10094 }
10095 if (align > 1 && count != 0 && (count & 1))
10096 emit_insn (gen_strmovqi (destreg, srcreg));
10097 if (align <= 1 || count == 0)
10098 {
10099 rtx label = ix86_expand_aligntest (countreg, 1);
10100 emit_insn (gen_strmovqi (destreg, srcreg));
10101 emit_label (label);
10102 LABEL_NUSES (label) = 1;
10103 }
10104 }
10105
10106 insns = get_insns ();
10107 end_sequence ();
10108
10109 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10110 emit_insn (insns);
10111 return 1;
10112 }
10113
10114 /* Expand string clear operation (bzero). Use i386 string operations when
10115 profitable. expand_movstr contains similar code. */
10116 int
10117 ix86_expand_clrstr (src, count_exp, align_exp)
10118 rtx src, count_exp, align_exp;
10119 {
10120 rtx destreg, zeroreg, countreg;
10121 enum machine_mode counter_mode;
10122 HOST_WIDE_INT align = 0;
10123 unsigned HOST_WIDE_INT count = 0;
10124
10125 if (GET_CODE (align_exp) == CONST_INT)
10126 align = INTVAL (align_exp);
10127
10128 /* This simple hack avoids all inlining code and simplifies code below. */
10129 if (!TARGET_ALIGN_STRINGOPS)
10130 align = 32;
10131
10132 if (GET_CODE (count_exp) == CONST_INT)
10133 count = INTVAL (count_exp);
10134 /* Figure out proper mode for counter. For 32bits it is always SImode,
10135 for 64bits use SImode when possible, otherwise DImode.
10136 Set count to number of bytes copied when known at compile time. */
10137 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10138 || x86_64_zero_extended_value (count_exp))
10139 counter_mode = SImode;
10140 else
10141 counter_mode = DImode;
10142
10143 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10144
10145 emit_insn (gen_cld ());
10146
10147 /* When optimizing for size emit simple rep ; movsb instruction for
10148 counts not divisible by 4. */
10149
10150 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10151 {
10152 countreg = ix86_zero_extend_to_Pmode (count_exp);
10153 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10154 if (TARGET_64BIT)
10155 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10156 destreg, countreg));
10157 else
10158 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10159 destreg, countreg));
10160 }
10161 else if (count != 0
10162 && (align >= 8
10163 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10164 || optimize_size || count < (unsigned int) 64))
10165 {
10166 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10167 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10168 if (count & ~(size - 1))
10169 {
10170 countreg = copy_to_mode_reg (counter_mode,
10171 GEN_INT ((count >> (size == 4 ? 2 : 3))
10172 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10173 countreg = ix86_zero_extend_to_Pmode (countreg);
10174 if (size == 4)
10175 {
10176 if (TARGET_64BIT)
10177 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10178 destreg, countreg));
10179 else
10180 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10181 destreg, countreg));
10182 }
10183 else
10184 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10185 destreg, countreg));
10186 }
10187 if (size == 8 && (count & 0x04))
10188 emit_insn (gen_strsetsi (destreg,
10189 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10190 if (count & 0x02)
10191 emit_insn (gen_strsethi (destreg,
10192 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10193 if (count & 0x01)
10194 emit_insn (gen_strsetqi (destreg,
10195 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10196 }
10197 else
10198 {
10199 rtx countreg2;
10200 rtx label = NULL;
10201 /* Compute desired alignment of the string operation. */
10202 int desired_alignment = (TARGET_PENTIUMPRO
10203 && (count == 0 || count >= (unsigned int) 260)
10204 ? 8 : UNITS_PER_WORD);
10205
10206 /* In case we don't know anything about the alignment, default to
10207 library version, since it is usually equally fast and result in
10208 shorter code. */
10209 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10210 return 0;
10211
10212 if (TARGET_SINGLE_STRINGOP)
10213 emit_insn (gen_cld ());
10214
10215 countreg2 = gen_reg_rtx (Pmode);
10216 countreg = copy_to_mode_reg (counter_mode, count_exp);
10217 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10218
10219 if (count == 0 && align < desired_alignment)
10220 {
10221 label = gen_label_rtx ();
10222 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10223 LEU, 0, counter_mode, 1, label);
10224 }
10225 if (align <= 1)
10226 {
10227 rtx label = ix86_expand_aligntest (destreg, 1);
10228 emit_insn (gen_strsetqi (destreg,
10229 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10230 ix86_adjust_counter (countreg, 1);
10231 emit_label (label);
10232 LABEL_NUSES (label) = 1;
10233 }
10234 if (align <= 2)
10235 {
10236 rtx label = ix86_expand_aligntest (destreg, 2);
10237 emit_insn (gen_strsethi (destreg,
10238 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10239 ix86_adjust_counter (countreg, 2);
10240 emit_label (label);
10241 LABEL_NUSES (label) = 1;
10242 }
10243 if (align <= 4 && desired_alignment > 4)
10244 {
10245 rtx label = ix86_expand_aligntest (destreg, 4);
10246 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10247 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10248 : zeroreg)));
10249 ix86_adjust_counter (countreg, 4);
10250 emit_label (label);
10251 LABEL_NUSES (label) = 1;
10252 }
10253
10254 if (label && desired_alignment > 4 && !TARGET_64BIT)
10255 {
10256 emit_label (label);
10257 LABEL_NUSES (label) = 1;
10258 label = NULL_RTX;
10259 }
10260
10261 if (!TARGET_SINGLE_STRINGOP)
10262 emit_insn (gen_cld ());
10263 if (TARGET_64BIT)
10264 {
10265 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10266 GEN_INT (3)));
10267 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10268 destreg, countreg2));
10269 }
10270 else
10271 {
10272 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10273 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10274 destreg, countreg2));
10275 }
10276 if (label)
10277 {
10278 emit_label (label);
10279 LABEL_NUSES (label) = 1;
10280 }
10281
10282 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10283 emit_insn (gen_strsetsi (destreg,
10284 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10285 if (TARGET_64BIT && (align <= 4 || count == 0))
10286 {
10287 rtx label = ix86_expand_aligntest (countreg, 4);
10288 emit_insn (gen_strsetsi (destreg,
10289 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10290 emit_label (label);
10291 LABEL_NUSES (label) = 1;
10292 }
10293 if (align > 2 && count != 0 && (count & 2))
10294 emit_insn (gen_strsethi (destreg,
10295 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10296 if (align <= 2 || count == 0)
10297 {
10298 rtx label = ix86_expand_aligntest (countreg, 2);
10299 emit_insn (gen_strsethi (destreg,
10300 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10301 emit_label (label);
10302 LABEL_NUSES (label) = 1;
10303 }
10304 if (align > 1 && count != 0 && (count & 1))
10305 emit_insn (gen_strsetqi (destreg,
10306 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10307 if (align <= 1 || count == 0)
10308 {
10309 rtx label = ix86_expand_aligntest (countreg, 1);
10310 emit_insn (gen_strsetqi (destreg,
10311 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10312 emit_label (label);
10313 LABEL_NUSES (label) = 1;
10314 }
10315 }
10316 return 1;
10317 }
10318 /* Expand strlen. */
10319 int
10320 ix86_expand_strlen (out, src, eoschar, align)
10321 rtx out, src, eoschar, align;
10322 {
10323 rtx addr, scratch1, scratch2, scratch3, scratch4;
10324
10325 /* The generic case of strlen expander is long. Avoid it's
10326 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10327
10328 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10329 && !TARGET_INLINE_ALL_STRINGOPS
10330 && !optimize_size
10331 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10332 return 0;
10333
10334 addr = force_reg (Pmode, XEXP (src, 0));
10335 scratch1 = gen_reg_rtx (Pmode);
10336
10337 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10338 && !optimize_size)
10339 {
10340 /* Well it seems that some optimizer does not combine a call like
10341 foo(strlen(bar), strlen(bar));
10342 when the move and the subtraction is done here. It does calculate
10343 the length just once when these instructions are done inside of
10344 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10345 often used and I use one fewer register for the lifetime of
10346 output_strlen_unroll() this is better. */
10347
10348 emit_move_insn (out, addr);
10349
10350 ix86_expand_strlensi_unroll_1 (out, align);
10351
10352 /* strlensi_unroll_1 returns the address of the zero at the end of
10353 the string, like memchr(), so compute the length by subtracting
10354 the start address. */
10355 if (TARGET_64BIT)
10356 emit_insn (gen_subdi3 (out, out, addr));
10357 else
10358 emit_insn (gen_subsi3 (out, out, addr));
10359 }
10360 else
10361 {
10362 scratch2 = gen_reg_rtx (Pmode);
10363 scratch3 = gen_reg_rtx (Pmode);
10364 scratch4 = force_reg (Pmode, constm1_rtx);
10365
10366 emit_move_insn (scratch3, addr);
10367 eoschar = force_reg (QImode, eoschar);
10368
10369 emit_insn (gen_cld ());
10370 if (TARGET_64BIT)
10371 {
10372 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10373 align, scratch4, scratch3));
10374 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10375 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10376 }
10377 else
10378 {
10379 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10380 align, scratch4, scratch3));
10381 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10382 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10383 }
10384 }
10385 return 1;
10386 }
10387
10388 /* Expand the appropriate insns for doing strlen if not just doing
10389 repnz; scasb
10390
10391 out = result, initialized with the start address
10392 align_rtx = alignment of the address.
10393 scratch = scratch register, initialized with the startaddress when
10394 not aligned, otherwise undefined
10395
10396 This is just the body. It needs the initialisations mentioned above and
10397 some address computing at the end. These things are done in i386.md. */
10398
10399 static void
10400 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10401 rtx out, align_rtx;
10402 {
10403 int align;
10404 rtx tmp;
10405 rtx align_2_label = NULL_RTX;
10406 rtx align_3_label = NULL_RTX;
10407 rtx align_4_label = gen_label_rtx ();
10408 rtx end_0_label = gen_label_rtx ();
10409 rtx mem;
10410 rtx tmpreg = gen_reg_rtx (SImode);
10411 rtx scratch = gen_reg_rtx (SImode);
10412
10413 align = 0;
10414 if (GET_CODE (align_rtx) == CONST_INT)
10415 align = INTVAL (align_rtx);
10416
10417 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10418
10419 /* Is there a known alignment and is it less than 4? */
10420 if (align < 4)
10421 {
10422 rtx scratch1 = gen_reg_rtx (Pmode);
10423 emit_move_insn (scratch1, out);
10424 /* Is there a known alignment and is it not 2? */
10425 if (align != 2)
10426 {
10427 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10428 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10429
10430 /* Leave just the 3 lower bits. */
10431 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10432 NULL_RTX, 0, OPTAB_WIDEN);
10433
10434 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10435 Pmode, 1, align_4_label);
10436 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10437 Pmode, 1, align_2_label);
10438 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10439 Pmode, 1, align_3_label);
10440 }
10441 else
10442 {
10443 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10444 check if is aligned to 4 - byte. */
10445
10446 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10447 NULL_RTX, 0, OPTAB_WIDEN);
10448
10449 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10450 Pmode, 1, align_4_label);
10451 }
10452
10453 mem = gen_rtx_MEM (QImode, out);
10454
10455 /* Now compare the bytes. */
10456
10457 /* Compare the first n unaligned byte on a byte per byte basis. */
10458 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10459 QImode, 1, end_0_label);
10460
10461 /* Increment the address. */
10462 if (TARGET_64BIT)
10463 emit_insn (gen_adddi3 (out, out, const1_rtx));
10464 else
10465 emit_insn (gen_addsi3 (out, out, const1_rtx));
10466
10467 /* Not needed with an alignment of 2 */
10468 if (align != 2)
10469 {
10470 emit_label (align_2_label);
10471
10472 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10473 end_0_label);
10474
10475 if (TARGET_64BIT)
10476 emit_insn (gen_adddi3 (out, out, const1_rtx));
10477 else
10478 emit_insn (gen_addsi3 (out, out, const1_rtx));
10479
10480 emit_label (align_3_label);
10481 }
10482
10483 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10484 end_0_label);
10485
10486 if (TARGET_64BIT)
10487 emit_insn (gen_adddi3 (out, out, const1_rtx));
10488 else
10489 emit_insn (gen_addsi3 (out, out, const1_rtx));
10490 }
10491
10492 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10493 align this loop. It gives only huge programs, but does not help to
10494 speed up. */
10495 emit_label (align_4_label);
10496
10497 mem = gen_rtx_MEM (SImode, out);
10498 emit_move_insn (scratch, mem);
10499 if (TARGET_64BIT)
10500 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10501 else
10502 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10503
10504 /* This formula yields a nonzero result iff one of the bytes is zero.
10505 This saves three branches inside loop and many cycles. */
10506
10507 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10508 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10509 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10510 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10511 gen_int_mode (0x80808080, SImode)));
10512 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10513 align_4_label);
10514
10515 if (TARGET_CMOVE)
10516 {
10517 rtx reg = gen_reg_rtx (SImode);
10518 rtx reg2 = gen_reg_rtx (Pmode);
10519 emit_move_insn (reg, tmpreg);
10520 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10521
10522 /* If zero is not in the first two bytes, move two bytes forward. */
10523 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10524 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10525 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10526 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10527 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10528 reg,
10529 tmpreg)));
10530 /* Emit lea manually to avoid clobbering of flags. */
10531 emit_insn (gen_rtx_SET (SImode, reg2,
10532 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10533
10534 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10535 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10536 emit_insn (gen_rtx_SET (VOIDmode, out,
10537 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10538 reg2,
10539 out)));
10540
10541 }
10542 else
10543 {
10544 rtx end_2_label = gen_label_rtx ();
10545 /* Is zero in the first two bytes? */
10546
10547 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10548 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10549 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10550 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10551 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10552 pc_rtx);
10553 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10554 JUMP_LABEL (tmp) = end_2_label;
10555
10556 /* Not in the first two. Move two bytes forward. */
10557 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10558 if (TARGET_64BIT)
10559 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10560 else
10561 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10562
10563 emit_label (end_2_label);
10564
10565 }
10566
10567 /* Avoid branch in fixing the byte. */
10568 tmpreg = gen_lowpart (QImode, tmpreg);
10569 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10570 if (TARGET_64BIT)
10571 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10572 else
10573 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10574
10575 emit_label (end_0_label);
10576 }
10577
10578 void
10579 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10580 rtx retval, fnaddr, callarg1, callarg2, pop;
10581 {
10582 rtx use = NULL, call;
10583
10584 if (pop == const0_rtx)
10585 pop = NULL;
10586 if (TARGET_64BIT && pop)
10587 abort ();
10588
10589 #if TARGET_MACHO
10590 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10591 fnaddr = machopic_indirect_call_target (fnaddr);
10592 #else
10593 /* Static functions and indirect calls don't need the pic register. */
10594 if (! TARGET_64BIT && flag_pic
10595 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10596 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10597 use_reg (&use, pic_offset_table_rtx);
10598
10599 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10600 {
10601 rtx al = gen_rtx_REG (QImode, 0);
10602 emit_move_insn (al, callarg2);
10603 use_reg (&use, al);
10604 }
10605 #endif /* TARGET_MACHO */
10606
10607 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10608 {
10609 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10610 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10611 }
10612
10613 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10614 if (retval)
10615 call = gen_rtx_SET (VOIDmode, retval, call);
10616 if (pop)
10617 {
10618 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10619 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10620 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10621 }
10622
10623 call = emit_call_insn (call);
10624 if (use)
10625 CALL_INSN_FUNCTION_USAGE (call) = use;
10626 }
10627
10628 \f
10629 /* Clear stack slot assignments remembered from previous functions.
10630 This is called from INIT_EXPANDERS once before RTL is emitted for each
10631 function. */
10632
10633 static struct machine_function *
10634 ix86_init_machine_status ()
10635 {
10636 return ggc_alloc_cleared (sizeof (struct machine_function));
10637 }
10638
10639 /* Return a MEM corresponding to a stack slot with mode MODE.
10640 Allocate a new slot if necessary.
10641
10642 The RTL for a function can have several slots available: N is
10643 which slot to use. */
10644
10645 rtx
10646 assign_386_stack_local (mode, n)
10647 enum machine_mode mode;
10648 int n;
10649 {
10650 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10651 abort ();
10652
10653 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10654 ix86_stack_locals[(int) mode][n]
10655 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10656
10657 return ix86_stack_locals[(int) mode][n];
10658 }
10659
10660 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10661
10662 static GTY(()) rtx ix86_tls_symbol;
10663 rtx
10664 ix86_tls_get_addr ()
10665 {
10666
10667 if (!ix86_tls_symbol)
10668 {
10669 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
10670 ? "___tls_get_addr"
10671 : "__tls_get_addr"));
10672 }
10673
10674 return ix86_tls_symbol;
10675 }
10676 \f
10677 /* Calculate the length of the memory address in the instruction
10678 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10679
10680 static int
10681 memory_address_length (addr)
10682 rtx addr;
10683 {
10684 struct ix86_address parts;
10685 rtx base, index, disp;
10686 int len;
10687
10688 if (GET_CODE (addr) == PRE_DEC
10689 || GET_CODE (addr) == POST_INC
10690 || GET_CODE (addr) == PRE_MODIFY
10691 || GET_CODE (addr) == POST_MODIFY)
10692 return 0;
10693
10694 if (! ix86_decompose_address (addr, &parts))
10695 abort ();
10696
10697 base = parts.base;
10698 index = parts.index;
10699 disp = parts.disp;
10700 len = 0;
10701
10702 /* Register Indirect. */
10703 if (base && !index && !disp)
10704 {
10705 /* Special cases: ebp and esp need the two-byte modrm form. */
10706 if (addr == stack_pointer_rtx
10707 || addr == arg_pointer_rtx
10708 || addr == frame_pointer_rtx
10709 || addr == hard_frame_pointer_rtx)
10710 len = 1;
10711 }
10712
10713 /* Direct Addressing. */
10714 else if (disp && !base && !index)
10715 len = 4;
10716
10717 else
10718 {
10719 /* Find the length of the displacement constant. */
10720 if (disp)
10721 {
10722 if (GET_CODE (disp) == CONST_INT
10723 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10724 len = 1;
10725 else
10726 len = 4;
10727 }
10728
10729 /* An index requires the two-byte modrm form. */
10730 if (index)
10731 len += 1;
10732 }
10733
10734 return len;
10735 }
10736
10737 /* Compute default value for "length_immediate" attribute. When SHORTFORM
10738 is set, expect that insn have 8bit immediate alternative. */
10739 int
10740 ix86_attr_length_immediate_default (insn, shortform)
10741 rtx insn;
10742 int shortform;
10743 {
10744 int len = 0;
10745 int i;
10746 extract_insn_cached (insn);
10747 for (i = recog_data.n_operands - 1; i >= 0; --i)
10748 if (CONSTANT_P (recog_data.operand[i]))
10749 {
10750 if (len)
10751 abort ();
10752 if (shortform
10753 && GET_CODE (recog_data.operand[i]) == CONST_INT
10754 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10755 len = 1;
10756 else
10757 {
10758 switch (get_attr_mode (insn))
10759 {
10760 case MODE_QI:
10761 len+=1;
10762 break;
10763 case MODE_HI:
10764 len+=2;
10765 break;
10766 case MODE_SI:
10767 len+=4;
10768 break;
10769 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10770 case MODE_DI:
10771 len+=4;
10772 break;
10773 default:
10774 fatal_insn ("unknown insn mode", insn);
10775 }
10776 }
10777 }
10778 return len;
10779 }
10780 /* Compute default value for "length_address" attribute. */
10781 int
10782 ix86_attr_length_address_default (insn)
10783 rtx insn;
10784 {
10785 int i;
10786 extract_insn_cached (insn);
10787 for (i = recog_data.n_operands - 1; i >= 0; --i)
10788 if (GET_CODE (recog_data.operand[i]) == MEM)
10789 {
10790 return memory_address_length (XEXP (recog_data.operand[i], 0));
10791 break;
10792 }
10793 return 0;
10794 }
10795 \f
10796 /* Return the maximum number of instructions a cpu can issue. */
10797
10798 static int
10799 ix86_issue_rate ()
10800 {
10801 switch (ix86_cpu)
10802 {
10803 case PROCESSOR_PENTIUM:
10804 case PROCESSOR_K6:
10805 return 2;
10806
10807 case PROCESSOR_PENTIUMPRO:
10808 case PROCESSOR_PENTIUM4:
10809 case PROCESSOR_ATHLON:
10810 return 3;
10811
10812 default:
10813 return 1;
10814 }
10815 }
10816
10817 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10818 by DEP_INSN and nothing set by DEP_INSN. */
10819
10820 static int
10821 ix86_flags_dependant (insn, dep_insn, insn_type)
10822 rtx insn, dep_insn;
10823 enum attr_type insn_type;
10824 {
10825 rtx set, set2;
10826
10827 /* Simplify the test for uninteresting insns. */
10828 if (insn_type != TYPE_SETCC
10829 && insn_type != TYPE_ICMOV
10830 && insn_type != TYPE_FCMOV
10831 && insn_type != TYPE_IBR)
10832 return 0;
10833
10834 if ((set = single_set (dep_insn)) != 0)
10835 {
10836 set = SET_DEST (set);
10837 set2 = NULL_RTX;
10838 }
10839 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10840 && XVECLEN (PATTERN (dep_insn), 0) == 2
10841 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10842 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10843 {
10844 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10845 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10846 }
10847 else
10848 return 0;
10849
10850 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10851 return 0;
10852
10853 /* This test is true if the dependent insn reads the flags but
10854 not any other potentially set register. */
10855 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
10856 return 0;
10857
10858 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
10859 return 0;
10860
10861 return 1;
10862 }
10863
10864 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
10865 address with operands set by DEP_INSN. */
10866
10867 static int
10868 ix86_agi_dependant (insn, dep_insn, insn_type)
10869 rtx insn, dep_insn;
10870 enum attr_type insn_type;
10871 {
10872 rtx addr;
10873
10874 if (insn_type == TYPE_LEA
10875 && TARGET_PENTIUM)
10876 {
10877 addr = PATTERN (insn);
10878 if (GET_CODE (addr) == SET)
10879 ;
10880 else if (GET_CODE (addr) == PARALLEL
10881 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
10882 addr = XVECEXP (addr, 0, 0);
10883 else
10884 abort ();
10885 addr = SET_SRC (addr);
10886 }
10887 else
10888 {
10889 int i;
10890 extract_insn_cached (insn);
10891 for (i = recog_data.n_operands - 1; i >= 0; --i)
10892 if (GET_CODE (recog_data.operand[i]) == MEM)
10893 {
10894 addr = XEXP (recog_data.operand[i], 0);
10895 goto found;
10896 }
10897 return 0;
10898 found:;
10899 }
10900
10901 return modified_in_p (addr, dep_insn);
10902 }
10903
10904 static int
10905 ix86_adjust_cost (insn, link, dep_insn, cost)
10906 rtx insn, link, dep_insn;
10907 int cost;
10908 {
10909 enum attr_type insn_type, dep_insn_type;
10910 enum attr_memory memory, dep_memory;
10911 rtx set, set2;
10912 int dep_insn_code_number;
10913
10914 /* Anti and output depenancies have zero cost on all CPUs. */
10915 if (REG_NOTE_KIND (link) != 0)
10916 return 0;
10917
10918 dep_insn_code_number = recog_memoized (dep_insn);
10919
10920 /* If we can't recognize the insns, we can't really do anything. */
10921 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
10922 return cost;
10923
10924 insn_type = get_attr_type (insn);
10925 dep_insn_type = get_attr_type (dep_insn);
10926
10927 switch (ix86_cpu)
10928 {
10929 case PROCESSOR_PENTIUM:
10930 /* Address Generation Interlock adds a cycle of latency. */
10931 if (ix86_agi_dependant (insn, dep_insn, insn_type))
10932 cost += 1;
10933
10934 /* ??? Compares pair with jump/setcc. */
10935 if (ix86_flags_dependant (insn, dep_insn, insn_type))
10936 cost = 0;
10937
10938 /* Floating point stores require value to be ready one cycle ealier. */
10939 if (insn_type == TYPE_FMOV
10940 && get_attr_memory (insn) == MEMORY_STORE
10941 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10942 cost += 1;
10943 break;
10944
10945 case PROCESSOR_PENTIUMPRO:
10946 memory = get_attr_memory (insn);
10947 dep_memory = get_attr_memory (dep_insn);
10948
10949 /* Since we can't represent delayed latencies of load+operation,
10950 increase the cost here for non-imov insns. */
10951 if (dep_insn_type != TYPE_IMOV
10952 && dep_insn_type != TYPE_FMOV
10953 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
10954 cost += 1;
10955
10956 /* INT->FP conversion is expensive. */
10957 if (get_attr_fp_int_src (dep_insn))
10958 cost += 5;
10959
10960 /* There is one cycle extra latency between an FP op and a store. */
10961 if (insn_type == TYPE_FMOV
10962 && (set = single_set (dep_insn)) != NULL_RTX
10963 && (set2 = single_set (insn)) != NULL_RTX
10964 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
10965 && GET_CODE (SET_DEST (set2)) == MEM)
10966 cost += 1;
10967
10968 /* Show ability of reorder buffer to hide latency of load by executing
10969 in parallel with previous instruction in case
10970 previous instruction is not needed to compute the address. */
10971 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
10972 && !ix86_agi_dependant (insn, dep_insn, insn_type))
10973 {
10974 /* Claim moves to take one cycle, as core can issue one load
10975 at time and the next load can start cycle later. */
10976 if (dep_insn_type == TYPE_IMOV
10977 || dep_insn_type == TYPE_FMOV)
10978 cost = 1;
10979 else if (cost > 1)
10980 cost--;
10981 }
10982 break;
10983
10984 case PROCESSOR_K6:
10985 memory = get_attr_memory (insn);
10986 dep_memory = get_attr_memory (dep_insn);
10987 /* The esp dependency is resolved before the instruction is really
10988 finished. */
10989 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
10990 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
10991 return 1;
10992
10993 /* Since we can't represent delayed latencies of load+operation,
10994 increase the cost here for non-imov insns. */
10995 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
10996 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
10997
10998 /* INT->FP conversion is expensive. */
10999 if (get_attr_fp_int_src (dep_insn))
11000 cost += 5;
11001
11002 /* Show ability of reorder buffer to hide latency of load by executing
11003 in parallel with previous instruction in case
11004 previous instruction is not needed to compute the address. */
11005 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11006 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11007 {
11008 /* Claim moves to take one cycle, as core can issue one load
11009 at time and the next load can start cycle later. */
11010 if (dep_insn_type == TYPE_IMOV
11011 || dep_insn_type == TYPE_FMOV)
11012 cost = 1;
11013 else if (cost > 2)
11014 cost -= 2;
11015 else
11016 cost = 1;
11017 }
11018 break;
11019
11020 case PROCESSOR_ATHLON:
11021 memory = get_attr_memory (insn);
11022 dep_memory = get_attr_memory (dep_insn);
11023
11024 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11025 {
11026 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11027 cost += 2;
11028 else
11029 cost += 3;
11030 }
11031 /* Show ability of reorder buffer to hide latency of load by executing
11032 in parallel with previous instruction in case
11033 previous instruction is not needed to compute the address. */
11034 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11035 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11036 {
11037 /* Claim moves to take one cycle, as core can issue one load
11038 at time and the next load can start cycle later. */
11039 if (dep_insn_type == TYPE_IMOV
11040 || dep_insn_type == TYPE_FMOV)
11041 cost = 0;
11042 else if (cost >= 3)
11043 cost -= 3;
11044 else
11045 cost = 0;
11046 }
11047
11048 default:
11049 break;
11050 }
11051
11052 return cost;
11053 }
11054
11055 static union
11056 {
11057 struct ppro_sched_data
11058 {
11059 rtx decode[3];
11060 int issued_this_cycle;
11061 } ppro;
11062 } ix86_sched_data;
11063
11064 static enum attr_ppro_uops
11065 ix86_safe_ppro_uops (insn)
11066 rtx insn;
11067 {
11068 if (recog_memoized (insn) >= 0)
11069 return get_attr_ppro_uops (insn);
11070 else
11071 return PPRO_UOPS_MANY;
11072 }
11073
11074 static void
11075 ix86_dump_ppro_packet (dump)
11076 FILE *dump;
11077 {
11078 if (ix86_sched_data.ppro.decode[0])
11079 {
11080 fprintf (dump, "PPRO packet: %d",
11081 INSN_UID (ix86_sched_data.ppro.decode[0]));
11082 if (ix86_sched_data.ppro.decode[1])
11083 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11084 if (ix86_sched_data.ppro.decode[2])
11085 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11086 fputc ('\n', dump);
11087 }
11088 }
11089
11090 /* We're beginning a new block. Initialize data structures as necessary. */
11091
11092 static void
11093 ix86_sched_init (dump, sched_verbose, veclen)
11094 FILE *dump ATTRIBUTE_UNUSED;
11095 int sched_verbose ATTRIBUTE_UNUSED;
11096 int veclen ATTRIBUTE_UNUSED;
11097 {
11098 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11099 }
11100
11101 /* Shift INSN to SLOT, and shift everything else down. */
11102
11103 static void
11104 ix86_reorder_insn (insnp, slot)
11105 rtx *insnp, *slot;
11106 {
11107 if (insnp != slot)
11108 {
11109 rtx insn = *insnp;
11110 do
11111 insnp[0] = insnp[1];
11112 while (++insnp != slot);
11113 *insnp = insn;
11114 }
11115 }
11116
11117 static void
11118 ix86_sched_reorder_ppro (ready, e_ready)
11119 rtx *ready;
11120 rtx *e_ready;
11121 {
11122 rtx decode[3];
11123 enum attr_ppro_uops cur_uops;
11124 int issued_this_cycle;
11125 rtx *insnp;
11126 int i;
11127
11128 /* At this point .ppro.decode contains the state of the three
11129 decoders from last "cycle". That is, those insns that were
11130 actually independent. But here we're scheduling for the
11131 decoder, and we may find things that are decodable in the
11132 same cycle. */
11133
11134 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11135 issued_this_cycle = 0;
11136
11137 insnp = e_ready;
11138 cur_uops = ix86_safe_ppro_uops (*insnp);
11139
11140 /* If the decoders are empty, and we've a complex insn at the
11141 head of the priority queue, let it issue without complaint. */
11142 if (decode[0] == NULL)
11143 {
11144 if (cur_uops == PPRO_UOPS_MANY)
11145 {
11146 decode[0] = *insnp;
11147 goto ppro_done;
11148 }
11149
11150 /* Otherwise, search for a 2-4 uop unsn to issue. */
11151 while (cur_uops != PPRO_UOPS_FEW)
11152 {
11153 if (insnp == ready)
11154 break;
11155 cur_uops = ix86_safe_ppro_uops (*--insnp);
11156 }
11157
11158 /* If so, move it to the head of the line. */
11159 if (cur_uops == PPRO_UOPS_FEW)
11160 ix86_reorder_insn (insnp, e_ready);
11161
11162 /* Issue the head of the queue. */
11163 issued_this_cycle = 1;
11164 decode[0] = *e_ready--;
11165 }
11166
11167 /* Look for simple insns to fill in the other two slots. */
11168 for (i = 1; i < 3; ++i)
11169 if (decode[i] == NULL)
11170 {
11171 if (ready > e_ready)
11172 goto ppro_done;
11173
11174 insnp = e_ready;
11175 cur_uops = ix86_safe_ppro_uops (*insnp);
11176 while (cur_uops != PPRO_UOPS_ONE)
11177 {
11178 if (insnp == ready)
11179 break;
11180 cur_uops = ix86_safe_ppro_uops (*--insnp);
11181 }
11182
11183 /* Found one. Move it to the head of the queue and issue it. */
11184 if (cur_uops == PPRO_UOPS_ONE)
11185 {
11186 ix86_reorder_insn (insnp, e_ready);
11187 decode[i] = *e_ready--;
11188 issued_this_cycle++;
11189 continue;
11190 }
11191
11192 /* ??? Didn't find one. Ideally, here we would do a lazy split
11193 of 2-uop insns, issue one and queue the other. */
11194 }
11195
11196 ppro_done:
11197 if (issued_this_cycle == 0)
11198 issued_this_cycle = 1;
11199 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11200 }
11201
11202 /* We are about to being issuing insns for this clock cycle.
11203 Override the default sort algorithm to better slot instructions. */
11204 static int
11205 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11206 FILE *dump ATTRIBUTE_UNUSED;
11207 int sched_verbose ATTRIBUTE_UNUSED;
11208 rtx *ready;
11209 int *n_readyp;
11210 int clock_var ATTRIBUTE_UNUSED;
11211 {
11212 int n_ready = *n_readyp;
11213 rtx *e_ready = ready + n_ready - 1;
11214
11215 /* Make sure to go ahead and initialize key items in
11216 ix86_sched_data if we are not going to bother trying to
11217 reorder the ready queue. */
11218 if (n_ready < 2)
11219 {
11220 ix86_sched_data.ppro.issued_this_cycle = 1;
11221 goto out;
11222 }
11223
11224 switch (ix86_cpu)
11225 {
11226 default:
11227 break;
11228
11229 case PROCESSOR_PENTIUMPRO:
11230 ix86_sched_reorder_ppro (ready, e_ready);
11231 break;
11232 }
11233
11234 out:
11235 return ix86_issue_rate ();
11236 }
11237
11238 /* We are about to issue INSN. Return the number of insns left on the
11239 ready queue that can be issued this cycle. */
11240
11241 static int
11242 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11243 FILE *dump;
11244 int sched_verbose;
11245 rtx insn;
11246 int can_issue_more;
11247 {
11248 int i;
11249 switch (ix86_cpu)
11250 {
11251 default:
11252 return can_issue_more - 1;
11253
11254 case PROCESSOR_PENTIUMPRO:
11255 {
11256 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11257
11258 if (uops == PPRO_UOPS_MANY)
11259 {
11260 if (sched_verbose)
11261 ix86_dump_ppro_packet (dump);
11262 ix86_sched_data.ppro.decode[0] = insn;
11263 ix86_sched_data.ppro.decode[1] = NULL;
11264 ix86_sched_data.ppro.decode[2] = NULL;
11265 if (sched_verbose)
11266 ix86_dump_ppro_packet (dump);
11267 ix86_sched_data.ppro.decode[0] = NULL;
11268 }
11269 else if (uops == PPRO_UOPS_FEW)
11270 {
11271 if (sched_verbose)
11272 ix86_dump_ppro_packet (dump);
11273 ix86_sched_data.ppro.decode[0] = insn;
11274 ix86_sched_data.ppro.decode[1] = NULL;
11275 ix86_sched_data.ppro.decode[2] = NULL;
11276 }
11277 else
11278 {
11279 for (i = 0; i < 3; ++i)
11280 if (ix86_sched_data.ppro.decode[i] == NULL)
11281 {
11282 ix86_sched_data.ppro.decode[i] = insn;
11283 break;
11284 }
11285 if (i == 3)
11286 abort ();
11287 if (i == 2)
11288 {
11289 if (sched_verbose)
11290 ix86_dump_ppro_packet (dump);
11291 ix86_sched_data.ppro.decode[0] = NULL;
11292 ix86_sched_data.ppro.decode[1] = NULL;
11293 ix86_sched_data.ppro.decode[2] = NULL;
11294 }
11295 }
11296 }
11297 return --ix86_sched_data.ppro.issued_this_cycle;
11298 }
11299 }
11300
11301 static int
11302 ia32_use_dfa_pipeline_interface ()
11303 {
11304 if (ix86_cpu == PROCESSOR_PENTIUM)
11305 return 1;
11306 return 0;
11307 }
11308
11309 /* How many alternative schedules to try. This should be as wide as the
11310 scheduling freedom in the DFA, but no wider. Making this value too
11311 large results extra work for the scheduler. */
11312
11313 static int
11314 ia32_multipass_dfa_lookahead ()
11315 {
11316 if (ix86_cpu == PROCESSOR_PENTIUM)
11317 return 2;
11318 else
11319 return 0;
11320 }
11321
11322 \f
11323 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11324 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11325 appropriate. */
11326
11327 void
11328 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11329 rtx insns;
11330 rtx dstref, srcref, dstreg, srcreg;
11331 {
11332 rtx insn;
11333
11334 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11335 if (INSN_P (insn))
11336 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11337 dstreg, srcreg);
11338 }
11339
11340 /* Subroutine of above to actually do the updating by recursively walking
11341 the rtx. */
11342
11343 static void
11344 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11345 rtx x;
11346 rtx dstref, srcref, dstreg, srcreg;
11347 {
11348 enum rtx_code code = GET_CODE (x);
11349 const char *format_ptr = GET_RTX_FORMAT (code);
11350 int i, j;
11351
11352 if (code == MEM && XEXP (x, 0) == dstreg)
11353 MEM_COPY_ATTRIBUTES (x, dstref);
11354 else if (code == MEM && XEXP (x, 0) == srcreg)
11355 MEM_COPY_ATTRIBUTES (x, srcref);
11356
11357 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11358 {
11359 if (*format_ptr == 'e')
11360 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11361 dstreg, srcreg);
11362 else if (*format_ptr == 'E')
11363 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11364 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11365 dstreg, srcreg);
11366 }
11367 }
11368 \f
11369 /* Compute the alignment given to a constant that is being placed in memory.
11370 EXP is the constant and ALIGN is the alignment that the object would
11371 ordinarily have.
11372 The value of this function is used instead of that alignment to align
11373 the object. */
11374
11375 int
11376 ix86_constant_alignment (exp, align)
11377 tree exp;
11378 int align;
11379 {
11380 if (TREE_CODE (exp) == REAL_CST)
11381 {
11382 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11383 return 64;
11384 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11385 return 128;
11386 }
11387 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11388 && align < 256)
11389 return 256;
11390
11391 return align;
11392 }
11393
11394 /* Compute the alignment for a static variable.
11395 TYPE is the data type, and ALIGN is the alignment that
11396 the object would ordinarily have. The value of this function is used
11397 instead of that alignment to align the object. */
11398
11399 int
11400 ix86_data_alignment (type, align)
11401 tree type;
11402 int align;
11403 {
11404 if (AGGREGATE_TYPE_P (type)
11405 && TYPE_SIZE (type)
11406 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11407 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11408 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11409 return 256;
11410
11411 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11412 to 16byte boundary. */
11413 if (TARGET_64BIT)
11414 {
11415 if (AGGREGATE_TYPE_P (type)
11416 && TYPE_SIZE (type)
11417 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11418 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11419 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11420 return 128;
11421 }
11422
11423 if (TREE_CODE (type) == ARRAY_TYPE)
11424 {
11425 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11426 return 64;
11427 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11428 return 128;
11429 }
11430 else if (TREE_CODE (type) == COMPLEX_TYPE)
11431 {
11432
11433 if (TYPE_MODE (type) == DCmode && align < 64)
11434 return 64;
11435 if (TYPE_MODE (type) == XCmode && align < 128)
11436 return 128;
11437 }
11438 else if ((TREE_CODE (type) == RECORD_TYPE
11439 || TREE_CODE (type) == UNION_TYPE
11440 || TREE_CODE (type) == QUAL_UNION_TYPE)
11441 && TYPE_FIELDS (type))
11442 {
11443 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11444 return 64;
11445 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11446 return 128;
11447 }
11448 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11449 || TREE_CODE (type) == INTEGER_TYPE)
11450 {
11451 if (TYPE_MODE (type) == DFmode && align < 64)
11452 return 64;
11453 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11454 return 128;
11455 }
11456
11457 return align;
11458 }
11459
11460 /* Compute the alignment for a local variable.
11461 TYPE is the data type, and ALIGN is the alignment that
11462 the object would ordinarily have. The value of this macro is used
11463 instead of that alignment to align the object. */
11464
11465 int
11466 ix86_local_alignment (type, align)
11467 tree type;
11468 int align;
11469 {
11470 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11471 to 16byte boundary. */
11472 if (TARGET_64BIT)
11473 {
11474 if (AGGREGATE_TYPE_P (type)
11475 && TYPE_SIZE (type)
11476 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11477 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11478 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11479 return 128;
11480 }
11481 if (TREE_CODE (type) == ARRAY_TYPE)
11482 {
11483 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11484 return 64;
11485 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11486 return 128;
11487 }
11488 else if (TREE_CODE (type) == COMPLEX_TYPE)
11489 {
11490 if (TYPE_MODE (type) == DCmode && align < 64)
11491 return 64;
11492 if (TYPE_MODE (type) == XCmode && align < 128)
11493 return 128;
11494 }
11495 else if ((TREE_CODE (type) == RECORD_TYPE
11496 || TREE_CODE (type) == UNION_TYPE
11497 || TREE_CODE (type) == QUAL_UNION_TYPE)
11498 && TYPE_FIELDS (type))
11499 {
11500 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11501 return 64;
11502 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11503 return 128;
11504 }
11505 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11506 || TREE_CODE (type) == INTEGER_TYPE)
11507 {
11508
11509 if (TYPE_MODE (type) == DFmode && align < 64)
11510 return 64;
11511 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11512 return 128;
11513 }
11514 return align;
11515 }
11516 \f
11517 /* Emit RTL insns to initialize the variable parts of a trampoline.
11518 FNADDR is an RTX for the address of the function's pure code.
11519 CXT is an RTX for the static chain value for the function. */
11520 void
11521 x86_initialize_trampoline (tramp, fnaddr, cxt)
11522 rtx tramp, fnaddr, cxt;
11523 {
11524 if (!TARGET_64BIT)
11525 {
11526 /* Compute offset from the end of the jmp to the target function. */
11527 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11528 plus_constant (tramp, 10),
11529 NULL_RTX, 1, OPTAB_DIRECT);
11530 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11531 gen_int_mode (0xb9, QImode));
11532 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11533 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11534 gen_int_mode (0xe9, QImode));
11535 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11536 }
11537 else
11538 {
11539 int offset = 0;
11540 /* Try to load address using shorter movl instead of movabs.
11541 We may want to support movq for kernel mode, but kernel does not use
11542 trampolines at the moment. */
11543 if (x86_64_zero_extended_value (fnaddr))
11544 {
11545 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11546 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11547 gen_int_mode (0xbb41, HImode));
11548 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11549 gen_lowpart (SImode, fnaddr));
11550 offset += 6;
11551 }
11552 else
11553 {
11554 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11555 gen_int_mode (0xbb49, HImode));
11556 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11557 fnaddr);
11558 offset += 10;
11559 }
11560 /* Load static chain using movabs to r10. */
11561 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11562 gen_int_mode (0xba49, HImode));
11563 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11564 cxt);
11565 offset += 10;
11566 /* Jump to the r11 */
11567 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11568 gen_int_mode (0xff49, HImode));
11569 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11570 gen_int_mode (0xe3, QImode));
11571 offset += 3;
11572 if (offset > TRAMPOLINE_SIZE)
11573 abort ();
11574 }
11575 }
11576 \f
11577 #define def_builtin(MASK, NAME, TYPE, CODE) \
11578 do { \
11579 if ((MASK) & target_flags) \
11580 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11581 NULL, NULL_TREE); \
11582 } while (0)
11583
11584 struct builtin_description
11585 {
11586 const unsigned int mask;
11587 const enum insn_code icode;
11588 const char *const name;
11589 const enum ix86_builtins code;
11590 const enum rtx_code comparison;
11591 const unsigned int flag;
11592 };
11593
11594 /* Used for builtins that are enabled both by -msse and -msse2. */
11595 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11596
11597 static const struct builtin_description bdesc_comi[] =
11598 {
11599 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11600 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11601 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11602 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11603 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11604 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11605 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11606 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11607 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11608 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11609 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11610 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11611 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11612 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11613 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11614 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11615 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11616 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11617 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11618 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11619 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11620 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11621 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11622 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
11623 };
11624
11625 static const struct builtin_description bdesc_2arg[] =
11626 {
11627 /* SSE */
11628 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11629 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11630 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11631 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11632 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11633 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11634 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11635 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11636
11637 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11638 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11639 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11640 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11641 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11642 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11643 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11644 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11645 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11646 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11647 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11648 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11649 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11650 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11651 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11652 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11653 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11654 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11655 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11656 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11657 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11658 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11659 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11660 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11661
11662 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11663 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11664 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11665 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11666
11667 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11668 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11669 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11670 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11671 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11672
11673 /* MMX */
11674 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11675 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11676 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11677 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11678 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11679 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11680
11681 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11682 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11683 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11684 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11685 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11686 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11687 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11688 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11689
11690 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11691 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11692 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11693
11694 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11695 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11696 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11697 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11698
11699 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11700 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11701
11702 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11703 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11704 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11705 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11706 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11707 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11708
11709 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11710 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11711 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11712 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11713
11714 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11715 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11716 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11717 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11718 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11719 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11720
11721 /* Special. */
11722 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11723 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11724 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11725
11726 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11727 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11728
11729 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11730 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11731 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11732 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11733 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11734 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11735
11736 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11737 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11738 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11739 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11740 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11741 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11742
11743 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11744 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11745 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11746 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11747
11748 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11749 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11750
11751 /* SSE2 */
11752 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11753 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11754 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11755 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11756 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11757 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11758 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11759 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11760
11761 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11762 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11763 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11764 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11765 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11766 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11767 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11768 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11769 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11770 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11771 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11772 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11773 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11774 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11775 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11776 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11777 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11778 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11779 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11780 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11781 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11782 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11783 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11784 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11785
11786 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11787 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11788 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11789 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11790
11791 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11792 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11793 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11794 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11795
11796 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11797 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11798 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11799
11800 /* SSE2 MMX */
11801 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11802 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11803 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11804 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11805 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11806 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11807 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11808 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11809
11810 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11811 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11812 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11813 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11814 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11815 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11816 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11817 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11818
11819 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11820 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11821 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11822 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11823
11824 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11825 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11826 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11827 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
11828
11829 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11830 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11831
11832 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11833 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11834 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11835 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11836 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11837 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11838
11839 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11840 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11841 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11842 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11843
11844 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11845 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11846 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11847 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11848 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11849 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11850
11851 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
11852 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
11853 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
11854
11855 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
11856 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
11857
11858 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
11859 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
11860 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
11861 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
11862 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
11863 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
11864
11865 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
11866 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
11867 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
11868 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
11869 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
11870 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
11871
11872 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
11873 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
11874 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
11875 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
11876
11877 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
11878
11879 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
11880 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
11881 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
11882 };
11883
11884 static const struct builtin_description bdesc_1arg[] =
11885 {
11886 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
11887 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
11888
11889 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
11890 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
11891 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
11892
11893 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
11894 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
11895 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
11896 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
11897
11898 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
11899 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
11901
11902 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
11903
11904 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
11906
11907 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
11908 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
11909 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
11910 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
11911 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
11912
11913 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
11914
11915 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
11916 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
11917
11918 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
11919 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
11920 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
11921 };
11922
11923 void
11924 ix86_init_builtins ()
11925 {
11926 if (TARGET_MMX)
11927 ix86_init_mmx_sse_builtins ();
11928 }
11929
11930 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
11931 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
11932 builtins. */
11933 static void
11934 ix86_init_mmx_sse_builtins ()
11935 {
11936 const struct builtin_description * d;
11937 size_t i;
11938
11939 tree pchar_type_node = build_pointer_type (char_type_node);
11940 tree pfloat_type_node = build_pointer_type (float_type_node);
11941 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
11942 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
11943 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
11944
11945 /* Comparisons. */
11946 tree int_ftype_v4sf_v4sf
11947 = build_function_type_list (integer_type_node,
11948 V4SF_type_node, V4SF_type_node, NULL_TREE);
11949 tree v4si_ftype_v4sf_v4sf
11950 = build_function_type_list (V4SI_type_node,
11951 V4SF_type_node, V4SF_type_node, NULL_TREE);
11952 /* MMX/SSE/integer conversions. */
11953 tree int_ftype_v4sf
11954 = build_function_type_list (integer_type_node,
11955 V4SF_type_node, NULL_TREE);
11956 tree int_ftype_v8qi
11957 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
11958 tree v4sf_ftype_v4sf_int
11959 = build_function_type_list (V4SF_type_node,
11960 V4SF_type_node, integer_type_node, NULL_TREE);
11961 tree v4sf_ftype_v4sf_v2si
11962 = build_function_type_list (V4SF_type_node,
11963 V4SF_type_node, V2SI_type_node, NULL_TREE);
11964 tree int_ftype_v4hi_int
11965 = build_function_type_list (integer_type_node,
11966 V4HI_type_node, integer_type_node, NULL_TREE);
11967 tree v4hi_ftype_v4hi_int_int
11968 = build_function_type_list (V4HI_type_node, V4HI_type_node,
11969 integer_type_node, integer_type_node,
11970 NULL_TREE);
11971 /* Miscellaneous. */
11972 tree v8qi_ftype_v4hi_v4hi
11973 = build_function_type_list (V8QI_type_node,
11974 V4HI_type_node, V4HI_type_node, NULL_TREE);
11975 tree v4hi_ftype_v2si_v2si
11976 = build_function_type_list (V4HI_type_node,
11977 V2SI_type_node, V2SI_type_node, NULL_TREE);
11978 tree v4sf_ftype_v4sf_v4sf_int
11979 = build_function_type_list (V4SF_type_node,
11980 V4SF_type_node, V4SF_type_node,
11981 integer_type_node, NULL_TREE);
11982 tree v2si_ftype_v4hi_v4hi
11983 = build_function_type_list (V2SI_type_node,
11984 V4HI_type_node, V4HI_type_node, NULL_TREE);
11985 tree v4hi_ftype_v4hi_int
11986 = build_function_type_list (V4HI_type_node,
11987 V4HI_type_node, integer_type_node, NULL_TREE);
11988 tree v4hi_ftype_v4hi_di
11989 = build_function_type_list (V4HI_type_node,
11990 V4HI_type_node, long_long_unsigned_type_node,
11991 NULL_TREE);
11992 tree v2si_ftype_v2si_di
11993 = build_function_type_list (V2SI_type_node,
11994 V2SI_type_node, long_long_unsigned_type_node,
11995 NULL_TREE);
11996 tree void_ftype_void
11997 = build_function_type (void_type_node, void_list_node);
11998 tree void_ftype_unsigned
11999 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12000 tree unsigned_ftype_void
12001 = build_function_type (unsigned_type_node, void_list_node);
12002 tree di_ftype_void
12003 = build_function_type (long_long_unsigned_type_node, void_list_node);
12004 tree v4sf_ftype_void
12005 = build_function_type (V4SF_type_node, void_list_node);
12006 tree v2si_ftype_v4sf
12007 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12008 /* Loads/stores. */
12009 tree void_ftype_v8qi_v8qi_pchar
12010 = build_function_type_list (void_type_node,
12011 V8QI_type_node, V8QI_type_node,
12012 pchar_type_node, NULL_TREE);
12013 tree v4sf_ftype_pfloat
12014 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12015 /* @@@ the type is bogus */
12016 tree v4sf_ftype_v4sf_pv2si
12017 = build_function_type_list (V4SF_type_node,
12018 V4SF_type_node, pv2di_type_node, NULL_TREE);
12019 tree void_ftype_pv2si_v4sf
12020 = build_function_type_list (void_type_node,
12021 pv2di_type_node, V4SF_type_node, NULL_TREE);
12022 tree void_ftype_pfloat_v4sf
12023 = build_function_type_list (void_type_node,
12024 pfloat_type_node, V4SF_type_node, NULL_TREE);
12025 tree void_ftype_pdi_di
12026 = build_function_type_list (void_type_node,
12027 pdi_type_node, long_long_unsigned_type_node,
12028 NULL_TREE);
12029 tree void_ftype_pv2di_v2di
12030 = build_function_type_list (void_type_node,
12031 pv2di_type_node, V2DI_type_node, NULL_TREE);
12032 /* Normal vector unops. */
12033 tree v4sf_ftype_v4sf
12034 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12035
12036 /* Normal vector binops. */
12037 tree v4sf_ftype_v4sf_v4sf
12038 = build_function_type_list (V4SF_type_node,
12039 V4SF_type_node, V4SF_type_node, NULL_TREE);
12040 tree v8qi_ftype_v8qi_v8qi
12041 = build_function_type_list (V8QI_type_node,
12042 V8QI_type_node, V8QI_type_node, NULL_TREE);
12043 tree v4hi_ftype_v4hi_v4hi
12044 = build_function_type_list (V4HI_type_node,
12045 V4HI_type_node, V4HI_type_node, NULL_TREE);
12046 tree v2si_ftype_v2si_v2si
12047 = build_function_type_list (V2SI_type_node,
12048 V2SI_type_node, V2SI_type_node, NULL_TREE);
12049 tree di_ftype_di_di
12050 = build_function_type_list (long_long_unsigned_type_node,
12051 long_long_unsigned_type_node,
12052 long_long_unsigned_type_node, NULL_TREE);
12053
12054 tree v2si_ftype_v2sf
12055 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12056 tree v2sf_ftype_v2si
12057 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12058 tree v2si_ftype_v2si
12059 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12060 tree v2sf_ftype_v2sf
12061 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12062 tree v2sf_ftype_v2sf_v2sf
12063 = build_function_type_list (V2SF_type_node,
12064 V2SF_type_node, V2SF_type_node, NULL_TREE);
12065 tree v2si_ftype_v2sf_v2sf
12066 = build_function_type_list (V2SI_type_node,
12067 V2SF_type_node, V2SF_type_node, NULL_TREE);
12068 tree pint_type_node = build_pointer_type (integer_type_node);
12069 tree pdouble_type_node = build_pointer_type (double_type_node);
12070 tree int_ftype_v2df_v2df
12071 = build_function_type_list (integer_type_node,
12072 V2DF_type_node, V2DF_type_node, NULL_TREE);
12073
12074 tree ti_ftype_void
12075 = build_function_type (intTI_type_node, void_list_node);
12076 tree ti_ftype_ti_ti
12077 = build_function_type_list (intTI_type_node,
12078 intTI_type_node, intTI_type_node, NULL_TREE);
12079 tree void_ftype_pvoid
12080 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12081 tree v2di_ftype_di
12082 = build_function_type_list (V2DI_type_node,
12083 long_long_unsigned_type_node, NULL_TREE);
12084 tree v4sf_ftype_v4si
12085 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12086 tree v4si_ftype_v4sf
12087 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12088 tree v2df_ftype_v4si
12089 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12090 tree v4si_ftype_v2df
12091 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12092 tree v2si_ftype_v2df
12093 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12094 tree v4sf_ftype_v2df
12095 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12096 tree v2df_ftype_v2si
12097 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12098 tree v2df_ftype_v4sf
12099 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12100 tree int_ftype_v2df
12101 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12102 tree v2df_ftype_v2df_int
12103 = build_function_type_list (V2DF_type_node,
12104 V2DF_type_node, integer_type_node, NULL_TREE);
12105 tree v4sf_ftype_v4sf_v2df
12106 = build_function_type_list (V4SF_type_node,
12107 V4SF_type_node, V2DF_type_node, NULL_TREE);
12108 tree v2df_ftype_v2df_v4sf
12109 = build_function_type_list (V2DF_type_node,
12110 V2DF_type_node, V4SF_type_node, NULL_TREE);
12111 tree v2df_ftype_v2df_v2df_int
12112 = build_function_type_list (V2DF_type_node,
12113 V2DF_type_node, V2DF_type_node,
12114 integer_type_node,
12115 NULL_TREE);
12116 tree v2df_ftype_v2df_pv2si
12117 = build_function_type_list (V2DF_type_node,
12118 V2DF_type_node, pv2si_type_node, NULL_TREE);
12119 tree void_ftype_pv2si_v2df
12120 = build_function_type_list (void_type_node,
12121 pv2si_type_node, V2DF_type_node, NULL_TREE);
12122 tree void_ftype_pdouble_v2df
12123 = build_function_type_list (void_type_node,
12124 pdouble_type_node, V2DF_type_node, NULL_TREE);
12125 tree void_ftype_pint_int
12126 = build_function_type_list (void_type_node,
12127 pint_type_node, integer_type_node, NULL_TREE);
12128 tree void_ftype_v16qi_v16qi_pchar
12129 = build_function_type_list (void_type_node,
12130 V16QI_type_node, V16QI_type_node,
12131 pchar_type_node, NULL_TREE);
12132 tree v2df_ftype_pdouble
12133 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12134 tree v2df_ftype_v2df_v2df
12135 = build_function_type_list (V2DF_type_node,
12136 V2DF_type_node, V2DF_type_node, NULL_TREE);
12137 tree v16qi_ftype_v16qi_v16qi
12138 = build_function_type_list (V16QI_type_node,
12139 V16QI_type_node, V16QI_type_node, NULL_TREE);
12140 tree v8hi_ftype_v8hi_v8hi
12141 = build_function_type_list (V8HI_type_node,
12142 V8HI_type_node, V8HI_type_node, NULL_TREE);
12143 tree v4si_ftype_v4si_v4si
12144 = build_function_type_list (V4SI_type_node,
12145 V4SI_type_node, V4SI_type_node, NULL_TREE);
12146 tree v2di_ftype_v2di_v2di
12147 = build_function_type_list (V2DI_type_node,
12148 V2DI_type_node, V2DI_type_node, NULL_TREE);
12149 tree v2di_ftype_v2df_v2df
12150 = build_function_type_list (V2DI_type_node,
12151 V2DF_type_node, V2DF_type_node, NULL_TREE);
12152 tree v2df_ftype_v2df
12153 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12154 tree v2df_ftype_double
12155 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12156 tree v2df_ftype_double_double
12157 = build_function_type_list (V2DF_type_node,
12158 double_type_node, double_type_node, NULL_TREE);
12159 tree int_ftype_v8hi_int
12160 = build_function_type_list (integer_type_node,
12161 V8HI_type_node, integer_type_node, NULL_TREE);
12162 tree v8hi_ftype_v8hi_int_int
12163 = build_function_type_list (V8HI_type_node,
12164 V8HI_type_node, integer_type_node,
12165 integer_type_node, NULL_TREE);
12166 tree v2di_ftype_v2di_int
12167 = build_function_type_list (V2DI_type_node,
12168 V2DI_type_node, integer_type_node, NULL_TREE);
12169 tree v4si_ftype_v4si_int
12170 = build_function_type_list (V4SI_type_node,
12171 V4SI_type_node, integer_type_node, NULL_TREE);
12172 tree v8hi_ftype_v8hi_int
12173 = build_function_type_list (V8HI_type_node,
12174 V8HI_type_node, integer_type_node, NULL_TREE);
12175 tree v8hi_ftype_v8hi_v2di
12176 = build_function_type_list (V8HI_type_node,
12177 V8HI_type_node, V2DI_type_node, NULL_TREE);
12178 tree v4si_ftype_v4si_v2di
12179 = build_function_type_list (V4SI_type_node,
12180 V4SI_type_node, V2DI_type_node, NULL_TREE);
12181 tree v4si_ftype_v8hi_v8hi
12182 = build_function_type_list (V4SI_type_node,
12183 V8HI_type_node, V8HI_type_node, NULL_TREE);
12184 tree di_ftype_v8qi_v8qi
12185 = build_function_type_list (long_long_unsigned_type_node,
12186 V8QI_type_node, V8QI_type_node, NULL_TREE);
12187 tree v2di_ftype_v16qi_v16qi
12188 = build_function_type_list (V2DI_type_node,
12189 V16QI_type_node, V16QI_type_node, NULL_TREE);
12190 tree int_ftype_v16qi
12191 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12192
12193 /* Add all builtins that are more or less simple operations on two
12194 operands. */
12195 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12196 {
12197 /* Use one of the operands; the target can have a different mode for
12198 mask-generating compares. */
12199 enum machine_mode mode;
12200 tree type;
12201
12202 if (d->name == 0)
12203 continue;
12204 mode = insn_data[d->icode].operand[1].mode;
12205
12206 switch (mode)
12207 {
12208 case V16QImode:
12209 type = v16qi_ftype_v16qi_v16qi;
12210 break;
12211 case V8HImode:
12212 type = v8hi_ftype_v8hi_v8hi;
12213 break;
12214 case V4SImode:
12215 type = v4si_ftype_v4si_v4si;
12216 break;
12217 case V2DImode:
12218 type = v2di_ftype_v2di_v2di;
12219 break;
12220 case V2DFmode:
12221 type = v2df_ftype_v2df_v2df;
12222 break;
12223 case TImode:
12224 type = ti_ftype_ti_ti;
12225 break;
12226 case V4SFmode:
12227 type = v4sf_ftype_v4sf_v4sf;
12228 break;
12229 case V8QImode:
12230 type = v8qi_ftype_v8qi_v8qi;
12231 break;
12232 case V4HImode:
12233 type = v4hi_ftype_v4hi_v4hi;
12234 break;
12235 case V2SImode:
12236 type = v2si_ftype_v2si_v2si;
12237 break;
12238 case DImode:
12239 type = di_ftype_di_di;
12240 break;
12241
12242 default:
12243 abort ();
12244 }
12245
12246 /* Override for comparisons. */
12247 if (d->icode == CODE_FOR_maskcmpv4sf3
12248 || d->icode == CODE_FOR_maskncmpv4sf3
12249 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12250 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12251 type = v4si_ftype_v4sf_v4sf;
12252
12253 if (d->icode == CODE_FOR_maskcmpv2df3
12254 || d->icode == CODE_FOR_maskncmpv2df3
12255 || d->icode == CODE_FOR_vmmaskcmpv2df3
12256 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12257 type = v2di_ftype_v2df_v2df;
12258
12259 def_builtin (d->mask, d->name, type, d->code);
12260 }
12261
12262 /* Add the remaining MMX insns with somewhat more complicated types. */
12263 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12264 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12265 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12266 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12267 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12268 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12269 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12270
12271 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12272 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12273 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12274
12275 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12276 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12277
12278 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12279 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12280
12281 /* comi/ucomi insns. */
12282 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12283 if (d->mask == MASK_SSE2)
12284 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12285 else
12286 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12287
12288 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12289 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12290 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12291
12292 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12293 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12294 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12295 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12296 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12297 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12298
12299 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12300 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12301 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12302 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
12303
12304 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12305 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12306
12307 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12308
12309 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12310 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12311 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12312 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12313 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12314 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12315
12316 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12317 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12318 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12319 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12320
12321 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12322 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12323 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12324 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12325
12326 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12327
12328 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12329
12330 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12331 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12332 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12333 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12334 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12335 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12336
12337 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12338
12339 /* Original 3DNow! */
12340 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12341 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12342 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12343 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12344 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12345 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12346 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12347 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12348 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12349 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12350 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12351 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12352 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12353 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12354 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12355 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12356 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12357 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12358 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12359 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12360
12361 /* 3DNow! extension as used in the Athlon CPU. */
12362 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12363 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12364 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12365 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12366 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12367 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12368
12369 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12370
12371 /* SSE2 */
12372 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12373 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12374
12375 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12376 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12377
12378 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12379 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12380 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12381 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12382 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12383 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12384
12385 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12386 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12387 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12388 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12389
12390 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12391 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12392 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12393 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12394 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12395
12396 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12397 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12398 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12399 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12400
12401 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12402 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12403
12404 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12405
12406 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12407 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12408
12409 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12410 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12411 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12412 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12413 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12414
12415 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12416
12417 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12418 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12419
12420 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12421 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12422 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12423
12424 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12425 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12426 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12427
12428 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12429 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12430 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12431 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12432 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12433 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12434 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12435
12436 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12437 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12438 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12439
12440 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12441 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12442 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12443
12444 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12445 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12446 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12447
12448 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12449 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12450
12451 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12452 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12453 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12454
12455 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12456 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12457 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12458
12459 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12460 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12461
12462 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12463 }
12464
12465 /* Errors in the source file can cause expand_expr to return const0_rtx
12466 where we expect a vector. To avoid crashing, use one of the vector
12467 clear instructions. */
12468 static rtx
12469 safe_vector_operand (x, mode)
12470 rtx x;
12471 enum machine_mode mode;
12472 {
12473 if (x != const0_rtx)
12474 return x;
12475 x = gen_reg_rtx (mode);
12476
12477 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12478 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12479 : gen_rtx_SUBREG (DImode, x, 0)));
12480 else
12481 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12482 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12483 return x;
12484 }
12485
12486 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12487
12488 static rtx
12489 ix86_expand_binop_builtin (icode, arglist, target)
12490 enum insn_code icode;
12491 tree arglist;
12492 rtx target;
12493 {
12494 rtx pat;
12495 tree arg0 = TREE_VALUE (arglist);
12496 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12497 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12498 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12499 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12500 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12501 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12502
12503 if (VECTOR_MODE_P (mode0))
12504 op0 = safe_vector_operand (op0, mode0);
12505 if (VECTOR_MODE_P (mode1))
12506 op1 = safe_vector_operand (op1, mode1);
12507
12508 if (! target
12509 || GET_MODE (target) != tmode
12510 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12511 target = gen_reg_rtx (tmode);
12512
12513 /* In case the insn wants input operands in modes different from
12514 the result, abort. */
12515 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12516 abort ();
12517
12518 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12519 op0 = copy_to_mode_reg (mode0, op0);
12520 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12521 op1 = copy_to_mode_reg (mode1, op1);
12522
12523 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12524 yet one of the two must not be a memory. This is normally enforced
12525 by expanders, but we didn't bother to create one here. */
12526 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12527 op0 = copy_to_mode_reg (mode0, op0);
12528
12529 pat = GEN_FCN (icode) (target, op0, op1);
12530 if (! pat)
12531 return 0;
12532 emit_insn (pat);
12533 return target;
12534 }
12535
12536 /* In type_for_mode we restrict the ability to create TImode types
12537 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12538 to have a V4SFmode signature. Convert them in-place to TImode. */
12539
12540 static rtx
12541 ix86_expand_timode_binop_builtin (icode, arglist, target)
12542 enum insn_code icode;
12543 tree arglist;
12544 rtx target;
12545 {
12546 rtx pat;
12547 tree arg0 = TREE_VALUE (arglist);
12548 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12549 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12550 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12551
12552 op0 = gen_lowpart (TImode, op0);
12553 op1 = gen_lowpart (TImode, op1);
12554 target = gen_reg_rtx (TImode);
12555
12556 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12557 op0 = copy_to_mode_reg (TImode, op0);
12558 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12559 op1 = copy_to_mode_reg (TImode, op1);
12560
12561 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12562 yet one of the two must not be a memory. This is normally enforced
12563 by expanders, but we didn't bother to create one here. */
12564 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12565 op0 = copy_to_mode_reg (TImode, op0);
12566
12567 pat = GEN_FCN (icode) (target, op0, op1);
12568 if (! pat)
12569 return 0;
12570 emit_insn (pat);
12571
12572 return gen_lowpart (V4SFmode, target);
12573 }
12574
12575 /* Subroutine of ix86_expand_builtin to take care of stores. */
12576
12577 static rtx
12578 ix86_expand_store_builtin (icode, arglist)
12579 enum insn_code icode;
12580 tree arglist;
12581 {
12582 rtx pat;
12583 tree arg0 = TREE_VALUE (arglist);
12584 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12585 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12586 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12587 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12588 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12589
12590 if (VECTOR_MODE_P (mode1))
12591 op1 = safe_vector_operand (op1, mode1);
12592
12593 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12594
12595 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12596 op1 = copy_to_mode_reg (mode1, op1);
12597
12598 pat = GEN_FCN (icode) (op0, op1);
12599 if (pat)
12600 emit_insn (pat);
12601 return 0;
12602 }
12603
12604 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12605
12606 static rtx
12607 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12608 enum insn_code icode;
12609 tree arglist;
12610 rtx target;
12611 int do_load;
12612 {
12613 rtx pat;
12614 tree arg0 = TREE_VALUE (arglist);
12615 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12616 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12617 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12618
12619 if (! target
12620 || GET_MODE (target) != tmode
12621 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12622 target = gen_reg_rtx (tmode);
12623 if (do_load)
12624 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12625 else
12626 {
12627 if (VECTOR_MODE_P (mode0))
12628 op0 = safe_vector_operand (op0, mode0);
12629
12630 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12631 op0 = copy_to_mode_reg (mode0, op0);
12632 }
12633
12634 pat = GEN_FCN (icode) (target, op0);
12635 if (! pat)
12636 return 0;
12637 emit_insn (pat);
12638 return target;
12639 }
12640
12641 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12642 sqrtss, rsqrtss, rcpss. */
12643
12644 static rtx
12645 ix86_expand_unop1_builtin (icode, arglist, target)
12646 enum insn_code icode;
12647 tree arglist;
12648 rtx target;
12649 {
12650 rtx pat;
12651 tree arg0 = TREE_VALUE (arglist);
12652 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12653 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12654 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12655
12656 if (! target
12657 || GET_MODE (target) != tmode
12658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12659 target = gen_reg_rtx (tmode);
12660
12661 if (VECTOR_MODE_P (mode0))
12662 op0 = safe_vector_operand (op0, mode0);
12663
12664 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12665 op0 = copy_to_mode_reg (mode0, op0);
12666
12667 op1 = op0;
12668 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12669 op1 = copy_to_mode_reg (mode0, op1);
12670
12671 pat = GEN_FCN (icode) (target, op0, op1);
12672 if (! pat)
12673 return 0;
12674 emit_insn (pat);
12675 return target;
12676 }
12677
12678 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12679
12680 static rtx
12681 ix86_expand_sse_compare (d, arglist, target)
12682 const struct builtin_description *d;
12683 tree arglist;
12684 rtx target;
12685 {
12686 rtx pat;
12687 tree arg0 = TREE_VALUE (arglist);
12688 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12689 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12690 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12691 rtx op2;
12692 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12693 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12694 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12695 enum rtx_code comparison = d->comparison;
12696
12697 if (VECTOR_MODE_P (mode0))
12698 op0 = safe_vector_operand (op0, mode0);
12699 if (VECTOR_MODE_P (mode1))
12700 op1 = safe_vector_operand (op1, mode1);
12701
12702 /* Swap operands if we have a comparison that isn't available in
12703 hardware. */
12704 if (d->flag)
12705 {
12706 rtx tmp = gen_reg_rtx (mode1);
12707 emit_move_insn (tmp, op1);
12708 op1 = op0;
12709 op0 = tmp;
12710 }
12711
12712 if (! target
12713 || GET_MODE (target) != tmode
12714 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12715 target = gen_reg_rtx (tmode);
12716
12717 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12718 op0 = copy_to_mode_reg (mode0, op0);
12719 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12720 op1 = copy_to_mode_reg (mode1, op1);
12721
12722 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12723 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12724 if (! pat)
12725 return 0;
12726 emit_insn (pat);
12727 return target;
12728 }
12729
12730 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12731
12732 static rtx
12733 ix86_expand_sse_comi (d, arglist, target)
12734 const struct builtin_description *d;
12735 tree arglist;
12736 rtx target;
12737 {
12738 rtx pat;
12739 tree arg0 = TREE_VALUE (arglist);
12740 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12741 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12742 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12743 rtx op2;
12744 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12745 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12746 enum rtx_code comparison = d->comparison;
12747
12748 if (VECTOR_MODE_P (mode0))
12749 op0 = safe_vector_operand (op0, mode0);
12750 if (VECTOR_MODE_P (mode1))
12751 op1 = safe_vector_operand (op1, mode1);
12752
12753 /* Swap operands if we have a comparison that isn't available in
12754 hardware. */
12755 if (d->flag)
12756 {
12757 rtx tmp = op1;
12758 op1 = op0;
12759 op0 = tmp;
12760 }
12761
12762 target = gen_reg_rtx (SImode);
12763 emit_move_insn (target, const0_rtx);
12764 target = gen_rtx_SUBREG (QImode, target, 0);
12765
12766 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12767 op0 = copy_to_mode_reg (mode0, op0);
12768 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12769 op1 = copy_to_mode_reg (mode1, op1);
12770
12771 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12772 pat = GEN_FCN (d->icode) (op0, op1, op2);
12773 if (! pat)
12774 return 0;
12775 emit_insn (pat);
12776 emit_insn (gen_rtx_SET (VOIDmode,
12777 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12778 gen_rtx_fmt_ee (comparison, QImode,
12779 gen_rtx_REG (CCmode, FLAGS_REG),
12780 const0_rtx)));
12781
12782 return SUBREG_REG (target);
12783 }
12784
12785 /* Expand an expression EXP that calls a built-in function,
12786 with result going to TARGET if that's convenient
12787 (and in mode MODE if that's convenient).
12788 SUBTARGET may be used as the target for computing one of EXP's operands.
12789 IGNORE is nonzero if the value is to be ignored. */
12790
12791 rtx
12792 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12793 tree exp;
12794 rtx target;
12795 rtx subtarget ATTRIBUTE_UNUSED;
12796 enum machine_mode mode ATTRIBUTE_UNUSED;
12797 int ignore ATTRIBUTE_UNUSED;
12798 {
12799 const struct builtin_description *d;
12800 size_t i;
12801 enum insn_code icode;
12802 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12803 tree arglist = TREE_OPERAND (exp, 1);
12804 tree arg0, arg1, arg2;
12805 rtx op0, op1, op2, pat;
12806 enum machine_mode tmode, mode0, mode1, mode2;
12807 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12808
12809 switch (fcode)
12810 {
12811 case IX86_BUILTIN_EMMS:
12812 emit_insn (gen_emms ());
12813 return 0;
12814
12815 case IX86_BUILTIN_SFENCE:
12816 emit_insn (gen_sfence ());
12817 return 0;
12818
12819 case IX86_BUILTIN_PEXTRW:
12820 case IX86_BUILTIN_PEXTRW128:
12821 icode = (fcode == IX86_BUILTIN_PEXTRW
12822 ? CODE_FOR_mmx_pextrw
12823 : CODE_FOR_sse2_pextrw);
12824 arg0 = TREE_VALUE (arglist);
12825 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12826 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12827 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12828 tmode = insn_data[icode].operand[0].mode;
12829 mode0 = insn_data[icode].operand[1].mode;
12830 mode1 = insn_data[icode].operand[2].mode;
12831
12832 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12833 op0 = copy_to_mode_reg (mode0, op0);
12834 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12835 {
12836 /* @@@ better error message */
12837 error ("selector must be an immediate");
12838 return gen_reg_rtx (tmode);
12839 }
12840 if (target == 0
12841 || GET_MODE (target) != tmode
12842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12843 target = gen_reg_rtx (tmode);
12844 pat = GEN_FCN (icode) (target, op0, op1);
12845 if (! pat)
12846 return 0;
12847 emit_insn (pat);
12848 return target;
12849
12850 case IX86_BUILTIN_PINSRW:
12851 case IX86_BUILTIN_PINSRW128:
12852 icode = (fcode == IX86_BUILTIN_PINSRW
12853 ? CODE_FOR_mmx_pinsrw
12854 : CODE_FOR_sse2_pinsrw);
12855 arg0 = TREE_VALUE (arglist);
12856 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12857 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12858 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12859 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12860 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12861 tmode = insn_data[icode].operand[0].mode;
12862 mode0 = insn_data[icode].operand[1].mode;
12863 mode1 = insn_data[icode].operand[2].mode;
12864 mode2 = insn_data[icode].operand[3].mode;
12865
12866 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12867 op0 = copy_to_mode_reg (mode0, op0);
12868 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12869 op1 = copy_to_mode_reg (mode1, op1);
12870 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12871 {
12872 /* @@@ better error message */
12873 error ("selector must be an immediate");
12874 return const0_rtx;
12875 }
12876 if (target == 0
12877 || GET_MODE (target) != tmode
12878 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12879 target = gen_reg_rtx (tmode);
12880 pat = GEN_FCN (icode) (target, op0, op1, op2);
12881 if (! pat)
12882 return 0;
12883 emit_insn (pat);
12884 return target;
12885
12886 case IX86_BUILTIN_MASKMOVQ:
12887 icode = (fcode == IX86_BUILTIN_MASKMOVQ
12888 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
12889 : CODE_FOR_sse2_maskmovdqu);
12890 /* Note the arg order is different from the operand order. */
12891 arg1 = TREE_VALUE (arglist);
12892 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
12893 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12894 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12895 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12896 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12897 mode0 = insn_data[icode].operand[0].mode;
12898 mode1 = insn_data[icode].operand[1].mode;
12899 mode2 = insn_data[icode].operand[2].mode;
12900
12901 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12902 op0 = copy_to_mode_reg (mode0, op0);
12903 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12904 op1 = copy_to_mode_reg (mode1, op1);
12905 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
12906 op2 = copy_to_mode_reg (mode2, op2);
12907 pat = GEN_FCN (icode) (op0, op1, op2);
12908 if (! pat)
12909 return 0;
12910 emit_insn (pat);
12911 return 0;
12912
12913 case IX86_BUILTIN_SQRTSS:
12914 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
12915 case IX86_BUILTIN_RSQRTSS:
12916 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
12917 case IX86_BUILTIN_RCPSS:
12918 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
12919
12920 case IX86_BUILTIN_ANDPS:
12921 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
12922 arglist, target);
12923 case IX86_BUILTIN_ANDNPS:
12924 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
12925 arglist, target);
12926 case IX86_BUILTIN_ORPS:
12927 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
12928 arglist, target);
12929 case IX86_BUILTIN_XORPS:
12930 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
12931 arglist, target);
12932
12933 case IX86_BUILTIN_LOADAPS:
12934 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
12935
12936 case IX86_BUILTIN_LOADUPS:
12937 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
12938
12939 case IX86_BUILTIN_STOREAPS:
12940 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
12941 case IX86_BUILTIN_STOREUPS:
12942 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
12943
12944 case IX86_BUILTIN_LOADSS:
12945 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
12946
12947 case IX86_BUILTIN_STORESS:
12948 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
12949
12950 case IX86_BUILTIN_LOADHPS:
12951 case IX86_BUILTIN_LOADLPS:
12952 case IX86_BUILTIN_LOADHPD:
12953 case IX86_BUILTIN_LOADLPD:
12954 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
12955 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
12956 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
12957 : CODE_FOR_sse2_movlpd);
12958 arg0 = TREE_VALUE (arglist);
12959 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12960 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12961 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12962 tmode = insn_data[icode].operand[0].mode;
12963 mode0 = insn_data[icode].operand[1].mode;
12964 mode1 = insn_data[icode].operand[2].mode;
12965
12966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12967 op0 = copy_to_mode_reg (mode0, op0);
12968 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
12969 if (target == 0
12970 || GET_MODE (target) != tmode
12971 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12972 target = gen_reg_rtx (tmode);
12973 pat = GEN_FCN (icode) (target, op0, op1);
12974 if (! pat)
12975 return 0;
12976 emit_insn (pat);
12977 return target;
12978
12979 case IX86_BUILTIN_STOREHPS:
12980 case IX86_BUILTIN_STORELPS:
12981 case IX86_BUILTIN_STOREHPD:
12982 case IX86_BUILTIN_STORELPD:
12983 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
12984 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
12985 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
12986 : CODE_FOR_sse2_movlpd);
12987 arg0 = TREE_VALUE (arglist);
12988 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12989 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12990 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12991 mode0 = insn_data[icode].operand[1].mode;
12992 mode1 = insn_data[icode].operand[2].mode;
12993
12994 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12995 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12996 op1 = copy_to_mode_reg (mode1, op1);
12997
12998 pat = GEN_FCN (icode) (op0, op0, op1);
12999 if (! pat)
13000 return 0;
13001 emit_insn (pat);
13002 return 0;
13003
13004 case IX86_BUILTIN_MOVNTPS:
13005 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13006 case IX86_BUILTIN_MOVNTQ:
13007 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13008
13009 case IX86_BUILTIN_LDMXCSR:
13010 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13011 target = assign_386_stack_local (SImode, 0);
13012 emit_move_insn (target, op0);
13013 emit_insn (gen_ldmxcsr (target));
13014 return 0;
13015
13016 case IX86_BUILTIN_STMXCSR:
13017 target = assign_386_stack_local (SImode, 0);
13018 emit_insn (gen_stmxcsr (target));
13019 return copy_to_mode_reg (SImode, target);
13020
13021 case IX86_BUILTIN_SHUFPS:
13022 case IX86_BUILTIN_SHUFPD:
13023 icode = (fcode == IX86_BUILTIN_SHUFPS
13024 ? CODE_FOR_sse_shufps
13025 : CODE_FOR_sse2_shufpd);
13026 arg0 = TREE_VALUE (arglist);
13027 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13028 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13029 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13030 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13031 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13032 tmode = insn_data[icode].operand[0].mode;
13033 mode0 = insn_data[icode].operand[1].mode;
13034 mode1 = insn_data[icode].operand[2].mode;
13035 mode2 = insn_data[icode].operand[3].mode;
13036
13037 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13038 op0 = copy_to_mode_reg (mode0, op0);
13039 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13040 op1 = copy_to_mode_reg (mode1, op1);
13041 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13042 {
13043 /* @@@ better error message */
13044 error ("mask must be an immediate");
13045 return gen_reg_rtx (tmode);
13046 }
13047 if (target == 0
13048 || GET_MODE (target) != tmode
13049 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13050 target = gen_reg_rtx (tmode);
13051 pat = GEN_FCN (icode) (target, op0, op1, op2);
13052 if (! pat)
13053 return 0;
13054 emit_insn (pat);
13055 return target;
13056
13057 case IX86_BUILTIN_PSHUFW:
13058 case IX86_BUILTIN_PSHUFD:
13059 case IX86_BUILTIN_PSHUFHW:
13060 case IX86_BUILTIN_PSHUFLW:
13061 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13062 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13063 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13064 : CODE_FOR_mmx_pshufw);
13065 arg0 = TREE_VALUE (arglist);
13066 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13067 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13068 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13069 tmode = insn_data[icode].operand[0].mode;
13070 mode1 = insn_data[icode].operand[1].mode;
13071 mode2 = insn_data[icode].operand[2].mode;
13072
13073 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13074 op0 = copy_to_mode_reg (mode1, op0);
13075 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13076 {
13077 /* @@@ better error message */
13078 error ("mask must be an immediate");
13079 return const0_rtx;
13080 }
13081 if (target == 0
13082 || GET_MODE (target) != tmode
13083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13084 target = gen_reg_rtx (tmode);
13085 pat = GEN_FCN (icode) (target, op0, op1);
13086 if (! pat)
13087 return 0;
13088 emit_insn (pat);
13089 return target;
13090
13091 case IX86_BUILTIN_FEMMS:
13092 emit_insn (gen_femms ());
13093 return NULL_RTX;
13094
13095 case IX86_BUILTIN_PAVGUSB:
13096 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13097
13098 case IX86_BUILTIN_PF2ID:
13099 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13100
13101 case IX86_BUILTIN_PFACC:
13102 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13103
13104 case IX86_BUILTIN_PFADD:
13105 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13106
13107 case IX86_BUILTIN_PFCMPEQ:
13108 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13109
13110 case IX86_BUILTIN_PFCMPGE:
13111 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13112
13113 case IX86_BUILTIN_PFCMPGT:
13114 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13115
13116 case IX86_BUILTIN_PFMAX:
13117 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13118
13119 case IX86_BUILTIN_PFMIN:
13120 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13121
13122 case IX86_BUILTIN_PFMUL:
13123 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13124
13125 case IX86_BUILTIN_PFRCP:
13126 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13127
13128 case IX86_BUILTIN_PFRCPIT1:
13129 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13130
13131 case IX86_BUILTIN_PFRCPIT2:
13132 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13133
13134 case IX86_BUILTIN_PFRSQIT1:
13135 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13136
13137 case IX86_BUILTIN_PFRSQRT:
13138 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13139
13140 case IX86_BUILTIN_PFSUB:
13141 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13142
13143 case IX86_BUILTIN_PFSUBR:
13144 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13145
13146 case IX86_BUILTIN_PI2FD:
13147 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13148
13149 case IX86_BUILTIN_PMULHRW:
13150 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13151
13152 case IX86_BUILTIN_PF2IW:
13153 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13154
13155 case IX86_BUILTIN_PFNACC:
13156 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13157
13158 case IX86_BUILTIN_PFPNACC:
13159 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13160
13161 case IX86_BUILTIN_PI2FW:
13162 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13163
13164 case IX86_BUILTIN_PSWAPDSI:
13165 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13166
13167 case IX86_BUILTIN_PSWAPDSF:
13168 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13169
13170 case IX86_BUILTIN_SSE_ZERO:
13171 target = gen_reg_rtx (V4SFmode);
13172 emit_insn (gen_sse_clrv4sf (target));
13173 return target;
13174
13175 case IX86_BUILTIN_MMX_ZERO:
13176 target = gen_reg_rtx (DImode);
13177 emit_insn (gen_mmx_clrdi (target));
13178 return target;
13179
13180 case IX86_BUILTIN_SQRTSD:
13181 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13182 case IX86_BUILTIN_LOADAPD:
13183 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13184 case IX86_BUILTIN_LOADUPD:
13185 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13186
13187 case IX86_BUILTIN_STOREAPD:
13188 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13189 case IX86_BUILTIN_STOREUPD:
13190 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13191
13192 case IX86_BUILTIN_LOADSD:
13193 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13194
13195 case IX86_BUILTIN_STORESD:
13196 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13197
13198 case IX86_BUILTIN_SETPD1:
13199 target = assign_386_stack_local (DFmode, 0);
13200 arg0 = TREE_VALUE (arglist);
13201 emit_move_insn (adjust_address (target, DFmode, 0),
13202 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13203 op0 = gen_reg_rtx (V2DFmode);
13204 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13205 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13206 return op0;
13207
13208 case IX86_BUILTIN_SETPD:
13209 target = assign_386_stack_local (V2DFmode, 0);
13210 arg0 = TREE_VALUE (arglist);
13211 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13212 emit_move_insn (adjust_address (target, DFmode, 0),
13213 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13214 emit_move_insn (adjust_address (target, DFmode, 8),
13215 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13216 op0 = gen_reg_rtx (V2DFmode);
13217 emit_insn (gen_sse2_movapd (op0, target));
13218 return op0;
13219
13220 case IX86_BUILTIN_LOADRPD:
13221 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13222 gen_reg_rtx (V2DFmode), 1);
13223 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13224 return target;
13225
13226 case IX86_BUILTIN_LOADPD1:
13227 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13228 gen_reg_rtx (V2DFmode), 1);
13229 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13230 return target;
13231
13232 case IX86_BUILTIN_STOREPD1:
13233 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13234 case IX86_BUILTIN_STORERPD:
13235 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13236
13237 case IX86_BUILTIN_MFENCE:
13238 emit_insn (gen_sse2_mfence ());
13239 return 0;
13240 case IX86_BUILTIN_LFENCE:
13241 emit_insn (gen_sse2_lfence ());
13242 return 0;
13243
13244 case IX86_BUILTIN_CLFLUSH:
13245 arg0 = TREE_VALUE (arglist);
13246 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13247 icode = CODE_FOR_sse2_clflush;
13248 mode0 = insn_data[icode].operand[0].mode;
13249 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13250 op0 = copy_to_mode_reg (mode0, op0);
13251
13252 emit_insn (gen_sse2_clflush (op0));
13253 return 0;
13254
13255 case IX86_BUILTIN_MOVNTPD:
13256 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13257 case IX86_BUILTIN_MOVNTDQ:
13258 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13259 case IX86_BUILTIN_MOVNTI:
13260 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13261
13262 default:
13263 break;
13264 }
13265
13266 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13267 if (d->code == fcode)
13268 {
13269 /* Compares are treated specially. */
13270 if (d->icode == CODE_FOR_maskcmpv4sf3
13271 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13272 || d->icode == CODE_FOR_maskncmpv4sf3
13273 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13274 || d->icode == CODE_FOR_maskcmpv2df3
13275 || d->icode == CODE_FOR_vmmaskcmpv2df3
13276 || d->icode == CODE_FOR_maskncmpv2df3
13277 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13278 return ix86_expand_sse_compare (d, arglist, target);
13279
13280 return ix86_expand_binop_builtin (d->icode, arglist, target);
13281 }
13282
13283 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13284 if (d->code == fcode)
13285 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13286
13287 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13288 if (d->code == fcode)
13289 return ix86_expand_sse_comi (d, arglist, target);
13290
13291 /* @@@ Should really do something sensible here. */
13292 return 0;
13293 }
13294
13295 /* Store OPERAND to the memory after reload is completed. This means
13296 that we can't easily use assign_stack_local. */
13297 rtx
13298 ix86_force_to_memory (mode, operand)
13299 enum machine_mode mode;
13300 rtx operand;
13301 {
13302 rtx result;
13303 if (!reload_completed)
13304 abort ();
13305 if (TARGET_64BIT && TARGET_RED_ZONE)
13306 {
13307 result = gen_rtx_MEM (mode,
13308 gen_rtx_PLUS (Pmode,
13309 stack_pointer_rtx,
13310 GEN_INT (-RED_ZONE_SIZE)));
13311 emit_move_insn (result, operand);
13312 }
13313 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13314 {
13315 switch (mode)
13316 {
13317 case HImode:
13318 case SImode:
13319 operand = gen_lowpart (DImode, operand);
13320 /* FALLTHRU */
13321 case DImode:
13322 emit_insn (
13323 gen_rtx_SET (VOIDmode,
13324 gen_rtx_MEM (DImode,
13325 gen_rtx_PRE_DEC (DImode,
13326 stack_pointer_rtx)),
13327 operand));
13328 break;
13329 default:
13330 abort ();
13331 }
13332 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13333 }
13334 else
13335 {
13336 switch (mode)
13337 {
13338 case DImode:
13339 {
13340 rtx operands[2];
13341 split_di (&operand, 1, operands, operands + 1);
13342 emit_insn (
13343 gen_rtx_SET (VOIDmode,
13344 gen_rtx_MEM (SImode,
13345 gen_rtx_PRE_DEC (Pmode,
13346 stack_pointer_rtx)),
13347 operands[1]));
13348 emit_insn (
13349 gen_rtx_SET (VOIDmode,
13350 gen_rtx_MEM (SImode,
13351 gen_rtx_PRE_DEC (Pmode,
13352 stack_pointer_rtx)),
13353 operands[0]));
13354 }
13355 break;
13356 case HImode:
13357 /* It is better to store HImodes as SImodes. */
13358 if (!TARGET_PARTIAL_REG_STALL)
13359 operand = gen_lowpart (SImode, operand);
13360 /* FALLTHRU */
13361 case SImode:
13362 emit_insn (
13363 gen_rtx_SET (VOIDmode,
13364 gen_rtx_MEM (GET_MODE (operand),
13365 gen_rtx_PRE_DEC (SImode,
13366 stack_pointer_rtx)),
13367 operand));
13368 break;
13369 default:
13370 abort ();
13371 }
13372 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13373 }
13374 return result;
13375 }
13376
13377 /* Free operand from the memory. */
13378 void
13379 ix86_free_from_memory (mode)
13380 enum machine_mode mode;
13381 {
13382 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13383 {
13384 int size;
13385
13386 if (mode == DImode || TARGET_64BIT)
13387 size = 8;
13388 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13389 size = 2;
13390 else
13391 size = 4;
13392 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13393 to pop or add instruction if registers are available. */
13394 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13395 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13396 GEN_INT (size))));
13397 }
13398 }
13399
13400 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13401 QImode must go into class Q_REGS.
13402 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13403 movdf to do mem-to-mem moves through integer regs. */
13404 enum reg_class
13405 ix86_preferred_reload_class (x, class)
13406 rtx x;
13407 enum reg_class class;
13408 {
13409 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13410 {
13411 /* SSE can't load any constant directly yet. */
13412 if (SSE_CLASS_P (class))
13413 return NO_REGS;
13414 /* Floats can load 0 and 1. */
13415 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13416 {
13417 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13418 if (MAYBE_SSE_CLASS_P (class))
13419 return (reg_class_subset_p (class, GENERAL_REGS)
13420 ? GENERAL_REGS : FLOAT_REGS);
13421 else
13422 return class;
13423 }
13424 /* General regs can load everything. */
13425 if (reg_class_subset_p (class, GENERAL_REGS))
13426 return GENERAL_REGS;
13427 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13428 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13429 return NO_REGS;
13430 }
13431 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13432 return NO_REGS;
13433 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13434 return Q_REGS;
13435 return class;
13436 }
13437
13438 /* If we are copying between general and FP registers, we need a memory
13439 location. The same is true for SSE and MMX registers.
13440
13441 The macro can't work reliably when one of the CLASSES is class containing
13442 registers from multiple units (SSE, MMX, integer). We avoid this by never
13443 combining those units in single alternative in the machine description.
13444 Ensure that this constraint holds to avoid unexpected surprises.
13445
13446 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13447 enforce these sanity checks. */
13448 int
13449 ix86_secondary_memory_needed (class1, class2, mode, strict)
13450 enum reg_class class1, class2;
13451 enum machine_mode mode;
13452 int strict;
13453 {
13454 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13455 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13456 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13457 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13458 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13459 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13460 {
13461 if (strict)
13462 abort ();
13463 else
13464 return 1;
13465 }
13466 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13467 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13468 && (mode) != SImode)
13469 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13470 && (mode) != SImode));
13471 }
13472 /* Return the cost of moving data from a register in class CLASS1 to
13473 one in class CLASS2.
13474
13475 It is not required that the cost always equal 2 when FROM is the same as TO;
13476 on some machines it is expensive to move between registers if they are not
13477 general registers. */
13478 int
13479 ix86_register_move_cost (mode, class1, class2)
13480 enum machine_mode mode;
13481 enum reg_class class1, class2;
13482 {
13483 /* In case we require secondary memory, compute cost of the store followed
13484 by load. In case of copying from general_purpose_register we may emit
13485 multiple stores followed by single load causing memory size mismatch
13486 stall. Count this as arbitarily high cost of 20. */
13487 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13488 {
13489 int add_cost = 0;
13490 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13491 add_cost = 20;
13492 return (MEMORY_MOVE_COST (mode, class1, 0)
13493 + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
13494 }
13495 /* Moves between SSE/MMX and integer unit are expensive. */
13496 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13497 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13498 return ix86_cost->mmxsse_to_integer;
13499 if (MAYBE_FLOAT_CLASS_P (class1))
13500 return ix86_cost->fp_move;
13501 if (MAYBE_SSE_CLASS_P (class1))
13502 return ix86_cost->sse_move;
13503 if (MAYBE_MMX_CLASS_P (class1))
13504 return ix86_cost->mmx_move;
13505 return 2;
13506 }
13507
13508 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13509 int
13510 ix86_hard_regno_mode_ok (regno, mode)
13511 int regno;
13512 enum machine_mode mode;
13513 {
13514 /* Flags and only flags can only hold CCmode values. */
13515 if (CC_REGNO_P (regno))
13516 return GET_MODE_CLASS (mode) == MODE_CC;
13517 if (GET_MODE_CLASS (mode) == MODE_CC
13518 || GET_MODE_CLASS (mode) == MODE_RANDOM
13519 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13520 return 0;
13521 if (FP_REGNO_P (regno))
13522 return VALID_FP_MODE_P (mode);
13523 if (SSE_REGNO_P (regno))
13524 return VALID_SSE_REG_MODE (mode);
13525 if (MMX_REGNO_P (regno))
13526 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13527 /* We handle both integer and floats in the general purpose registers.
13528 In future we should be able to handle vector modes as well. */
13529 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13530 return 0;
13531 /* Take care for QImode values - they can be in non-QI regs, but then
13532 they do cause partial register stalls. */
13533 if (regno < 4 || mode != QImode || TARGET_64BIT)
13534 return 1;
13535 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13536 }
13537
13538 /* Return the cost of moving data of mode M between a
13539 register and memory. A value of 2 is the default; this cost is
13540 relative to those in `REGISTER_MOVE_COST'.
13541
13542 If moving between registers and memory is more expensive than
13543 between two registers, you should define this macro to express the
13544 relative cost.
13545
13546 Model also increased moving costs of QImode registers in non
13547 Q_REGS classes.
13548 */
13549 int
13550 ix86_memory_move_cost (mode, class, in)
13551 enum machine_mode mode;
13552 enum reg_class class;
13553 int in;
13554 {
13555 if (FLOAT_CLASS_P (class))
13556 {
13557 int index;
13558 switch (mode)
13559 {
13560 case SFmode:
13561 index = 0;
13562 break;
13563 case DFmode:
13564 index = 1;
13565 break;
13566 case XFmode:
13567 case TFmode:
13568 index = 2;
13569 break;
13570 default:
13571 return 100;
13572 }
13573 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13574 }
13575 if (SSE_CLASS_P (class))
13576 {
13577 int index;
13578 switch (GET_MODE_SIZE (mode))
13579 {
13580 case 4:
13581 index = 0;
13582 break;
13583 case 8:
13584 index = 1;
13585 break;
13586 case 16:
13587 index = 2;
13588 break;
13589 default:
13590 return 100;
13591 }
13592 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13593 }
13594 if (MMX_CLASS_P (class))
13595 {
13596 int index;
13597 switch (GET_MODE_SIZE (mode))
13598 {
13599 case 4:
13600 index = 0;
13601 break;
13602 case 8:
13603 index = 1;
13604 break;
13605 default:
13606 return 100;
13607 }
13608 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13609 }
13610 switch (GET_MODE_SIZE (mode))
13611 {
13612 case 1:
13613 if (in)
13614 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13615 : ix86_cost->movzbl_load);
13616 else
13617 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13618 : ix86_cost->int_store[0] + 4);
13619 break;
13620 case 2:
13621 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13622 default:
13623 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13624 if (mode == TFmode)
13625 mode = XFmode;
13626 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13627 * (int) GET_MODE_SIZE (mode) / 4);
13628 }
13629 }
13630
13631 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13632 static void
13633 ix86_svr3_asm_out_constructor (symbol, priority)
13634 rtx symbol;
13635 int priority ATTRIBUTE_UNUSED;
13636 {
13637 init_section ();
13638 fputs ("\tpushl $", asm_out_file);
13639 assemble_name (asm_out_file, XSTR (symbol, 0));
13640 fputc ('\n', asm_out_file);
13641 }
13642 #endif
13643
13644 #if TARGET_MACHO
13645
13646 static int current_machopic_label_num;
13647
13648 /* Given a symbol name and its associated stub, write out the
13649 definition of the stub. */
13650
13651 void
13652 machopic_output_stub (file, symb, stub)
13653 FILE *file;
13654 const char *symb, *stub;
13655 {
13656 unsigned int length;
13657 char *binder_name, *symbol_name, lazy_ptr_name[32];
13658 int label = ++current_machopic_label_num;
13659
13660 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13661 symb = (*targetm.strip_name_encoding) (symb);
13662
13663 length = strlen (stub);
13664 binder_name = alloca (length + 32);
13665 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13666
13667 length = strlen (symb);
13668 symbol_name = alloca (length + 32);
13669 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13670
13671 sprintf (lazy_ptr_name, "L%d$lz", label);
13672
13673 if (MACHOPIC_PURE)
13674 machopic_picsymbol_stub_section ();
13675 else
13676 machopic_symbol_stub_section ();
13677
13678 fprintf (file, "%s:\n", stub);
13679 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13680
13681 if (MACHOPIC_PURE)
13682 {
13683 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13684 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13685 fprintf (file, "\tjmp %%edx\n");
13686 }
13687 else
13688 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13689
13690 fprintf (file, "%s:\n", binder_name);
13691
13692 if (MACHOPIC_PURE)
13693 {
13694 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13695 fprintf (file, "\tpushl %%eax\n");
13696 }
13697 else
13698 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13699
13700 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13701
13702 machopic_lazy_symbol_ptr_section ();
13703 fprintf (file, "%s:\n", lazy_ptr_name);
13704 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13705 fprintf (file, "\t.long %s\n", binder_name);
13706 }
13707 #endif /* TARGET_MACHO */
13708
13709 /* Order the registers for register allocator. */
13710
13711 void
13712 x86_order_regs_for_local_alloc ()
13713 {
13714 int pos = 0;
13715 int i;
13716
13717 /* First allocate the local general purpose registers. */
13718 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13719 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13720 reg_alloc_order [pos++] = i;
13721
13722 /* Global general purpose registers. */
13723 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13724 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13725 reg_alloc_order [pos++] = i;
13726
13727 /* x87 registers come first in case we are doing FP math
13728 using them. */
13729 if (!TARGET_SSE_MATH)
13730 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13731 reg_alloc_order [pos++] = i;
13732
13733 /* SSE registers. */
13734 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13735 reg_alloc_order [pos++] = i;
13736 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13737 reg_alloc_order [pos++] = i;
13738
13739 /* x87 registerts. */
13740 if (TARGET_SSE_MATH)
13741 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13742 reg_alloc_order [pos++] = i;
13743
13744 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13745 reg_alloc_order [pos++] = i;
13746
13747 /* Initialize the rest of array as we do not allocate some registers
13748 at all. */
13749 while (pos < FIRST_PSEUDO_REGISTER)
13750 reg_alloc_order [pos++] = 0;
13751 }
13752
13753 void
13754 x86_output_mi_thunk (file, delta, function)
13755 FILE *file;
13756 int delta;
13757 tree function;
13758 {
13759 tree parm;
13760 rtx xops[3];
13761
13762 if (ix86_regparm > 0)
13763 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13764 else
13765 parm = NULL_TREE;
13766 for (; parm; parm = TREE_CHAIN (parm))
13767 if (TREE_VALUE (parm) == void_type_node)
13768 break;
13769
13770 xops[0] = GEN_INT (delta);
13771 if (TARGET_64BIT)
13772 {
13773 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13774 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13775 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13776 if (flag_pic)
13777 {
13778 fprintf (file, "\tjmp *");
13779 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13780 fprintf (file, "@GOTPCREL(%%rip)\n");
13781 }
13782 else
13783 {
13784 fprintf (file, "\tjmp ");
13785 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13786 fprintf (file, "\n");
13787 }
13788 }
13789 else
13790 {
13791 if (parm)
13792 xops[1] = gen_rtx_REG (SImode, 0);
13793 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13794 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13795 else
13796 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13797 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13798
13799 if (flag_pic)
13800 {
13801 xops[0] = pic_offset_table_rtx;
13802 xops[1] = gen_label_rtx ();
13803 xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13804
13805 if (ix86_regparm > 2)
13806 abort ();
13807 output_asm_insn ("push{l}\t%0", xops);
13808 output_asm_insn ("call\t%P1", xops);
13809 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13810 output_asm_insn ("pop{l}\t%0", xops);
13811 output_asm_insn
13812 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13813 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13814 output_asm_insn
13815 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13816 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13817 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13818 }
13819 else
13820 {
13821 fprintf (file, "\tjmp ");
13822 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13823 fprintf (file, "\n");
13824 }
13825 }
13826 }
13827
13828 int
13829 x86_field_alignment (field, computed)
13830 tree field;
13831 int computed;
13832 {
13833 enum machine_mode mode;
13834 tree type = TREE_TYPE (field);
13835
13836 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
13837 return computed;
13838 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
13839 ? get_inner_array_type (type) : type);
13840 if (mode == DFmode || mode == DCmode
13841 || GET_MODE_CLASS (mode) == MODE_INT
13842 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
13843 return MIN (32, computed);
13844 return computed;
13845 }
13846
13847 /* Implement machine specific optimizations.
13848 At the moment we implement single transformation: AMD Athlon works faster
13849 when RET is not destination of conditional jump or directly preceeded
13850 by other jump instruction. We avoid the penalty by inserting NOP just
13851 before the RET instructions in such cases. */
13852 void
13853 x86_machine_dependent_reorg (first)
13854 rtx first ATTRIBUTE_UNUSED;
13855 {
13856 edge e;
13857
13858 if (!TARGET_ATHLON || !optimize || optimize_size)
13859 return;
13860 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
13861 {
13862 basic_block bb = e->src;
13863 rtx ret = bb->end;
13864 rtx prev;
13865 bool insert = false;
13866
13867 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
13868 continue;
13869 prev = prev_nonnote_insn (ret);
13870 if (prev && GET_CODE (prev) == CODE_LABEL)
13871 {
13872 edge e;
13873 for (e = bb->pred; e; e = e->pred_next)
13874 if (EDGE_FREQUENCY (e) && e->src->index > 0
13875 && !(e->flags & EDGE_FALLTHRU))
13876 insert = 1;
13877 }
13878 if (!insert)
13879 {
13880 prev = prev_real_insn (ret);
13881 if (prev && GET_CODE (prev) == JUMP_INSN
13882 && any_condjump_p (prev))
13883 insert = 1;
13884 }
13885 if (insert)
13886 emit_insn_before (gen_nop (), ret);
13887 }
13888 }
13889
13890 #include "gt-i386.h"