1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
28 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
33 #include "insn-attr.h"
41 #include "basic-block.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
51 /* Processor costs (relative to an add) */
53 struct processor_costs size_cost
= { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
95 /* Processor costs (relative to an add) */
97 struct processor_costs i386_cost
= { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
140 struct processor_costs i486_cost
= { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
183 struct processor_costs pentium_cost
= {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
226 struct processor_costs pentiumpro_cost
= {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
269 struct processor_costs k6_cost
= {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
312 struct processor_costs athlon_cost
= {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
355 struct processor_costs pentium4_cost
= {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
397 const struct processor_costs
*ix86_cost
= &pentium_cost
;
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
408 const int x86_use_leave
= m_386
| m_K6
| m_ATHLON
;
409 const int x86_push_memory
= m_386
| m_K6
| m_ATHLON
| m_PENT4
;
410 const int x86_zero_extend_with_and
= m_486
| m_PENT
;
411 const int x86_movx
= m_ATHLON
| m_PPRO
| m_PENT4
/* m_386 | m_K6 */;
412 const int x86_double_with_add
= ~m_386
;
413 const int x86_use_bit_test
= m_386
;
414 const int x86_unroll_strlen
= m_486
| m_PENT
| m_PPRO
| m_ATHLON
| m_K6
;
415 const int x86_cmove
= m_PPRO
| m_ATHLON
| m_PENT4
;
416 const int x86_3dnow_a
= m_ATHLON
;
417 const int x86_deep_branch
= m_PPRO
| m_K6
| m_ATHLON
| m_PENT4
;
418 const int x86_branch_hints
= m_PENT4
;
419 const int x86_use_sahf
= m_PPRO
| m_K6
| m_PENT4
;
420 const int x86_partial_reg_stall
= m_PPRO
;
421 const int x86_use_loop
= m_K6
;
422 const int x86_use_fiop
= ~(m_PPRO
| m_ATHLON
| m_PENT
);
423 const int x86_use_mov0
= m_K6
;
424 const int x86_use_cltd
= ~(m_PENT
| m_K6
);
425 const int x86_read_modify_write
= ~m_PENT
;
426 const int x86_read_modify
= ~(m_PENT
| m_PPRO
);
427 const int x86_split_long_moves
= m_PPRO
;
428 const int x86_promote_QImode
= m_K6
| m_PENT
| m_386
| m_486
| m_ATHLON
;
429 const int x86_fast_prefix
= ~(m_PENT
| m_486
| m_386
);
430 const int x86_single_stringop
= m_386
| m_PENT4
;
431 const int x86_qimode_math
= ~(0);
432 const int x86_promote_qi_regs
= 0;
433 const int x86_himode_math
= ~(m_PPRO
);
434 const int x86_promote_hi_regs
= m_PPRO
;
435 const int x86_sub_esp_4
= m_ATHLON
| m_PPRO
| m_PENT4
;
436 const int x86_sub_esp_8
= m_ATHLON
| m_PPRO
| m_386
| m_486
| m_PENT4
;
437 const int x86_add_esp_4
= m_ATHLON
| m_K6
| m_PENT4
;
438 const int x86_add_esp_8
= m_ATHLON
| m_PPRO
| m_K6
| m_386
| m_486
| m_PENT4
;
439 const int x86_integer_DFmode_moves
= ~(m_ATHLON
| m_PENT4
| m_PPRO
);
440 const int x86_partial_reg_dependency
= m_ATHLON
| m_PENT4
;
441 const int x86_memory_mismatch_stall
= m_ATHLON
| m_PENT4
;
442 const int x86_accumulate_outgoing_args
= m_ATHLON
| m_PENT4
| m_PPRO
;
443 const int x86_prologue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
444 const int x86_epilogue_using_move
= m_ATHLON
| m_PENT4
| m_PPRO
;
445 const int x86_decompose_lea
= m_PENT4
;
446 const int x86_shift1
= ~m_486
;
447 const int x86_arch_always_fancy_math_387
= m_PENT
| m_PPRO
| m_ATHLON
| m_PENT4
;
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
452 #define FAST_PROLOGUE_INSN_COUNT 30
454 /* Set by prologue expander and used by epilogue expander to determine
456 static int use_fast_prologue_epilogue
;
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
460 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
461 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
466 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
469 AREG
, DREG
, CREG
, BREG
,
471 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
473 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
474 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
479 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
481 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
483 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
484 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
485 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
489 /* The "default" register map used in 32bit mode. */
491 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
502 static int const x86_64_int_parameter_registers
[6] =
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
508 static int const x86_64_int_return_registers
[4] =
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
579 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
593 rtx ix86_compare_op0
= NULL_RTX
;
594 rtx ix86_compare_op1
= NULL_RTX
;
596 /* The encoding characters for the four TLS models present in ELF. */
598 static char const tls_model_chars
[] = " GLil";
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function
GTY(())
607 rtx stack_locals
[(int) MAX_MACHINE_MODE
][MAX_386_STACK_LOCALS
];
608 const char *some_ld_name
;
609 int save_varrargs_registers
;
610 int accesses_prev_frame
;
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
616 /* Structure describing stack frame layout.
617 Stack grows downward:
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
630 > to_allocate <- FRAME_POINTER
642 int outgoing_arguments_size
;
645 HOST_WIDE_INT to_allocate
;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset
;
648 HOST_WIDE_INT hard_frame_pointer_offset
;
649 HOST_WIDE_INT stack_pointer_offset
;
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string
, *ix86_debug_addr_string
;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string
;
657 enum cmodel ix86_cmodel
;
659 const char *ix86_asm_string
;
660 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
662 const char *ix86_tls_dialect_string
;
663 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath
;
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu
;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch
;
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string
; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string
; /* for -march=<xxx> */
676 const char *ix86_fpmath_string
; /* for -mfpmath=<xxx> */
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string
;
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse
;
684 /* ix86_regparm_string as a number */
687 /* Alignment to use for loops and jumps: */
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string
;
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string
;
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string
;
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary
;
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost
;
703 const char *ix86_branch_cost_string
;
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string
;
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix
[16];
710 static int internal_label_prefix_len
;
712 static int local_symbolic_operand
PARAMS ((rtx
, enum machine_mode
));
713 static int tls_symbolic_operand_1
PARAMS ((rtx
, enum tls_model
));
714 static void output_pic_addr_const
PARAMS ((FILE *, rtx
, int));
715 static void put_condition_code
PARAMS ((enum rtx_code
, enum machine_mode
,
717 static const char *get_some_local_dynamic_name
PARAMS ((void));
718 static int get_some_local_dynamic_name_1
PARAMS ((rtx
*, void *));
719 static rtx maybe_get_pool_constant
PARAMS ((rtx
));
720 static rtx ix86_expand_int_compare
PARAMS ((enum rtx_code
, rtx
, rtx
));
721 static enum rtx_code ix86_prepare_fp_compare_args
PARAMS ((enum rtx_code
,
723 static rtx get_thread_pointer
PARAMS ((void));
724 static void get_pc_thunk_name
PARAMS ((char [32], unsigned int));
725 static rtx gen_push
PARAMS ((rtx
));
726 static int memory_address_length
PARAMS ((rtx addr
));
727 static int ix86_flags_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
728 static int ix86_agi_dependant
PARAMS ((rtx
, rtx
, enum attr_type
));
729 static enum attr_ppro_uops ix86_safe_ppro_uops
PARAMS ((rtx
));
730 static void ix86_dump_ppro_packet
PARAMS ((FILE *));
731 static void ix86_reorder_insn
PARAMS ((rtx
*, rtx
*));
732 static struct machine_function
* ix86_init_machine_status
PARAMS ((void));
733 static int ix86_split_to_parts
PARAMS ((rtx
, rtx
*, enum machine_mode
));
734 static int ix86_nsaved_regs
PARAMS ((void));
735 static void ix86_emit_save_regs
PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov
PARAMS ((rtx
, HOST_WIDE_INT
));
737 static void ix86_emit_restore_regs_using_mov
PARAMS ((rtx
, int, int));
738 static void ix86_output_function_epilogue
PARAMS ((FILE *, HOST_WIDE_INT
));
739 static void ix86_set_move_mem_attrs_1
PARAMS ((rtx
, rtx
, rtx
, rtx
, rtx
));
740 static void ix86_sched_reorder_ppro
PARAMS ((rtx
*, rtx
*));
741 static HOST_WIDE_INT ix86_GOT_alias_set
PARAMS ((void));
742 static void ix86_adjust_counter
PARAMS ((rtx
, HOST_WIDE_INT
));
743 static rtx ix86_expand_aligntest
PARAMS ((rtx
, int));
744 static void ix86_expand_strlensi_unroll_1
PARAMS ((rtx
, rtx
));
745 static int ix86_issue_rate
PARAMS ((void));
746 static int ix86_adjust_cost
PARAMS ((rtx
, rtx
, rtx
, int));
747 static void ix86_sched_init
PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder
PARAMS ((FILE *, int, rtx
*, int *, int));
749 static int ix86_variable_issue
PARAMS ((FILE *, int, rtx
, int));
750 static int ia32_use_dfa_pipeline_interface
PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead
PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins
PARAMS ((void));
753 static rtx x86_this_parameter
PARAMS ((tree
));
754 static void x86_output_mi_thunk
PARAMS ((FILE *, tree
, HOST_WIDE_INT
,
755 HOST_WIDE_INT
, tree
));
756 static bool x86_can_output_mi_thunk
PARAMS ((tree
, HOST_WIDE_INT
,
757 HOST_WIDE_INT
, tree
));
761 rtx base
, index
, disp
;
765 static int ix86_decompose_address
PARAMS ((rtx
, struct ix86_address
*));
767 static void ix86_encode_section_info
PARAMS ((tree
, int)) ATTRIBUTE_UNUSED
;
768 static const char *ix86_strip_name_encoding
PARAMS ((const char *))
771 struct builtin_description
;
772 static rtx ix86_expand_sse_comi
PARAMS ((const struct builtin_description
*,
774 static rtx ix86_expand_sse_compare
PARAMS ((const struct builtin_description
*,
776 static rtx ix86_expand_unop1_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
777 static rtx ix86_expand_unop_builtin
PARAMS ((enum insn_code
, tree
, rtx
, int));
778 static rtx ix86_expand_binop_builtin
PARAMS ((enum insn_code
, tree
, rtx
));
779 static rtx ix86_expand_store_builtin
PARAMS ((enum insn_code
, tree
));
780 static rtx safe_vector_operand
PARAMS ((rtx
, enum machine_mode
));
781 static enum rtx_code ix86_fp_compare_code_to_integer
PARAMS ((enum rtx_code
));
782 static void ix86_fp_comparison_codes
PARAMS ((enum rtx_code code
,
786 static rtx ix86_expand_fp_compare
PARAMS ((enum rtx_code
, rtx
, rtx
, rtx
,
788 static int ix86_fp_comparison_arithmetics_cost
PARAMS ((enum rtx_code code
));
789 static int ix86_fp_comparison_fcomi_cost
PARAMS ((enum rtx_code code
));
790 static int ix86_fp_comparison_sahf_cost
PARAMS ((enum rtx_code code
));
791 static int ix86_fp_comparison_cost
PARAMS ((enum rtx_code code
));
792 static unsigned int ix86_select_alt_pic_regnum
PARAMS ((void));
793 static int ix86_save_reg
PARAMS ((unsigned int, int));
794 static void ix86_compute_frame_layout
PARAMS ((struct ix86_frame
*));
795 static int ix86_comp_type_attributes
PARAMS ((tree
, tree
));
796 static int ix86_fntype_regparm
PARAMS ((tree
));
797 const struct attribute_spec ix86_attribute_table
[];
798 static tree ix86_handle_cdecl_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
799 static tree ix86_handle_regparm_attribute
PARAMS ((tree
*, tree
, tree
, int, bool *));
800 static int ix86_value_regno
PARAMS ((enum machine_mode
));
802 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
803 static void ix86_svr3_asm_out_constructor
PARAMS ((rtx
, int));
806 /* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
814 enum x86_64_reg_class
817 X86_64_INTEGER_CLASS
,
818 X86_64_INTEGERSI_CLASS
,
827 static const char * const x86_64_reg_class_name
[] =
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
830 #define MAX_CLASSES 4
831 static int classify_argument
PARAMS ((enum machine_mode
, tree
,
832 enum x86_64_reg_class
[MAX_CLASSES
],
834 static int examine_argument
PARAMS ((enum machine_mode
, tree
, int, int *,
836 static rtx construct_container
PARAMS ((enum machine_mode
, tree
, int, int, int,
838 static enum x86_64_reg_class merge_classes
PARAMS ((enum x86_64_reg_class
,
839 enum x86_64_reg_class
));
841 /* Initialize the GCC target structure. */
842 #undef TARGET_ATTRIBUTE_TABLE
843 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
844 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
845 # undef TARGET_MERGE_DECL_ATTRIBUTES
846 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
849 #undef TARGET_COMP_TYPE_ATTRIBUTES
850 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
852 #undef TARGET_INIT_BUILTINS
853 #define TARGET_INIT_BUILTINS ix86_init_builtins
855 #undef TARGET_EXPAND_BUILTIN
856 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
858 #undef TARGET_ASM_FUNCTION_EPILOGUE
859 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
861 #undef TARGET_ASM_OPEN_PAREN
862 #define TARGET_ASM_OPEN_PAREN ""
863 #undef TARGET_ASM_CLOSE_PAREN
864 #define TARGET_ASM_CLOSE_PAREN ""
866 #undef TARGET_ASM_ALIGNED_HI_OP
867 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868 #undef TARGET_ASM_ALIGNED_SI_OP
869 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
871 #undef TARGET_ASM_ALIGNED_DI_OP
872 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
875 #undef TARGET_ASM_UNALIGNED_HI_OP
876 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877 #undef TARGET_ASM_UNALIGNED_SI_OP
878 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879 #undef TARGET_ASM_UNALIGNED_DI_OP
880 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
882 #undef TARGET_SCHED_ADJUST_COST
883 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884 #undef TARGET_SCHED_ISSUE_RATE
885 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886 #undef TARGET_SCHED_VARIABLE_ISSUE
887 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888 #undef TARGET_SCHED_INIT
889 #define TARGET_SCHED_INIT ix86_sched_init
890 #undef TARGET_SCHED_REORDER
891 #define TARGET_SCHED_REORDER ix86_sched_reorder
892 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
893 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
900 #undef TARGET_HAVE_TLS
901 #define TARGET_HAVE_TLS true
904 #undef TARGET_ASM_OUTPUT_MI_THUNK
905 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
906 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
909 struct gcc_target targetm
= TARGET_INITIALIZER
;
911 /* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
924 /* Comes from final.c -- no real reason to change it. */
925 #define MAX_CODE_ALIGN 16
929 const struct processor_costs
*cost
; /* Processor costs */
930 const int target_enable
; /* Target flags to enable. */
931 const int target_disable
; /* Target flags to disable. */
932 const int align_loop
; /* Default alignments. */
933 const int align_loop_max_skip
;
934 const int align_jump
;
935 const int align_jump_max_skip
;
936 const int align_func
;
937 const int branch_cost
;
939 const processor_target_table
[PROCESSOR_max
] =
941 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost
, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0, 1}
950 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
953 const char *const name
; /* processor name or nickname. */
954 const enum processor_type processor
;
960 PTA_PREFETCH_SSE
= 8,
965 const processor_alias_table
[] =
967 {"i386", PROCESSOR_I386
, 0},
968 {"i486", PROCESSOR_I486
, 0},
969 {"i586", PROCESSOR_PENTIUM
, 0},
970 {"pentium", PROCESSOR_PENTIUM
, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
972 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
973 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
974 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
975 {"i686", PROCESSOR_PENTIUMPRO
, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
978 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
979 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
|
980 PTA_MMX
| PTA_PREFETCH_SSE
},
981 {"k6", PROCESSOR_K6
, PTA_MMX
},
982 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
983 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
984 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
986 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
987 | PTA_3DNOW
| PTA_3DNOW_A
},
988 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
989 | PTA_3DNOW_A
| PTA_SSE
},
990 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
991 | PTA_3DNOW_A
| PTA_SSE
},
992 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
993 | PTA_3DNOW_A
| PTA_SSE
},
996 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode
[XFmode
- QFmode
] = &ieee_extended_intel_96_format
;
1001 real_format_for_mode
[TFmode
- QFmode
] = &ieee_extended_intel_128_format
;
1003 /* Set the default values for switches whose default depends on TARGET_64BIT
1004 in case they weren't overwriten by command line options. */
1007 if (flag_omit_frame_pointer
== 2)
1008 flag_omit_frame_pointer
= 1;
1009 if (flag_asynchronous_unwind_tables
== 2)
1010 flag_asynchronous_unwind_tables
= 1;
1011 if (flag_pcc_struct_return
== 2)
1012 flag_pcc_struct_return
= 0;
1016 if (flag_omit_frame_pointer
== 2)
1017 flag_omit_frame_pointer
= 0;
1018 if (flag_asynchronous_unwind_tables
== 2)
1019 flag_asynchronous_unwind_tables
= 0;
1020 if (flag_pcc_struct_return
== 2)
1021 flag_pcc_struct_return
= 1;
1024 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1025 SUBTARGET_OVERRIDE_OPTIONS
;
1028 if (!ix86_cpu_string
&& ix86_arch_string
)
1029 ix86_cpu_string
= ix86_arch_string
;
1030 if (!ix86_cpu_string
)
1031 ix86_cpu_string
= cpu_names
[TARGET_CPU_DEFAULT
];
1032 if (!ix86_arch_string
)
1033 ix86_arch_string
= TARGET_64BIT
? "athlon-4" : "i386";
1035 if (ix86_cmodel_string
!= 0)
1037 if (!strcmp (ix86_cmodel_string
, "small"))
1038 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1040 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string
);
1041 else if (!strcmp (ix86_cmodel_string
, "32"))
1042 ix86_cmodel
= CM_32
;
1043 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
1044 ix86_cmodel
= CM_KERNEL
;
1045 else if (!strcmp (ix86_cmodel_string
, "medium") && !flag_pic
)
1046 ix86_cmodel
= CM_MEDIUM
;
1047 else if (!strcmp (ix86_cmodel_string
, "large") && !flag_pic
)
1048 ix86_cmodel
= CM_LARGE
;
1050 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
1054 ix86_cmodel
= CM_32
;
1056 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
1058 if (ix86_asm_string
!= 0)
1060 if (!strcmp (ix86_asm_string
, "intel"))
1061 ix86_asm_dialect
= ASM_INTEL
;
1062 else if (!strcmp (ix86_asm_string
, "att"))
1063 ix86_asm_dialect
= ASM_ATT
;
1065 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
1067 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
1068 error ("code model `%s' not supported in the %s bit mode",
1069 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
1070 if (ix86_cmodel
== CM_LARGE
)
1071 sorry ("code model `large' not supported yet");
1072 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
1073 sorry ("%i-bit mode not compiled in",
1074 (target_flags
& MASK_64BIT
) ? 64 : 32);
1076 for (i
= 0; i
< pta_size
; i
++)
1077 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
1079 ix86_arch
= processor_alias_table
[i
].processor
;
1080 /* Default cpu tuning to the architecture. */
1081 ix86_cpu
= ix86_arch
;
1082 if (processor_alias_table
[i
].flags
& PTA_MMX
1083 && !(target_flags_explicit
& MASK_MMX
))
1084 target_flags
|= MASK_MMX
;
1085 if (processor_alias_table
[i
].flags
& PTA_3DNOW
1086 && !(target_flags_explicit
& MASK_3DNOW
))
1087 target_flags
|= MASK_3DNOW
;
1088 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
1089 && !(target_flags_explicit
& MASK_3DNOW_A
))
1090 target_flags
|= MASK_3DNOW_A
;
1091 if (processor_alias_table
[i
].flags
& PTA_SSE
1092 && !(target_flags_explicit
& MASK_SSE
))
1093 target_flags
|= MASK_SSE
;
1094 if (processor_alias_table
[i
].flags
& PTA_SSE2
1095 && !(target_flags_explicit
& MASK_SSE2
))
1096 target_flags
|= MASK_SSE2
;
1097 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1098 x86_prefetch_sse
= true;
1103 error ("bad value (%s) for -march= switch", ix86_arch_string
);
1105 for (i
= 0; i
< pta_size
; i
++)
1106 if (! strcmp (ix86_cpu_string
, processor_alias_table
[i
].name
))
1108 ix86_cpu
= processor_alias_table
[i
].processor
;
1111 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
1112 x86_prefetch_sse
= true;
1114 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string
);
1117 ix86_cost
= &size_cost
;
1119 ix86_cost
= processor_target_table
[ix86_cpu
].cost
;
1120 target_flags
|= processor_target_table
[ix86_cpu
].target_enable
;
1121 target_flags
&= ~processor_target_table
[ix86_cpu
].target_disable
;
1123 /* Arrange to set up i386_stack_locals for all functions. */
1124 init_machine_status
= ix86_init_machine_status
;
1126 /* Validate -mregparm= value. */
1127 if (ix86_regparm_string
)
1129 i
= atoi (ix86_regparm_string
);
1130 if (i
< 0 || i
> REGPARM_MAX
)
1131 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
1137 ix86_regparm
= REGPARM_MAX
;
1139 /* If the user has provided any of the -malign-* options,
1140 warn and use that value only if -falign-* is not set.
1141 Remove this code in GCC 3.2 or later. */
1142 if (ix86_align_loops_string
)
1144 warning ("-malign-loops is obsolete, use -falign-loops");
1145 if (align_loops
== 0)
1147 i
= atoi (ix86_align_loops_string
);
1148 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1149 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1151 align_loops
= 1 << i
;
1155 if (ix86_align_jumps_string
)
1157 warning ("-malign-jumps is obsolete, use -falign-jumps");
1158 if (align_jumps
== 0)
1160 i
= atoi (ix86_align_jumps_string
);
1161 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1162 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1164 align_jumps
= 1 << i
;
1168 if (ix86_align_funcs_string
)
1170 warning ("-malign-functions is obsolete, use -falign-functions");
1171 if (align_functions
== 0)
1173 i
= atoi (ix86_align_funcs_string
);
1174 if (i
< 0 || i
> MAX_CODE_ALIGN
)
1175 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
1177 align_functions
= 1 << i
;
1181 /* Default align_* from the processor table. */
1182 if (align_loops
== 0)
1184 align_loops
= processor_target_table
[ix86_cpu
].align_loop
;
1185 align_loops_max_skip
= processor_target_table
[ix86_cpu
].align_loop_max_skip
;
1187 if (align_jumps
== 0)
1189 align_jumps
= processor_target_table
[ix86_cpu
].align_jump
;
1190 align_jumps_max_skip
= processor_target_table
[ix86_cpu
].align_jump_max_skip
;
1192 if (align_functions
== 0)
1194 align_functions
= processor_target_table
[ix86_cpu
].align_func
;
1197 /* Validate -mpreferred-stack-boundary= value, or provide default.
1198 The default of 128 bits is for Pentium III's SSE __m128, but we
1199 don't want additional code to keep the stack aligned when
1200 optimizing for code size. */
1201 ix86_preferred_stack_boundary
= (optimize_size
1202 ? TARGET_64BIT
? 128 : 32
1204 if (ix86_preferred_stack_boundary_string
)
1206 i
= atoi (ix86_preferred_stack_boundary_string
);
1207 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
1208 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
1209 TARGET_64BIT
? 4 : 2);
1211 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
1214 /* Validate -mbranch-cost= value, or provide default. */
1215 ix86_branch_cost
= processor_target_table
[ix86_cpu
].branch_cost
;
1216 if (ix86_branch_cost_string
)
1218 i
= atoi (ix86_branch_cost_string
);
1220 error ("-mbranch-cost=%d is not between 0 and 5", i
);
1222 ix86_branch_cost
= i
;
1225 if (ix86_tls_dialect_string
)
1227 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
1228 ix86_tls_dialect
= TLS_DIALECT_GNU
;
1229 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
1230 ix86_tls_dialect
= TLS_DIALECT_SUN
;
1232 error ("bad value (%s) for -mtls-dialect= switch",
1233 ix86_tls_dialect_string
);
1236 /* Keep nonleaf frame pointers. */
1237 if (TARGET_OMIT_LEAF_FRAME_POINTER
)
1238 flag_omit_frame_pointer
= 1;
1240 /* If we're doing fast math, we don't care about comparison order
1241 wrt NaNs. This lets us use a shorter comparison sequence. */
1242 if (flag_unsafe_math_optimizations
)
1243 target_flags
&= ~MASK_IEEE_FP
;
1245 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1246 since the insns won't need emulation. */
1247 if (x86_arch_always_fancy_math_387
& (1 << ix86_arch
))
1248 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
1252 if (TARGET_ALIGN_DOUBLE
)
1253 error ("-malign-double makes no sense in the 64bit mode");
1255 error ("-mrtd calling convention not supported in the 64bit mode");
1256 /* Enable by default the SSE and MMX builtins. */
1257 target_flags
|= (MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
);
1258 ix86_fpmath
= FPMATH_SSE
;
1261 ix86_fpmath
= FPMATH_387
;
1263 if (ix86_fpmath_string
!= 0)
1265 if (! strcmp (ix86_fpmath_string
, "387"))
1266 ix86_fpmath
= FPMATH_387
;
1267 else if (! strcmp (ix86_fpmath_string
, "sse"))
1271 warning ("SSE instruction set disabled, using 387 arithmetics");
1272 ix86_fpmath
= FPMATH_387
;
1275 ix86_fpmath
= FPMATH_SSE
;
1277 else if (! strcmp (ix86_fpmath_string
, "387,sse")
1278 || ! strcmp (ix86_fpmath_string
, "sse,387"))
1282 warning ("SSE instruction set disabled, using 387 arithmetics");
1283 ix86_fpmath
= FPMATH_387
;
1285 else if (!TARGET_80387
)
1287 warning ("387 instruction set disabled, using SSE arithmetics");
1288 ix86_fpmath
= FPMATH_SSE
;
1291 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
1294 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
1297 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1301 target_flags
|= MASK_MMX
;
1302 x86_prefetch_sse
= true;
1305 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1308 target_flags
|= MASK_MMX
;
1309 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1310 extensions it adds. */
1311 if (x86_3dnow_a
& (1 << ix86_arch
))
1312 target_flags
|= MASK_3DNOW_A
;
1314 if ((x86_accumulate_outgoing_args
& CPUMASK
)
1315 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
1317 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
1319 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1322 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
1323 p
= strchr (internal_label_prefix
, 'X');
1324 internal_label_prefix_len
= p
- internal_label_prefix
;
1330 optimization_options (level
, size
)
1332 int size ATTRIBUTE_UNUSED
;
1334 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1335 make the problem with not enough registers even worse. */
1336 #ifdef INSN_SCHEDULING
1338 flag_schedule_insns
= 0;
1341 /* The default values of these switches depend on the TARGET_64BIT
1342 that is not known at this moment. Mark these values with 2 and
1343 let user the to override these. In case there is no command line option
1344 specifying them, we will set the defaults in override_options. */
1346 flag_omit_frame_pointer
= 2;
1347 flag_pcc_struct_return
= 2;
1348 flag_asynchronous_unwind_tables
= 2;
1351 /* Table of valid machine attributes. */
1352 const struct attribute_spec ix86_attribute_table
[] =
1354 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1355 /* Stdcall attribute says callee is responsible for popping arguments
1356 if they are not variable. */
1357 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1358 /* Cdecl attribute says the callee is a normal C declaration */
1359 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute
},
1360 /* Regparm attribute specifies how many integer arguments are to be
1361 passed in registers. */
1362 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute
},
1363 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1364 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1365 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute
},
1366 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
1368 { NULL
, 0, 0, false, false, false, NULL
}
1371 /* Handle a "cdecl" or "stdcall" attribute;
1372 arguments as in struct attribute_spec.handler. */
1374 ix86_handle_cdecl_attribute (node
, name
, args
, flags
, no_add_attrs
)
1377 tree args ATTRIBUTE_UNUSED
;
1378 int flags ATTRIBUTE_UNUSED
;
1381 if (TREE_CODE (*node
) != FUNCTION_TYPE
1382 && TREE_CODE (*node
) != METHOD_TYPE
1383 && TREE_CODE (*node
) != FIELD_DECL
1384 && TREE_CODE (*node
) != TYPE_DECL
)
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name
));
1388 *no_add_attrs
= true;
1393 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name
));
1394 *no_add_attrs
= true;
1400 /* Handle a "regparm" attribute;
1401 arguments as in struct attribute_spec.handler. */
1403 ix86_handle_regparm_attribute (node
, name
, args
, flags
, no_add_attrs
)
1407 int flags ATTRIBUTE_UNUSED
;
1410 if (TREE_CODE (*node
) != FUNCTION_TYPE
1411 && TREE_CODE (*node
) != METHOD_TYPE
1412 && TREE_CODE (*node
) != FIELD_DECL
1413 && TREE_CODE (*node
) != TYPE_DECL
)
1415 warning ("`%s' attribute only applies to functions",
1416 IDENTIFIER_POINTER (name
));
1417 *no_add_attrs
= true;
1423 cst
= TREE_VALUE (args
);
1424 if (TREE_CODE (cst
) != INTEGER_CST
)
1426 warning ("`%s' attribute requires an integer constant argument",
1427 IDENTIFIER_POINTER (name
));
1428 *no_add_attrs
= true;
1430 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
1432 warning ("argument to `%s' attribute larger than %d",
1433 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
1434 *no_add_attrs
= true;
1441 /* Return 0 if the attributes for two types are incompatible, 1 if they
1442 are compatible, and 2 if they are nearly compatible (which causes a
1443 warning to be generated). */
1446 ix86_comp_type_attributes (type1
, type2
)
1450 /* Check for mismatch of non-default calling convention. */
1451 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
1453 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
1456 /* Check for mismatched return types (cdecl vs stdcall). */
1457 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
1458 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
1463 /* Return the regparm value for a fuctio with the indicated TYPE. */
1466 ix86_fntype_regparm (type
)
1471 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1473 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1475 return ix86_regparm
;
1478 /* Value is the number of bytes of arguments automatically
1479 popped when returning from a subroutine call.
1480 FUNDECL is the declaration node of the function (as a tree),
1481 FUNTYPE is the data type of the function (as a tree),
1482 or for a library call it is an identifier node for the subroutine name.
1483 SIZE is the number of bytes of arguments passed on the stack.
1485 On the 80386, the RTD insn may be used to pop them if the number
1486 of args is fixed, but if the number is variable then the caller
1487 must pop them all. RTD can't be used for library calls now
1488 because the library is compiled with the Unix compiler.
1489 Use of RTD is a selectable option, since it is incompatible with
1490 standard Unix calling sequences. If the option is not selected,
1491 the caller must always pop the args.
1493 The attribute stdcall is equivalent to RTD on a per module basis. */
1496 ix86_return_pops_args (fundecl
, funtype
, size
)
1501 int rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
1503 /* Cdecl functions override -mrtd, and never pop the stack. */
1504 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
))) {
1506 /* Stdcall functions will pop the stack if not variable args. */
1507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
)))
1511 && (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1512 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype
)))
1513 == void_type_node
)))
1517 /* Lose any fake structure return argument if it is passed on the stack. */
1518 if (aggregate_value_p (TREE_TYPE (funtype
))
1521 int nregs
= ix86_fntype_regparm (funtype
);
1524 return GET_MODE_SIZE (Pmode
);
1530 /* Argument support functions. */
1532 /* Return true when register may be used to pass function parameters. */
1534 ix86_function_arg_regno_p (regno
)
1539 return (regno
< REGPARM_MAX
1540 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1541 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
1543 /* RAX is used as hidden argument to va_arg functions. */
1546 for (i
= 0; i
< REGPARM_MAX
; i
++)
1547 if (regno
== x86_64_int_parameter_registers
[i
])
1552 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1553 for a call to a function whose data type is FNTYPE.
1554 For a library call, FNTYPE is 0. */
1557 init_cumulative_args (cum
, fntype
, libname
)
1558 CUMULATIVE_ARGS
*cum
; /* Argument info to initialize */
1559 tree fntype
; /* tree ptr for function decl */
1560 rtx libname
; /* SYMBOL_REF of library name or 0 */
1562 static CUMULATIVE_ARGS zero_cum
;
1563 tree param
, next_param
;
1565 if (TARGET_DEBUG_ARG
)
1567 fprintf (stderr
, "\ninit_cumulative_args (");
1569 fprintf (stderr
, "fntype code = %s, ret code = %s",
1570 tree_code_name
[(int) TREE_CODE (fntype
)],
1571 tree_code_name
[(int) TREE_CODE (TREE_TYPE (fntype
))]);
1573 fprintf (stderr
, "no fntype");
1576 fprintf (stderr
, ", libname = %s", XSTR (libname
, 0));
1581 /* Set up the number of registers to use for passing arguments. */
1582 cum
->nregs
= ix86_regparm
;
1583 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1584 if (fntype
&& !TARGET_64BIT
)
1586 tree attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype
));
1589 cum
->nregs
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1591 cum
->maybe_vaarg
= false;
1593 /* Determine if this function has variable arguments. This is
1594 indicated by the last argument being 'void_type_mode' if there
1595 are no variable arguments. If there are variable arguments, then
1596 we won't pass anything in registers */
1600 for (param
= (fntype
) ? TYPE_ARG_TYPES (fntype
) : 0;
1601 param
!= 0; param
= next_param
)
1603 next_param
= TREE_CHAIN (param
);
1604 if (next_param
== 0 && TREE_VALUE (param
) != void_type_node
)
1608 cum
->maybe_vaarg
= true;
1612 if ((!fntype
&& !libname
)
1613 || (fntype
&& !TYPE_ARG_TYPES (fntype
)))
1614 cum
->maybe_vaarg
= 1;
1616 if (TARGET_DEBUG_ARG
)
1617 fprintf (stderr
, ", nregs=%d )\n", cum
->nregs
);
1622 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1623 of this code is to classify each 8bytes of incoming argument by the register
1624 class and assign registers accordingly. */
1626 /* Return the union class of CLASS1 and CLASS2.
1627 See the x86-64 PS ABI for details. */
1629 static enum x86_64_reg_class
1630 merge_classes (class1
, class2
)
1631 enum x86_64_reg_class class1
, class2
;
1633 /* Rule #1: If both classes are equal, this is the resulting class. */
1634 if (class1
== class2
)
1637 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1639 if (class1
== X86_64_NO_CLASS
)
1641 if (class2
== X86_64_NO_CLASS
)
1644 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1645 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
1646 return X86_64_MEMORY_CLASS
;
1648 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1649 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
1650 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
1651 return X86_64_INTEGERSI_CLASS
;
1652 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
1653 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
1654 return X86_64_INTEGER_CLASS
;
1656 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1657 if (class1
== X86_64_X87_CLASS
|| class1
== X86_64_X87UP_CLASS
1658 || class2
== X86_64_X87_CLASS
|| class2
== X86_64_X87UP_CLASS
)
1659 return X86_64_MEMORY_CLASS
;
1661 /* Rule #6: Otherwise class SSE is used. */
1662 return X86_64_SSE_CLASS
;
1665 /* Classify the argument of type TYPE and mode MODE.
1666 CLASSES will be filled by the register class used to pass each word
1667 of the operand. The number of words is returned. In case the parameter
1668 should be passed in memory, 0 is returned. As a special case for zero
1669 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1671 BIT_OFFSET is used internally for handling records and specifies offset
1672 of the offset in bits modulo 256 to avoid overflow cases.
1674 See the x86-64 PS ABI for details.
1678 classify_argument (mode
, type
, classes
, bit_offset
)
1679 enum machine_mode mode
;
1681 enum x86_64_reg_class classes
[MAX_CLASSES
];
1685 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1686 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1688 /* Variable sized entities are always passed/returned in memory. */
1692 if (type
&& AGGREGATE_TYPE_P (type
))
1696 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
1698 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1702 for (i
= 0; i
< words
; i
++)
1703 classes
[i
] = X86_64_NO_CLASS
;
1705 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1706 signalize memory class, so handle it as special case. */
1709 classes
[0] = X86_64_NO_CLASS
;
1713 /* Classify each field of record and merge classes. */
1714 if (TREE_CODE (type
) == RECORD_TYPE
)
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1719 tree bases
= TYPE_BINFO_BASETYPES (type
);
1720 int n_bases
= TREE_VEC_LENGTH (bases
);
1723 for (i
= 0; i
< n_bases
; ++i
)
1725 tree binfo
= TREE_VEC_ELT (bases
, i
);
1727 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1728 tree type
= BINFO_TYPE (binfo
);
1730 num
= classify_argument (TYPE_MODE (type
),
1732 (offset
+ bit_offset
) % 256);
1735 for (i
= 0; i
< num
; i
++)
1737 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1739 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1743 /* And now merge the fields of structure. */
1744 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1746 if (TREE_CODE (field
) == FIELD_DECL
)
1750 /* Bitfields are always classified as integer. Handle them
1751 early, since later code would consider them to be
1752 misaligned integers. */
1753 if (DECL_BIT_FIELD (field
))
1755 for (i
= int_bit_position (field
) / 8 / 8;
1756 i
< (int_bit_position (field
)
1757 + tree_low_cst (DECL_SIZE (field
), 0)
1760 merge_classes (X86_64_INTEGER_CLASS
,
1765 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1766 TREE_TYPE (field
), subclasses
,
1767 (int_bit_position (field
)
1768 + bit_offset
) % 256);
1771 for (i
= 0; i
< num
; i
++)
1774 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
1776 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1782 /* Arrays are handled as small records. */
1783 else if (TREE_CODE (type
) == ARRAY_TYPE
)
1786 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
1787 TREE_TYPE (type
), subclasses
, bit_offset
);
1791 /* The partial classes are now full classes. */
1792 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
1793 subclasses
[0] = X86_64_SSE_CLASS
;
1794 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
1795 subclasses
[0] = X86_64_INTEGER_CLASS
;
1797 for (i
= 0; i
< words
; i
++)
1798 classes
[i
] = subclasses
[i
% num
];
1800 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1801 else if (TREE_CODE (type
) == UNION_TYPE
1802 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
1804 /* For classes first merge in the field of the subclasses. */
1805 if (TYPE_BINFO (type
) != NULL
&& TYPE_BINFO_BASETYPES (type
) != NULL
)
1807 tree bases
= TYPE_BINFO_BASETYPES (type
);
1808 int n_bases
= TREE_VEC_LENGTH (bases
);
1811 for (i
= 0; i
< n_bases
; ++i
)
1813 tree binfo
= TREE_VEC_ELT (bases
, i
);
1815 int offset
= tree_low_cst (BINFO_OFFSET (binfo
), 0) * 8;
1816 tree type
= BINFO_TYPE (binfo
);
1818 num
= classify_argument (TYPE_MODE (type
),
1820 (offset
+ (bit_offset
% 64)) % 256);
1823 for (i
= 0; i
< num
; i
++)
1825 int pos
= (offset
+ (bit_offset
% 64)) / 8 / 8;
1827 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
1831 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1833 if (TREE_CODE (field
) == FIELD_DECL
)
1836 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
1837 TREE_TYPE (field
), subclasses
,
1841 for (i
= 0; i
< num
; i
++)
1842 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
1849 /* Final merger cleanup. */
1850 for (i
= 0; i
< words
; i
++)
1852 /* If one class is MEMORY, everything should be passed in
1854 if (classes
[i
] == X86_64_MEMORY_CLASS
)
1857 /* The X86_64_SSEUP_CLASS should be always preceded by
1858 X86_64_SSE_CLASS. */
1859 if (classes
[i
] == X86_64_SSEUP_CLASS
1860 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
1861 classes
[i
] = X86_64_SSE_CLASS
;
1863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1864 if (classes
[i
] == X86_64_X87UP_CLASS
1865 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
1866 classes
[i
] = X86_64_SSE_CLASS
;
1871 /* Compute alignment needed. We align all types to natural boundaries with
1872 exception of XFmode that is aligned to 64bits. */
1873 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
1875 int mode_alignment
= GET_MODE_BITSIZE (mode
);
1878 mode_alignment
= 128;
1879 else if (mode
== XCmode
)
1880 mode_alignment
= 256;
1881 /* Misaligned fields are always returned in memory. */
1882 if (bit_offset
% mode_alignment
)
1886 /* Classification of atomic types. */
1896 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
1897 classes
[0] = X86_64_INTEGERSI_CLASS
;
1899 classes
[0] = X86_64_INTEGER_CLASS
;
1903 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1906 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
1907 classes
[2] = classes
[3] = X86_64_INTEGER_CLASS
;
1910 if (!(bit_offset
% 64))
1911 classes
[0] = X86_64_SSESF_CLASS
;
1913 classes
[0] = X86_64_SSE_CLASS
;
1916 classes
[0] = X86_64_SSEDF_CLASS
;
1919 classes
[0] = X86_64_X87_CLASS
;
1920 classes
[1] = X86_64_X87UP_CLASS
;
1923 classes
[0] = X86_64_X87_CLASS
;
1924 classes
[1] = X86_64_X87UP_CLASS
;
1925 classes
[2] = X86_64_X87_CLASS
;
1926 classes
[3] = X86_64_X87UP_CLASS
;
1929 classes
[0] = X86_64_SSEDF_CLASS
;
1930 classes
[1] = X86_64_SSEDF_CLASS
;
1933 classes
[0] = X86_64_SSE_CLASS
;
1941 classes
[0] = X86_64_SSE_CLASS
;
1942 classes
[1] = X86_64_SSEUP_CLASS
;
1957 /* Examine the argument and return set number of register required in each
1958 class. Return 0 iff parameter should be passed in memory. */
1960 examine_argument (mode
, type
, in_return
, int_nregs
, sse_nregs
)
1961 enum machine_mode mode
;
1963 int *int_nregs
, *sse_nregs
;
1966 enum x86_64_reg_class
class[MAX_CLASSES
];
1967 int n
= classify_argument (mode
, type
, class, 0);
1973 for (n
--; n
>= 0; n
--)
1976 case X86_64_INTEGER_CLASS
:
1977 case X86_64_INTEGERSI_CLASS
:
1980 case X86_64_SSE_CLASS
:
1981 case X86_64_SSESF_CLASS
:
1982 case X86_64_SSEDF_CLASS
:
1985 case X86_64_NO_CLASS
:
1986 case X86_64_SSEUP_CLASS
:
1988 case X86_64_X87_CLASS
:
1989 case X86_64_X87UP_CLASS
:
1993 case X86_64_MEMORY_CLASS
:
1998 /* Construct container for the argument used by GCC interface. See
1999 FUNCTION_ARG for the detailed description. */
2001 construct_container (mode
, type
, in_return
, nintregs
, nsseregs
, intreg
, sse_regno
)
2002 enum machine_mode mode
;
2005 int nintregs
, nsseregs
;
2009 enum machine_mode tmpmode
;
2011 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2012 enum x86_64_reg_class
class[MAX_CLASSES
];
2016 int needed_sseregs
, needed_intregs
;
2017 rtx exp
[MAX_CLASSES
];
2020 n
= classify_argument (mode
, type
, class, 0);
2021 if (TARGET_DEBUG_ARG
)
2024 fprintf (stderr
, "Memory class\n");
2027 fprintf (stderr
, "Classes:");
2028 for (i
= 0; i
< n
; i
++)
2030 fprintf (stderr
, " %s", x86_64_reg_class_name
[class[i
]]);
2032 fprintf (stderr
, "\n");
2037 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
, &needed_sseregs
))
2039 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2042 /* First construct simple cases. Avoid SCmode, since we want to use
2043 single register to pass this type. */
2044 if (n
== 1 && mode
!= SCmode
)
2047 case X86_64_INTEGER_CLASS
:
2048 case X86_64_INTEGERSI_CLASS
:
2049 return gen_rtx_REG (mode
, intreg
[0]);
2050 case X86_64_SSE_CLASS
:
2051 case X86_64_SSESF_CLASS
:
2052 case X86_64_SSEDF_CLASS
:
2053 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2054 case X86_64_X87_CLASS
:
2055 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2056 case X86_64_NO_CLASS
:
2057 /* Zero sized array, struct or class. */
2062 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
)
2063 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
2065 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
2066 return gen_rtx_REG (TFmode
, FIRST_STACK_REG
);
2067 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
2068 && class[1] == X86_64_INTEGER_CLASS
2069 && (mode
== CDImode
|| mode
== TImode
)
2070 && intreg
[0] + 1 == intreg
[1])
2071 return gen_rtx_REG (mode
, intreg
[0]);
2073 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
2074 && class[2] == X86_64_X87_CLASS
&& class[3] == X86_64_X87UP_CLASS
)
2075 return gen_rtx_REG (TCmode
, FIRST_STACK_REG
);
2077 /* Otherwise figure out the entries of the PARALLEL. */
2078 for (i
= 0; i
< n
; i
++)
2082 case X86_64_NO_CLASS
:
2084 case X86_64_INTEGER_CLASS
:
2085 case X86_64_INTEGERSI_CLASS
:
2086 /* Merge TImodes on aligned occassions here too. */
2087 if (i
* 8 + 8 > bytes
)
2088 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
2089 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
2093 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2094 if (tmpmode
== BLKmode
)
2096 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2097 gen_rtx_REG (tmpmode
, *intreg
),
2101 case X86_64_SSESF_CLASS
:
2102 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2103 gen_rtx_REG (SFmode
,
2104 SSE_REGNO (sse_regno
)),
2108 case X86_64_SSEDF_CLASS
:
2109 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2110 gen_rtx_REG (DFmode
,
2111 SSE_REGNO (sse_regno
)),
2115 case X86_64_SSE_CLASS
:
2116 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
2120 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
2121 gen_rtx_REG (tmpmode
,
2122 SSE_REGNO (sse_regno
)),
2124 if (tmpmode
== TImode
)
2132 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2133 for (i
= 0; i
< nexps
; i
++)
2134 XVECEXP (ret
, 0, i
) = exp
[i
];
2138 /* Update the data in CUM to advance over an argument
2139 of mode MODE and data type TYPE.
2140 (TYPE is null for libcalls where that information may not be available.) */
2143 function_arg_advance (cum
, mode
, type
, named
)
2144 CUMULATIVE_ARGS
*cum
; /* current arg information */
2145 enum machine_mode mode
; /* current arg mode */
2146 tree type
; /* type of the argument or 0 if lib support */
2147 int named
; /* whether or not the argument was named */
2150 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2151 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2153 if (TARGET_DEBUG_ARG
)
2155 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2156 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2159 int int_nregs
, sse_nregs
;
2160 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
2161 cum
->words
+= words
;
2162 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2164 cum
->nregs
-= int_nregs
;
2165 cum
->sse_nregs
-= sse_nregs
;
2166 cum
->regno
+= int_nregs
;
2167 cum
->sse_regno
+= sse_nregs
;
2170 cum
->words
+= words
;
2174 if (TARGET_SSE
&& mode
== TImode
)
2176 cum
->sse_words
+= words
;
2177 cum
->sse_nregs
-= 1;
2178 cum
->sse_regno
+= 1;
2179 if (cum
->sse_nregs
<= 0)
2187 cum
->words
+= words
;
2188 cum
->nregs
-= words
;
2189 cum
->regno
+= words
;
2191 if (cum
->nregs
<= 0)
2201 /* Define where to put the arguments to a function.
2202 Value is zero to push the argument on the stack,
2203 or a hard register in which to store the argument.
2205 MODE is the argument's machine mode.
2206 TYPE is the data type of the argument (as a tree).
2207 This is null for libcalls where that information may
2209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2210 the preceding args and about the function being called.
2211 NAMED is nonzero if this argument is a named parameter
2212 (otherwise it is an extra parameter matching an ellipsis). */
2215 function_arg (cum
, mode
, type
, named
)
2216 CUMULATIVE_ARGS
*cum
; /* current arg information */
2217 enum machine_mode mode
; /* current arg mode */
2218 tree type
; /* type of the argument or 0 if lib support */
2219 int named
; /* != 0 for normal args, == 0 for ... args */
2223 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2224 int words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2226 /* Handle an hidden AL argument containing number of registers for varargs
2227 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2229 if (mode
== VOIDmode
)
2232 return GEN_INT (cum
->maybe_vaarg
2233 ? (cum
->sse_nregs
< 0
2241 ret
= construct_container (mode
, type
, 0, cum
->nregs
, cum
->sse_nregs
,
2242 &x86_64_int_parameter_registers
[cum
->regno
],
2247 /* For now, pass fp/complex values on the stack. */
2256 if (words
<= cum
->nregs
)
2257 ret
= gen_rtx_REG (mode
, cum
->regno
);
2261 ret
= gen_rtx_REG (mode
, cum
->sse_regno
);
2265 if (TARGET_DEBUG_ARG
)
2268 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2269 words
, cum
->words
, cum
->nregs
, GET_MODE_NAME (mode
), named
);
2272 print_simple_rtl (stderr
, ret
);
2274 fprintf (stderr
, ", stack");
2276 fprintf (stderr
, " )\n");
2282 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2286 ix86_function_arg_boundary (mode
, type
)
2287 enum machine_mode mode
;
2292 return PARM_BOUNDARY
;
2294 align
= TYPE_ALIGN (type
);
2296 align
= GET_MODE_ALIGNMENT (mode
);
2297 if (align
< PARM_BOUNDARY
)
2298 align
= PARM_BOUNDARY
;
2304 /* Return true if N is a possible register number of function value. */
2306 ix86_function_value_regno_p (regno
)
2311 return ((regno
) == 0
2312 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
)
2313 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
));
2315 return ((regno
) == 0 || (regno
) == FIRST_FLOAT_REG
2316 || ((regno
) == FIRST_SSE_REG
&& TARGET_SSE
)
2317 || ((regno
) == FIRST_FLOAT_REG
&& TARGET_FLOAT_RETURNS_IN_80387
));
2320 /* Define how to find the value returned by a function.
2321 VALTYPE is the data type of the value (as a tree).
2322 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2323 otherwise, FUNC is 0. */
2325 ix86_function_value (valtype
)
2330 rtx ret
= construct_container (TYPE_MODE (valtype
), valtype
, 1,
2331 REGPARM_MAX
, SSE_REGPARM_MAX
,
2332 x86_64_int_return_registers
, 0);
2333 /* For zero sized structures, construct_continer return NULL, but we need
2334 to keep rest of compiler happy by returning meaningfull value. */
2336 ret
= gen_rtx_REG (TYPE_MODE (valtype
), 0);
2340 return gen_rtx_REG (TYPE_MODE (valtype
),
2341 ix86_value_regno (TYPE_MODE (valtype
)));
2344 /* Return false iff type is returned in memory. */
2346 ix86_return_in_memory (type
)
2349 int needed_intregs
, needed_sseregs
;
2352 return !examine_argument (TYPE_MODE (type
), type
, 1,
2353 &needed_intregs
, &needed_sseregs
);
2357 if (TYPE_MODE (type
) == BLKmode
2358 || (VECTOR_MODE_P (TYPE_MODE (type
))
2359 && int_size_in_bytes (type
) == 8)
2360 || (int_size_in_bytes (type
) > 12 && TYPE_MODE (type
) != TImode
2361 && TYPE_MODE (type
) != TFmode
2362 && !VECTOR_MODE_P (TYPE_MODE (type
))))
2368 /* Define how to find the value returned by a library function
2369 assuming the value has mode MODE. */
2371 ix86_libcall_value (mode
)
2372 enum machine_mode mode
;
2382 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
2385 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
2387 return gen_rtx_REG (mode
, 0);
2391 return gen_rtx_REG (mode
, ix86_value_regno (mode
));
2394 /* Given a mode, return the register to use for a return value. */
2397 ix86_value_regno (mode
)
2398 enum machine_mode mode
;
2400 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& TARGET_FLOAT_RETURNS_IN_80387
)
2401 return FIRST_FLOAT_REG
;
2402 if (mode
== TImode
|| VECTOR_MODE_P (mode
))
2403 return FIRST_SSE_REG
;
2407 /* Create the va_list data type. */
2410 ix86_build_va_list ()
2412 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
2414 /* For i386 we use plain pointer to argument area. */
2416 return build_pointer_type (char_type_node
);
2418 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2419 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2421 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
2422 unsigned_type_node
);
2423 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
2424 unsigned_type_node
);
2425 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
2427 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
2430 DECL_FIELD_CONTEXT (f_gpr
) = record
;
2431 DECL_FIELD_CONTEXT (f_fpr
) = record
;
2432 DECL_FIELD_CONTEXT (f_ovf
) = record
;
2433 DECL_FIELD_CONTEXT (f_sav
) = record
;
2435 TREE_CHAIN (record
) = type_decl
;
2436 TYPE_NAME (record
) = type_decl
;
2437 TYPE_FIELDS (record
) = f_gpr
;
2438 TREE_CHAIN (f_gpr
) = f_fpr
;
2439 TREE_CHAIN (f_fpr
) = f_ovf
;
2440 TREE_CHAIN (f_ovf
) = f_sav
;
2442 layout_type (record
);
2444 /* The correct type is an array type of one element. */
2445 return build_array_type (record
, build_index_type (size_zero_node
));
2448 /* Perform any needed actions needed for a function that is receiving a
2449 variable number of arguments.
2453 MODE and TYPE are the mode and type of the current parameter.
2455 PRETEND_SIZE is a variable that should be set to the amount of stack
2456 that must be pushed by the prolog to pretend that our caller pushed
2459 Normally, this macro will push all remaining incoming registers on the
2460 stack and set PRETEND_SIZE to the length of the registers pushed. */
2463 ix86_setup_incoming_varargs (cum
, mode
, type
, pretend_size
, no_rtl
)
2464 CUMULATIVE_ARGS
*cum
;
2465 enum machine_mode mode
;
2467 int *pretend_size ATTRIBUTE_UNUSED
;
2471 CUMULATIVE_ARGS next_cum
;
2472 rtx save_area
= NULL_RTX
, mem
;
2485 /* Indicate to allocate space on the stack for varargs save area. */
2486 ix86_save_varrargs_registers
= 1;
2488 fntype
= TREE_TYPE (current_function_decl
);
2489 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
2490 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
2491 != void_type_node
));
2493 /* For varargs, we do not want to skip the dummy va_dcl argument.
2494 For stdargs, we do want to skip the last named argument. */
2497 function_arg_advance (&next_cum
, mode
, type
, 1);
2500 save_area
= frame_pointer_rtx
;
2502 set
= get_varargs_alias_set ();
2504 for (i
= next_cum
.regno
; i
< ix86_regparm
; i
++)
2506 mem
= gen_rtx_MEM (Pmode
,
2507 plus_constant (save_area
, i
* UNITS_PER_WORD
));
2508 set_mem_alias_set (mem
, set
);
2509 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
2510 x86_64_int_parameter_registers
[i
]));
2513 if (next_cum
.sse_nregs
)
2515 /* Now emit code to save SSE registers. The AX parameter contains number
2516 of SSE parameter regsiters used to call this function. We use
2517 sse_prologue_save insn template that produces computed jump across
2518 SSE saves. We need some preparation work to get this working. */
2520 label
= gen_label_rtx ();
2521 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
2523 /* Compute address to jump to :
2524 label - 5*eax + nnamed_sse_arguments*5 */
2525 tmp_reg
= gen_reg_rtx (Pmode
);
2526 nsse_reg
= gen_reg_rtx (Pmode
);
2527 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
2528 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2529 gen_rtx_MULT (Pmode
, nsse_reg
,
2531 if (next_cum
.sse_regno
)
2534 gen_rtx_CONST (DImode
,
2535 gen_rtx_PLUS (DImode
,
2537 GEN_INT (next_cum
.sse_regno
* 4))));
2539 emit_move_insn (nsse_reg
, label_ref
);
2540 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
2542 /* Compute address of memory block we save into. We always use pointer
2543 pointing 127 bytes after first byte to store - this is needed to keep
2544 instruction size limited by 4 bytes. */
2545 tmp_reg
= gen_reg_rtx (Pmode
);
2546 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
2547 plus_constant (save_area
,
2548 8 * REGPARM_MAX
+ 127)));
2549 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
2550 set_mem_alias_set (mem
, set
);
2551 set_mem_align (mem
, BITS_PER_WORD
);
2553 /* And finally do the dirty job! */
2554 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
2555 GEN_INT (next_cum
.sse_regno
), label
));
2560 /* Implement va_start. */
2563 ix86_va_start (valist
, nextarg
)
2567 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
2568 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2569 tree gpr
, fpr
, ovf
, sav
, t
;
2571 /* Only 64bit target needs something special. */
2574 std_expand_builtin_va_start (valist
, nextarg
);
2578 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2579 f_fpr
= TREE_CHAIN (f_gpr
);
2580 f_ovf
= TREE_CHAIN (f_fpr
);
2581 f_sav
= TREE_CHAIN (f_ovf
);
2583 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2584 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2585 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2586 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2587 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2589 /* Count number of gp and fp argument registers used. */
2590 words
= current_function_args_info
.words
;
2591 n_gpr
= current_function_args_info
.regno
;
2592 n_fpr
= current_function_args_info
.sse_regno
;
2594 if (TARGET_DEBUG_ARG
)
2595 fprintf (stderr
, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2596 (int) words
, (int) n_gpr
, (int) n_fpr
);
2598 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
2599 build_int_2 (n_gpr
* 8, 0));
2600 TREE_SIDE_EFFECTS (t
) = 1;
2601 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2603 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
2604 build_int_2 (n_fpr
* 16 + 8*REGPARM_MAX
, 0));
2605 TREE_SIDE_EFFECTS (t
) = 1;
2606 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2608 /* Find the overflow area. */
2609 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
2611 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), t
,
2612 build_int_2 (words
* UNITS_PER_WORD
, 0));
2613 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2614 TREE_SIDE_EFFECTS (t
) = 1;
2615 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2617 /* Find the register save area.
2618 Prologue of the function save it right above stack frame. */
2619 t
= make_tree (TREE_TYPE (sav
), frame_pointer_rtx
);
2620 t
= build (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
2621 TREE_SIDE_EFFECTS (t
) = 1;
2622 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2625 /* Implement va_arg. */
2627 ix86_va_arg (valist
, type
)
2630 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
2631 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
2632 tree gpr
, fpr
, ovf
, sav
, t
;
2634 rtx lab_false
, lab_over
= NULL_RTX
;
2638 /* Only 64bit target needs something special. */
2641 return std_expand_builtin_va_arg (valist
, type
);
2644 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2645 f_fpr
= TREE_CHAIN (f_gpr
);
2646 f_ovf
= TREE_CHAIN (f_fpr
);
2647 f_sav
= TREE_CHAIN (f_ovf
);
2649 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2650 gpr
= build (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
);
2651 fpr
= build (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
);
2652 ovf
= build (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
);
2653 sav
= build (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
);
2655 size
= int_size_in_bytes (type
);
2656 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2658 container
= construct_container (TYPE_MODE (type
), type
, 0,
2659 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
2661 * Pull the value out of the saved registers ...
2664 addr_rtx
= gen_reg_rtx (Pmode
);
2668 rtx int_addr_rtx
, sse_addr_rtx
;
2669 int needed_intregs
, needed_sseregs
;
2672 lab_over
= gen_label_rtx ();
2673 lab_false
= gen_label_rtx ();
2675 examine_argument (TYPE_MODE (type
), type
, 0,
2676 &needed_intregs
, &needed_sseregs
);
2679 need_temp
= ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
2680 || TYPE_ALIGN (type
) > 128);
2682 /* In case we are passing structure, verify that it is consetuctive block
2683 on the register save area. If not we need to do moves. */
2684 if (!need_temp
&& !REG_P (container
))
2686 /* Verify that all registers are strictly consetuctive */
2687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
2691 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2693 rtx slot
= XVECEXP (container
, 0, i
);
2694 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
2695 || INTVAL (XEXP (slot
, 1)) != i
* 16)
2703 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
2705 rtx slot
= XVECEXP (container
, 0, i
);
2706 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
2707 || INTVAL (XEXP (slot
, 1)) != i
* 8)
2714 int_addr_rtx
= addr_rtx
;
2715 sse_addr_rtx
= addr_rtx
;
2719 int_addr_rtx
= gen_reg_rtx (Pmode
);
2720 sse_addr_rtx
= gen_reg_rtx (Pmode
);
2722 /* First ensure that we fit completely in registers. */
2725 emit_cmp_and_jump_insns (expand_expr
2726 (gpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2727 GEN_INT ((REGPARM_MAX
- needed_intregs
+
2728 1) * 8), GE
, const1_rtx
, SImode
,
2733 emit_cmp_and_jump_insns (expand_expr
2734 (fpr
, NULL_RTX
, SImode
, EXPAND_NORMAL
),
2735 GEN_INT ((SSE_REGPARM_MAX
-
2736 needed_sseregs
+ 1) * 16 +
2737 REGPARM_MAX
* 8), GE
, const1_rtx
,
2738 SImode
, 1, lab_false
);
2741 /* Compute index to start of area used for integer regs. */
2744 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, gpr
);
2745 r
= expand_expr (t
, int_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2746 if (r
!= int_addr_rtx
)
2747 emit_move_insn (int_addr_rtx
, r
);
2751 t
= build (PLUS_EXPR
, ptr_type_node
, sav
, fpr
);
2752 r
= expand_expr (t
, sse_addr_rtx
, Pmode
, EXPAND_NORMAL
);
2753 if (r
!= sse_addr_rtx
)
2754 emit_move_insn (sse_addr_rtx
, r
);
2761 /* Never use the memory itself, as it has the alias set. */
2762 addr_rtx
= XEXP (assign_temp (type
, 0, 1, 0), 0);
2763 mem
= gen_rtx_MEM (BLKmode
, addr_rtx
);
2764 set_mem_alias_set (mem
, get_varargs_alias_set ());
2765 set_mem_align (mem
, BITS_PER_UNIT
);
2767 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
2769 rtx slot
= XVECEXP (container
, 0, i
);
2770 rtx reg
= XEXP (slot
, 0);
2771 enum machine_mode mode
= GET_MODE (reg
);
2777 if (SSE_REGNO_P (REGNO (reg
)))
2779 src_addr
= sse_addr_rtx
;
2780 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
2784 src_addr
= int_addr_rtx
;
2785 src_offset
= REGNO (reg
) * 8;
2787 src_mem
= gen_rtx_MEM (mode
, src_addr
);
2788 set_mem_alias_set (src_mem
, get_varargs_alias_set ());
2789 src_mem
= adjust_address (src_mem
, mode
, src_offset
);
2790 dest_mem
= adjust_address (mem
, mode
, INTVAL (XEXP (slot
, 1)));
2791 emit_move_insn (dest_mem
, src_mem
);
2798 build (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
2799 build_int_2 (needed_intregs
* 8, 0));
2800 t
= build (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
, t
);
2801 TREE_SIDE_EFFECTS (t
) = 1;
2802 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2807 build (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
2808 build_int_2 (needed_sseregs
* 16, 0));
2809 t
= build (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
, t
);
2810 TREE_SIDE_EFFECTS (t
) = 1;
2811 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2814 emit_jump_insn (gen_jump (lab_over
));
2816 emit_label (lab_false
);
2819 /* ... otherwise out of the overflow area. */
2821 /* Care for on-stack alignment if needed. */
2822 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64)
2826 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
2827 t
= build (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
, build_int_2 (align
- 1, 0));
2828 t
= build (BIT_AND_EXPR
, TREE_TYPE (t
), t
, build_int_2 (-align
, -1));
2832 r
= expand_expr (t
, addr_rtx
, Pmode
, EXPAND_NORMAL
);
2834 emit_move_insn (addr_rtx
, r
);
2837 build (PLUS_EXPR
, TREE_TYPE (t
), t
,
2838 build_int_2 (rsize
* UNITS_PER_WORD
, 0));
2839 t
= build (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
2840 TREE_SIDE_EFFECTS (t
) = 1;
2841 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2844 emit_label (lab_over
);
2849 /* Return nonzero if OP is either a i387 or SSE fp register. */
2851 any_fp_register_operand (op
, mode
)
2853 enum machine_mode mode ATTRIBUTE_UNUSED
;
2855 return ANY_FP_REG_P (op
);
2858 /* Return nonzero if OP is an i387 fp register. */
2860 fp_register_operand (op
, mode
)
2862 enum machine_mode mode ATTRIBUTE_UNUSED
;
2864 return FP_REG_P (op
);
2867 /* Return nonzero if OP is a non-fp register_operand. */
2869 register_and_not_any_fp_reg_operand (op
, mode
)
2871 enum machine_mode mode
;
2873 return register_operand (op
, mode
) && !ANY_FP_REG_P (op
);
2876 /* Return nonzero of OP is a register operand other than an
2877 i387 fp register. */
2879 register_and_not_fp_reg_operand (op
, mode
)
2881 enum machine_mode mode
;
2883 return register_operand (op
, mode
) && !FP_REG_P (op
);
2886 /* Return nonzero if OP is general operand representable on x86_64. */
2889 x86_64_general_operand (op
, mode
)
2891 enum machine_mode mode
;
2894 return general_operand (op
, mode
);
2895 if (nonimmediate_operand (op
, mode
))
2897 return x86_64_sign_extended_value (op
);
2900 /* Return nonzero if OP is general operand representable on x86_64
2901 as either sign extended or zero extended constant. */
2904 x86_64_szext_general_operand (op
, mode
)
2906 enum machine_mode mode
;
2909 return general_operand (op
, mode
);
2910 if (nonimmediate_operand (op
, mode
))
2912 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2915 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2918 x86_64_nonmemory_operand (op
, mode
)
2920 enum machine_mode mode
;
2923 return nonmemory_operand (op
, mode
);
2924 if (register_operand (op
, mode
))
2926 return x86_64_sign_extended_value (op
);
2929 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2932 x86_64_movabs_operand (op
, mode
)
2934 enum machine_mode mode
;
2936 if (!TARGET_64BIT
|| !flag_pic
)
2937 return nonmemory_operand (op
, mode
);
2938 if (register_operand (op
, mode
) || x86_64_sign_extended_value (op
))
2940 if (CONSTANT_P (op
) && !symbolic_reference_mentioned_p (op
))
2945 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2948 x86_64_szext_nonmemory_operand (op
, mode
)
2950 enum machine_mode mode
;
2953 return nonmemory_operand (op
, mode
);
2954 if (register_operand (op
, mode
))
2956 return x86_64_sign_extended_value (op
) || x86_64_zero_extended_value (op
);
2959 /* Return nonzero if OP is immediate operand representable on x86_64. */
2962 x86_64_immediate_operand (op
, mode
)
2964 enum machine_mode mode
;
2967 return immediate_operand (op
, mode
);
2968 return x86_64_sign_extended_value (op
);
2971 /* Return nonzero if OP is immediate operand representable on x86_64. */
2974 x86_64_zext_immediate_operand (op
, mode
)
2976 enum machine_mode mode ATTRIBUTE_UNUSED
;
2978 return x86_64_zero_extended_value (op
);
2981 /* Return nonzero if OP is (const_int 1), else return zero. */
2984 const_int_1_operand (op
, mode
)
2986 enum machine_mode mode ATTRIBUTE_UNUSED
;
2988 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) == 1);
2991 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2992 for shift & compare patterns, as shifting by 0 does not change flags),
2993 else return zero. */
2996 const_int_1_31_operand (op
, mode
)
2998 enum machine_mode mode ATTRIBUTE_UNUSED
;
3000 return (GET_CODE (op
) == CONST_INT
&& INTVAL (op
) >= 1 && INTVAL (op
) <= 31);
3003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3004 reference and a constant. */
3007 symbolic_operand (op
, mode
)
3009 enum machine_mode mode ATTRIBUTE_UNUSED
;
3011 switch (GET_CODE (op
))
3019 if (GET_CODE (op
) == SYMBOL_REF
3020 || GET_CODE (op
) == LABEL_REF
3021 || (GET_CODE (op
) == UNSPEC
3022 && (XINT (op
, 1) == UNSPEC_GOT
3023 || XINT (op
, 1) == UNSPEC_GOTOFF
3024 || XINT (op
, 1) == UNSPEC_GOTPCREL
)))
3026 if (GET_CODE (op
) != PLUS
3027 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3031 if (GET_CODE (op
) == SYMBOL_REF
3032 || GET_CODE (op
) == LABEL_REF
)
3034 /* Only @GOTOFF gets offsets. */
3035 if (GET_CODE (op
) != UNSPEC
3036 || XINT (op
, 1) != UNSPEC_GOTOFF
)
3039 op
= XVECEXP (op
, 0, 0);
3040 if (GET_CODE (op
) == SYMBOL_REF
3041 || GET_CODE (op
) == LABEL_REF
)
3050 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3053 pic_symbolic_operand (op
, mode
)
3055 enum machine_mode mode ATTRIBUTE_UNUSED
;
3057 if (GET_CODE (op
) != CONST
)
3062 if (GET_CODE (XEXP (op
, 0)) == UNSPEC
)
3067 if (GET_CODE (op
) == UNSPEC
)
3069 if (GET_CODE (op
) != PLUS
3070 || GET_CODE (XEXP (op
, 1)) != CONST_INT
)
3073 if (GET_CODE (op
) == UNSPEC
)
3079 /* Return true if OP is a symbolic operand that resolves locally. */
3082 local_symbolic_operand (op
, mode
)
3084 enum machine_mode mode ATTRIBUTE_UNUSED
;
3086 if (GET_CODE (op
) == CONST
3087 && GET_CODE (XEXP (op
, 0)) == PLUS
3088 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3089 op
= XEXP (XEXP (op
, 0), 0);
3091 if (GET_CODE (op
) == LABEL_REF
)
3094 if (GET_CODE (op
) != SYMBOL_REF
)
3097 /* These we've been told are local by varasm and encode_section_info
3099 if (CONSTANT_POOL_ADDRESS_P (op
) || SYMBOL_REF_FLAG (op
))
3102 /* There is, however, a not insubstantial body of code in the rest of
3103 the compiler that assumes it can just stick the results of
3104 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3105 /* ??? This is a hack. Should update the body of the compiler to
3106 always create a DECL an invoke targetm.encode_section_info. */
3107 if (strncmp (XSTR (op
, 0), internal_label_prefix
,
3108 internal_label_prefix_len
) == 0)
3114 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3117 tls_symbolic_operand (op
, mode
)
3119 enum machine_mode mode ATTRIBUTE_UNUSED
;
3121 const char *symbol_str
;
3123 if (GET_CODE (op
) != SYMBOL_REF
)
3125 symbol_str
= XSTR (op
, 0);
3127 if (symbol_str
[0] != '%')
3129 return strchr (tls_model_chars
, symbol_str
[1]) - tls_model_chars
;
3133 tls_symbolic_operand_1 (op
, kind
)
3135 enum tls_model kind
;
3137 const char *symbol_str
;
3139 if (GET_CODE (op
) != SYMBOL_REF
)
3141 symbol_str
= XSTR (op
, 0);
3143 return symbol_str
[0] == '%' && symbol_str
[1] == tls_model_chars
[kind
];
3147 global_dynamic_symbolic_operand (op
, mode
)
3149 enum machine_mode mode ATTRIBUTE_UNUSED
;
3151 return tls_symbolic_operand_1 (op
, TLS_MODEL_GLOBAL_DYNAMIC
);
3155 local_dynamic_symbolic_operand (op
, mode
)
3157 enum machine_mode mode ATTRIBUTE_UNUSED
;
3159 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_DYNAMIC
);
3163 initial_exec_symbolic_operand (op
, mode
)
3165 enum machine_mode mode ATTRIBUTE_UNUSED
;
3167 return tls_symbolic_operand_1 (op
, TLS_MODEL_INITIAL_EXEC
);
3171 local_exec_symbolic_operand (op
, mode
)
3173 enum machine_mode mode ATTRIBUTE_UNUSED
;
3175 return tls_symbolic_operand_1 (op
, TLS_MODEL_LOCAL_EXEC
);
3178 /* Test for a valid operand for a call instruction. Don't allow the
3179 arg pointer register or virtual regs since they may decay into
3180 reg + const, which the patterns can't handle. */
3183 call_insn_operand (op
, mode
)
3185 enum machine_mode mode ATTRIBUTE_UNUSED
;
3187 /* Disallow indirect through a virtual register. This leads to
3188 compiler aborts when trying to eliminate them. */
3189 if (GET_CODE (op
) == REG
3190 && (op
== arg_pointer_rtx
3191 || op
== frame_pointer_rtx
3192 || (REGNO (op
) >= FIRST_PSEUDO_REGISTER
3193 && REGNO (op
) <= LAST_VIRTUAL_REGISTER
)))
3196 /* Disallow `call 1234'. Due to varying assembler lameness this
3197 gets either rejected or translated to `call .+1234'. */
3198 if (GET_CODE (op
) == CONST_INT
)
3201 /* Explicitly allow SYMBOL_REF even if pic. */
3202 if (GET_CODE (op
) == SYMBOL_REF
)
3205 /* Otherwise we can allow any general_operand in the address. */
3206 return general_operand (op
, Pmode
);
3210 constant_call_address_operand (op
, mode
)
3212 enum machine_mode mode ATTRIBUTE_UNUSED
;
3214 if (GET_CODE (op
) == CONST
3215 && GET_CODE (XEXP (op
, 0)) == PLUS
3216 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
)
3217 op
= XEXP (XEXP (op
, 0), 0);
3218 return GET_CODE (op
) == SYMBOL_REF
;
3221 /* Match exactly zero and one. */
3224 const0_operand (op
, mode
)
3226 enum machine_mode mode
;
3228 return op
== CONST0_RTX (mode
);
3232 const1_operand (op
, mode
)
3234 enum machine_mode mode ATTRIBUTE_UNUSED
;
3236 return op
== const1_rtx
;
3239 /* Match 2, 4, or 8. Used for leal multiplicands. */
3242 const248_operand (op
, mode
)
3244 enum machine_mode mode ATTRIBUTE_UNUSED
;
3246 return (GET_CODE (op
) == CONST_INT
3247 && (INTVAL (op
) == 2 || INTVAL (op
) == 4 || INTVAL (op
) == 8));
3250 /* True if this is a constant appropriate for an increment or decremenmt. */
3253 incdec_operand (op
, mode
)
3255 enum machine_mode mode ATTRIBUTE_UNUSED
;
3257 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3258 registers, since carry flag is not set. */
3259 if (TARGET_PENTIUM4
&& !optimize_size
)
3261 return op
== const1_rtx
|| op
== constm1_rtx
;
3264 /* Return nonzero if OP is acceptable as operand of DImode shift
3268 shiftdi_operand (op
, mode
)
3270 enum machine_mode mode ATTRIBUTE_UNUSED
;
3273 return nonimmediate_operand (op
, mode
);
3275 return register_operand (op
, mode
);
3278 /* Return false if this is the stack pointer, or any other fake
3279 register eliminable to the stack pointer. Otherwise, this is
3282 This is used to prevent esp from being used as an index reg.
3283 Which would only happen in pathological cases. */
3286 reg_no_sp_operand (op
, mode
)
3288 enum machine_mode mode
;
3291 if (GET_CODE (t
) == SUBREG
)
3293 if (t
== stack_pointer_rtx
|| t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
)
3296 return register_operand (op
, mode
);
3300 mmx_reg_operand (op
, mode
)
3302 enum machine_mode mode ATTRIBUTE_UNUSED
;
3304 return MMX_REG_P (op
);
3307 /* Return false if this is any eliminable register. Otherwise
3311 general_no_elim_operand (op
, mode
)
3313 enum machine_mode mode
;
3316 if (GET_CODE (t
) == SUBREG
)
3318 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3319 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3320 || t
== virtual_stack_dynamic_rtx
)
3323 && REGNO (t
) >= FIRST_VIRTUAL_REGISTER
3324 && REGNO (t
) <= LAST_VIRTUAL_REGISTER
)
3327 return general_operand (op
, mode
);
3330 /* Return false if this is any eliminable register. Otherwise
3331 register_operand or const_int. */
3334 nonmemory_no_elim_operand (op
, mode
)
3336 enum machine_mode mode
;
3339 if (GET_CODE (t
) == SUBREG
)
3341 if (t
== arg_pointer_rtx
|| t
== frame_pointer_rtx
3342 || t
== virtual_incoming_args_rtx
|| t
== virtual_stack_vars_rtx
3343 || t
== virtual_stack_dynamic_rtx
)
3346 return GET_CODE (op
) == CONST_INT
|| register_operand (op
, mode
);
3349 /* Return false if this is any eliminable register or stack register,
3350 otherwise work like register_operand. */
3353 index_register_operand (op
, mode
)
3355 enum machine_mode mode
;
3358 if (GET_CODE (t
) == SUBREG
)
3362 if (t
== arg_pointer_rtx
3363 || t
== frame_pointer_rtx
3364 || t
== virtual_incoming_args_rtx
3365 || t
== virtual_stack_vars_rtx
3366 || t
== virtual_stack_dynamic_rtx
3367 || REGNO (t
) == STACK_POINTER_REGNUM
)
3370 return general_operand (op
, mode
);
3373 /* Return true if op is a Q_REGS class register. */
3376 q_regs_operand (op
, mode
)
3378 enum machine_mode mode
;
3380 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3382 if (GET_CODE (op
) == SUBREG
)
3383 op
= SUBREG_REG (op
);
3384 return ANY_QI_REG_P (op
);
3387 /* Return true if op is a NON_Q_REGS class register. */
3390 non_q_regs_operand (op
, mode
)
3392 enum machine_mode mode
;
3394 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3396 if (GET_CODE (op
) == SUBREG
)
3397 op
= SUBREG_REG (op
);
3398 return NON_QI_REG_P (op
);
3401 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3404 sse_comparison_operator (op
, mode
)
3406 enum machine_mode mode ATTRIBUTE_UNUSED
;
3408 enum rtx_code code
= GET_CODE (op
);
3411 /* Operations supported directly. */
3421 /* These are equivalent to ones above in non-IEEE comparisons. */
3428 return !TARGET_IEEE_FP
;
3433 /* Return 1 if OP is a valid comparison operator in valid mode. */
3435 ix86_comparison_operator (op
, mode
)
3437 enum machine_mode mode
;
3439 enum machine_mode inmode
;
3440 enum rtx_code code
= GET_CODE (op
);
3441 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3443 if (GET_RTX_CLASS (code
) != '<')
3445 inmode
= GET_MODE (XEXP (op
, 0));
3447 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3449 enum rtx_code second_code
, bypass_code
;
3450 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3451 return (bypass_code
== NIL
&& second_code
== NIL
);
3458 if (inmode
== CCmode
|| inmode
== CCGCmode
3459 || inmode
== CCGOCmode
|| inmode
== CCNOmode
)
3462 case LTU
: case GTU
: case LEU
: case ORDERED
: case UNORDERED
: case GEU
:
3463 if (inmode
== CCmode
)
3467 if (inmode
== CCmode
|| inmode
== CCGCmode
|| inmode
== CCNOmode
)
3475 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3478 fcmov_comparison_operator (op
, mode
)
3480 enum machine_mode mode
;
3482 enum machine_mode inmode
;
3483 enum rtx_code code
= GET_CODE (op
);
3484 if (mode
!= VOIDmode
&& GET_MODE (op
) != mode
)
3486 if (GET_RTX_CLASS (code
) != '<')
3488 inmode
= GET_MODE (XEXP (op
, 0));
3489 if (inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3491 enum rtx_code second_code
, bypass_code
;
3492 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
3493 if (bypass_code
!= NIL
|| second_code
!= NIL
)
3495 code
= ix86_fp_compare_code_to_integer (code
);
3497 /* i387 supports just limited amount of conditional codes. */
3500 case LTU
: case GTU
: case LEU
: case GEU
:
3501 if (inmode
== CCmode
|| inmode
== CCFPmode
|| inmode
== CCFPUmode
)
3504 case ORDERED
: case UNORDERED
:
3512 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3515 promotable_binary_operator (op
, mode
)
3517 enum machine_mode mode ATTRIBUTE_UNUSED
;
3519 switch (GET_CODE (op
))
3522 /* Modern CPUs have same latency for HImode and SImode multiply,
3523 but 386 and 486 do HImode multiply faster. */
3524 return ix86_cpu
> PROCESSOR_I486
;
3536 /* Nearly general operand, but accept any const_double, since we wish
3537 to be able to drop them into memory rather than have them get pulled
3541 cmp_fp_expander_operand (op
, mode
)
3543 enum machine_mode mode
;
3545 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3547 if (GET_CODE (op
) == CONST_DOUBLE
)
3549 return general_operand (op
, mode
);
3552 /* Match an SI or HImode register for a zero_extract. */
3555 ext_register_operand (op
, mode
)
3557 enum machine_mode mode ATTRIBUTE_UNUSED
;
3560 if ((!TARGET_64BIT
|| GET_MODE (op
) != DImode
)
3561 && GET_MODE (op
) != SImode
&& GET_MODE (op
) != HImode
)
3564 if (!register_operand (op
, VOIDmode
))
3567 /* Be curefull to accept only registers having upper parts. */
3568 regno
= REG_P (op
) ? REGNO (op
) : REGNO (SUBREG_REG (op
));
3569 return (regno
> LAST_VIRTUAL_REGISTER
|| regno
< 4);
3572 /* Return 1 if this is a valid binary floating-point operation.
3573 OP is the expression matched, and MODE is its mode. */
3576 binary_fp_operator (op
, mode
)
3578 enum machine_mode mode
;
3580 if (mode
!= VOIDmode
&& mode
!= GET_MODE (op
))
3583 switch (GET_CODE (op
))
3589 return GET_MODE_CLASS (GET_MODE (op
)) == MODE_FLOAT
;
3597 mult_operator (op
, mode
)
3599 enum machine_mode mode ATTRIBUTE_UNUSED
;
3601 return GET_CODE (op
) == MULT
;
3605 div_operator (op
, mode
)
3607 enum machine_mode mode ATTRIBUTE_UNUSED
;
3609 return GET_CODE (op
) == DIV
;
3613 arith_or_logical_operator (op
, mode
)
3615 enum machine_mode mode
;
3617 return ((mode
== VOIDmode
|| GET_MODE (op
) == mode
)
3618 && (GET_RTX_CLASS (GET_CODE (op
)) == 'c'
3619 || GET_RTX_CLASS (GET_CODE (op
)) == '2'));
3622 /* Returns 1 if OP is memory operand with a displacement. */
3625 memory_displacement_operand (op
, mode
)
3627 enum machine_mode mode
;
3629 struct ix86_address parts
;
3631 if (! memory_operand (op
, mode
))
3634 if (! ix86_decompose_address (XEXP (op
, 0), &parts
))
3637 return parts
.disp
!= NULL_RTX
;
3640 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3641 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3643 ??? It seems likely that this will only work because cmpsi is an
3644 expander, and no actual insns use this. */
3647 cmpsi_operand (op
, mode
)
3649 enum machine_mode mode
;
3651 if (nonimmediate_operand (op
, mode
))
3654 if (GET_CODE (op
) == AND
3655 && GET_MODE (op
) == SImode
3656 && GET_CODE (XEXP (op
, 0)) == ZERO_EXTRACT
3657 && GET_CODE (XEXP (XEXP (op
, 0), 1)) == CONST_INT
3658 && GET_CODE (XEXP (XEXP (op
, 0), 2)) == CONST_INT
3659 && INTVAL (XEXP (XEXP (op
, 0), 1)) == 8
3660 && INTVAL (XEXP (XEXP (op
, 0), 2)) == 8
3661 && GET_CODE (XEXP (op
, 1)) == CONST_INT
)
3667 /* Returns 1 if OP is memory operand that can not be represented by the
3671 long_memory_operand (op
, mode
)
3673 enum machine_mode mode
;
3675 if (! memory_operand (op
, mode
))
3678 return memory_address_length (op
) != 0;
3681 /* Return nonzero if the rtx is known aligned. */
3684 aligned_operand (op
, mode
)
3686 enum machine_mode mode
;
3688 struct ix86_address parts
;
3690 if (!general_operand (op
, mode
))
3693 /* Registers and immediate operands are always "aligned". */
3694 if (GET_CODE (op
) != MEM
)
3697 /* Don't even try to do any aligned optimizations with volatiles. */
3698 if (MEM_VOLATILE_P (op
))
3703 /* Pushes and pops are only valid on the stack pointer. */
3704 if (GET_CODE (op
) == PRE_DEC
3705 || GET_CODE (op
) == POST_INC
)
3708 /* Decode the address. */
3709 if (! ix86_decompose_address (op
, &parts
))
3712 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
3713 parts
.base
= SUBREG_REG (parts
.base
);
3714 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
3715 parts
.index
= SUBREG_REG (parts
.index
);
3717 /* Look for some component that isn't known to be aligned. */
3721 && REGNO_POINTER_ALIGN (REGNO (parts
.index
)) < 32)
3726 if (REGNO_POINTER_ALIGN (REGNO (parts
.base
)) < 32)
3731 if (GET_CODE (parts
.disp
) != CONST_INT
3732 || (INTVAL (parts
.disp
) & 3) != 0)
3736 /* Didn't find one -- this must be an aligned address. */
3740 /* Return true if the constant is something that can be loaded with
3741 a special instruction. Only handle 0.0 and 1.0; others are less
3745 standard_80387_constant_p (x
)
3748 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
3750 /* Note that on the 80387, other constants, such as pi, that we should support
3751 too. On some machines, these are much slower to load as standard constant,
3752 than to load from doubles in memory. */
3753 if (x
== CONST0_RTX (GET_MODE (x
)))
3755 if (x
== CONST1_RTX (GET_MODE (x
)))
3760 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3763 standard_sse_constant_p (x
)
3766 if (x
== const0_rtx
)
3768 return (x
== CONST0_RTX (GET_MODE (x
)));
3771 /* Returns 1 if OP contains a symbol reference */
3774 symbolic_reference_mentioned_p (op
)
3777 register const char *fmt
;
3780 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
3783 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
3784 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
3790 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
3791 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
3795 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
3802 /* Return 1 if it is appropriate to emit `ret' instructions in the
3803 body of a function. Do this only if the epilogue is simple, needing a
3804 couple of insns. Prior to reloading, we can't tell how many registers
3805 must be saved, so return 0 then. Return 0 if there is no frame
3806 marker to de-allocate.
3808 If NON_SAVING_SETJMP is defined and true, then it is not possible
3809 for the epilogue to be simple, so return 0. This is a special case
3810 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3811 until final, but jump_optimize may need to know sooner if a
3815 ix86_can_use_return_insn_p ()
3817 struct ix86_frame frame
;
3819 #ifdef NON_SAVING_SETJMP
3820 if (NON_SAVING_SETJMP
&& current_function_calls_setjmp
)
3824 if (! reload_completed
|| frame_pointer_needed
)
3827 /* Don't allow more than 32 pop, since that's all we can do
3828 with one instruction. */
3829 if (current_function_pops_args
3830 && current_function_args_size
>= 32768)
3833 ix86_compute_frame_layout (&frame
);
3834 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
3837 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3839 x86_64_sign_extended_value (value
)
3842 switch (GET_CODE (value
))
3844 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3845 to be at least 32 and this all acceptable constants are
3846 represented as CONST_INT. */
3848 if (HOST_BITS_PER_WIDE_INT
== 32)
3852 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (value
), DImode
);
3853 return trunc_int_for_mode (val
, SImode
) == val
;
3857 /* For certain code models, the symbolic references are known to fit.
3858 in CM_SMALL_PIC model we know it fits if it is local to the shared
3859 library. Don't count TLS SYMBOL_REFs here, since they should fit
3860 only if inside of UNSPEC handled below. */
3862 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_KERNEL
);
3864 /* For certain code models, the code is near as well. */
3866 return (ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
3867 || ix86_cmodel
== CM_KERNEL
);
3869 /* We also may accept the offsetted memory references in certain special
3872 if (GET_CODE (XEXP (value
, 0)) == UNSPEC
)
3873 switch (XINT (XEXP (value
, 0), 1))
3875 case UNSPEC_GOTPCREL
:
3877 case UNSPEC_GOTNTPOFF
:
3883 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3885 rtx op1
= XEXP (XEXP (value
, 0), 0);
3886 rtx op2
= XEXP (XEXP (value
, 0), 1);
3887 HOST_WIDE_INT offset
;
3889 if (ix86_cmodel
== CM_LARGE
)
3891 if (GET_CODE (op2
) != CONST_INT
)
3893 offset
= trunc_int_for_mode (INTVAL (op2
), DImode
);
3894 switch (GET_CODE (op1
))
3897 /* For CM_SMALL assume that latest object is 16MB before
3898 end of 31bits boundary. We may also accept pretty
3899 large negative constants knowing that all objects are
3900 in the positive half of address space. */
3901 if (ix86_cmodel
== CM_SMALL
3902 && offset
< 16*1024*1024
3903 && trunc_int_for_mode (offset
, SImode
) == offset
)
3905 /* For CM_KERNEL we know that all object resist in the
3906 negative half of 32bits address space. We may not
3907 accept negative offsets, since they may be just off
3908 and we may accept pretty large positive ones. */
3909 if (ix86_cmodel
== CM_KERNEL
3911 && trunc_int_for_mode (offset
, SImode
) == offset
)
3915 /* These conditions are similar to SYMBOL_REF ones, just the
3916 constraints for code models differ. */
3917 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
3918 && offset
< 16*1024*1024
3919 && trunc_int_for_mode (offset
, SImode
) == offset
)
3921 if (ix86_cmodel
== CM_KERNEL
3923 && trunc_int_for_mode (offset
, SImode
) == offset
)
3927 switch (XINT (op1
, 1))
3932 && trunc_int_for_mode (offset
, SImode
) == offset
)
3946 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3948 x86_64_zero_extended_value (value
)
3951 switch (GET_CODE (value
))
3954 if (HOST_BITS_PER_WIDE_INT
== 32)
3955 return (GET_MODE (value
) == VOIDmode
3956 && !CONST_DOUBLE_HIGH (value
));
3960 if (HOST_BITS_PER_WIDE_INT
== 32)
3961 return INTVAL (value
) >= 0;
3963 return !(INTVAL (value
) & ~(HOST_WIDE_INT
) 0xffffffff);
3966 /* For certain code models, the symbolic references are known to fit. */
3968 return ix86_cmodel
== CM_SMALL
;
3970 /* For certain code models, the code is near as well. */
3972 return ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
;
3974 /* We also may accept the offsetted memory references in certain special
3977 if (GET_CODE (XEXP (value
, 0)) == PLUS
)
3979 rtx op1
= XEXP (XEXP (value
, 0), 0);
3980 rtx op2
= XEXP (XEXP (value
, 0), 1);
3982 if (ix86_cmodel
== CM_LARGE
)
3984 switch (GET_CODE (op1
))
3988 /* For small code model we may accept pretty large positive
3989 offsets, since one bit is available for free. Negative
3990 offsets are limited by the size of NULL pointer area
3991 specified by the ABI. */
3992 if (ix86_cmodel
== CM_SMALL
3993 && GET_CODE (op2
) == CONST_INT
3994 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
3995 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
3998 /* ??? For the kernel, we may accept adjustment of
3999 -0x10000000, since we know that it will just convert
4000 negative address space to positive, but perhaps this
4001 is not worthwhile. */
4004 /* These conditions are similar to SYMBOL_REF ones, just the
4005 constraints for code models differ. */
4006 if ((ix86_cmodel
== CM_SMALL
|| ix86_cmodel
== CM_MEDIUM
)
4007 && GET_CODE (op2
) == CONST_INT
4008 && trunc_int_for_mode (INTVAL (op2
), DImode
) > -0x10000
4009 && (trunc_int_for_mode (INTVAL (op2
), SImode
)
4023 /* Value should be nonzero if functions must have frame pointers.
4024 Zero means the frame pointer need not be set up (and parms may
4025 be accessed via the stack pointer) in functions that seem suitable. */
4028 ix86_frame_pointer_required ()
4030 /* If we accessed previous frames, then the generated code expects
4031 to be able to access the saved ebp value in our frame. */
4032 if (cfun
->machine
->accesses_prev_frame
)
4035 /* Several x86 os'es need a frame pointer for other reasons,
4036 usually pertaining to setjmp. */
4037 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4040 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4041 the frame pointer by default. Turn it back on now if we've not
4042 got a leaf function. */
4043 if (TARGET_OMIT_LEAF_FRAME_POINTER
4044 && (!current_function_is_leaf
))
4047 if (current_function_profile
)
4053 /* Record that the current function accesses previous call frames. */
4056 ix86_setup_frame_addresses ()
4058 cfun
->machine
->accesses_prev_frame
= 1;
4061 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4062 # define USE_HIDDEN_LINKONCE 1
4064 # define USE_HIDDEN_LINKONCE 0
4067 static int pic_labels_used
;
4069 /* Fills in the label name that should be used for a pc thunk for
4070 the given register. */
4073 get_pc_thunk_name (name
, regno
)
4077 if (USE_HIDDEN_LINKONCE
)
4078 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
4080 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
4084 /* This function generates code for -fpic that loads %ebx with
4085 the return address of the caller and then returns. */
4088 ix86_asm_file_end (file
)
4094 for (regno
= 0; regno
< 8; ++regno
)
4098 if (! ((pic_labels_used
>> regno
) & 1))
4101 get_pc_thunk_name (name
, regno
);
4103 if (USE_HIDDEN_LINKONCE
)
4107 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
4109 TREE_PUBLIC (decl
) = 1;
4110 TREE_STATIC (decl
) = 1;
4111 DECL_ONE_ONLY (decl
) = 1;
4113 (*targetm
.asm_out
.unique_section
) (decl
, 0);
4114 named_section (decl
, NULL
, 0);
4116 (*targetm
.asm_out
.globalize_label
) (file
, name
);
4117 fputs ("\t.hidden\t", file
);
4118 assemble_name (file
, name
);
4120 ASM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
4125 ASM_OUTPUT_LABEL (file
, name
);
4128 xops
[0] = gen_rtx_REG (SImode
, regno
);
4129 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
4130 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
4131 output_asm_insn ("ret", xops
);
4135 /* Emit code for the SET_GOT patterns. */
4138 output_set_got (dest
)
4144 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
4146 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
4148 xops
[2] = gen_rtx_LABEL_REF (Pmode
, gen_label_rtx ());
4151 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
4153 output_asm_insn ("call\t%a2", xops
);
4156 /* Output the "canonical" label name ("Lxx$pb") here too. This
4157 is what will be referred to by the Mach-O PIC subsystem. */
4158 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
4160 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, "L",
4161 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
4164 output_asm_insn ("pop{l}\t%0", xops
);
4169 get_pc_thunk_name (name
, REGNO (dest
));
4170 pic_labels_used
|= 1 << REGNO (dest
);
4172 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
4173 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
4174 output_asm_insn ("call\t%X2", xops
);
4177 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
4178 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
4179 else if (!TARGET_MACHO
)
4180 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops
);
4185 /* Generate an "push" pattern for input ARG. */
4191 return gen_rtx_SET (VOIDmode
,
4193 gen_rtx_PRE_DEC (Pmode
,
4194 stack_pointer_rtx
)),
4198 /* Return >= 0 if there is an unused call-clobbered register available
4199 for the entire function. */
4202 ix86_select_alt_pic_regnum ()
4204 if (current_function_is_leaf
&& !current_function_profile
)
4207 for (i
= 2; i
>= 0; --i
)
4208 if (!regs_ever_live
[i
])
4212 return INVALID_REGNUM
;
4215 /* Return 1 if we need to save REGNO. */
4217 ix86_save_reg (regno
, maybe_eh_return
)
4219 int maybe_eh_return
;
4221 if (pic_offset_table_rtx
4222 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
4223 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4224 || current_function_profile
4225 || current_function_calls_eh_return
))
4227 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
4232 if (current_function_calls_eh_return
&& maybe_eh_return
)
4237 unsigned test
= EH_RETURN_DATA_REGNO (i
);
4238 if (test
== INVALID_REGNUM
)
4245 return (regs_ever_live
[regno
]
4246 && !call_used_regs
[regno
]
4247 && !fixed_regs
[regno
]
4248 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
4251 /* Return number of registers to be saved on the stack. */
4259 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4260 if (ix86_save_reg (regno
, true))
4265 /* Return the offset between two registers, one to be eliminated, and the other
4266 its replacement, at the start of a routine. */
4269 ix86_initial_elimination_offset (from
, to
)
4273 struct ix86_frame frame
;
4274 ix86_compute_frame_layout (&frame
);
4276 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4277 return frame
.hard_frame_pointer_offset
;
4278 else if (from
== FRAME_POINTER_REGNUM
4279 && to
== HARD_FRAME_POINTER_REGNUM
)
4280 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
4283 if (to
!= STACK_POINTER_REGNUM
)
4285 else if (from
== ARG_POINTER_REGNUM
)
4286 return frame
.stack_pointer_offset
;
4287 else if (from
!= FRAME_POINTER_REGNUM
)
4290 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
4294 /* Fill structure ix86_frame about frame of currently computed function. */
4297 ix86_compute_frame_layout (frame
)
4298 struct ix86_frame
*frame
;
4300 HOST_WIDE_INT total_size
;
4301 int stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
4303 int preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
4304 HOST_WIDE_INT size
= get_frame_size ();
4306 frame
->nregs
= ix86_nsaved_regs ();
4309 /* Skip return address and saved base pointer. */
4310 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
4312 frame
->hard_frame_pointer_offset
= offset
;
4314 /* Do some sanity checking of stack_alignment_needed and
4315 preferred_alignment, since i386 port is the only using those features
4316 that may break easily. */
4318 if (size
&& !stack_alignment_needed
)
4320 if (preferred_alignment
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4322 if (preferred_alignment
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4324 if (stack_alignment_needed
> PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
)
4327 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
4328 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
4330 /* Register save area */
4331 offset
+= frame
->nregs
* UNITS_PER_WORD
;
4334 if (ix86_save_varrargs_registers
)
4336 offset
+= X86_64_VARARGS_SIZE
;
4337 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
4340 frame
->va_arg_size
= 0;
4342 /* Align start of frame for local function. */
4343 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
4344 & -stack_alignment_needed
) - offset
;
4346 offset
+= frame
->padding1
;
4348 /* Frame pointer points here. */
4349 frame
->frame_pointer_offset
= offset
;
4353 /* Add outgoing arguments area. Can be skipped if we eliminated
4354 all the function calls as dead code. */
4355 if (ACCUMULATE_OUTGOING_ARGS
&& !current_function_is_leaf
)
4357 offset
+= current_function_outgoing_args_size
;
4358 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
4361 frame
->outgoing_arguments_size
= 0;
4363 /* Align stack boundary. Only needed if we're calling another function
4365 if (!current_function_is_leaf
|| current_function_calls_alloca
)
4366 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
4367 & -preferred_alignment
) - offset
;
4369 frame
->padding2
= 0;
4371 offset
+= frame
->padding2
;
4373 /* We've reached end of stack frame. */
4374 frame
->stack_pointer_offset
= offset
;
4376 /* Size prologue needs to allocate. */
4377 frame
->to_allocate
=
4378 (size
+ frame
->padding1
+ frame
->padding2
4379 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
4381 if (TARGET_64BIT
&& TARGET_RED_ZONE
&& current_function_sp_is_unchanging
4382 && current_function_is_leaf
)
4384 frame
->red_zone_size
= frame
->to_allocate
;
4385 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
4386 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
4389 frame
->red_zone_size
= 0;
4390 frame
->to_allocate
-= frame
->red_zone_size
;
4391 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
4393 fprintf (stderr
, "nregs: %i\n", frame
->nregs
);
4394 fprintf (stderr
, "size: %i\n", size
);
4395 fprintf (stderr
, "alignment1: %i\n", stack_alignment_needed
);
4396 fprintf (stderr
, "padding1: %i\n", frame
->padding1
);
4397 fprintf (stderr
, "va_arg: %i\n", frame
->va_arg_size
);
4398 fprintf (stderr
, "padding2: %i\n", frame
->padding2
);
4399 fprintf (stderr
, "to_allocate: %i\n", frame
->to_allocate
);
4400 fprintf (stderr
, "red_zone_size: %i\n", frame
->red_zone_size
);
4401 fprintf (stderr
, "frame_pointer_offset: %i\n", frame
->frame_pointer_offset
);
4402 fprintf (stderr
, "hard_frame_pointer_offset: %i\n",
4403 frame
->hard_frame_pointer_offset
);
4404 fprintf (stderr
, "stack_pointer_offset: %i\n", frame
->stack_pointer_offset
);
4408 /* Emit code to save registers in the prologue. */
4411 ix86_emit_save_regs ()
4416 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
4417 if (ix86_save_reg (regno
, true))
4419 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
4420 RTX_FRAME_RELATED_P (insn
) = 1;
4424 /* Emit code to save registers using MOV insns. First register
4425 is restored from POINTER + OFFSET. */
4427 ix86_emit_save_regs_using_mov (pointer
, offset
)
4429 HOST_WIDE_INT offset
;
4434 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4435 if (ix86_save_reg (regno
, true))
4437 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4439 gen_rtx_REG (Pmode
, regno
));
4440 RTX_FRAME_RELATED_P (insn
) = 1;
4441 offset
+= UNITS_PER_WORD
;
4445 /* Expand the prologue into a bunch of separate insns. */
4448 ix86_expand_prologue ()
4452 struct ix86_frame frame
;
4454 HOST_WIDE_INT allocate
;
4458 use_fast_prologue_epilogue
4459 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT
);
4460 if (TARGET_PROLOGUE_USING_MOVE
)
4461 use_mov
= use_fast_prologue_epilogue
;
4463 ix86_compute_frame_layout (&frame
);
4465 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4466 slower on all targets. Also sdb doesn't like it. */
4468 if (frame_pointer_needed
)
4470 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
4471 RTX_FRAME_RELATED_P (insn
) = 1;
4473 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
4474 RTX_FRAME_RELATED_P (insn
) = 1;
4477 allocate
= frame
.to_allocate
;
4478 /* In case we are dealing only with single register and empty frame,
4479 push is equivalent of the mov+add sequence. */
4480 if (allocate
== 0 && frame
.nregs
<= 1)
4484 ix86_emit_save_regs ();
4486 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
4490 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
4492 insn
= emit_insn (gen_pro_epilogue_adjust_stack
4493 (stack_pointer_rtx
, stack_pointer_rtx
,
4494 GEN_INT (-allocate
)));
4495 RTX_FRAME_RELATED_P (insn
) = 1;
4499 /* ??? Is this only valid for Win32? */
4506 arg0
= gen_rtx_REG (SImode
, 0);
4507 emit_move_insn (arg0
, GEN_INT (allocate
));
4509 sym
= gen_rtx_MEM (FUNCTION_MODE
,
4510 gen_rtx_SYMBOL_REF (Pmode
, "_alloca"));
4511 insn
= emit_call_insn (gen_call (sym
, const0_rtx
, constm1_rtx
));
4513 CALL_INSN_FUNCTION_USAGE (insn
)
4514 = gen_rtx_EXPR_LIST (VOIDmode
, gen_rtx_USE (VOIDmode
, arg0
),
4515 CALL_INSN_FUNCTION_USAGE (insn
));
4519 if (!frame_pointer_needed
|| !frame
.to_allocate
)
4520 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
4522 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
4523 -frame
.nregs
* UNITS_PER_WORD
);
4526 #ifdef SUBTARGET_PROLOGUE
4530 pic_reg_used
= false;
4531 if (pic_offset_table_rtx
4532 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
4533 || current_function_profile
))
4535 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
4537 if (alt_pic_reg_used
!= INVALID_REGNUM
)
4538 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
4540 pic_reg_used
= true;
4545 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
4547 /* Even with accurate pre-reload life analysis, we can wind up
4548 deleting all references to the pic register after reload.
4549 Consider if cross-jumping unifies two sides of a branch
4550 controled by a comparison vs the only read from a global.
4551 In which case, allow the set_got to be deleted, though we're
4552 too late to do anything about the ebx save in the prologue. */
4553 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
4556 /* Prevent function calls from be scheduled before the call to mcount.
4557 In the pic_reg_used case, make sure that the got load isn't deleted. */
4558 if (current_function_profile
)
4559 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
4562 /* Emit code to restore saved registers using MOV insns. First register
4563 is restored from POINTER + OFFSET. */
4565 ix86_emit_restore_regs_using_mov (pointer
, offset
, maybe_eh_return
)
4568 int maybe_eh_return
;
4572 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4573 if (ix86_save_reg (regno
, maybe_eh_return
))
4575 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
4576 adjust_address (gen_rtx_MEM (Pmode
, pointer
),
4578 offset
+= UNITS_PER_WORD
;
4582 /* Restore function stack, frame, and registers. */
4585 ix86_expand_epilogue (style
)
4589 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
4590 struct ix86_frame frame
;
4591 HOST_WIDE_INT offset
;
4593 ix86_compute_frame_layout (&frame
);
4595 /* Calculate start of saved registers relative to ebp. Special care
4596 must be taken for the normal return case of a function using
4597 eh_return: the eax and edx registers are marked as saved, but not
4598 restored along this path. */
4599 offset
= frame
.nregs
;
4600 if (current_function_calls_eh_return
&& style
!= 2)
4602 offset
*= -UNITS_PER_WORD
;
4604 /* If we're only restoring one register and sp is not valid then
4605 using a move instruction to restore the register since it's
4606 less work than reloading sp and popping the register.
4608 The default code result in stack adjustment using add/lea instruction,
4609 while this code results in LEAVE instruction (or discrete equivalent),
4610 so it is profitable in some other cases as well. Especially when there
4611 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4612 and there is exactly one register to pop. This heruistic may need some
4613 tuning in future. */
4614 if ((!sp_valid
&& frame
.nregs
<= 1)
4615 || (TARGET_EPILOGUE_USING_MOVE
4616 && use_fast_prologue_epilogue
4617 && (frame
.nregs
> 1 || frame
.to_allocate
))
4618 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
4619 || (frame_pointer_needed
&& TARGET_USE_LEAVE
4620 && use_fast_prologue_epilogue
&& frame
.nregs
== 1)
4621 || current_function_calls_eh_return
)
4623 /* Restore registers. We can use ebp or esp to address the memory
4624 locations. If both are available, default to ebp, since offsets
4625 are known to be small. Only exception is esp pointing directly to the
4626 end of block of saved registers, where we may simplify addressing
4629 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
4630 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
4631 frame
.to_allocate
, style
== 2);
4633 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
4634 offset
, style
== 2);
4636 /* eh_return epilogues need %ecx added to the stack pointer. */
4639 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
4641 if (frame_pointer_needed
)
4643 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
4644 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
4645 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
4647 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
4648 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
4650 emit_insn (gen_pro_epilogue_adjust_stack
4651 (stack_pointer_rtx
, sa
, const0_rtx
));
4655 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
4656 tmp
= plus_constant (tmp
, (frame
.to_allocate
4657 + frame
.nregs
* UNITS_PER_WORD
));
4658 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
4661 else if (!frame_pointer_needed
)
4662 emit_insn (gen_pro_epilogue_adjust_stack
4663 (stack_pointer_rtx
, stack_pointer_rtx
,
4664 GEN_INT (frame
.to_allocate
4665 + frame
.nregs
* UNITS_PER_WORD
)));
4666 /* If not an i386, mov & pop is faster than "leave". */
4667 else if (TARGET_USE_LEAVE
|| optimize_size
|| !use_fast_prologue_epilogue
)
4668 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4671 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4672 hard_frame_pointer_rtx
,
4675 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4677 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4682 /* First step is to deallocate the stack frame so that we can
4683 pop the registers. */
4686 if (!frame_pointer_needed
)
4688 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx
,
4689 hard_frame_pointer_rtx
,
4692 else if (frame
.to_allocate
)
4693 emit_insn (gen_pro_epilogue_adjust_stack
4694 (stack_pointer_rtx
, stack_pointer_rtx
,
4695 GEN_INT (frame
.to_allocate
)));
4697 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
4698 if (ix86_save_reg (regno
, false))
4701 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
4703 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
4705 if (frame_pointer_needed
)
4707 /* Leave results in shorter dependency chains on CPUs that are
4708 able to grok it fast. */
4709 if (TARGET_USE_LEAVE
)
4710 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
4711 else if (TARGET_64BIT
)
4712 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
4714 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
4718 /* Sibcall epilogues don't want a return instruction. */
4722 if (current_function_pops_args
&& current_function_args_size
)
4724 rtx popc
= GEN_INT (current_function_pops_args
);
4726 /* i386 can only pop 64K bytes. If asked to pop more, pop
4727 return address, do explicit add, and jump indirectly to the
4730 if (current_function_pops_args
>= 65536)
4732 rtx ecx
= gen_rtx_REG (SImode
, 2);
4734 /* There are is no "pascal" calling convention in 64bit ABI. */
4738 emit_insn (gen_popsi1 (ecx
));
4739 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
4740 emit_jump_insn (gen_return_indirect_internal (ecx
));
4743 emit_jump_insn (gen_return_pop_internal (popc
));
4746 emit_jump_insn (gen_return_internal ());
4749 /* Reset from the function's potential modifications. */
4752 ix86_output_function_epilogue (file
, size
)
4753 FILE *file ATTRIBUTE_UNUSED
;
4754 HOST_WIDE_INT size ATTRIBUTE_UNUSED
;
4756 if (pic_offset_table_rtx
)
4757 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
4760 /* Extract the parts of an RTL expression that is a valid memory address
4761 for an instruction. Return 0 if the structure of the address is
4762 grossly off. Return -1 if the address contains ASHIFT, so it is not
4763 strictly valid, but still used for computing length of lea instruction.
4767 ix86_decompose_address (addr
, out
)
4769 struct ix86_address
*out
;
4771 rtx base
= NULL_RTX
;
4772 rtx index
= NULL_RTX
;
4773 rtx disp
= NULL_RTX
;
4774 HOST_WIDE_INT scale
= 1;
4775 rtx scale_rtx
= NULL_RTX
;
4778 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
4780 else if (GET_CODE (addr
) == PLUS
)
4782 rtx op0
= XEXP (addr
, 0);
4783 rtx op1
= XEXP (addr
, 1);
4784 enum rtx_code code0
= GET_CODE (op0
);
4785 enum rtx_code code1
= GET_CODE (op1
);
4787 if (code0
== REG
|| code0
== SUBREG
)
4789 if (code1
== REG
|| code1
== SUBREG
)
4790 index
= op0
, base
= op1
; /* index + base */
4792 base
= op0
, disp
= op1
; /* base + displacement */
4794 else if (code0
== MULT
)
4796 index
= XEXP (op0
, 0);
4797 scale_rtx
= XEXP (op0
, 1);
4798 if (code1
== REG
|| code1
== SUBREG
)
4799 base
= op1
; /* index*scale + base */
4801 disp
= op1
; /* index*scale + disp */
4803 else if (code0
== PLUS
&& GET_CODE (XEXP (op0
, 0)) == MULT
)
4805 index
= XEXP (XEXP (op0
, 0), 0); /* index*scale + base + disp */
4806 scale_rtx
= XEXP (XEXP (op0
, 0), 1);
4807 base
= XEXP (op0
, 1);
4810 else if (code0
== PLUS
)
4812 index
= XEXP (op0
, 0); /* index + base + disp */
4813 base
= XEXP (op0
, 1);
4819 else if (GET_CODE (addr
) == MULT
)
4821 index
= XEXP (addr
, 0); /* index*scale */
4822 scale_rtx
= XEXP (addr
, 1);
4824 else if (GET_CODE (addr
) == ASHIFT
)
4828 /* We're called for lea too, which implements ashift on occasion. */
4829 index
= XEXP (addr
, 0);
4830 tmp
= XEXP (addr
, 1);
4831 if (GET_CODE (tmp
) != CONST_INT
)
4833 scale
= INTVAL (tmp
);
4834 if ((unsigned HOST_WIDE_INT
) scale
> 3)
4840 disp
= addr
; /* displacement */
4842 /* Extract the integral value of scale. */
4845 if (GET_CODE (scale_rtx
) != CONST_INT
)
4847 scale
= INTVAL (scale_rtx
);
4850 /* Allow arg pointer and stack pointer as index if there is not scaling */
4851 if (base
&& index
&& scale
== 1
4852 && (index
== arg_pointer_rtx
|| index
== frame_pointer_rtx
4853 || index
== stack_pointer_rtx
))
4860 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4861 if ((base
== hard_frame_pointer_rtx
4862 || base
== frame_pointer_rtx
4863 || base
== arg_pointer_rtx
) && !disp
)
4866 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4867 Avoid this by transforming to [%esi+0]. */
4868 if (ix86_cpu
== PROCESSOR_K6
&& !optimize_size
4869 && base
&& !index
&& !disp
4871 && REGNO_REG_CLASS (REGNO (base
)) == SIREG
)
4874 /* Special case: encode reg+reg instead of reg*2. */
4875 if (!base
&& index
&& scale
&& scale
== 2)
4876 base
= index
, scale
= 1;
4878 /* Special case: scaling cannot be encoded without base or displacement. */
4879 if (!base
&& !disp
&& index
&& scale
!= 1)
4890 /* Return cost of the memory address x.
4891 For i386, it is better to use a complex address than let gcc copy
4892 the address into a reg and make a new pseudo. But not if the address
4893 requires to two regs - that would mean more pseudos with longer
4896 ix86_address_cost (x
)
4899 struct ix86_address parts
;
4902 if (!ix86_decompose_address (x
, &parts
))
4905 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
4906 parts
.base
= SUBREG_REG (parts
.base
);
4907 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
4908 parts
.index
= SUBREG_REG (parts
.index
);
4910 /* More complex memory references are better. */
4911 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
4914 /* Attempt to minimize number of registers in the address. */
4916 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
4918 && (!REG_P (parts
.index
)
4919 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
4923 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
4925 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
4926 && parts
.base
!= parts
.index
)
4929 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4930 since it's predecode logic can't detect the length of instructions
4931 and it degenerates to vector decoded. Increase cost of such
4932 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4933 to split such addresses or even refuse such addresses at all.
4935 Following addressing modes are affected:
4940 The first and last case may be avoidable by explicitly coding the zero in
4941 memory address, but I don't have AMD-K6 machine handy to check this
4945 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4946 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
4947 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
4953 /* If X is a machine specific address (i.e. a symbol or label being
4954 referenced as a displacement from the GOT implemented using an
4955 UNSPEC), then return the base term. Otherwise return X. */
4958 ix86_find_base_term (x
)
4965 if (GET_CODE (x
) != CONST
)
4968 if (GET_CODE (term
) == PLUS
4969 && (GET_CODE (XEXP (term
, 1)) == CONST_INT
4970 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
4971 term
= XEXP (term
, 0);
4972 if (GET_CODE (term
) != UNSPEC
4973 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
4976 term
= XVECEXP (term
, 0, 0);
4978 if (GET_CODE (term
) != SYMBOL_REF
4979 && GET_CODE (term
) != LABEL_REF
)
4985 if (GET_CODE (x
) != PLUS
4986 || XEXP (x
, 0) != pic_offset_table_rtx
4987 || GET_CODE (XEXP (x
, 1)) != CONST
)
4990 term
= XEXP (XEXP (x
, 1), 0);
4992 if (GET_CODE (term
) == PLUS
&& GET_CODE (XEXP (term
, 1)) == CONST_INT
)
4993 term
= XEXP (term
, 0);
4995 if (GET_CODE (term
) != UNSPEC
4996 || XINT (term
, 1) != UNSPEC_GOTOFF
)
4999 term
= XVECEXP (term
, 0, 0);
5001 if (GET_CODE (term
) != SYMBOL_REF
5002 && GET_CODE (term
) != LABEL_REF
)
5008 /* Determine if a given RTX is a valid constant. We already know this
5009 satisfies CONSTANT_P. */
5012 legitimate_constant_p (x
)
5017 switch (GET_CODE (x
))
5020 /* TLS symbols are not constant. */
5021 if (tls_symbolic_operand (x
, Pmode
))
5026 inner
= XEXP (x
, 0);
5028 /* Offsets of TLS symbols are never valid.
5029 Discourage CSE from creating them. */
5030 if (GET_CODE (inner
) == PLUS
5031 && tls_symbolic_operand (XEXP (inner
, 0), Pmode
))
5034 /* Only some unspecs are valid as "constants". */
5035 if (GET_CODE (inner
) == UNSPEC
)
5036 switch (XINT (inner
, 1))
5039 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5049 /* Otherwise we handle everything else in the move patterns. */
5053 /* Determine if a given RTX is a valid constant address. */
5056 constant_address_p (x
)
5059 switch (GET_CODE (x
))
5066 return TARGET_64BIT
;
5069 /* For Mach-O, really believe the CONST. */
5072 /* Otherwise fall through. */
5074 return !flag_pic
&& legitimate_constant_p (x
);
5081 /* Nonzero if the constant value X is a legitimate general operand
5082 when generating PIC code. It is given that flag_pic is on and
5083 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5086 legitimate_pic_operand_p (x
)
5091 switch (GET_CODE (x
))
5094 inner
= XEXP (x
, 0);
5096 /* Only some unspecs are valid as "constants". */
5097 if (GET_CODE (inner
) == UNSPEC
)
5098 switch (XINT (inner
, 1))
5101 return local_exec_symbolic_operand (XVECEXP (inner
, 0, 0), Pmode
);
5109 return legitimate_pic_address_disp_p (x
);
5116 /* Determine if a given CONST RTX is a valid memory displacement
5120 legitimate_pic_address_disp_p (disp
)
5125 /* In 64bit mode we can allow direct addresses of symbols and labels
5126 when they are not dynamic symbols. */
5129 /* TLS references should always be enclosed in UNSPEC. */
5130 if (tls_symbolic_operand (disp
, GET_MODE (disp
)))
5132 if (GET_CODE (disp
) == SYMBOL_REF
5133 && ix86_cmodel
== CM_SMALL_PIC
5134 && (CONSTANT_POOL_ADDRESS_P (disp
)
5135 || SYMBOL_REF_FLAG (disp
)))
5137 if (GET_CODE (disp
) == LABEL_REF
)
5139 if (GET_CODE (disp
) == CONST
5140 && GET_CODE (XEXP (disp
, 0)) == PLUS
5141 && ((GET_CODE (XEXP (XEXP (disp
, 0), 0)) == SYMBOL_REF
5142 && ix86_cmodel
== CM_SMALL_PIC
5143 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp
, 0), 0))
5144 || SYMBOL_REF_FLAG (XEXP (XEXP (disp
, 0), 0))))
5145 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) == LABEL_REF
)
5146 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
5147 && INTVAL (XEXP (XEXP (disp
, 0), 1)) < 16*1024*1024
5148 && INTVAL (XEXP (XEXP (disp
, 0), 1)) >= -16*1024*1024)
5151 if (GET_CODE (disp
) != CONST
)
5153 disp
= XEXP (disp
, 0);
5157 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5158 of GOT tables. We should not need these anyway. */
5159 if (GET_CODE (disp
) != UNSPEC
5160 || XINT (disp
, 1) != UNSPEC_GOTPCREL
)
5163 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
5164 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
5170 if (GET_CODE (disp
) == PLUS
)
5172 if (GET_CODE (XEXP (disp
, 1)) != CONST_INT
)
5174 disp
= XEXP (disp
, 0);
5178 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5179 if (TARGET_MACHO
&& GET_CODE (disp
) == MINUS
)
5181 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
5182 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
5183 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
5185 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
5186 if (strstr (sym_name
, "$pb") != 0)
5191 if (GET_CODE (disp
) != UNSPEC
)
5194 switch (XINT (disp
, 1))
5199 return GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
;
5201 return local_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5202 case UNSPEC_GOTTPOFF
:
5203 case UNSPEC_GOTNTPOFF
:
5204 case UNSPEC_INDNTPOFF
:
5207 return initial_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5209 return local_exec_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5211 return local_dynamic_symbolic_operand (XVECEXP (disp
, 0, 0), Pmode
);
5217 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5218 memory address for an instruction. The MODE argument is the machine mode
5219 for the MEM expression that wants to use this address.
5221 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5222 convert common non-canonical forms to canonical form so that they will
5226 legitimate_address_p (mode
, addr
, strict
)
5227 enum machine_mode mode
;
5231 struct ix86_address parts
;
5232 rtx base
, index
, disp
;
5233 HOST_WIDE_INT scale
;
5234 const char *reason
= NULL
;
5235 rtx reason_rtx
= NULL_RTX
;
5237 if (TARGET_DEBUG_ADDR
)
5240 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5241 GET_MODE_NAME (mode
), strict
);
5245 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
5247 if (TARGET_DEBUG_ADDR
)
5248 fprintf (stderr
, "Success.\n");
5252 if (ix86_decompose_address (addr
, &parts
) <= 0)
5254 reason
= "decomposition failed";
5259 index
= parts
.index
;
5261 scale
= parts
.scale
;
5263 /* Validate base register.
5265 Don't allow SUBREG's here, it can lead to spill failures when the base
5266 is one word out of a two word structure, which is represented internally
5274 if (GET_CODE (base
) == SUBREG
)
5275 reg
= SUBREG_REG (base
);
5279 if (GET_CODE (reg
) != REG
)
5281 reason
= "base is not a register";
5285 if (GET_MODE (base
) != Pmode
)
5287 reason
= "base is not in Pmode";
5291 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
5292 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
5294 reason
= "base is not valid";
5299 /* Validate index register.
5301 Don't allow SUBREG's here, it can lead to spill failures when the index
5302 is one word out of a two word structure, which is represented internally
5310 if (GET_CODE (index
) == SUBREG
)
5311 reg
= SUBREG_REG (index
);
5315 if (GET_CODE (reg
) != REG
)
5317 reason
= "index is not a register";
5321 if (GET_MODE (index
) != Pmode
)
5323 reason
= "index is not in Pmode";
5327 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
5328 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
5330 reason
= "index is not valid";
5335 /* Validate scale factor. */
5338 reason_rtx
= GEN_INT (scale
);
5341 reason
= "scale without index";
5345 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
5347 reason
= "scale is not a valid multiplier";
5352 /* Validate displacement. */
5357 if (GET_CODE (disp
) == CONST
5358 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
5359 switch (XINT (XEXP (disp
, 0), 1))
5363 case UNSPEC_GOTPCREL
:
5366 goto is_legitimate_pic
;
5368 case UNSPEC_GOTTPOFF
:
5369 case UNSPEC_GOTNTPOFF
:
5370 case UNSPEC_INDNTPOFF
:
5376 reason
= "invalid address unspec";
5380 else if (flag_pic
&& (SYMBOLIC_CONST (disp
)
5382 && !machopic_operand_p (disp
)
5387 if (TARGET_64BIT
&& (index
|| base
))
5389 /* foo@dtpoff(%rX) is ok. */
5390 if (GET_CODE (disp
) != CONST
5391 || GET_CODE (XEXP (disp
, 0)) != PLUS
5392 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
5393 || GET_CODE (XEXP (XEXP (disp
, 0), 1)) != CONST_INT
5394 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
5395 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
5397 reason
= "non-constant pic memory reference";
5401 else if (! legitimate_pic_address_disp_p (disp
))
5403 reason
= "displacement is an invalid pic construct";
5407 /* This code used to verify that a symbolic pic displacement
5408 includes the pic_offset_table_rtx register.
5410 While this is good idea, unfortunately these constructs may
5411 be created by "adds using lea" optimization for incorrect
5420 This code is nonsensical, but results in addressing
5421 GOT table with pic_offset_table_rtx base. We can't
5422 just refuse it easily, since it gets matched by
5423 "addsi3" pattern, that later gets split to lea in the
5424 case output register differs from input. While this
5425 can be handled by separate addsi pattern for this case
5426 that never results in lea, this seems to be easier and
5427 correct fix for crash to disable this test. */
5429 else if (!CONSTANT_ADDRESS_P (disp
))
5431 reason
= "displacement is not constant";
5434 else if (TARGET_64BIT
&& !x86_64_sign_extended_value (disp
))
5436 reason
= "displacement is out of range";
5439 else if (!TARGET_64BIT
&& GET_CODE (disp
) == CONST_DOUBLE
)
5441 reason
= "displacement is a const_double";
5446 /* Everything looks valid. */
5447 if (TARGET_DEBUG_ADDR
)
5448 fprintf (stderr
, "Success.\n");
5452 if (TARGET_DEBUG_ADDR
)
5454 fprintf (stderr
, "Error: %s\n", reason
);
5455 debug_rtx (reason_rtx
);
5460 /* Return an unique alias set for the GOT. */
5462 static HOST_WIDE_INT
5463 ix86_GOT_alias_set ()
5465 static HOST_WIDE_INT set
= -1;
5467 set
= new_alias_set ();
5471 /* Return a legitimate reference for ORIG (an address) using the
5472 register REG. If REG is 0, a new pseudo is generated.
5474 There are two types of references that must be handled:
5476 1. Global data references must load the address from the GOT, via
5477 the PIC reg. An insn is emitted to do this load, and the reg is
5480 2. Static data references, constant pool addresses, and code labels
5481 compute the address as an offset from the GOT, whose base is in
5482 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5483 differentiate them from global data objects. The returned
5484 address is the PIC reg + an unspec constant.
5486 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5487 reg also appears in the address. */
5490 legitimize_pic_address (orig
, reg
)
5500 reg
= gen_reg_rtx (Pmode
);
5501 /* Use the generic Mach-O PIC machinery. */
5502 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
5505 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
5507 else if (!TARGET_64BIT
&& local_symbolic_operand (addr
, Pmode
))
5509 /* This symbol may be referenced via a displacement from the PIC
5510 base address (@GOTOFF). */
5512 if (reload_in_progress
)
5513 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5514 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5515 new = gen_rtx_CONST (Pmode
, new);
5516 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5520 emit_move_insn (reg
, new);
5524 else if (GET_CODE (addr
) == SYMBOL_REF
)
5528 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
5529 new = gen_rtx_CONST (Pmode
, new);
5530 new = gen_rtx_MEM (Pmode
, new);
5531 RTX_UNCHANGING_P (new) = 1;
5532 set_mem_alias_set (new, ix86_GOT_alias_set ());
5535 reg
= gen_reg_rtx (Pmode
);
5536 /* Use directly gen_movsi, otherwise the address is loaded
5537 into register for CSE. We don't want to CSE this addresses,
5538 instead we CSE addresses from the GOT table, so skip this. */
5539 emit_insn (gen_movsi (reg
, new));
5544 /* This symbol must be referenced via a load from the
5545 Global Offset Table (@GOT). */
5547 if (reload_in_progress
)
5548 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5549 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5550 new = gen_rtx_CONST (Pmode
, new);
5551 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5552 new = gen_rtx_MEM (Pmode
, new);
5553 RTX_UNCHANGING_P (new) = 1;
5554 set_mem_alias_set (new, ix86_GOT_alias_set ());
5557 reg
= gen_reg_rtx (Pmode
);
5558 emit_move_insn (reg
, new);
5564 if (GET_CODE (addr
) == CONST
)
5566 addr
= XEXP (addr
, 0);
5568 /* We must match stuff we generate before. Assume the only
5569 unspecs that can get here are ours. Not that we could do
5570 anything with them anyway... */
5571 if (GET_CODE (addr
) == UNSPEC
5572 || (GET_CODE (addr
) == PLUS
5573 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
5575 if (GET_CODE (addr
) != PLUS
)
5578 if (GET_CODE (addr
) == PLUS
)
5580 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
5582 /* Check first to see if this is a constant offset from a @GOTOFF
5583 symbol reference. */
5584 if (local_symbolic_operand (op0
, Pmode
)
5585 && GET_CODE (op1
) == CONST_INT
)
5589 if (reload_in_progress
)
5590 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5591 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
5593 new = gen_rtx_PLUS (Pmode
, new, op1
);
5594 new = gen_rtx_CONST (Pmode
, new);
5595 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
5599 emit_move_insn (reg
, new);
5605 if (INTVAL (op1
) < -16*1024*1024
5606 || INTVAL (op1
) >= 16*1024*1024)
5607 new = gen_rtx_PLUS (Pmode
, op0
, force_reg (Pmode
, op1
));
5612 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
5613 new = legitimize_pic_address (XEXP (addr
, 1),
5614 base
== reg
? NULL_RTX
: reg
);
5616 if (GET_CODE (new) == CONST_INT
)
5617 new = plus_constant (base
, INTVAL (new));
5620 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
5622 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
5623 new = XEXP (new, 1);
5625 new = gen_rtx_PLUS (Pmode
, base
, new);
5634 ix86_encode_section_info (decl
, first
)
5636 int first ATTRIBUTE_UNUSED
;
5638 bool local_p
= (*targetm
.binds_local_p
) (decl
);
5641 rtl
= DECL_P (decl
) ? DECL_RTL (decl
) : TREE_CST_RTL (decl
);
5642 if (GET_CODE (rtl
) != MEM
)
5644 symbol
= XEXP (rtl
, 0);
5645 if (GET_CODE (symbol
) != SYMBOL_REF
)
5648 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5649 symbol so that we may access it directly in the GOT. */
5652 SYMBOL_REF_FLAG (symbol
) = local_p
;
5654 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5655 "local dynamic", "initial exec" or "local exec" TLS models
5658 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL (decl
))
5660 const char *symbol_str
;
5663 enum tls_model kind
= decl_tls_model (decl
);
5665 if (TARGET_64BIT
&& ! flag_pic
)
5667 /* x86-64 doesn't allow non-pic code for shared libraries,
5668 so don't generate GD/LD TLS models for non-pic code. */
5671 case TLS_MODEL_GLOBAL_DYNAMIC
:
5672 kind
= TLS_MODEL_INITIAL_EXEC
; break;
5673 case TLS_MODEL_LOCAL_DYNAMIC
:
5674 kind
= TLS_MODEL_LOCAL_EXEC
; break;
5680 symbol_str
= XSTR (symbol
, 0);
5682 if (symbol_str
[0] == '%')
5684 if (symbol_str
[1] == tls_model_chars
[kind
])
5688 len
= strlen (symbol_str
) + 1;
5689 newstr
= alloca (len
+ 2);
5692 newstr
[1] = tls_model_chars
[kind
];
5693 memcpy (newstr
+ 2, symbol_str
, len
);
5695 XSTR (symbol
, 0) = ggc_alloc_string (newstr
, len
+ 2 - 1);
5699 /* Undo the above when printing symbol names. */
5702 ix86_strip_name_encoding (str
)
5712 /* Load the thread pointer into a register. */
5715 get_thread_pointer ()
5719 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
5720 tp
= gen_rtx_MEM (Pmode
, tp
);
5721 RTX_UNCHANGING_P (tp
) = 1;
5722 set_mem_alias_set (tp
, ix86_GOT_alias_set ());
5723 tp
= force_reg (Pmode
, tp
);
5728 /* Try machine-dependent ways of modifying an illegitimate address
5729 to be legitimate. If we find one, return the new, valid address.
5730 This macro is used in only one place: `memory_address' in explow.c.
5732 OLDX is the address as it was before break_out_memory_refs was called.
5733 In some cases it is useful to look at this to decide what needs to be done.
5735 MODE and WIN are passed so that this macro can use
5736 GO_IF_LEGITIMATE_ADDRESS.
5738 It is always safe for this macro to do nothing. It exists to recognize
5739 opportunities to optimize the output.
5741 For the 80386, we handle X+REG by loading X into a register R and
5742 using R+REG. R will go in a general reg and indexing will be used.
5743 However, if REG is a broken-out memory address or multiplication,
5744 nothing needs to be done because REG can certainly go in a general reg.
5746 When -fpic is used, special handling is needed for symbolic references.
5747 See comments by legitimize_pic_address in i386.c for details. */
5750 legitimize_address (x
, oldx
, mode
)
5752 register rtx oldx ATTRIBUTE_UNUSED
;
5753 enum machine_mode mode
;
5758 if (TARGET_DEBUG_ADDR
)
5760 fprintf (stderr
, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5761 GET_MODE_NAME (mode
));
5765 log
= tls_symbolic_operand (x
, mode
);
5768 rtx dest
, base
, off
, pic
;
5773 case TLS_MODEL_GLOBAL_DYNAMIC
:
5774 dest
= gen_reg_rtx (Pmode
);
5777 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
5780 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
5781 insns
= get_insns ();
5784 emit_libcall_block (insns
, dest
, rax
, x
);
5787 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
5790 case TLS_MODEL_LOCAL_DYNAMIC
:
5791 base
= gen_reg_rtx (Pmode
);
5794 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
5797 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
5798 insns
= get_insns ();
5801 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
5802 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
5803 emit_libcall_block (insns
, base
, rax
, note
);
5806 emit_insn (gen_tls_local_dynamic_base_32 (base
));
5808 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
5809 off
= gen_rtx_CONST (Pmode
, off
);
5811 return gen_rtx_PLUS (Pmode
, base
, off
);
5813 case TLS_MODEL_INITIAL_EXEC
:
5817 type
= UNSPEC_GOTNTPOFF
;
5821 if (reload_in_progress
)
5822 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
5823 pic
= pic_offset_table_rtx
;
5824 type
= TARGET_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
5826 else if (!TARGET_GNU_TLS
)
5828 pic
= gen_reg_rtx (Pmode
);
5829 emit_insn (gen_set_got (pic
));
5830 type
= UNSPEC_GOTTPOFF
;
5835 type
= UNSPEC_INDNTPOFF
;
5838 base
= get_thread_pointer ();
5840 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
5841 off
= gen_rtx_CONST (Pmode
, off
);
5843 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
5844 off
= gen_rtx_MEM (Pmode
, off
);
5845 RTX_UNCHANGING_P (off
) = 1;
5846 set_mem_alias_set (off
, ix86_GOT_alias_set ());
5847 dest
= gen_reg_rtx (Pmode
);
5849 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5851 emit_move_insn (dest
, off
);
5852 return gen_rtx_PLUS (Pmode
, base
, dest
);
5855 emit_insn (gen_subsi3 (dest
, base
, off
));
5858 case TLS_MODEL_LOCAL_EXEC
:
5859 base
= get_thread_pointer ();
5861 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
5862 (TARGET_64BIT
|| TARGET_GNU_TLS
)
5863 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
5864 off
= gen_rtx_CONST (Pmode
, off
);
5866 if (TARGET_64BIT
|| TARGET_GNU_TLS
)
5867 return gen_rtx_PLUS (Pmode
, base
, off
);
5870 dest
= gen_reg_rtx (Pmode
);
5871 emit_insn (gen_subsi3 (dest
, base
, off
));
5882 if (flag_pic
&& SYMBOLIC_CONST (x
))
5883 return legitimize_pic_address (x
, 0);
5885 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5886 if (GET_CODE (x
) == ASHIFT
5887 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5888 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (x
, 1)))) < 4)
5891 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
5892 GEN_INT (1 << log
));
5895 if (GET_CODE (x
) == PLUS
)
5897 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5899 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
5900 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
5901 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)))) < 4)
5904 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
5905 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
5906 GEN_INT (1 << log
));
5909 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
5910 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
5911 && (log
= (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x
, 1), 1)))) < 4)
5914 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
5915 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
5916 GEN_INT (1 << log
));
5919 /* Put multiply first if it isn't already. */
5920 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5922 rtx tmp
= XEXP (x
, 0);
5923 XEXP (x
, 0) = XEXP (x
, 1);
5928 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5929 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5930 created by virtual register instantiation, register elimination, and
5931 similar optimizations. */
5932 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
5935 x
= gen_rtx_PLUS (Pmode
,
5936 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
5937 XEXP (XEXP (x
, 1), 0)),
5938 XEXP (XEXP (x
, 1), 1));
5942 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5943 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5944 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
5945 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5946 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
5947 && CONSTANT_P (XEXP (x
, 1)))
5950 rtx other
= NULL_RTX
;
5952 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5954 constant
= XEXP (x
, 1);
5955 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5957 else if (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 1), 1)) == CONST_INT
)
5959 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
5960 other
= XEXP (x
, 1);
5968 x
= gen_rtx_PLUS (Pmode
,
5969 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
5970 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
5971 plus_constant (other
, INTVAL (constant
)));
5975 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
5978 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5981 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
5984 if (GET_CODE (XEXP (x
, 1)) == MULT
)
5987 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
5991 && GET_CODE (XEXP (x
, 1)) == REG
5992 && GET_CODE (XEXP (x
, 0)) == REG
)
5995 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
5998 x
= legitimize_pic_address (x
, 0);
6001 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
6004 if (GET_CODE (XEXP (x
, 0)) == REG
)
6006 register rtx temp
= gen_reg_rtx (Pmode
);
6007 register rtx val
= force_operand (XEXP (x
, 1), temp
);
6009 emit_move_insn (temp
, val
);
6015 else if (GET_CODE (XEXP (x
, 1)) == REG
)
6017 register rtx temp
= gen_reg_rtx (Pmode
);
6018 register rtx val
= force_operand (XEXP (x
, 0), temp
);
6020 emit_move_insn (temp
, val
);
6030 /* Print an integer constant expression in assembler syntax. Addition
6031 and subtraction are the only arithmetic that may appear in these
6032 expressions. FILE is the stdio stream to write to, X is the rtx, and
6033 CODE is the operand print code from the output string. */
6036 output_pic_addr_const (file
, x
, code
)
6043 switch (GET_CODE (x
))
6053 assemble_name (file
, XSTR (x
, 0));
6054 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_FLAG (x
))
6055 fputs ("@PLT", file
);
6062 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
6063 assemble_name (asm_out_file
, buf
);
6067 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6071 /* This used to output parentheses around the expression,
6072 but that does not work on the 386 (either ATT or BSD assembler). */
6073 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6077 if (GET_MODE (x
) == VOIDmode
)
6079 /* We can use %d if the number is <32 bits and positive. */
6080 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
6081 fprintf (file
, "0x%lx%08lx",
6082 (unsigned long) CONST_DOUBLE_HIGH (x
),
6083 (unsigned long) CONST_DOUBLE_LOW (x
));
6085 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
6088 /* We can't handle floating point constants;
6089 PRINT_OPERAND must handle them. */
6090 output_operand_lossage ("floating constant misused");
6094 /* Some assemblers need integer constants to appear first. */
6095 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
6097 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6099 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6101 else if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6103 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6105 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6113 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
6114 output_pic_addr_const (file
, XEXP (x
, 0), code
);
6116 output_pic_addr_const (file
, XEXP (x
, 1), code
);
6118 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
6122 if (XVECLEN (x
, 0) != 1)
6124 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
6125 switch (XINT (x
, 1))
6128 fputs ("@GOT", file
);
6131 fputs ("@GOTOFF", file
);
6133 case UNSPEC_GOTPCREL
:
6134 fputs ("@GOTPCREL(%rip)", file
);
6136 case UNSPEC_GOTTPOFF
:
6137 /* FIXME: This might be @TPOFF in Sun ld too. */
6138 fputs ("@GOTTPOFF", file
);
6141 fputs ("@TPOFF", file
);
6145 fputs ("@TPOFF", file
);
6147 fputs ("@NTPOFF", file
);
6150 fputs ("@DTPOFF", file
);
6152 case UNSPEC_GOTNTPOFF
:
6154 fputs ("@GOTTPOFF(%rip)", file
);
6156 fputs ("@GOTNTPOFF", file
);
6158 case UNSPEC_INDNTPOFF
:
6159 fputs ("@INDNTPOFF", file
);
6162 output_operand_lossage ("invalid UNSPEC as operand");
6168 output_operand_lossage ("invalid expression as operand");
6172 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6173 We need to handle our special PIC relocations. */
6176 i386_dwarf_output_addr_const (file
, x
)
6181 fprintf (file
, "%s", TARGET_64BIT
? ASM_QUAD
: ASM_LONG
);
6185 fprintf (file
, "%s", ASM_LONG
);
6188 output_pic_addr_const (file
, x
, '\0');
6190 output_addr_const (file
, x
);
6194 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6195 We need to emit DTP-relative relocations. */
6198 i386_output_dwarf_dtprel (file
, size
, x
)
6203 fputs (ASM_LONG
, file
);
6204 output_addr_const (file
, x
);
6205 fputs ("@DTPOFF", file
);
6211 fputs (", 0", file
);
6218 /* In the name of slightly smaller debug output, and to cater to
6219 general assembler losage, recognize PIC+GOTOFF and turn it back
6220 into a direct symbol reference. */
6223 i386_simplify_dwarf_addr (orig_x
)
6228 if (GET_CODE (x
) == MEM
)
6233 if (GET_CODE (x
) != CONST
6234 || GET_CODE (XEXP (x
, 0)) != UNSPEC
6235 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
6236 || GET_CODE (orig_x
) != MEM
)
6238 return XVECEXP (XEXP (x
, 0), 0, 0);
6241 if (GET_CODE (x
) != PLUS
6242 || GET_CODE (XEXP (x
, 1)) != CONST
)
6245 if (GET_CODE (XEXP (x
, 0)) == REG
6246 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6247 /* %ebx + GOT/GOTOFF */
6249 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
6251 /* %ebx + %reg * scale + GOT/GOTOFF */
6253 if (GET_CODE (XEXP (y
, 0)) == REG
6254 && REGNO (XEXP (y
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
6256 else if (GET_CODE (XEXP (y
, 1)) == REG
6257 && REGNO (XEXP (y
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
6261 if (GET_CODE (y
) != REG
6262 && GET_CODE (y
) != MULT
6263 && GET_CODE (y
) != ASHIFT
)
6269 x
= XEXP (XEXP (x
, 1), 0);
6270 if (GET_CODE (x
) == UNSPEC
6271 && ((XINT (x
, 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6272 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& GET_CODE (orig_x
) != MEM
)))
6275 return gen_rtx_PLUS (Pmode
, y
, XVECEXP (x
, 0, 0));
6276 return XVECEXP (x
, 0, 0);
6279 if (GET_CODE (x
) == PLUS
6280 && GET_CODE (XEXP (x
, 0)) == UNSPEC
6281 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6282 && ((XINT (XEXP (x
, 0), 1) == UNSPEC_GOT
&& GET_CODE (orig_x
) == MEM
)
6283 || (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTOFF
6284 && GET_CODE (orig_x
) != MEM
)))
6286 x
= gen_rtx_PLUS (VOIDmode
, XVECEXP (XEXP (x
, 0), 0, 0), XEXP (x
, 1));
6288 return gen_rtx_PLUS (Pmode
, y
, x
);
6296 put_condition_code (code
, mode
, reverse
, fp
, file
)
6298 enum machine_mode mode
;
6304 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
6306 enum rtx_code second_code
, bypass_code
;
6307 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
6308 if (bypass_code
!= NIL
|| second_code
!= NIL
)
6310 code
= ix86_fp_compare_code_to_integer (code
);
6314 code
= reverse_condition (code
);
6325 if (mode
!= CCmode
&& mode
!= CCNOmode
&& mode
!= CCGCmode
)
6330 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6331 Those same assemblers have the same but opposite losage on cmov. */
6334 suffix
= fp
? "nbe" : "a";
6337 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6339 else if (mode
== CCmode
|| mode
== CCGCmode
)
6350 if (mode
== CCNOmode
|| mode
== CCGOCmode
)
6352 else if (mode
== CCmode
|| mode
== CCGCmode
)
6361 suffix
= fp
? "nb" : "ae";
6364 if (mode
!= CCmode
&& mode
!= CCGCmode
&& mode
!= CCNOmode
)
6374 suffix
= fp
? "u" : "p";
6377 suffix
= fp
? "nu" : "np";
6382 fputs (suffix
, file
);
6386 print_reg (x
, code
, file
)
6391 if (REGNO (x
) == ARG_POINTER_REGNUM
6392 || REGNO (x
) == FRAME_POINTER_REGNUM
6393 || REGNO (x
) == FLAGS_REG
6394 || REGNO (x
) == FPSR_REG
)
6397 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6400 if (code
== 'w' || MMX_REG_P (x
))
6402 else if (code
== 'b')
6404 else if (code
== 'k')
6406 else if (code
== 'q')
6408 else if (code
== 'y')
6410 else if (code
== 'h')
6413 code
= GET_MODE_SIZE (GET_MODE (x
));
6415 /* Irritatingly, AMD extended registers use different naming convention
6416 from the normal registers. */
6417 if (REX_INT_REG_P (x
))
6424 error ("extended registers have no high halves");
6427 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6430 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6433 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6436 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
6439 error ("unsupported operand size for extended register");
6447 if (STACK_TOP_P (x
))
6449 fputs ("st(0)", file
);
6456 if (! ANY_FP_REG_P (x
))
6457 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
6461 fputs (hi_reg_name
[REGNO (x
)], file
);
6464 fputs (qi_reg_name
[REGNO (x
)], file
);
6467 fputs (qi_high_reg_name
[REGNO (x
)], file
);
6474 /* Locate some local-dynamic symbol still in use by this function
6475 so that we can print its name in some tls_local_dynamic_base
6479 get_some_local_dynamic_name ()
6483 if (cfun
->machine
->some_ld_name
)
6484 return cfun
->machine
->some_ld_name
;
6486 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
6488 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
6489 return cfun
->machine
->some_ld_name
;
6495 get_some_local_dynamic_name_1 (px
, data
)
6497 void *data ATTRIBUTE_UNUSED
;
6501 if (GET_CODE (x
) == SYMBOL_REF
6502 && local_dynamic_symbolic_operand (x
, Pmode
))
6504 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
6512 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6513 C -- print opcode suffix for set/cmov insn.
6514 c -- like C, but print reversed condition
6515 F,f -- likewise, but for floating-point.
6516 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6518 R -- print the prefix for register names.
6519 z -- print the opcode suffix for the size of the current operand.
6520 * -- print a star (in certain assembler syntax)
6521 A -- print an absolute memory reference.
6522 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6523 s -- print a shift double count, followed by the assemblers argument
6525 b -- print the QImode name of the register for the indicated operand.
6526 %b0 would print %al if operands[0] is reg 0.
6527 w -- likewise, print the HImode name of the register.
6528 k -- likewise, print the SImode name of the register.
6529 q -- likewise, print the DImode name of the register.
6530 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6531 y -- print "st(0)" instead of "st" as a register.
6532 D -- print condition for SSE cmp instruction.
6533 P -- if PIC, print an @PLT suffix.
6534 X -- don't print any sort of PIC '@' suffix for a symbol.
6535 & -- print some in-use local-dynamic symbol name.
6539 print_operand (file
, x
, code
)
6549 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6554 assemble_name (file
, get_some_local_dynamic_name ());
6558 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6560 else if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6562 /* Intel syntax. For absolute addresses, registers should not
6563 be surrounded by braces. */
6564 if (GET_CODE (x
) != REG
)
6567 PRINT_OPERAND (file
, x
, 0);
6575 PRINT_OPERAND (file
, x
, 0);
6580 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6585 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6590 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6595 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6600 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6605 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6610 /* 387 opcodes don't get size suffixes if the operands are
6612 if (STACK_REG_P (x
))
6615 /* Likewise if using Intel opcodes. */
6616 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6619 /* This is the size of op from size of operand. */
6620 switch (GET_MODE_SIZE (GET_MODE (x
)))
6623 #ifdef HAVE_GAS_FILDS_FISTS
6629 if (GET_MODE (x
) == SFmode
)
6644 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6646 #ifdef GAS_MNEMONICS
6672 if (GET_CODE (x
) == CONST_INT
|| ! SHIFT_DOUBLE_OMITS_COUNT
)
6674 PRINT_OPERAND (file
, x
, 0);
6680 /* Little bit of braindamage here. The SSE compare instructions
6681 does use completely different names for the comparisons that the
6682 fp conditional moves. */
6683 switch (GET_CODE (x
))
6698 fputs ("unord", file
);
6702 fputs ("neq", file
);
6706 fputs ("nlt", file
);
6710 fputs ("nle", file
);
6713 fputs ("ord", file
);
6721 #ifdef CMOV_SUN_AS_SYNTAX
6722 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6724 switch (GET_MODE (x
))
6726 case HImode
: putc ('w', file
); break;
6728 case SFmode
: putc ('l', file
); break;
6730 case DFmode
: putc ('q', file
); break;
6738 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
6741 #ifdef CMOV_SUN_AS_SYNTAX
6742 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6745 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
6748 /* Like above, but reverse condition */
6750 /* Check to see if argument to %c is really a constant
6751 and not a condition code which needs to be reversed. */
6752 if (GET_RTX_CLASS (GET_CODE (x
)) != '<')
6754 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6757 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
6760 #ifdef CMOV_SUN_AS_SYNTAX
6761 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6764 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
6770 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
6773 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
6776 int pred_val
= INTVAL (XEXP (x
, 0));
6778 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
6779 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
6781 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
6782 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
6784 /* Emit hints only in the case default branch prediction
6785 heruistics would fail. */
6786 if (taken
!= cputaken
)
6788 /* We use 3e (DS) prefix for taken branches and
6789 2e (CS) prefix for not taken branches. */
6791 fputs ("ds ; ", file
);
6793 fputs ("cs ; ", file
);
6800 output_operand_lossage ("invalid operand code `%c'", code
);
6804 if (GET_CODE (x
) == REG
)
6806 PRINT_REG (x
, code
, file
);
6809 else if (GET_CODE (x
) == MEM
)
6811 /* No `byte ptr' prefix for call instructions. */
6812 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
6815 switch (GET_MODE_SIZE (GET_MODE (x
)))
6817 case 1: size
= "BYTE"; break;
6818 case 2: size
= "WORD"; break;
6819 case 4: size
= "DWORD"; break;
6820 case 8: size
= "QWORD"; break;
6821 case 12: size
= "XWORD"; break;
6822 case 16: size
= "XMMWORD"; break;
6827 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6830 else if (code
== 'w')
6832 else if (code
== 'k')
6836 fputs (" PTR ", file
);
6840 if (flag_pic
&& CONSTANT_ADDRESS_P (x
))
6841 output_pic_addr_const (file
, x
, code
);
6842 /* Avoid (%rip) for call operands. */
6843 else if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
6844 && GET_CODE (x
) != CONST_INT
)
6845 output_addr_const (file
, x
);
6846 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
6847 output_operand_lossage ("invalid constraints for operand");
6852 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
6857 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
6858 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
6860 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6862 fprintf (file
, "0x%lx", l
);
6865 /* These float cases don't actually occur as immediate operands. */
6866 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
6870 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6871 fprintf (file
, "%s", dstr
);
6874 else if (GET_CODE (x
) == CONST_DOUBLE
6875 && (GET_MODE (x
) == XFmode
|| GET_MODE (x
) == TFmode
))
6879 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
6880 fprintf (file
, "%s", dstr
);
6887 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
6889 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6892 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
6893 || GET_CODE (x
) == LABEL_REF
)
6895 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6898 fputs ("OFFSET FLAT:", file
);
6901 if (GET_CODE (x
) == CONST_INT
)
6902 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
6904 output_pic_addr_const (file
, x
, code
);
6906 output_addr_const (file
, x
);
6910 /* Print a memory operand whose address is ADDR. */
6913 print_operand_address (file
, addr
)
6917 struct ix86_address parts
;
6918 rtx base
, index
, disp
;
6921 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_TP
)
6923 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6924 fputs ("DWORD PTR ", file
);
6925 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
6928 fputs ("fs:0", file
);
6930 fputs ("gs:0", file
);
6934 if (! ix86_decompose_address (addr
, &parts
))
6938 index
= parts
.index
;
6940 scale
= parts
.scale
;
6942 if (!base
&& !index
)
6944 /* Displacement only requires special attention. */
6946 if (GET_CODE (disp
) == CONST_INT
)
6948 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
6950 if (USER_LABEL_PREFIX
[0] == 0)
6952 fputs ("ds:", file
);
6954 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (addr
));
6957 output_pic_addr_const (file
, addr
, 0);
6959 output_addr_const (file
, addr
);
6961 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6963 && ((GET_CODE (addr
) == SYMBOL_REF
6964 && ! tls_symbolic_operand (addr
, GET_MODE (addr
)))
6965 || GET_CODE (addr
) == LABEL_REF
6966 || (GET_CODE (addr
) == CONST
6967 && GET_CODE (XEXP (addr
, 0)) == PLUS
6968 && (GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
6969 || GET_CODE (XEXP (XEXP (addr
, 0), 0)) == LABEL_REF
)
6970 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)))
6971 fputs ("(%rip)", file
);
6975 if (ASSEMBLER_DIALECT
== ASM_ATT
)
6980 output_pic_addr_const (file
, disp
, 0);
6981 else if (GET_CODE (disp
) == LABEL_REF
)
6982 output_asm_label (disp
);
6984 output_addr_const (file
, disp
);
6989 PRINT_REG (base
, 0, file
);
6993 PRINT_REG (index
, 0, file
);
6995 fprintf (file
, ",%d", scale
);
7001 rtx offset
= NULL_RTX
;
7005 /* Pull out the offset of a symbol; print any symbol itself. */
7006 if (GET_CODE (disp
) == CONST
7007 && GET_CODE (XEXP (disp
, 0)) == PLUS
7008 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
7010 offset
= XEXP (XEXP (disp
, 0), 1);
7011 disp
= gen_rtx_CONST (VOIDmode
,
7012 XEXP (XEXP (disp
, 0), 0));
7016 output_pic_addr_const (file
, disp
, 0);
7017 else if (GET_CODE (disp
) == LABEL_REF
)
7018 output_asm_label (disp
);
7019 else if (GET_CODE (disp
) == CONST_INT
)
7022 output_addr_const (file
, disp
);
7028 PRINT_REG (base
, 0, file
);
7031 if (INTVAL (offset
) >= 0)
7033 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7037 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
7044 PRINT_REG (index
, 0, file
);
7046 fprintf (file
, "*%d", scale
);
7054 output_addr_const_extra (file
, x
)
7060 if (GET_CODE (x
) != UNSPEC
)
7063 op
= XVECEXP (x
, 0, 0);
7064 switch (XINT (x
, 1))
7066 case UNSPEC_GOTTPOFF
:
7067 output_addr_const (file
, op
);
7068 /* FIXME: This might be @TPOFF in Sun ld. */
7069 fputs ("@GOTTPOFF", file
);
7072 output_addr_const (file
, op
);
7073 fputs ("@TPOFF", file
);
7076 output_addr_const (file
, op
);
7078 fputs ("@TPOFF", file
);
7080 fputs ("@NTPOFF", file
);
7083 output_addr_const (file
, op
);
7084 fputs ("@DTPOFF", file
);
7086 case UNSPEC_GOTNTPOFF
:
7087 output_addr_const (file
, op
);
7089 fputs ("@GOTTPOFF(%rip)", file
);
7091 fputs ("@GOTNTPOFF", file
);
7093 case UNSPEC_INDNTPOFF
:
7094 output_addr_const (file
, op
);
7095 fputs ("@INDNTPOFF", file
);
7105 /* Split one or more DImode RTL references into pairs of SImode
7106 references. The RTL can be REG, offsettable MEM, integer constant, or
7107 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7108 split and "num" is its length. lo_half and hi_half are output arrays
7109 that parallel "operands". */
7112 split_di (operands
, num
, lo_half
, hi_half
)
7115 rtx lo_half
[], hi_half
[];
7119 rtx op
= operands
[num
];
7121 /* simplify_subreg refuse to split volatile memory addresses,
7122 but we still have to handle it. */
7123 if (GET_CODE (op
) == MEM
)
7125 lo_half
[num
] = adjust_address (op
, SImode
, 0);
7126 hi_half
[num
] = adjust_address (op
, SImode
, 4);
7130 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
7131 GET_MODE (op
) == VOIDmode
7132 ? DImode
: GET_MODE (op
), 0);
7133 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
7134 GET_MODE (op
) == VOIDmode
7135 ? DImode
: GET_MODE (op
), 4);
7139 /* Split one or more TImode RTL references into pairs of SImode
7140 references. The RTL can be REG, offsettable MEM, integer constant, or
7141 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7142 split and "num" is its length. lo_half and hi_half are output arrays
7143 that parallel "operands". */
7146 split_ti (operands
, num
, lo_half
, hi_half
)
7149 rtx lo_half
[], hi_half
[];
7153 rtx op
= operands
[num
];
7155 /* simplify_subreg refuse to split volatile memory addresses, but we
7156 still have to handle it. */
7157 if (GET_CODE (op
) == MEM
)
7159 lo_half
[num
] = adjust_address (op
, DImode
, 0);
7160 hi_half
[num
] = adjust_address (op
, DImode
, 8);
7164 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
7165 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
7170 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7171 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7172 is the expression of the binary operation. The output may either be
7173 emitted here, or returned to the caller, like all output_* functions.
7175 There is no guarantee that the operands are the same mode, as they
7176 might be within FLOAT or FLOAT_EXTEND expressions. */
7178 #ifndef SYSV386_COMPAT
7179 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7180 wants to fix the assemblers because that causes incompatibility
7181 with gcc. No-one wants to fix gcc because that causes
7182 incompatibility with assemblers... You can use the option of
7183 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7184 #define SYSV386_COMPAT 1
7188 output_387_binary_op (insn
, operands
)
7192 static char buf
[30];
7195 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]) | SSE_REG_P (operands
[2]);
7197 #ifdef ENABLE_CHECKING
7198 /* Even if we do not want to check the inputs, this documents input
7199 constraints. Which helps in understanding the following code. */
7200 if (STACK_REG_P (operands
[0])
7201 && ((REG_P (operands
[1])
7202 && REGNO (operands
[0]) == REGNO (operands
[1])
7203 && (STACK_REG_P (operands
[2]) || GET_CODE (operands
[2]) == MEM
))
7204 || (REG_P (operands
[2])
7205 && REGNO (operands
[0]) == REGNO (operands
[2])
7206 && (STACK_REG_P (operands
[1]) || GET_CODE (operands
[1]) == MEM
)))
7207 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
7213 switch (GET_CODE (operands
[3]))
7216 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7217 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7225 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7226 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7234 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7235 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7243 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
7244 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
7258 if (GET_MODE (operands
[0]) == SFmode
)
7259 strcat (buf
, "ss\t{%2, %0|%0, %2}");
7261 strcat (buf
, "sd\t{%2, %0|%0, %2}");
7266 switch (GET_CODE (operands
[3]))
7270 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
7272 rtx temp
= operands
[2];
7273 operands
[2] = operands
[1];
7277 /* know operands[0] == operands[1]. */
7279 if (GET_CODE (operands
[2]) == MEM
)
7285 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7287 if (STACK_TOP_P (operands
[0]))
7288 /* How is it that we are storing to a dead operand[2]?
7289 Well, presumably operands[1] is dead too. We can't
7290 store the result to st(0) as st(0) gets popped on this
7291 instruction. Instead store to operands[2] (which I
7292 think has to be st(1)). st(1) will be popped later.
7293 gcc <= 2.8.1 didn't have this check and generated
7294 assembly code that the Unixware assembler rejected. */
7295 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7297 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7301 if (STACK_TOP_P (operands
[0]))
7302 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7304 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7309 if (GET_CODE (operands
[1]) == MEM
)
7315 if (GET_CODE (operands
[2]) == MEM
)
7321 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
7324 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7325 derived assemblers, confusingly reverse the direction of
7326 the operation for fsub{r} and fdiv{r} when the
7327 destination register is not st(0). The Intel assembler
7328 doesn't have this brain damage. Read !SYSV386_COMPAT to
7329 figure out what the hardware really does. */
7330 if (STACK_TOP_P (operands
[0]))
7331 p
= "{p\t%0, %2|rp\t%2, %0}";
7333 p
= "{rp\t%2, %0|p\t%0, %2}";
7335 if (STACK_TOP_P (operands
[0]))
7336 /* As above for fmul/fadd, we can't store to st(0). */
7337 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7339 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7344 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
7347 if (STACK_TOP_P (operands
[0]))
7348 p
= "{rp\t%0, %1|p\t%1, %0}";
7350 p
= "{p\t%1, %0|rp\t%0, %1}";
7352 if (STACK_TOP_P (operands
[0]))
7353 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7355 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7360 if (STACK_TOP_P (operands
[0]))
7362 if (STACK_TOP_P (operands
[1]))
7363 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7365 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7368 else if (STACK_TOP_P (operands
[1]))
7371 p
= "{\t%1, %0|r\t%0, %1}";
7373 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7379 p
= "{r\t%2, %0|\t%0, %2}";
7381 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7394 /* Output code to initialize control word copies used by
7395 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7396 is set to control word rounding downwards. */
7398 emit_i387_cw_initialization (normal
, round_down
)
7399 rtx normal
, round_down
;
7401 rtx reg
= gen_reg_rtx (HImode
);
7403 emit_insn (gen_x86_fnstcw_1 (normal
));
7404 emit_move_insn (reg
, normal
);
7405 if (!TARGET_PARTIAL_REG_STALL
&& !optimize_size
7407 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
7409 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0xc00)));
7410 emit_move_insn (round_down
, reg
);
7413 /* Output code for INSN to convert a float to a signed int. OPERANDS
7414 are the insn operands. The output may be [HSD]Imode and the input
7415 operand may be [SDX]Fmode. */
7418 output_fix_trunc (insn
, operands
)
7422 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7423 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
7425 /* Jump through a hoop or two for DImode, since the hardware has no
7426 non-popping instruction. We used to do this a different way, but
7427 that was somewhat fragile and broke with post-reload splitters. */
7428 if (dimode_p
&& !stack_top_dies
)
7429 output_asm_insn ("fld\t%y1", operands
);
7431 if (!STACK_TOP_P (operands
[1]))
7434 if (GET_CODE (operands
[0]) != MEM
)
7437 output_asm_insn ("fldcw\t%3", operands
);
7438 if (stack_top_dies
|| dimode_p
)
7439 output_asm_insn ("fistp%z0\t%0", operands
);
7441 output_asm_insn ("fist%z0\t%0", operands
);
7442 output_asm_insn ("fldcw\t%2", operands
);
7447 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7448 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7449 when fucom should be used. */
7452 output_fp_compare (insn
, operands
, eflags_p
, unordered_p
)
7455 int eflags_p
, unordered_p
;
7458 rtx cmp_op0
= operands
[0];
7459 rtx cmp_op1
= operands
[1];
7460 int is_sse
= SSE_REG_P (operands
[0]) | SSE_REG_P (operands
[1]);
7465 cmp_op1
= operands
[2];
7469 if (GET_MODE (operands
[0]) == SFmode
)
7471 return "ucomiss\t{%1, %0|%0, %1}";
7473 return "comiss\t{%1, %0|%0, %y}";
7476 return "ucomisd\t{%1, %0|%0, %1}";
7478 return "comisd\t{%1, %0|%0, %y}";
7481 if (! STACK_TOP_P (cmp_op0
))
7484 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
7486 if (STACK_REG_P (cmp_op1
)
7488 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
7489 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
7491 /* If both the top of the 387 stack dies, and the other operand
7492 is also a stack register that dies, then this must be a
7493 `fcompp' float compare */
7497 /* There is no double popping fcomi variant. Fortunately,
7498 eflags is immune from the fstp's cc clobbering. */
7500 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
7502 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
7510 return "fucompp\n\tfnstsw\t%0";
7512 return "fcompp\n\tfnstsw\t%0";
7525 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7527 static const char * const alt
[24] =
7539 "fcomi\t{%y1, %0|%0, %y1}",
7540 "fcomip\t{%y1, %0|%0, %y1}",
7541 "fucomi\t{%y1, %0|%0, %y1}",
7542 "fucomip\t{%y1, %0|%0, %y1}",
7549 "fcom%z2\t%y2\n\tfnstsw\t%0",
7550 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7551 "fucom%z2\t%y2\n\tfnstsw\t%0",
7552 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7554 "ficom%z2\t%y2\n\tfnstsw\t%0",
7555 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7563 mask
= eflags_p
<< 3;
7564 mask
|= (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
) << 2;
7565 mask
|= unordered_p
<< 1;
7566 mask
|= stack_top_dies
;
7579 ix86_output_addr_vec_elt (file
, value
)
7583 const char *directive
= ASM_LONG
;
7588 directive
= ASM_QUAD
;
7594 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
7598 ix86_output_addr_diff_elt (file
, value
, rel
)
7603 fprintf (file
, "%s%s%d-%s%d\n",
7604 ASM_LONG
, LPREFIX
, value
, LPREFIX
, rel
);
7605 else if (HAVE_AS_GOTOFF_IN_DATA
)
7606 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
7608 else if (TARGET_MACHO
)
7609 fprintf (file
, "%s%s%d-%s\n", ASM_LONG
, LPREFIX
, value
,
7610 machopic_function_base_name () + 1);
7613 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
7614 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
7617 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7621 ix86_expand_clear (dest
)
7626 /* We play register width games, which are only valid after reload. */
7627 if (!reload_completed
)
7630 /* Avoid HImode and its attendant prefix byte. */
7631 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
7632 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
7634 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
7636 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7637 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
7639 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
7640 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
7646 /* X is an unchanging MEM. If it is a constant pool reference, return
7647 the constant pool rtx, else NULL. */
7650 maybe_get_pool_constant (x
)
7655 if (flag_pic
&& ! TARGET_64BIT
)
7657 if (GET_CODE (x
) != PLUS
)
7659 if (XEXP (x
, 0) != pic_offset_table_rtx
)
7662 if (GET_CODE (x
) != CONST
)
7665 if (GET_CODE (x
) != UNSPEC
)
7667 if (XINT (x
, 1) != UNSPEC_GOTOFF
)
7669 x
= XVECEXP (x
, 0, 0);
7672 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7673 return get_pool_constant (x
);
7679 ix86_expand_move (mode
, operands
)
7680 enum machine_mode mode
;
7683 int strict
= (reload_in_progress
|| reload_completed
);
7684 rtx insn
, op0
, op1
, tmp
;
7689 /* ??? We have a slight problem. We need to say that tls symbols are
7690 not legitimate constants so that reload does not helpfully reload
7691 these constants from a REG_EQUIV, which we cannot handle. (Recall
7692 that general- and local-dynamic address resolution requires a
7695 However, if we say that tls symbols are not legitimate constants,
7696 then emit_move_insn helpfully drop them into the constant pool.
7698 It is far easier to work around emit_move_insn than reload. Recognize
7699 the MEM that we would have created and extract the symbol_ref. */
7702 && GET_CODE (op1
) == MEM
7703 && RTX_UNCHANGING_P (op1
))
7705 tmp
= maybe_get_pool_constant (op1
);
7706 /* Note that we only care about symbolic constants here, which
7707 unlike CONST_INT will always have a proper mode. */
7708 if (tmp
&& GET_MODE (tmp
) == Pmode
)
7712 if (tls_symbolic_operand (op1
, Pmode
))
7714 op1
= legitimize_address (op1
, op1
, VOIDmode
);
7715 if (GET_CODE (op0
) == MEM
)
7717 tmp
= gen_reg_rtx (mode
);
7718 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, op1
));
7722 else if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
7727 rtx temp
= ((reload_in_progress
7728 || ((op0
&& GET_CODE (op0
) == REG
)
7730 ? op0
: gen_reg_rtx (Pmode
));
7731 op1
= machopic_indirect_data_reference (op1
, temp
);
7732 op1
= machopic_legitimize_pic_address (op1
, mode
,
7733 temp
== op1
? 0 : temp
);
7737 if (MACHOPIC_INDIRECT
)
7738 op1
= machopic_indirect_data_reference (op1
, 0);
7742 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7746 #endif /* TARGET_MACHO */
7747 if (GET_CODE (op0
) == MEM
)
7748 op1
= force_reg (Pmode
, op1
);
7752 if (GET_CODE (temp
) != REG
)
7753 temp
= gen_reg_rtx (Pmode
);
7754 temp
= legitimize_pic_address (op1
, temp
);
7762 if (GET_CODE (op0
) == MEM
7763 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
7764 || !push_operand (op0
, mode
))
7765 && GET_CODE (op1
) == MEM
)
7766 op1
= force_reg (mode
, op1
);
7768 if (push_operand (op0
, mode
)
7769 && ! general_no_elim_operand (op1
, mode
))
7770 op1
= copy_to_mode_reg (mode
, op1
);
7772 /* Force large constants in 64bit compilation into register
7773 to get them CSEed. */
7774 if (TARGET_64BIT
&& mode
== DImode
7775 && immediate_operand (op1
, mode
)
7776 && !x86_64_zero_extended_value (op1
)
7777 && !register_operand (op0
, mode
)
7778 && optimize
&& !reload_completed
&& !reload_in_progress
)
7779 op1
= copy_to_mode_reg (mode
, op1
);
7781 if (FLOAT_MODE_P (mode
))
7783 /* If we are loading a floating point constant to a register,
7784 force the value to memory now, since we'll get better code
7785 out the back end. */
7789 else if (GET_CODE (op1
) == CONST_DOUBLE
7790 && register_operand (op0
, mode
))
7791 op1
= validize_mem (force_const_mem (mode
, op1
));
7795 insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
7801 ix86_expand_vector_move (mode
, operands
)
7802 enum machine_mode mode
;
7805 /* Force constants other than zero into memory. We do not know how
7806 the instructions used to build constants modify the upper 64 bits
7807 of the register, once we have that information we may be able
7808 to handle some of them more efficiently. */
7809 if ((reload_in_progress
| reload_completed
) == 0
7810 && register_operand (operands
[0], mode
)
7811 && CONSTANT_P (operands
[1]))
7812 operands
[1] = force_const_mem (mode
, operands
[1]);
7814 /* Make operand1 a register if it isn't already. */
7816 && !register_operand (operands
[0], mode
)
7817 && !register_operand (operands
[1], mode
))
7819 rtx temp
= force_reg (GET_MODE (operands
[1]), operands
[1]);
7820 emit_move_insn (operands
[0], temp
);
7824 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]));
7827 /* Attempt to expand a binary operator. Make the expansion closer to the
7828 actual machine, then just general_operand, which will allow 3 separate
7829 memory references (one output, two input) in a single insn. */
7832 ix86_expand_binary_operator (code
, mode
, operands
)
7834 enum machine_mode mode
;
7837 int matching_memory
;
7838 rtx src1
, src2
, dst
, op
, clob
;
7844 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7845 if (GET_RTX_CLASS (code
) == 'c'
7846 && (rtx_equal_p (dst
, src2
)
7847 || immediate_operand (src1
, mode
)))
7854 /* If the destination is memory, and we do not have matching source
7855 operands, do things in registers. */
7856 matching_memory
= 0;
7857 if (GET_CODE (dst
) == MEM
)
7859 if (rtx_equal_p (dst
, src1
))
7860 matching_memory
= 1;
7861 else if (GET_RTX_CLASS (code
) == 'c'
7862 && rtx_equal_p (dst
, src2
))
7863 matching_memory
= 2;
7865 dst
= gen_reg_rtx (mode
);
7868 /* Both source operands cannot be in memory. */
7869 if (GET_CODE (src1
) == MEM
&& GET_CODE (src2
) == MEM
)
7871 if (matching_memory
!= 2)
7872 src2
= force_reg (mode
, src2
);
7874 src1
= force_reg (mode
, src1
);
7877 /* If the operation is not commutable, source 1 cannot be a constant
7878 or non-matching memory. */
7879 if ((CONSTANT_P (src1
)
7880 || (!matching_memory
&& GET_CODE (src1
) == MEM
))
7881 && GET_RTX_CLASS (code
) != 'c')
7882 src1
= force_reg (mode
, src1
);
7884 /* If optimizing, copy to regs to improve CSE */
7885 if (optimize
&& ! no_new_pseudos
)
7887 if (GET_CODE (dst
) == MEM
)
7888 dst
= gen_reg_rtx (mode
);
7889 if (GET_CODE (src1
) == MEM
)
7890 src1
= force_reg (mode
, src1
);
7891 if (GET_CODE (src2
) == MEM
)
7892 src2
= force_reg (mode
, src2
);
7895 /* Emit the instruction. */
7897 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
7898 if (reload_in_progress
)
7900 /* Reload doesn't know about the flags register, and doesn't know that
7901 it doesn't want to clobber it. We can only do this with PLUS. */
7908 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
7909 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
7912 /* Fix up the destination if needed. */
7913 if (dst
!= operands
[0])
7914 emit_move_insn (operands
[0], dst
);
7917 /* Return TRUE or FALSE depending on whether the binary operator meets the
7918 appropriate constraints. */
7921 ix86_binary_operator_ok (code
, mode
, operands
)
7923 enum machine_mode mode ATTRIBUTE_UNUSED
;
7926 /* Both source operands cannot be in memory. */
7927 if (GET_CODE (operands
[1]) == MEM
&& GET_CODE (operands
[2]) == MEM
)
7929 /* If the operation is not commutable, source 1 cannot be a constant. */
7930 if (CONSTANT_P (operands
[1]) && GET_RTX_CLASS (code
) != 'c')
7932 /* If the destination is memory, we must have a matching source operand. */
7933 if (GET_CODE (operands
[0]) == MEM
7934 && ! (rtx_equal_p (operands
[0], operands
[1])
7935 || (GET_RTX_CLASS (code
) == 'c'
7936 && rtx_equal_p (operands
[0], operands
[2]))))
7938 /* If the operation is not commutable and the source 1 is memory, we must
7939 have a matching destination. */
7940 if (GET_CODE (operands
[1]) == MEM
7941 && GET_RTX_CLASS (code
) != 'c'
7942 && ! rtx_equal_p (operands
[0], operands
[1]))
7947 /* Attempt to expand a unary operator. Make the expansion closer to the
7948 actual machine, then just general_operand, which will allow 2 separate
7949 memory references (one output, one input) in a single insn. */
7952 ix86_expand_unary_operator (code
, mode
, operands
)
7954 enum machine_mode mode
;
7957 int matching_memory
;
7958 rtx src
, dst
, op
, clob
;
7963 /* If the destination is memory, and we do not have matching source
7964 operands, do things in registers. */
7965 matching_memory
= 0;
7966 if (GET_CODE (dst
) == MEM
)
7968 if (rtx_equal_p (dst
, src
))
7969 matching_memory
= 1;
7971 dst
= gen_reg_rtx (mode
);
7974 /* When source operand is memory, destination must match. */
7975 if (!matching_memory
&& GET_CODE (src
) == MEM
)
7976 src
= force_reg (mode
, src
);
7978 /* If optimizing, copy to regs to improve CSE */
7979 if (optimize
&& ! no_new_pseudos
)
7981 if (GET_CODE (dst
) == MEM
)
7982 dst
= gen_reg_rtx (mode
);
7983 if (GET_CODE (src
) == MEM
)
7984 src
= force_reg (mode
, src
);
7987 /* Emit the instruction. */
7989 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
7990 if (reload_in_progress
|| code
== NOT
)
7992 /* Reload doesn't know about the flags register, and doesn't know that
7993 it doesn't want to clobber it. */
8000 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
8001 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
8004 /* Fix up the destination if needed. */
8005 if (dst
!= operands
[0])
8006 emit_move_insn (operands
[0], dst
);
8009 /* Return TRUE or FALSE depending on whether the unary operator meets the
8010 appropriate constraints. */
8013 ix86_unary_operator_ok (code
, mode
, operands
)
8014 enum rtx_code code ATTRIBUTE_UNUSED
;
8015 enum machine_mode mode ATTRIBUTE_UNUSED
;
8016 rtx operands
[2] ATTRIBUTE_UNUSED
;
8018 /* If one of operands is memory, source and destination must match. */
8019 if ((GET_CODE (operands
[0]) == MEM
8020 || GET_CODE (operands
[1]) == MEM
)
8021 && ! rtx_equal_p (operands
[0], operands
[1]))
8026 /* Return TRUE or FALSE depending on whether the first SET in INSN
8027 has source and destination with matching CC modes, and that the
8028 CC mode is at least as constrained as REQ_MODE. */
8031 ix86_match_ccmode (insn
, req_mode
)
8033 enum machine_mode req_mode
;
8036 enum machine_mode set_mode
;
8038 set
= PATTERN (insn
);
8039 if (GET_CODE (set
) == PARALLEL
)
8040 set
= XVECEXP (set
, 0, 0);
8041 if (GET_CODE (set
) != SET
)
8043 if (GET_CODE (SET_SRC (set
)) != COMPARE
)
8046 set_mode
= GET_MODE (SET_DEST (set
));
8050 if (req_mode
!= CCNOmode
8051 && (req_mode
!= CCmode
8052 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
8056 if (req_mode
== CCGCmode
)
8060 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
8064 if (req_mode
== CCZmode
)
8074 return (GET_MODE (SET_SRC (set
)) == set_mode
);
8077 /* Generate insn patterns to do an integer compare of OPERANDS. */
8080 ix86_expand_int_compare (code
, op0
, op1
)
8084 enum machine_mode cmpmode
;
8087 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
8088 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
8090 /* This is very simple, but making the interface the same as in the
8091 FP case makes the rest of the code easier. */
8092 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
8093 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
8095 /* Return the test that should be put into the flags user, i.e.
8096 the bcc, scc, or cmov instruction. */
8097 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
8100 /* Figure out whether to use ordered or unordered fp comparisons.
8101 Return the appropriate mode to use. */
8104 ix86_fp_compare_mode (code
)
8105 enum rtx_code code ATTRIBUTE_UNUSED
;
8107 /* ??? In order to make all comparisons reversible, we do all comparisons
8108 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8109 all forms trapping and nontrapping comparisons, we can make inequality
8110 comparisons trapping again, since it results in better code when using
8111 FCOM based compares. */
8112 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
8116 ix86_cc_mode (code
, op0
, op1
)
8120 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8121 return ix86_fp_compare_mode (code
);
8124 /* Only zero flag is needed. */
8126 case NE
: /* ZF!=0 */
8128 /* Codes needing carry flag. */
8129 case GEU
: /* CF=0 */
8130 case GTU
: /* CF=0 & ZF=0 */
8131 case LTU
: /* CF=1 */
8132 case LEU
: /* CF=1 | ZF=1 */
8134 /* Codes possibly doable only with sign flag when
8135 comparing against zero. */
8136 case GE
: /* SF=OF or SF=0 */
8137 case LT
: /* SF<>OF or SF=1 */
8138 if (op1
== const0_rtx
)
8141 /* For other cases Carry flag is not required. */
8143 /* Codes doable only with sign flag when comparing
8144 against zero, but we miss jump instruction for it
8145 so we need to use relational tests agains overflow
8146 that thus needs to be zero. */
8147 case GT
: /* ZF=0 & SF=OF */
8148 case LE
: /* ZF=1 | SF<>OF */
8149 if (op1
== const0_rtx
)
8153 /* strcmp pattern do (use flags) and combine may ask us for proper
8162 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8165 ix86_use_fcomi_compare (code
)
8166 enum rtx_code code ATTRIBUTE_UNUSED
;
8168 enum rtx_code swapped_code
= swap_condition (code
);
8169 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
8170 || (ix86_fp_comparison_cost (swapped_code
)
8171 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
8174 /* Swap, force into registers, or otherwise massage the two operands
8175 to a fp comparison. The operands are updated in place; the new
8176 comparsion code is returned. */
8178 static enum rtx_code
8179 ix86_prepare_fp_compare_args (code
, pop0
, pop1
)
8183 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
8184 rtx op0
= *pop0
, op1
= *pop1
;
8185 enum machine_mode op_mode
= GET_MODE (op0
);
8186 int is_sse
= SSE_REG_P (op0
) | SSE_REG_P (op1
);
8188 /* All of the unordered compare instructions only work on registers.
8189 The same is true of the XFmode compare instructions. The same is
8190 true of the fcomi compare instructions. */
8193 && (fpcmp_mode
== CCFPUmode
8194 || op_mode
== XFmode
8195 || op_mode
== TFmode
8196 || ix86_use_fcomi_compare (code
)))
8198 op0
= force_reg (op_mode
, op0
);
8199 op1
= force_reg (op_mode
, op1
);
8203 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8204 things around if they appear profitable, otherwise force op0
8207 if (standard_80387_constant_p (op0
) == 0
8208 || (GET_CODE (op0
) == MEM
8209 && ! (standard_80387_constant_p (op1
) == 0
8210 || GET_CODE (op1
) == MEM
)))
8213 tmp
= op0
, op0
= op1
, op1
= tmp
;
8214 code
= swap_condition (code
);
8217 if (GET_CODE (op0
) != REG
)
8218 op0
= force_reg (op_mode
, op0
);
8220 if (CONSTANT_P (op1
))
8222 if (standard_80387_constant_p (op1
))
8223 op1
= force_reg (op_mode
, op1
);
8225 op1
= validize_mem (force_const_mem (op_mode
, op1
));
8229 /* Try to rearrange the comparison to make it cheaper. */
8230 if (ix86_fp_comparison_cost (code
)
8231 > ix86_fp_comparison_cost (swap_condition (code
))
8232 && (GET_CODE (op1
) == REG
|| !no_new_pseudos
))
8235 tmp
= op0
, op0
= op1
, op1
= tmp
;
8236 code
= swap_condition (code
);
8237 if (GET_CODE (op0
) != REG
)
8238 op0
= force_reg (op_mode
, op0
);
8246 /* Convert comparison codes we use to represent FP comparison to integer
8247 code that will result in proper branch. Return UNKNOWN if no such code
8249 static enum rtx_code
8250 ix86_fp_compare_code_to_integer (code
)
8280 /* Split comparison code CODE into comparisons we can do using branch
8281 instructions. BYPASS_CODE is comparison code for branch that will
8282 branch around FIRST_CODE and SECOND_CODE. If some of branches
8283 is not required, set value to NIL.
8284 We never require more than two branches. */
8286 ix86_fp_comparison_codes (code
, bypass_code
, first_code
, second_code
)
8287 enum rtx_code code
, *bypass_code
, *first_code
, *second_code
;
8293 /* The fcomi comparison sets flags as follows:
8303 case GT
: /* GTU - CF=0 & ZF=0 */
8304 case GE
: /* GEU - CF=0 */
8305 case ORDERED
: /* PF=0 */
8306 case UNORDERED
: /* PF=1 */
8307 case UNEQ
: /* EQ - ZF=1 */
8308 case UNLT
: /* LTU - CF=1 */
8309 case UNLE
: /* LEU - CF=1 | ZF=1 */
8310 case LTGT
: /* EQ - ZF=0 */
8312 case LT
: /* LTU - CF=1 - fails on unordered */
8314 *bypass_code
= UNORDERED
;
8316 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
8318 *bypass_code
= UNORDERED
;
8320 case EQ
: /* EQ - ZF=1 - fails on unordered */
8322 *bypass_code
= UNORDERED
;
8324 case NE
: /* NE - ZF=0 - fails on unordered */
8326 *second_code
= UNORDERED
;
8328 case UNGE
: /* GEU - CF=0 - fails on unordered */
8330 *second_code
= UNORDERED
;
8332 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
8334 *second_code
= UNORDERED
;
8339 if (!TARGET_IEEE_FP
)
8346 /* Return cost of comparison done fcom + arithmetics operations on AX.
8347 All following functions do use number of instructions as an cost metrics.
8348 In future this should be tweaked to compute bytes for optimize_size and
8349 take into account performance of various instructions on various CPUs. */
8351 ix86_fp_comparison_arithmetics_cost (code
)
8354 if (!TARGET_IEEE_FP
)
8356 /* The cost of code output by ix86_expand_fp_compare. */
8384 /* Return cost of comparison done using fcomi operation.
8385 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8387 ix86_fp_comparison_fcomi_cost (code
)
8390 enum rtx_code bypass_code
, first_code
, second_code
;
8391 /* Return arbitarily high cost when instruction is not supported - this
8392 prevents gcc from using it. */
8395 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8396 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 2;
8399 /* Return cost of comparison done using sahf operation.
8400 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8402 ix86_fp_comparison_sahf_cost (code
)
8405 enum rtx_code bypass_code
, first_code
, second_code
;
8406 /* Return arbitarily high cost when instruction is not preferred - this
8407 avoids gcc from using it. */
8408 if (!TARGET_USE_SAHF
&& !optimize_size
)
8410 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8411 return (bypass_code
!= NIL
|| second_code
!= NIL
) + 3;
8414 /* Compute cost of the comparison done using any method.
8415 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8417 ix86_fp_comparison_cost (code
)
8420 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
8423 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
8424 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
8426 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
8427 if (min
> sahf_cost
)
8429 if (min
> fcomi_cost
)
8434 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8437 ix86_expand_fp_compare (code
, op0
, op1
, scratch
, second_test
, bypass_test
)
8439 rtx op0
, op1
, scratch
;
8443 enum machine_mode fpcmp_mode
, intcmp_mode
;
8445 int cost
= ix86_fp_comparison_cost (code
);
8446 enum rtx_code bypass_code
, first_code
, second_code
;
8448 fpcmp_mode
= ix86_fp_compare_mode (code
);
8449 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
8452 *second_test
= NULL_RTX
;
8454 *bypass_test
= NULL_RTX
;
8456 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8458 /* Do fcomi/sahf based test when profitable. */
8459 if ((bypass_code
== NIL
|| bypass_test
)
8460 && (second_code
== NIL
|| second_test
)
8461 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
8465 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8466 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
8472 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8473 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8475 scratch
= gen_reg_rtx (HImode
);
8476 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8477 emit_insn (gen_x86_sahf_1 (scratch
));
8480 /* The FP codes work out to act like unsigned. */
8481 intcmp_mode
= fpcmp_mode
;
8483 if (bypass_code
!= NIL
)
8484 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
8485 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8487 if (second_code
!= NIL
)
8488 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
8489 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8494 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8495 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
8496 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
8498 scratch
= gen_reg_rtx (HImode
);
8499 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
8501 /* In the unordered case, we have to check C2 for NaN's, which
8502 doesn't happen to work out to anything nice combination-wise.
8503 So do some bit twiddling on the value we've got in AH to come
8504 up with an appropriate set of condition codes. */
8506 intcmp_mode
= CCNOmode
;
8511 if (code
== GT
|| !TARGET_IEEE_FP
)
8513 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8518 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8519 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8520 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
8521 intcmp_mode
= CCmode
;
8527 if (code
== LT
&& TARGET_IEEE_FP
)
8529 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8530 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
8531 intcmp_mode
= CCmode
;
8536 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
8542 if (code
== GE
|| !TARGET_IEEE_FP
)
8544 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
8549 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8550 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8557 if (code
== LE
&& TARGET_IEEE_FP
)
8559 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8560 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
8561 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8562 intcmp_mode
= CCmode
;
8567 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
8573 if (code
== EQ
&& TARGET_IEEE_FP
)
8575 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8576 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
8577 intcmp_mode
= CCmode
;
8582 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8589 if (code
== NE
&& TARGET_IEEE_FP
)
8591 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
8592 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
8598 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
8604 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8608 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
8617 /* Return the test that should be put into the flags user, i.e.
8618 the bcc, scc, or cmov instruction. */
8619 return gen_rtx_fmt_ee (code
, VOIDmode
,
8620 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
8625 ix86_expand_compare (code
, second_test
, bypass_test
)
8627 rtx
*second_test
, *bypass_test
;
8630 op0
= ix86_compare_op0
;
8631 op1
= ix86_compare_op1
;
8634 *second_test
= NULL_RTX
;
8636 *bypass_test
= NULL_RTX
;
8638 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
8639 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
8640 second_test
, bypass_test
);
8642 ret
= ix86_expand_int_compare (code
, op0
, op1
);
8647 /* Return true if the CODE will result in nontrivial jump sequence. */
8649 ix86_fp_jump_nontrivial_p (code
)
8652 enum rtx_code bypass_code
, first_code
, second_code
;
8655 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8656 return bypass_code
!= NIL
|| second_code
!= NIL
;
8660 ix86_expand_branch (code
, label
)
8666 switch (GET_MODE (ix86_compare_op0
))
8672 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
8673 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8674 gen_rtx_LABEL_REF (VOIDmode
, label
),
8676 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
8686 enum rtx_code bypass_code
, first_code
, second_code
;
8688 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
8691 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
8693 /* Check whether we will use the natural sequence with one jump. If
8694 so, we can expand jump early. Otherwise delay expansion by
8695 creating compound insn to not confuse optimizers. */
8696 if (bypass_code
== NIL
&& second_code
== NIL
8699 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
8700 gen_rtx_LABEL_REF (VOIDmode
, label
),
8705 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
8706 ix86_compare_op0
, ix86_compare_op1
);
8707 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
8708 gen_rtx_LABEL_REF (VOIDmode
, label
),
8710 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
8712 use_fcomi
= ix86_use_fcomi_compare (code
);
8713 vec
= rtvec_alloc (3 + !use_fcomi
);
8714 RTVEC_ELT (vec
, 0) = tmp
;
8716 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
8718 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
8721 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
8723 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
8731 /* Expand DImode branch into multiple compare+branch. */
8733 rtx lo
[2], hi
[2], label2
;
8734 enum rtx_code code1
, code2
, code3
;
8736 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
8738 tmp
= ix86_compare_op0
;
8739 ix86_compare_op0
= ix86_compare_op1
;
8740 ix86_compare_op1
= tmp
;
8741 code
= swap_condition (code
);
8743 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
8744 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
8746 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8747 avoid two branches. This costs one extra insn, so disable when
8748 optimizing for size. */
8750 if ((code
== EQ
|| code
== NE
)
8752 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
8757 if (hi
[1] != const0_rtx
)
8758 xor1
= expand_binop (SImode
, xor_optab
, xor1
, hi
[1],
8759 NULL_RTX
, 0, OPTAB_WIDEN
);
8762 if (lo
[1] != const0_rtx
)
8763 xor0
= expand_binop (SImode
, xor_optab
, xor0
, lo
[1],
8764 NULL_RTX
, 0, OPTAB_WIDEN
);
8766 tmp
= expand_binop (SImode
, ior_optab
, xor1
, xor0
,
8767 NULL_RTX
, 0, OPTAB_WIDEN
);
8769 ix86_compare_op0
= tmp
;
8770 ix86_compare_op1
= const0_rtx
;
8771 ix86_expand_branch (code
, label
);
8775 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8776 op1 is a constant and the low word is zero, then we can just
8777 examine the high word. */
8779 if (GET_CODE (hi
[1]) == CONST_INT
&& lo
[1] == const0_rtx
)
8782 case LT
: case LTU
: case GE
: case GEU
:
8783 ix86_compare_op0
= hi
[0];
8784 ix86_compare_op1
= hi
[1];
8785 ix86_expand_branch (code
, label
);
8791 /* Otherwise, we need two or three jumps. */
8793 label2
= gen_label_rtx ();
8796 code2
= swap_condition (code
);
8797 code3
= unsigned_condition (code
);
8801 case LT
: case GT
: case LTU
: case GTU
:
8804 case LE
: code1
= LT
; code2
= GT
; break;
8805 case GE
: code1
= GT
; code2
= LT
; break;
8806 case LEU
: code1
= LTU
; code2
= GTU
; break;
8807 case GEU
: code1
= GTU
; code2
= LTU
; break;
8809 case EQ
: code1
= NIL
; code2
= NE
; break;
8810 case NE
: code2
= NIL
; break;
8818 * if (hi(a) < hi(b)) goto true;
8819 * if (hi(a) > hi(b)) goto false;
8820 * if (lo(a) < lo(b)) goto true;
8824 ix86_compare_op0
= hi
[0];
8825 ix86_compare_op1
= hi
[1];
8828 ix86_expand_branch (code1
, label
);
8830 ix86_expand_branch (code2
, label2
);
8832 ix86_compare_op0
= lo
[0];
8833 ix86_compare_op1
= lo
[1];
8834 ix86_expand_branch (code3
, label
);
8837 emit_label (label2
);
8846 /* Split branch based on floating point condition. */
8848 ix86_split_fp_branch (code
, op1
, op2
, target1
, target2
, tmp
)
8850 rtx op1
, op2
, target1
, target2
, tmp
;
8853 rtx label
= NULL_RTX
;
8855 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
8858 if (target2
!= pc_rtx
)
8861 code
= reverse_condition_maybe_unordered (code
);
8866 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
8867 tmp
, &second
, &bypass
);
8869 if (split_branch_probability
>= 0)
8871 /* Distribute the probabilities across the jumps.
8872 Assume the BYPASS and SECOND to be always test
8874 probability
= split_branch_probability
;
8876 /* Value of 1 is low enough to make no need for probability
8877 to be updated. Later we may run some experiments and see
8878 if unordered values are more frequent in practice. */
8880 bypass_probability
= 1;
8882 second_probability
= 1;
8884 if (bypass
!= NULL_RTX
)
8886 label
= gen_label_rtx ();
8887 i
= emit_jump_insn (gen_rtx_SET
8889 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8891 gen_rtx_LABEL_REF (VOIDmode
,
8894 if (bypass_probability
>= 0)
8896 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8897 GEN_INT (bypass_probability
),
8900 i
= emit_jump_insn (gen_rtx_SET
8902 gen_rtx_IF_THEN_ELSE (VOIDmode
,
8903 condition
, target1
, target2
)));
8904 if (probability
>= 0)
8906 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8907 GEN_INT (probability
),
8909 if (second
!= NULL_RTX
)
8911 i
= emit_jump_insn (gen_rtx_SET
8913 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
8915 if (second_probability
>= 0)
8917 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
8918 GEN_INT (second_probability
),
8921 if (label
!= NULL_RTX
)
8926 ix86_expand_setcc (code
, dest
)
8930 rtx ret
, tmp
, tmpreg
;
8931 rtx second_test
, bypass_test
;
8933 if (GET_MODE (ix86_compare_op0
) == DImode
8935 return 0; /* FAIL */
8937 if (GET_MODE (dest
) != QImode
)
8940 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
8941 PUT_MODE (ret
, QImode
);
8946 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
8947 if (bypass_test
|| second_test
)
8949 rtx test
= second_test
;
8951 rtx tmp2
= gen_reg_rtx (QImode
);
8958 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
8960 PUT_MODE (test
, QImode
);
8961 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
8964 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
8966 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
8969 return 1; /* DONE */
8973 ix86_expand_int_movcc (operands
)
8976 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
8977 rtx compare_seq
, compare_op
;
8978 rtx second_test
, bypass_test
;
8979 enum machine_mode mode
= GET_MODE (operands
[0]);
8981 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8982 In case comparsion is done with immediate, we can convert it to LTU or
8983 GEU by altering the integer. */
8985 if ((code
== LEU
|| code
== GTU
)
8986 && GET_CODE (ix86_compare_op1
) == CONST_INT
8988 && INTVAL (ix86_compare_op1
) != -1
8989 /* For x86-64, the immediate field in the instruction is 32-bit
8990 signed, so we can't increment a DImode value above 0x7fffffff. */
8992 || GET_MODE (ix86_compare_op0
) != DImode
8993 || INTVAL (ix86_compare_op1
) != 0x7fffffff)
8994 && GET_CODE (operands
[2]) == CONST_INT
8995 && GET_CODE (operands
[3]) == CONST_INT
)
9001 ix86_compare_op1
= gen_int_mode (INTVAL (ix86_compare_op1
) + 1,
9002 GET_MODE (ix86_compare_op0
));
9006 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9007 compare_seq
= get_insns ();
9010 compare_code
= GET_CODE (compare_op
);
9012 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9013 HImode insns, we'd be swallowed in word prefix ops. */
9016 && (mode
!= DImode
|| TARGET_64BIT
)
9017 && GET_CODE (operands
[2]) == CONST_INT
9018 && GET_CODE (operands
[3]) == CONST_INT
)
9020 rtx out
= operands
[0];
9021 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
9022 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
9025 if ((compare_code
== LTU
|| compare_code
== GEU
)
9026 && !second_test
&& !bypass_test
)
9028 /* Detect overlap between destination and compare sources. */
9031 /* To simplify rest of code, restrict to the GEU case. */
9032 if (compare_code
== LTU
)
9037 compare_code
= reverse_condition (compare_code
);
9038 code
= reverse_condition (code
);
9042 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
9043 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
9044 tmp
= gen_reg_rtx (mode
);
9046 emit_insn (compare_seq
);
9048 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
));
9050 emit_insn (gen_x86_movsicc_0_m1 (tmp
));
9062 tmp
= expand_simple_binop (mode
, PLUS
,
9064 tmp
, 1, OPTAB_DIRECT
);
9075 tmp
= expand_simple_binop (mode
, IOR
,
9077 tmp
, 1, OPTAB_DIRECT
);
9079 else if (diff
== -1 && ct
)
9089 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9091 tmp
= expand_simple_binop (mode
, PLUS
,
9093 tmp
, 1, OPTAB_DIRECT
);
9101 * andl cf - ct, dest
9111 tmp
= expand_simple_unop (mode
, NOT
, tmp
, tmp
, 1);
9114 tmp
= expand_simple_binop (mode
, AND
,
9116 gen_int_mode (cf
- ct
, mode
),
9117 tmp
, 1, OPTAB_DIRECT
);
9119 tmp
= expand_simple_binop (mode
, PLUS
,
9121 tmp
, 1, OPTAB_DIRECT
);
9125 emit_move_insn (out
, tmp
);
9127 return 1; /* DONE */
9134 tmp
= ct
, ct
= cf
, cf
= tmp
;
9136 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9138 /* We may be reversing unordered compare to normal compare, that
9139 is not valid in general (we may convert non-trapping condition
9140 to trapping one), however on i386 we currently emit all
9141 comparisons unordered. */
9142 compare_code
= reverse_condition_maybe_unordered (compare_code
);
9143 code
= reverse_condition_maybe_unordered (code
);
9147 compare_code
= reverse_condition (compare_code
);
9148 code
= reverse_condition (code
);
9153 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
9154 && GET_CODE (ix86_compare_op1
) == CONST_INT
)
9156 if (ix86_compare_op1
== const0_rtx
9157 && (code
== LT
|| code
== GE
))
9158 compare_code
= code
;
9159 else if (ix86_compare_op1
== constm1_rtx
)
9163 else if (code
== GT
)
9168 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9169 if (compare_code
!= NIL
9170 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
9171 && (cf
== -1 || ct
== -1))
9173 /* If lea code below could be used, only optimize
9174 if it results in a 2 insn sequence. */
9176 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9177 || diff
== 3 || diff
== 5 || diff
== 9)
9178 || (compare_code
== LT
&& ct
== -1)
9179 || (compare_code
== GE
&& cf
== -1))
9182 * notl op1 (if necessary)
9190 code
= reverse_condition (code
);
9193 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9194 ix86_compare_op1
, VOIDmode
, 0, -1);
9196 out
= expand_simple_binop (mode
, IOR
,
9198 out
, 1, OPTAB_DIRECT
);
9199 if (out
!= operands
[0])
9200 emit_move_insn (operands
[0], out
);
9202 return 1; /* DONE */
9206 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
9207 || diff
== 3 || diff
== 5 || diff
== 9)
9208 && (mode
!= DImode
|| x86_64_sign_extended_value (GEN_INT (cf
))))
9214 * lea cf(dest*(ct-cf)),dest
9218 * This also catches the degenerate setcc-only case.
9224 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9225 ix86_compare_op1
, VOIDmode
, 0, 1);
9228 /* On x86_64 the lea instruction operates on Pmode, so we need
9229 to get arithmetics done in proper mode to match. */
9236 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
9240 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
9246 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
9250 && (GET_CODE (tmp
) != SUBREG
|| SUBREG_REG (tmp
) != out
))
9256 clob
= gen_rtx_REG (CCmode
, FLAGS_REG
);
9257 clob
= gen_rtx_CLOBBER (VOIDmode
, clob
);
9259 tmp
= gen_rtx_SET (VOIDmode
, out
, tmp
);
9260 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9264 emit_insn (gen_rtx_SET (VOIDmode
, out
, tmp
));
9266 if (out
!= operands
[0])
9267 emit_move_insn (operands
[0], copy_rtx (out
));
9269 return 1; /* DONE */
9273 * General case: Jumpful:
9274 * xorl dest,dest cmpl op1, op2
9275 * cmpl op1, op2 movl ct, dest
9277 * decl dest movl cf, dest
9278 * andl (cf-ct),dest 1:
9283 * This is reasonably steep, but branch mispredict costs are
9284 * high on modern cpus, so consider failing only if optimizing
9287 * %%% Parameterize branch_cost on the tuning architecture, then
9288 * use that. The 80386 couldn't care less about mispredicts.
9291 if (!optimize_size
&& !TARGET_CMOVE
)
9297 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
9298 /* We may be reversing unordered compare to normal compare,
9299 that is not valid in general (we may convert non-trapping
9300 condition to trapping one), however on i386 we currently
9301 emit all comparisons unordered. */
9302 code
= reverse_condition_maybe_unordered (code
);
9305 code
= reverse_condition (code
);
9306 if (compare_code
!= NIL
)
9307 compare_code
= reverse_condition (compare_code
);
9311 if (compare_code
!= NIL
)
9313 /* notl op1 (if needed)
9318 For x < 0 (resp. x <= -1) there will be no notl,
9319 so if possible swap the constants to get rid of the
9321 True/false will be -1/0 while code below (store flag
9322 followed by decrement) is 0/-1, so the constants need
9323 to be exchanged once more. */
9325 if (compare_code
== GE
|| !cf
)
9327 code
= reverse_condition (code
);
9332 HOST_WIDE_INT tmp
= cf
;
9337 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9338 ix86_compare_op1
, VOIDmode
, 0, -1);
9342 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
9343 ix86_compare_op1
, VOIDmode
, 0, 1);
9345 out
= expand_simple_binop (mode
, PLUS
, out
, constm1_rtx
,
9346 out
, 1, OPTAB_DIRECT
);
9349 out
= expand_simple_binop (mode
, AND
, out
,
9350 gen_int_mode (cf
- ct
, mode
),
9351 out
, 1, OPTAB_DIRECT
);
9353 out
= expand_simple_binop (mode
, PLUS
, out
, GEN_INT (ct
),
9354 out
, 1, OPTAB_DIRECT
);
9355 if (out
!= operands
[0])
9356 emit_move_insn (operands
[0], out
);
9358 return 1; /* DONE */
9364 /* Try a few things more with specific constants and a variable. */
9367 rtx var
, orig_out
, out
, tmp
;
9370 return 0; /* FAIL */
9372 /* If one of the two operands is an interesting constant, load a
9373 constant with the above and mask it in with a logical operation. */
9375 if (GET_CODE (operands
[2]) == CONST_INT
)
9378 if (INTVAL (operands
[2]) == 0)
9379 operands
[3] = constm1_rtx
, op
= and_optab
;
9380 else if (INTVAL (operands
[2]) == -1)
9381 operands
[3] = const0_rtx
, op
= ior_optab
;
9383 return 0; /* FAIL */
9385 else if (GET_CODE (operands
[3]) == CONST_INT
)
9388 if (INTVAL (operands
[3]) == 0)
9389 operands
[2] = constm1_rtx
, op
= and_optab
;
9390 else if (INTVAL (operands
[3]) == -1)
9391 operands
[2] = const0_rtx
, op
= ior_optab
;
9393 return 0; /* FAIL */
9396 return 0; /* FAIL */
9398 orig_out
= operands
[0];
9399 tmp
= gen_reg_rtx (mode
);
9402 /* Recurse to get the constant loaded. */
9403 if (ix86_expand_int_movcc (operands
) == 0)
9404 return 0; /* FAIL */
9406 /* Mask in the interesting variable. */
9407 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
9409 if (out
!= orig_out
)
9410 emit_move_insn (orig_out
, out
);
9412 return 1; /* DONE */
9416 * For comparison with above,
9426 if (! nonimmediate_operand (operands
[2], mode
))
9427 operands
[2] = force_reg (mode
, operands
[2]);
9428 if (! nonimmediate_operand (operands
[3], mode
))
9429 operands
[3] = force_reg (mode
, operands
[3]);
9431 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9433 rtx tmp
= gen_reg_rtx (mode
);
9434 emit_move_insn (tmp
, operands
[3]);
9437 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9439 rtx tmp
= gen_reg_rtx (mode
);
9440 emit_move_insn (tmp
, operands
[2]);
9443 if (! register_operand (operands
[2], VOIDmode
)
9444 && ! register_operand (operands
[3], VOIDmode
))
9445 operands
[2] = force_reg (mode
, operands
[2]);
9447 emit_insn (compare_seq
);
9448 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9449 gen_rtx_IF_THEN_ELSE (mode
,
9450 compare_op
, operands
[2],
9453 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9454 gen_rtx_IF_THEN_ELSE (mode
,
9459 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9460 gen_rtx_IF_THEN_ELSE (mode
,
9465 return 1; /* DONE */
9469 ix86_expand_fp_movcc (operands
)
9474 rtx compare_op
, second_test
, bypass_test
;
9476 /* For SF/DFmode conditional moves based on comparisons
9477 in same mode, we may want to use SSE min/max instructions. */
9478 if (((TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == SFmode
)
9479 || (TARGET_SSE2
&& TARGET_SSE_MATH
&& GET_MODE (operands
[0]) == DFmode
))
9480 && GET_MODE (ix86_compare_op0
) == GET_MODE (operands
[0])
9481 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9483 || (GET_CODE (operands
[1]) != LTGT
&& GET_CODE (operands
[1]) != UNEQ
))
9484 /* We may be called from the post-reload splitter. */
9485 && (!REG_P (operands
[0])
9486 || SSE_REG_P (operands
[0])
9487 || REGNO (operands
[0]) >= FIRST_PSEUDO_REGISTER
))
9489 rtx op0
= ix86_compare_op0
, op1
= ix86_compare_op1
;
9490 code
= GET_CODE (operands
[1]);
9492 /* See if we have (cross) match between comparison operands and
9493 conditional move operands. */
9494 if (rtx_equal_p (operands
[2], op1
))
9499 code
= reverse_condition_maybe_unordered (code
);
9501 if (rtx_equal_p (operands
[2], op0
) && rtx_equal_p (operands
[3], op1
))
9503 /* Check for min operation. */
9506 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9507 if (memory_operand (op0
, VOIDmode
))
9508 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9509 if (GET_MODE (operands
[0]) == SFmode
)
9510 emit_insn (gen_minsf3 (operands
[0], op0
, op1
));
9512 emit_insn (gen_mindf3 (operands
[0], op0
, op1
));
9515 /* Check for max operation. */
9518 operands
[0] = force_reg (GET_MODE (operands
[0]), operands
[0]);
9519 if (memory_operand (op0
, VOIDmode
))
9520 op0
= force_reg (GET_MODE (operands
[0]), op0
);
9521 if (GET_MODE (operands
[0]) == SFmode
)
9522 emit_insn (gen_maxsf3 (operands
[0], op0
, op1
));
9524 emit_insn (gen_maxdf3 (operands
[0], op0
, op1
));
9528 /* Manage condition to be sse_comparison_operator. In case we are
9529 in non-ieee mode, try to canonicalize the destination operand
9530 to be first in the comparison - this helps reload to avoid extra
9532 if (!sse_comparison_operator (operands
[1], VOIDmode
)
9533 || (rtx_equal_p (operands
[0], ix86_compare_op1
) && !TARGET_IEEE_FP
))
9535 rtx tmp
= ix86_compare_op0
;
9536 ix86_compare_op0
= ix86_compare_op1
;
9537 ix86_compare_op1
= tmp
;
9538 operands
[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands
[1])),
9539 VOIDmode
, ix86_compare_op0
,
9542 /* Similary try to manage result to be first operand of conditional
9543 move. We also don't support the NE comparison on SSE, so try to
9545 if ((rtx_equal_p (operands
[0], operands
[3])
9546 && (!TARGET_IEEE_FP
|| GET_CODE (operands
[1]) != EQ
))
9547 || (GET_CODE (operands
[1]) == NE
&& TARGET_IEEE_FP
))
9549 rtx tmp
= operands
[2];
9550 operands
[2] = operands
[3];
9552 operands
[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9553 (GET_CODE (operands
[1])),
9554 VOIDmode
, ix86_compare_op0
,
9557 if (GET_MODE (operands
[0]) == SFmode
)
9558 emit_insn (gen_sse_movsfcc (operands
[0], operands
[1],
9559 operands
[2], operands
[3],
9560 ix86_compare_op0
, ix86_compare_op1
));
9562 emit_insn (gen_sse_movdfcc (operands
[0], operands
[1],
9563 operands
[2], operands
[3],
9564 ix86_compare_op0
, ix86_compare_op1
));
9568 /* The floating point conditional move instructions don't directly
9569 support conditions resulting from a signed integer comparison. */
9571 code
= GET_CODE (operands
[1]);
9572 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9574 /* The floating point conditional move instructions don't directly
9575 support signed integer comparisons. */
9577 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
9579 if (second_test
!= NULL
|| bypass_test
!= NULL
)
9581 tmp
= gen_reg_rtx (QImode
);
9582 ix86_expand_setcc (code
, tmp
);
9584 ix86_compare_op0
= tmp
;
9585 ix86_compare_op1
= const0_rtx
;
9586 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
9588 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
9590 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9591 emit_move_insn (tmp
, operands
[3]);
9594 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
9596 tmp
= gen_reg_rtx (GET_MODE (operands
[0]));
9597 emit_move_insn (tmp
, operands
[2]);
9601 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9602 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9607 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9608 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9613 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
9614 gen_rtx_IF_THEN_ELSE (GET_MODE (operands
[0]),
9622 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9623 works for floating pointer parameters and nonoffsetable memories.
9624 For pushes, it returns just stack offsets; the values will be saved
9625 in the right order. Maximally three parts are generated. */
9628 ix86_split_to_parts (operand
, parts
, mode
)
9631 enum machine_mode mode
;
9636 size
= mode
== TFmode
? 3 : (GET_MODE_SIZE (mode
) / 4);
9638 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
9640 if (GET_CODE (operand
) == REG
&& MMX_REGNO_P (REGNO (operand
)))
9642 if (size
< 2 || size
> 3)
9645 /* Optimize constant pool reference to immediates. This is used by fp
9646 moves, that force all constants to memory to allow combining. */
9647 if (GET_CODE (operand
) == MEM
&& RTX_UNCHANGING_P (operand
))
9649 rtx tmp
= maybe_get_pool_constant (operand
);
9654 if (GET_CODE (operand
) == MEM
&& !offsettable_memref_p (operand
))
9656 /* The only non-offsetable memories we handle are pushes. */
9657 if (! push_operand (operand
, VOIDmode
))
9660 operand
= copy_rtx (operand
);
9661 PUT_MODE (operand
, Pmode
);
9662 parts
[0] = parts
[1] = parts
[2] = operand
;
9664 else if (!TARGET_64BIT
)
9667 split_di (&operand
, 1, &parts
[0], &parts
[1]);
9670 if (REG_P (operand
))
9672 if (!reload_completed
)
9674 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
9675 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9677 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
9679 else if (offsettable_memref_p (operand
))
9681 operand
= adjust_address (operand
, SImode
, 0);
9683 parts
[1] = adjust_address (operand
, SImode
, 4);
9685 parts
[2] = adjust_address (operand
, SImode
, 8);
9687 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9692 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9697 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9698 parts
[2] = gen_int_mode (l
[2], SImode
);
9701 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
9706 parts
[1] = gen_int_mode (l
[1], SImode
);
9707 parts
[0] = gen_int_mode (l
[0], SImode
);
9716 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
9717 if (mode
== XFmode
|| mode
== TFmode
)
9719 if (REG_P (operand
))
9721 if (!reload_completed
)
9723 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
9724 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
9726 else if (offsettable_memref_p (operand
))
9728 operand
= adjust_address (operand
, DImode
, 0);
9730 parts
[1] = adjust_address (operand
, SImode
, 8);
9732 else if (GET_CODE (operand
) == CONST_DOUBLE
)
9737 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
9738 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
9739 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9740 if (HOST_BITS_PER_WIDE_INT
>= 64)
9743 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
9744 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
9747 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
9748 parts
[1] = gen_int_mode (l
[2], SImode
);
9758 /* Emit insns to perform a move or push of DI, DF, and XF values.
9759 Return false when normal moves are needed; true when all required
9760 insns have been emitted. Operands 2-4 contain the input values
9761 int the correct order; operands 5-7 contain the output values. */
9764 ix86_split_long_move (operands
)
9771 enum machine_mode mode
= GET_MODE (operands
[0]);
9773 /* The DFmode expanders may ask us to move double.
9774 For 64bit target this is single move. By hiding the fact
9775 here we simplify i386.md splitters. */
9776 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
9778 /* Optimize constant pool reference to immediates. This is used by
9779 fp moves, that force all constants to memory to allow combining. */
9781 if (GET_CODE (operands
[1]) == MEM
9782 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
9783 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
9784 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
9785 if (push_operand (operands
[0], VOIDmode
))
9787 operands
[0] = copy_rtx (operands
[0]);
9788 PUT_MODE (operands
[0], Pmode
);
9791 operands
[0] = gen_lowpart (DImode
, operands
[0]);
9792 operands
[1] = gen_lowpart (DImode
, operands
[1]);
9793 emit_move_insn (operands
[0], operands
[1]);
9797 /* The only non-offsettable memory we handle is push. */
9798 if (push_operand (operands
[0], VOIDmode
))
9800 else if (GET_CODE (operands
[0]) == MEM
9801 && ! offsettable_memref_p (operands
[0]))
9804 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
9805 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
9807 /* When emitting push, take care for source operands on the stack. */
9808 if (push
&& GET_CODE (operands
[1]) == MEM
9809 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
9812 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
9813 XEXP (part
[1][2], 0));
9814 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
9815 XEXP (part
[1][1], 0));
9818 /* We need to do copy in the right order in case an address register
9819 of the source overlaps the destination. */
9820 if (REG_P (part
[0][0]) && GET_CODE (part
[1][0]) == MEM
)
9822 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
9824 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9827 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
9830 /* Collision in the middle part can be handled by reordering. */
9831 if (collisions
== 1 && nparts
== 3
9832 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
9835 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
9836 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
9839 /* If there are more collisions, we can't handle it by reordering.
9840 Do an lea to the last part and use only one colliding move. */
9841 else if (collisions
> 1)
9844 emit_insn (gen_rtx_SET (VOIDmode
, part
[0][nparts
- 1],
9845 XEXP (part
[1][0], 0)));
9846 part
[1][0] = change_address (part
[1][0],
9847 TARGET_64BIT
? DImode
: SImode
,
9848 part
[0][nparts
- 1]);
9849 part
[1][1] = adjust_address (part
[1][0], VOIDmode
, UNITS_PER_WORD
);
9851 part
[1][2] = adjust_address (part
[1][0], VOIDmode
, 8);
9861 /* We use only first 12 bytes of TFmode value, but for pushing we
9862 are required to adjust stack as if we were pushing real 16byte
9864 if (mode
== TFmode
&& !TARGET_64BIT
)
9865 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
9867 emit_move_insn (part
[0][2], part
[1][2]);
9872 /* In 64bit mode we don't have 32bit push available. In case this is
9873 register, it is OK - we will just use larger counterpart. We also
9874 retype memory - these comes from attempt to avoid REX prefix on
9875 moving of second half of TFmode value. */
9876 if (GET_MODE (part
[1][1]) == SImode
)
9878 if (GET_CODE (part
[1][1]) == MEM
)
9879 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
9880 else if (REG_P (part
[1][1]))
9881 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
9884 if (GET_MODE (part
[1][0]) == SImode
)
9885 part
[1][0] = part
[1][1];
9888 emit_move_insn (part
[0][1], part
[1][1]);
9889 emit_move_insn (part
[0][0], part
[1][0]);
9893 /* Choose correct order to not overwrite the source before it is copied. */
9894 if ((REG_P (part
[0][0])
9895 && REG_P (part
[1][1])
9896 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
9898 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
9900 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
9904 operands
[2] = part
[0][2];
9905 operands
[3] = part
[0][1];
9906 operands
[4] = part
[0][0];
9907 operands
[5] = part
[1][2];
9908 operands
[6] = part
[1][1];
9909 operands
[7] = part
[1][0];
9913 operands
[2] = part
[0][1];
9914 operands
[3] = part
[0][0];
9915 operands
[5] = part
[1][1];
9916 operands
[6] = part
[1][0];
9923 operands
[2] = part
[0][0];
9924 operands
[3] = part
[0][1];
9925 operands
[4] = part
[0][2];
9926 operands
[5] = part
[1][0];
9927 operands
[6] = part
[1][1];
9928 operands
[7] = part
[1][2];
9932 operands
[2] = part
[0][0];
9933 operands
[3] = part
[0][1];
9934 operands
[5] = part
[1][0];
9935 operands
[6] = part
[1][1];
9938 emit_move_insn (operands
[2], operands
[5]);
9939 emit_move_insn (operands
[3], operands
[6]);
9941 emit_move_insn (operands
[4], operands
[7]);
9947 ix86_split_ashldi (operands
, scratch
)
9948 rtx
*operands
, scratch
;
9950 rtx low
[2], high
[2];
9953 if (GET_CODE (operands
[2]) == CONST_INT
)
9955 split_di (operands
, 2, low
, high
);
9956 count
= INTVAL (operands
[2]) & 63;
9960 emit_move_insn (high
[0], low
[1]);
9961 emit_move_insn (low
[0], const0_rtx
);
9964 emit_insn (gen_ashlsi3 (high
[0], high
[0], GEN_INT (count
- 32)));
9968 if (!rtx_equal_p (operands
[0], operands
[1]))
9969 emit_move_insn (operands
[0], operands
[1]);
9970 emit_insn (gen_x86_shld_1 (high
[0], low
[0], GEN_INT (count
)));
9971 emit_insn (gen_ashlsi3 (low
[0], low
[0], GEN_INT (count
)));
9976 if (!rtx_equal_p (operands
[0], operands
[1]))
9977 emit_move_insn (operands
[0], operands
[1]);
9979 split_di (operands
, 1, low
, high
);
9981 emit_insn (gen_x86_shld_1 (high
[0], low
[0], operands
[2]));
9982 emit_insn (gen_ashlsi3 (low
[0], low
[0], operands
[2]));
9984 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
9986 if (! no_new_pseudos
)
9987 scratch
= force_reg (SImode
, const0_rtx
);
9989 emit_move_insn (scratch
, const0_rtx
);
9991 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2],
9995 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
10000 ix86_split_ashrdi (operands
, scratch
)
10001 rtx
*operands
, scratch
;
10003 rtx low
[2], high
[2];
10006 if (GET_CODE (operands
[2]) == CONST_INT
)
10008 split_di (operands
, 2, low
, high
);
10009 count
= INTVAL (operands
[2]) & 63;
10013 emit_move_insn (low
[0], high
[1]);
10015 if (! reload_completed
)
10016 emit_insn (gen_ashrsi3 (high
[0], low
[0], GEN_INT (31)));
10019 emit_move_insn (high
[0], low
[0]);
10020 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (31)));
10024 emit_insn (gen_ashrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10028 if (!rtx_equal_p (operands
[0], operands
[1]))
10029 emit_move_insn (operands
[0], operands
[1]);
10030 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10031 emit_insn (gen_ashrsi3 (high
[0], high
[0], GEN_INT (count
)));
10036 if (!rtx_equal_p (operands
[0], operands
[1]))
10037 emit_move_insn (operands
[0], operands
[1]);
10039 split_di (operands
, 1, low
, high
);
10041 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10042 emit_insn (gen_ashrsi3 (high
[0], high
[0], operands
[2]));
10044 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10046 if (! no_new_pseudos
)
10047 scratch
= gen_reg_rtx (SImode
);
10048 emit_move_insn (scratch
, high
[0]);
10049 emit_insn (gen_ashrsi3 (scratch
, scratch
, GEN_INT (31)));
10050 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10054 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
10059 ix86_split_lshrdi (operands
, scratch
)
10060 rtx
*operands
, scratch
;
10062 rtx low
[2], high
[2];
10065 if (GET_CODE (operands
[2]) == CONST_INT
)
10067 split_di (operands
, 2, low
, high
);
10068 count
= INTVAL (operands
[2]) & 63;
10072 emit_move_insn (low
[0], high
[1]);
10073 emit_move_insn (high
[0], const0_rtx
);
10076 emit_insn (gen_lshrsi3 (low
[0], low
[0], GEN_INT (count
- 32)));
10080 if (!rtx_equal_p (operands
[0], operands
[1]))
10081 emit_move_insn (operands
[0], operands
[1]);
10082 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], GEN_INT (count
)));
10083 emit_insn (gen_lshrsi3 (high
[0], high
[0], GEN_INT (count
)));
10088 if (!rtx_equal_p (operands
[0], operands
[1]))
10089 emit_move_insn (operands
[0], operands
[1]);
10091 split_di (operands
, 1, low
, high
);
10093 emit_insn (gen_x86_shrd_1 (low
[0], high
[0], operands
[2]));
10094 emit_insn (gen_lshrsi3 (high
[0], high
[0], operands
[2]));
10096 /* Heh. By reversing the arguments, we can reuse this pattern. */
10097 if (TARGET_CMOVE
&& (! no_new_pseudos
|| scratch
))
10099 if (! no_new_pseudos
)
10100 scratch
= force_reg (SImode
, const0_rtx
);
10102 emit_move_insn (scratch
, const0_rtx
);
10104 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
10108 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
10112 /* Helper function for the string operations below. Dest VARIABLE whether
10113 it is aligned to VALUE bytes. If true, jump to the label. */
10115 ix86_expand_aligntest (variable
, value
)
10119 rtx label
= gen_label_rtx ();
10120 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
10121 if (GET_MODE (variable
) == DImode
)
10122 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
10124 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
10125 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
10130 /* Adjust COUNTER by the VALUE. */
10132 ix86_adjust_counter (countreg
, value
)
10134 HOST_WIDE_INT value
;
10136 if (GET_MODE (countreg
) == DImode
)
10137 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
10139 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
10142 /* Zero extend possibly SImode EXP to Pmode register. */
10144 ix86_zero_extend_to_Pmode (exp
)
10148 if (GET_MODE (exp
) == VOIDmode
)
10149 return force_reg (Pmode
, exp
);
10150 if (GET_MODE (exp
) == Pmode
)
10151 return copy_to_mode_reg (Pmode
, exp
);
10152 r
= gen_reg_rtx (Pmode
);
10153 emit_insn (gen_zero_extendsidi2 (r
, exp
));
10157 /* Expand string move (memcpy) operation. Use i386 string operations when
10158 profitable. expand_clrstr contains similar code. */
10160 ix86_expand_movstr (dst
, src
, count_exp
, align_exp
)
10161 rtx dst
, src
, count_exp
, align_exp
;
10163 rtx srcreg
, destreg
, countreg
;
10164 enum machine_mode counter_mode
;
10165 HOST_WIDE_INT align
= 0;
10166 unsigned HOST_WIDE_INT count
= 0;
10171 if (GET_CODE (align_exp
) == CONST_INT
)
10172 align
= INTVAL (align_exp
);
10174 /* This simple hack avoids all inlining code and simplifies code below. */
10175 if (!TARGET_ALIGN_STRINGOPS
)
10178 if (GET_CODE (count_exp
) == CONST_INT
)
10179 count
= INTVAL (count_exp
);
10181 /* Figure out proper mode for counter. For 32bits it is always SImode,
10182 for 64bits use SImode when possible, otherwise DImode.
10183 Set count to number of bytes copied when known at compile time. */
10184 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10185 || x86_64_zero_extended_value (count_exp
))
10186 counter_mode
= SImode
;
10188 counter_mode
= DImode
;
10190 if (counter_mode
!= SImode
&& counter_mode
!= DImode
)
10193 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10194 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10196 emit_insn (gen_cld ());
10198 /* When optimizing for size emit simple rep ; movsb instruction for
10199 counts not divisible by 4. */
10201 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10203 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10205 emit_insn (gen_rep_movqi_rex64 (destreg
, srcreg
, countreg
,
10206 destreg
, srcreg
, countreg
));
10208 emit_insn (gen_rep_movqi (destreg
, srcreg
, countreg
,
10209 destreg
, srcreg
, countreg
));
10212 /* For constant aligned (or small unaligned) copies use rep movsl
10213 followed by code copying the rest. For PentiumPro ensure 8 byte
10214 alignment to allow rep movsl acceleration. */
10216 else if (count
!= 0
10218 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10219 || optimize_size
|| count
< (unsigned int) 64))
10221 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10222 if (count
& ~(size
- 1))
10224 countreg
= copy_to_mode_reg (counter_mode
,
10225 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10226 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10227 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10231 emit_insn (gen_rep_movsi_rex64 (destreg
, srcreg
, countreg
,
10232 destreg
, srcreg
, countreg
));
10234 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg
,
10235 destreg
, srcreg
, countreg
));
10238 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg
,
10239 destreg
, srcreg
, countreg
));
10241 if (size
== 8 && (count
& 0x04))
10242 emit_insn (gen_strmovsi (destreg
, srcreg
));
10244 emit_insn (gen_strmovhi (destreg
, srcreg
));
10246 emit_insn (gen_strmovqi (destreg
, srcreg
));
10248 /* The generic code based on the glibc implementation:
10249 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10250 allowing accelerated copying there)
10251 - copy the data using rep movsl
10252 - copy the rest. */
10257 int desired_alignment
= (TARGET_PENTIUMPRO
10258 && (count
== 0 || count
>= (unsigned int) 260)
10259 ? 8 : UNITS_PER_WORD
);
10261 /* In case we don't know anything about the alignment, default to
10262 library version, since it is usually equally fast and result in
10264 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10270 if (TARGET_SINGLE_STRINGOP
)
10271 emit_insn (gen_cld ());
10273 countreg2
= gen_reg_rtx (Pmode
);
10274 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10276 /* We don't use loops to align destination and to copy parts smaller
10277 than 4 bytes, because gcc is able to optimize such code better (in
10278 the case the destination or the count really is aligned, gcc is often
10279 able to predict the branches) and also it is friendlier to the
10280 hardware branch prediction.
10282 Using loops is benefical for generic case, because we can
10283 handle small counts using the loops. Many CPUs (such as Athlon)
10284 have large REP prefix setup costs.
10286 This is quite costy. Maybe we can revisit this decision later or
10287 add some customizability to this code. */
10289 if (count
== 0 && align
< desired_alignment
)
10291 label
= gen_label_rtx ();
10292 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10293 LEU
, 0, counter_mode
, 1, label
);
10297 rtx label
= ix86_expand_aligntest (destreg
, 1);
10298 emit_insn (gen_strmovqi (destreg
, srcreg
));
10299 ix86_adjust_counter (countreg
, 1);
10300 emit_label (label
);
10301 LABEL_NUSES (label
) = 1;
10305 rtx label
= ix86_expand_aligntest (destreg
, 2);
10306 emit_insn (gen_strmovhi (destreg
, srcreg
));
10307 ix86_adjust_counter (countreg
, 2);
10308 emit_label (label
);
10309 LABEL_NUSES (label
) = 1;
10311 if (align
<= 4 && desired_alignment
> 4)
10313 rtx label
= ix86_expand_aligntest (destreg
, 4);
10314 emit_insn (gen_strmovsi (destreg
, srcreg
));
10315 ix86_adjust_counter (countreg
, 4);
10316 emit_label (label
);
10317 LABEL_NUSES (label
) = 1;
10320 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10322 emit_label (label
);
10323 LABEL_NUSES (label
) = 1;
10326 if (!TARGET_SINGLE_STRINGOP
)
10327 emit_insn (gen_cld ());
10330 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10332 emit_insn (gen_rep_movdi_rex64 (destreg
, srcreg
, countreg2
,
10333 destreg
, srcreg
, countreg2
));
10337 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10338 emit_insn (gen_rep_movsi (destreg
, srcreg
, countreg2
,
10339 destreg
, srcreg
, countreg2
));
10344 emit_label (label
);
10345 LABEL_NUSES (label
) = 1;
10347 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10348 emit_insn (gen_strmovsi (destreg
, srcreg
));
10349 if ((align
<= 4 || count
== 0) && TARGET_64BIT
)
10351 rtx label
= ix86_expand_aligntest (countreg
, 4);
10352 emit_insn (gen_strmovsi (destreg
, srcreg
));
10353 emit_label (label
);
10354 LABEL_NUSES (label
) = 1;
10356 if (align
> 2 && count
!= 0 && (count
& 2))
10357 emit_insn (gen_strmovhi (destreg
, srcreg
));
10358 if (align
<= 2 || count
== 0)
10360 rtx label
= ix86_expand_aligntest (countreg
, 2);
10361 emit_insn (gen_strmovhi (destreg
, srcreg
));
10362 emit_label (label
);
10363 LABEL_NUSES (label
) = 1;
10365 if (align
> 1 && count
!= 0 && (count
& 1))
10366 emit_insn (gen_strmovqi (destreg
, srcreg
));
10367 if (align
<= 1 || count
== 0)
10369 rtx label
= ix86_expand_aligntest (countreg
, 1);
10370 emit_insn (gen_strmovqi (destreg
, srcreg
));
10371 emit_label (label
);
10372 LABEL_NUSES (label
) = 1;
10376 insns
= get_insns ();
10379 ix86_set_move_mem_attrs (insns
, dst
, src
, destreg
, srcreg
);
10384 /* Expand string clear operation (bzero). Use i386 string operations when
10385 profitable. expand_movstr contains similar code. */
10387 ix86_expand_clrstr (src
, count_exp
, align_exp
)
10388 rtx src
, count_exp
, align_exp
;
10390 rtx destreg
, zeroreg
, countreg
;
10391 enum machine_mode counter_mode
;
10392 HOST_WIDE_INT align
= 0;
10393 unsigned HOST_WIDE_INT count
= 0;
10395 if (GET_CODE (align_exp
) == CONST_INT
)
10396 align
= INTVAL (align_exp
);
10398 /* This simple hack avoids all inlining code and simplifies code below. */
10399 if (!TARGET_ALIGN_STRINGOPS
)
10402 if (GET_CODE (count_exp
) == CONST_INT
)
10403 count
= INTVAL (count_exp
);
10404 /* Figure out proper mode for counter. For 32bits it is always SImode,
10405 for 64bits use SImode when possible, otherwise DImode.
10406 Set count to number of bytes copied when known at compile time. */
10407 if (!TARGET_64BIT
|| GET_MODE (count_exp
) == SImode
10408 || x86_64_zero_extended_value (count_exp
))
10409 counter_mode
= SImode
;
10411 counter_mode
= DImode
;
10413 destreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10415 emit_insn (gen_cld ());
10417 /* When optimizing for size emit simple rep ; movsb instruction for
10418 counts not divisible by 4. */
10420 if ((!optimize
|| optimize_size
) && (count
== 0 || (count
& 0x03)))
10422 countreg
= ix86_zero_extend_to_Pmode (count_exp
);
10423 zeroreg
= copy_to_mode_reg (QImode
, const0_rtx
);
10425 emit_insn (gen_rep_stosqi_rex64 (destreg
, countreg
, zeroreg
,
10426 destreg
, countreg
));
10428 emit_insn (gen_rep_stosqi (destreg
, countreg
, zeroreg
,
10429 destreg
, countreg
));
10431 else if (count
!= 0
10433 || (!TARGET_PENTIUMPRO
&& !TARGET_64BIT
&& align
>= 4)
10434 || optimize_size
|| count
< (unsigned int) 64))
10436 int size
= TARGET_64BIT
&& !optimize_size
? 8 : 4;
10437 zeroreg
= copy_to_mode_reg (size
== 4 ? SImode
: DImode
, const0_rtx
);
10438 if (count
& ~(size
- 1))
10440 countreg
= copy_to_mode_reg (counter_mode
,
10441 GEN_INT ((count
>> (size
== 4 ? 2 : 3))
10442 & (TARGET_64BIT
? -1 : 0x3fffffff)));
10443 countreg
= ix86_zero_extend_to_Pmode (countreg
);
10447 emit_insn (gen_rep_stossi_rex64 (destreg
, countreg
, zeroreg
,
10448 destreg
, countreg
));
10450 emit_insn (gen_rep_stossi (destreg
, countreg
, zeroreg
,
10451 destreg
, countreg
));
10454 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg
, zeroreg
,
10455 destreg
, countreg
));
10457 if (size
== 8 && (count
& 0x04))
10458 emit_insn (gen_strsetsi (destreg
,
10459 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10461 emit_insn (gen_strsethi (destreg
,
10462 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10464 emit_insn (gen_strsetqi (destreg
,
10465 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10471 /* Compute desired alignment of the string operation. */
10472 int desired_alignment
= (TARGET_PENTIUMPRO
10473 && (count
== 0 || count
>= (unsigned int) 260)
10474 ? 8 : UNITS_PER_WORD
);
10476 /* In case we don't know anything about the alignment, default to
10477 library version, since it is usually equally fast and result in
10479 if (!TARGET_INLINE_ALL_STRINGOPS
&& align
< UNITS_PER_WORD
)
10482 if (TARGET_SINGLE_STRINGOP
)
10483 emit_insn (gen_cld ());
10485 countreg2
= gen_reg_rtx (Pmode
);
10486 countreg
= copy_to_mode_reg (counter_mode
, count_exp
);
10487 zeroreg
= copy_to_mode_reg (Pmode
, const0_rtx
);
10489 if (count
== 0 && align
< desired_alignment
)
10491 label
= gen_label_rtx ();
10492 emit_cmp_and_jump_insns (countreg
, GEN_INT (desired_alignment
- 1),
10493 LEU
, 0, counter_mode
, 1, label
);
10497 rtx label
= ix86_expand_aligntest (destreg
, 1);
10498 emit_insn (gen_strsetqi (destreg
,
10499 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10500 ix86_adjust_counter (countreg
, 1);
10501 emit_label (label
);
10502 LABEL_NUSES (label
) = 1;
10506 rtx label
= ix86_expand_aligntest (destreg
, 2);
10507 emit_insn (gen_strsethi (destreg
,
10508 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10509 ix86_adjust_counter (countreg
, 2);
10510 emit_label (label
);
10511 LABEL_NUSES (label
) = 1;
10513 if (align
<= 4 && desired_alignment
> 4)
10515 rtx label
= ix86_expand_aligntest (destreg
, 4);
10516 emit_insn (gen_strsetsi (destreg
, (TARGET_64BIT
10517 ? gen_rtx_SUBREG (SImode
, zeroreg
, 0)
10519 ix86_adjust_counter (countreg
, 4);
10520 emit_label (label
);
10521 LABEL_NUSES (label
) = 1;
10524 if (label
&& desired_alignment
> 4 && !TARGET_64BIT
)
10526 emit_label (label
);
10527 LABEL_NUSES (label
) = 1;
10531 if (!TARGET_SINGLE_STRINGOP
)
10532 emit_insn (gen_cld ());
10535 emit_insn (gen_lshrdi3 (countreg2
, ix86_zero_extend_to_Pmode (countreg
),
10537 emit_insn (gen_rep_stosdi_rex64 (destreg
, countreg2
, zeroreg
,
10538 destreg
, countreg2
));
10542 emit_insn (gen_lshrsi3 (countreg2
, countreg
, GEN_INT (2)));
10543 emit_insn (gen_rep_stossi (destreg
, countreg2
, zeroreg
,
10544 destreg
, countreg2
));
10548 emit_label (label
);
10549 LABEL_NUSES (label
) = 1;
10552 if (TARGET_64BIT
&& align
> 4 && count
!= 0 && (count
& 4))
10553 emit_insn (gen_strsetsi (destreg
,
10554 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10555 if (TARGET_64BIT
&& (align
<= 4 || count
== 0))
10557 rtx label
= ix86_expand_aligntest (countreg
, 4);
10558 emit_insn (gen_strsetsi (destreg
,
10559 gen_rtx_SUBREG (SImode
, zeroreg
, 0)));
10560 emit_label (label
);
10561 LABEL_NUSES (label
) = 1;
10563 if (align
> 2 && count
!= 0 && (count
& 2))
10564 emit_insn (gen_strsethi (destreg
,
10565 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10566 if (align
<= 2 || count
== 0)
10568 rtx label
= ix86_expand_aligntest (countreg
, 2);
10569 emit_insn (gen_strsethi (destreg
,
10570 gen_rtx_SUBREG (HImode
, zeroreg
, 0)));
10571 emit_label (label
);
10572 LABEL_NUSES (label
) = 1;
10574 if (align
> 1 && count
!= 0 && (count
& 1))
10575 emit_insn (gen_strsetqi (destreg
,
10576 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10577 if (align
<= 1 || count
== 0)
10579 rtx label
= ix86_expand_aligntest (countreg
, 1);
10580 emit_insn (gen_strsetqi (destreg
,
10581 gen_rtx_SUBREG (QImode
, zeroreg
, 0)));
10582 emit_label (label
);
10583 LABEL_NUSES (label
) = 1;
10588 /* Expand strlen. */
10590 ix86_expand_strlen (out
, src
, eoschar
, align
)
10591 rtx out
, src
, eoschar
, align
;
10593 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
10595 /* The generic case of strlen expander is long. Avoid it's
10596 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10598 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10599 && !TARGET_INLINE_ALL_STRINGOPS
10601 && (GET_CODE (align
) != CONST_INT
|| INTVAL (align
) < 4))
10604 addr
= force_reg (Pmode
, XEXP (src
, 0));
10605 scratch1
= gen_reg_rtx (Pmode
);
10607 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
10610 /* Well it seems that some optimizer does not combine a call like
10611 foo(strlen(bar), strlen(bar));
10612 when the move and the subtraction is done here. It does calculate
10613 the length just once when these instructions are done inside of
10614 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10615 often used and I use one fewer register for the lifetime of
10616 output_strlen_unroll() this is better. */
10618 emit_move_insn (out
, addr
);
10620 ix86_expand_strlensi_unroll_1 (out
, align
);
10622 /* strlensi_unroll_1 returns the address of the zero at the end of
10623 the string, like memchr(), so compute the length by subtracting
10624 the start address. */
10626 emit_insn (gen_subdi3 (out
, out
, addr
));
10628 emit_insn (gen_subsi3 (out
, out
, addr
));
10632 scratch2
= gen_reg_rtx (Pmode
);
10633 scratch3
= gen_reg_rtx (Pmode
);
10634 scratch4
= force_reg (Pmode
, constm1_rtx
);
10636 emit_move_insn (scratch3
, addr
);
10637 eoschar
= force_reg (QImode
, eoschar
);
10639 emit_insn (gen_cld ());
10642 emit_insn (gen_strlenqi_rex_1 (scratch1
, scratch3
, eoschar
,
10643 align
, scratch4
, scratch3
));
10644 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
10645 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
10649 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, eoschar
,
10650 align
, scratch4
, scratch3
));
10651 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
10652 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
10658 /* Expand the appropriate insns for doing strlen if not just doing
10661 out = result, initialized with the start address
10662 align_rtx = alignment of the address.
10663 scratch = scratch register, initialized with the startaddress when
10664 not aligned, otherwise undefined
10666 This is just the body. It needs the initialisations mentioned above and
10667 some address computing at the end. These things are done in i386.md. */
10670 ix86_expand_strlensi_unroll_1 (out
, align_rtx
)
10671 rtx out
, align_rtx
;
10675 rtx align_2_label
= NULL_RTX
;
10676 rtx align_3_label
= NULL_RTX
;
10677 rtx align_4_label
= gen_label_rtx ();
10678 rtx end_0_label
= gen_label_rtx ();
10680 rtx tmpreg
= gen_reg_rtx (SImode
);
10681 rtx scratch
= gen_reg_rtx (SImode
);
10684 if (GET_CODE (align_rtx
) == CONST_INT
)
10685 align
= INTVAL (align_rtx
);
10687 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10689 /* Is there a known alignment and is it less than 4? */
10692 rtx scratch1
= gen_reg_rtx (Pmode
);
10693 emit_move_insn (scratch1
, out
);
10694 /* Is there a known alignment and is it not 2? */
10697 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
10698 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
10700 /* Leave just the 3 lower bits. */
10701 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
10702 NULL_RTX
, 0, OPTAB_WIDEN
);
10704 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10705 Pmode
, 1, align_4_label
);
10706 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), EQ
, NULL
,
10707 Pmode
, 1, align_2_label
);
10708 emit_cmp_and_jump_insns (align_rtx
, GEN_INT (2), GTU
, NULL
,
10709 Pmode
, 1, align_3_label
);
10713 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10714 check if is aligned to 4 - byte. */
10716 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (2),
10717 NULL_RTX
, 0, OPTAB_WIDEN
);
10719 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
10720 Pmode
, 1, align_4_label
);
10723 mem
= gen_rtx_MEM (QImode
, out
);
10725 /* Now compare the bytes. */
10727 /* Compare the first n unaligned byte on a byte per byte basis. */
10728 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
10729 QImode
, 1, end_0_label
);
10731 /* Increment the address. */
10733 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10735 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10737 /* Not needed with an alignment of 2 */
10740 emit_label (align_2_label
);
10742 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10746 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10748 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10750 emit_label (align_3_label
);
10753 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
10757 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
10759 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
10762 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10763 align this loop. It gives only huge programs, but does not help to
10765 emit_label (align_4_label
);
10767 mem
= gen_rtx_MEM (SImode
, out
);
10768 emit_move_insn (scratch
, mem
);
10770 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
10772 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
10774 /* This formula yields a nonzero result iff one of the bytes is zero.
10775 This saves three branches inside loop and many cycles. */
10777 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
10778 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
10779 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
10780 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
10781 gen_int_mode (0x80808080, SImode
)));
10782 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
10787 rtx reg
= gen_reg_rtx (SImode
);
10788 rtx reg2
= gen_reg_rtx (Pmode
);
10789 emit_move_insn (reg
, tmpreg
);
10790 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
10792 /* If zero is not in the first two bytes, move two bytes forward. */
10793 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10794 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10795 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10796 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
10797 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
10800 /* Emit lea manually to avoid clobbering of flags. */
10801 emit_insn (gen_rtx_SET (SImode
, reg2
,
10802 gen_rtx_PLUS (Pmode
, out
, GEN_INT (2))));
10804 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10805 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
10806 emit_insn (gen_rtx_SET (VOIDmode
, out
,
10807 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
10814 rtx end_2_label
= gen_label_rtx ();
10815 /* Is zero in the first two bytes? */
10817 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
10818 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
10819 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
10820 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
10821 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
10823 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
10824 JUMP_LABEL (tmp
) = end_2_label
;
10826 /* Not in the first two. Move two bytes forward. */
10827 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
10829 emit_insn (gen_adddi3 (out
, out
, GEN_INT (2)));
10831 emit_insn (gen_addsi3 (out
, out
, GEN_INT (2)));
10833 emit_label (end_2_label
);
10837 /* Avoid branch in fixing the byte. */
10838 tmpreg
= gen_lowpart (QImode
, tmpreg
);
10839 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
10841 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3)));
10843 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3)));
10845 emit_label (end_0_label
);
10849 ix86_expand_call (retval
, fnaddr
, callarg1
, callarg2
, pop
)
10850 rtx retval
, fnaddr
, callarg1
, callarg2
, pop
;
10852 rtx use
= NULL
, call
;
10854 if (pop
== const0_rtx
)
10856 if (TARGET_64BIT
&& pop
)
10860 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
10861 fnaddr
= machopic_indirect_call_target (fnaddr
);
10863 /* Static functions and indirect calls don't need the pic register. */
10864 if (! TARGET_64BIT
&& flag_pic
10865 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
10866 && ! SYMBOL_REF_FLAG (XEXP (fnaddr
, 0)))
10867 use_reg (&use
, pic_offset_table_rtx
);
10869 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
10871 rtx al
= gen_rtx_REG (QImode
, 0);
10872 emit_move_insn (al
, callarg2
);
10873 use_reg (&use
, al
);
10875 #endif /* TARGET_MACHO */
10877 if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
10879 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
10880 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
10883 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
10885 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
10888 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
10889 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
10890 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
10893 call
= emit_call_insn (call
);
10895 CALL_INSN_FUNCTION_USAGE (call
) = use
;
10899 /* Clear stack slot assignments remembered from previous functions.
10900 This is called from INIT_EXPANDERS once before RTL is emitted for each
10903 static struct machine_function
*
10904 ix86_init_machine_status ()
10906 return ggc_alloc_cleared (sizeof (struct machine_function
));
10909 /* Return a MEM corresponding to a stack slot with mode MODE.
10910 Allocate a new slot if necessary.
10912 The RTL for a function can have several slots available: N is
10913 which slot to use. */
10916 assign_386_stack_local (mode
, n
)
10917 enum machine_mode mode
;
10920 if (n
< 0 || n
>= MAX_386_STACK_LOCALS
)
10923 if (ix86_stack_locals
[(int) mode
][n
] == NULL_RTX
)
10924 ix86_stack_locals
[(int) mode
][n
]
10925 = assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
10927 return ix86_stack_locals
[(int) mode
][n
];
10930 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10932 static GTY(()) rtx ix86_tls_symbol
;
10934 ix86_tls_get_addr ()
10937 if (!ix86_tls_symbol
)
10939 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
10940 (TARGET_GNU_TLS
&& !TARGET_64BIT
)
10941 ? "___tls_get_addr"
10942 : "__tls_get_addr");
10945 return ix86_tls_symbol
;
10948 /* Calculate the length of the memory address in the instruction
10949 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10952 memory_address_length (addr
)
10955 struct ix86_address parts
;
10956 rtx base
, index
, disp
;
10959 if (GET_CODE (addr
) == PRE_DEC
10960 || GET_CODE (addr
) == POST_INC
10961 || GET_CODE (addr
) == PRE_MODIFY
10962 || GET_CODE (addr
) == POST_MODIFY
)
10965 if (! ix86_decompose_address (addr
, &parts
))
10969 index
= parts
.index
;
10973 /* Register Indirect. */
10974 if (base
&& !index
&& !disp
)
10976 /* Special cases: ebp and esp need the two-byte modrm form. */
10977 if (addr
== stack_pointer_rtx
10978 || addr
== arg_pointer_rtx
10979 || addr
== frame_pointer_rtx
10980 || addr
== hard_frame_pointer_rtx
)
10984 /* Direct Addressing. */
10985 else if (disp
&& !base
&& !index
)
10990 /* Find the length of the displacement constant. */
10993 if (GET_CODE (disp
) == CONST_INT
10994 && CONST_OK_FOR_LETTER_P (INTVAL (disp
), 'K'))
11000 /* An index requires the two-byte modrm form. */
11008 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11009 is set, expect that insn have 8bit immediate alternative. */
11011 ix86_attr_length_immediate_default (insn
, shortform
)
11017 extract_insn_cached (insn
);
11018 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11019 if (CONSTANT_P (recog_data
.operand
[i
]))
11024 && GET_CODE (recog_data
.operand
[i
]) == CONST_INT
11025 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data
.operand
[i
]), 'K'))
11029 switch (get_attr_mode (insn
))
11040 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11045 fatal_insn ("unknown insn mode", insn
);
11051 /* Compute default value for "length_address" attribute. */
11053 ix86_attr_length_address_default (insn
)
11057 extract_insn_cached (insn
);
11058 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11059 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11061 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
11067 /* Return the maximum number of instructions a cpu can issue. */
11074 case PROCESSOR_PENTIUM
:
11078 case PROCESSOR_PENTIUMPRO
:
11079 case PROCESSOR_PENTIUM4
:
11080 case PROCESSOR_ATHLON
:
11088 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11089 by DEP_INSN and nothing set by DEP_INSN. */
11092 ix86_flags_dependant (insn
, dep_insn
, insn_type
)
11093 rtx insn
, dep_insn
;
11094 enum attr_type insn_type
;
11098 /* Simplify the test for uninteresting insns. */
11099 if (insn_type
!= TYPE_SETCC
11100 && insn_type
!= TYPE_ICMOV
11101 && insn_type
!= TYPE_FCMOV
11102 && insn_type
!= TYPE_IBR
)
11105 if ((set
= single_set (dep_insn
)) != 0)
11107 set
= SET_DEST (set
);
11110 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
11111 && XVECLEN (PATTERN (dep_insn
), 0) == 2
11112 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
11113 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
11115 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11116 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
11121 if (GET_CODE (set
) != REG
|| REGNO (set
) != FLAGS_REG
)
11124 /* This test is true if the dependent insn reads the flags but
11125 not any other potentially set register. */
11126 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
11129 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
11135 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11136 address with operands set by DEP_INSN. */
11139 ix86_agi_dependant (insn
, dep_insn
, insn_type
)
11140 rtx insn
, dep_insn
;
11141 enum attr_type insn_type
;
11145 if (insn_type
== TYPE_LEA
11148 addr
= PATTERN (insn
);
11149 if (GET_CODE (addr
) == SET
)
11151 else if (GET_CODE (addr
) == PARALLEL
11152 && GET_CODE (XVECEXP (addr
, 0, 0)) == SET
)
11153 addr
= XVECEXP (addr
, 0, 0);
11156 addr
= SET_SRC (addr
);
11161 extract_insn_cached (insn
);
11162 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
11163 if (GET_CODE (recog_data
.operand
[i
]) == MEM
)
11165 addr
= XEXP (recog_data
.operand
[i
], 0);
11172 return modified_in_p (addr
, dep_insn
);
11176 ix86_adjust_cost (insn
, link
, dep_insn
, cost
)
11177 rtx insn
, link
, dep_insn
;
11180 enum attr_type insn_type
, dep_insn_type
;
11181 enum attr_memory memory
, dep_memory
;
11183 int dep_insn_code_number
;
11185 /* Anti and output depenancies have zero cost on all CPUs. */
11186 if (REG_NOTE_KIND (link
) != 0)
11189 dep_insn_code_number
= recog_memoized (dep_insn
);
11191 /* If we can't recognize the insns, we can't really do anything. */
11192 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
11195 insn_type
= get_attr_type (insn
);
11196 dep_insn_type
= get_attr_type (dep_insn
);
11200 case PROCESSOR_PENTIUM
:
11201 /* Address Generation Interlock adds a cycle of latency. */
11202 if (ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11205 /* ??? Compares pair with jump/setcc. */
11206 if (ix86_flags_dependant (insn
, dep_insn
, insn_type
))
11209 /* Floating point stores require value to be ready one cycle ealier. */
11210 if (insn_type
== TYPE_FMOV
11211 && get_attr_memory (insn
) == MEMORY_STORE
11212 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11216 case PROCESSOR_PENTIUMPRO
:
11217 memory
= get_attr_memory (insn
);
11218 dep_memory
= get_attr_memory (dep_insn
);
11220 /* Since we can't represent delayed latencies of load+operation,
11221 increase the cost here for non-imov insns. */
11222 if (dep_insn_type
!= TYPE_IMOV
11223 && dep_insn_type
!= TYPE_FMOV
11224 && (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
))
11227 /* INT->FP conversion is expensive. */
11228 if (get_attr_fp_int_src (dep_insn
))
11231 /* There is one cycle extra latency between an FP op and a store. */
11232 if (insn_type
== TYPE_FMOV
11233 && (set
= single_set (dep_insn
)) != NULL_RTX
11234 && (set2
= single_set (insn
)) != NULL_RTX
11235 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
11236 && GET_CODE (SET_DEST (set2
)) == MEM
)
11239 /* Show ability of reorder buffer to hide latency of load by executing
11240 in parallel with previous instruction in case
11241 previous instruction is not needed to compute the address. */
11242 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11243 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11245 /* Claim moves to take one cycle, as core can issue one load
11246 at time and the next load can start cycle later. */
11247 if (dep_insn_type
== TYPE_IMOV
11248 || dep_insn_type
== TYPE_FMOV
)
11256 memory
= get_attr_memory (insn
);
11257 dep_memory
= get_attr_memory (dep_insn
);
11258 /* The esp dependency is resolved before the instruction is really
11260 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
11261 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
11264 /* Since we can't represent delayed latencies of load+operation,
11265 increase the cost here for non-imov insns. */
11266 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11267 cost
+= (dep_insn_type
!= TYPE_IMOV
) ? 2 : 1;
11269 /* INT->FP conversion is expensive. */
11270 if (get_attr_fp_int_src (dep_insn
))
11273 /* Show ability of reorder buffer to hide latency of load by executing
11274 in parallel with previous instruction in case
11275 previous instruction is not needed to compute the address. */
11276 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11277 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11279 /* Claim moves to take one cycle, as core can issue one load
11280 at time and the next load can start cycle later. */
11281 if (dep_insn_type
== TYPE_IMOV
11282 || dep_insn_type
== TYPE_FMOV
)
11291 case PROCESSOR_ATHLON
:
11292 memory
= get_attr_memory (insn
);
11293 dep_memory
= get_attr_memory (dep_insn
);
11295 if (dep_memory
== MEMORY_LOAD
|| dep_memory
== MEMORY_BOTH
)
11297 if (dep_insn_type
== TYPE_IMOV
|| dep_insn_type
== TYPE_FMOV
)
11302 /* Show ability of reorder buffer to hide latency of load by executing
11303 in parallel with previous instruction in case
11304 previous instruction is not needed to compute the address. */
11305 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
11306 && !ix86_agi_dependant (insn
, dep_insn
, insn_type
))
11308 /* Claim moves to take one cycle, as core can issue one load
11309 at time and the next load can start cycle later. */
11310 if (dep_insn_type
== TYPE_IMOV
11311 || dep_insn_type
== TYPE_FMOV
)
11313 else if (cost
>= 3)
11328 struct ppro_sched_data
11331 int issued_this_cycle
;
11335 static enum attr_ppro_uops
11336 ix86_safe_ppro_uops (insn
)
11339 if (recog_memoized (insn
) >= 0)
11340 return get_attr_ppro_uops (insn
);
11342 return PPRO_UOPS_MANY
;
11346 ix86_dump_ppro_packet (dump
)
11349 if (ix86_sched_data
.ppro
.decode
[0])
11351 fprintf (dump
, "PPRO packet: %d",
11352 INSN_UID (ix86_sched_data
.ppro
.decode
[0]));
11353 if (ix86_sched_data
.ppro
.decode
[1])
11354 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[1]));
11355 if (ix86_sched_data
.ppro
.decode
[2])
11356 fprintf (dump
, " %d", INSN_UID (ix86_sched_data
.ppro
.decode
[2]));
11357 fputc ('\n', dump
);
11361 /* We're beginning a new block. Initialize data structures as necessary. */
11364 ix86_sched_init (dump
, sched_verbose
, veclen
)
11365 FILE *dump ATTRIBUTE_UNUSED
;
11366 int sched_verbose ATTRIBUTE_UNUSED
;
11367 int veclen ATTRIBUTE_UNUSED
;
11369 memset (&ix86_sched_data
, 0, sizeof (ix86_sched_data
));
11372 /* Shift INSN to SLOT, and shift everything else down. */
11375 ix86_reorder_insn (insnp
, slot
)
11382 insnp
[0] = insnp
[1];
11383 while (++insnp
!= slot
);
11389 ix86_sched_reorder_ppro (ready
, e_ready
)
11394 enum attr_ppro_uops cur_uops
;
11395 int issued_this_cycle
;
11399 /* At this point .ppro.decode contains the state of the three
11400 decoders from last "cycle". That is, those insns that were
11401 actually independent. But here we're scheduling for the
11402 decoder, and we may find things that are decodable in the
11405 memcpy (decode
, ix86_sched_data
.ppro
.decode
, sizeof (decode
));
11406 issued_this_cycle
= 0;
11409 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11411 /* If the decoders are empty, and we've a complex insn at the
11412 head of the priority queue, let it issue without complaint. */
11413 if (decode
[0] == NULL
)
11415 if (cur_uops
== PPRO_UOPS_MANY
)
11417 decode
[0] = *insnp
;
11421 /* Otherwise, search for a 2-4 uop unsn to issue. */
11422 while (cur_uops
!= PPRO_UOPS_FEW
)
11424 if (insnp
== ready
)
11426 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11429 /* If so, move it to the head of the line. */
11430 if (cur_uops
== PPRO_UOPS_FEW
)
11431 ix86_reorder_insn (insnp
, e_ready
);
11433 /* Issue the head of the queue. */
11434 issued_this_cycle
= 1;
11435 decode
[0] = *e_ready
--;
11438 /* Look for simple insns to fill in the other two slots. */
11439 for (i
= 1; i
< 3; ++i
)
11440 if (decode
[i
] == NULL
)
11442 if (ready
> e_ready
)
11446 cur_uops
= ix86_safe_ppro_uops (*insnp
);
11447 while (cur_uops
!= PPRO_UOPS_ONE
)
11449 if (insnp
== ready
)
11451 cur_uops
= ix86_safe_ppro_uops (*--insnp
);
11454 /* Found one. Move it to the head of the queue and issue it. */
11455 if (cur_uops
== PPRO_UOPS_ONE
)
11457 ix86_reorder_insn (insnp
, e_ready
);
11458 decode
[i
] = *e_ready
--;
11459 issued_this_cycle
++;
11463 /* ??? Didn't find one. Ideally, here we would do a lazy split
11464 of 2-uop insns, issue one and queue the other. */
11468 if (issued_this_cycle
== 0)
11469 issued_this_cycle
= 1;
11470 ix86_sched_data
.ppro
.issued_this_cycle
= issued_this_cycle
;
11473 /* We are about to being issuing insns for this clock cycle.
11474 Override the default sort algorithm to better slot instructions. */
11476 ix86_sched_reorder (dump
, sched_verbose
, ready
, n_readyp
, clock_var
)
11477 FILE *dump ATTRIBUTE_UNUSED
;
11478 int sched_verbose ATTRIBUTE_UNUSED
;
11481 int clock_var ATTRIBUTE_UNUSED
;
11483 int n_ready
= *n_readyp
;
11484 rtx
*e_ready
= ready
+ n_ready
- 1;
11486 /* Make sure to go ahead and initialize key items in
11487 ix86_sched_data if we are not going to bother trying to
11488 reorder the ready queue. */
11491 ix86_sched_data
.ppro
.issued_this_cycle
= 1;
11500 case PROCESSOR_PENTIUMPRO
:
11501 ix86_sched_reorder_ppro (ready
, e_ready
);
11506 return ix86_issue_rate ();
11509 /* We are about to issue INSN. Return the number of insns left on the
11510 ready queue that can be issued this cycle. */
11513 ix86_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
)
11517 int can_issue_more
;
11523 return can_issue_more
- 1;
11525 case PROCESSOR_PENTIUMPRO
:
11527 enum attr_ppro_uops uops
= ix86_safe_ppro_uops (insn
);
11529 if (uops
== PPRO_UOPS_MANY
)
11532 ix86_dump_ppro_packet (dump
);
11533 ix86_sched_data
.ppro
.decode
[0] = insn
;
11534 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11535 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11537 ix86_dump_ppro_packet (dump
);
11538 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11540 else if (uops
== PPRO_UOPS_FEW
)
11543 ix86_dump_ppro_packet (dump
);
11544 ix86_sched_data
.ppro
.decode
[0] = insn
;
11545 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11546 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11550 for (i
= 0; i
< 3; ++i
)
11551 if (ix86_sched_data
.ppro
.decode
[i
] == NULL
)
11553 ix86_sched_data
.ppro
.decode
[i
] = insn
;
11561 ix86_dump_ppro_packet (dump
);
11562 ix86_sched_data
.ppro
.decode
[0] = NULL
;
11563 ix86_sched_data
.ppro
.decode
[1] = NULL
;
11564 ix86_sched_data
.ppro
.decode
[2] = NULL
;
11568 return --ix86_sched_data
.ppro
.issued_this_cycle
;
11573 ia32_use_dfa_pipeline_interface ()
11575 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11580 /* How many alternative schedules to try. This should be as wide as the
11581 scheduling freedom in the DFA, but no wider. Making this value too
11582 large results extra work for the scheduler. */
11585 ia32_multipass_dfa_lookahead ()
11587 if (ix86_cpu
== PROCESSOR_PENTIUM
)
11594 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11595 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11599 ix86_set_move_mem_attrs (insns
, dstref
, srcref
, dstreg
, srcreg
)
11601 rtx dstref
, srcref
, dstreg
, srcreg
;
11605 for (insn
= insns
; insn
!= 0 ; insn
= NEXT_INSN (insn
))
11607 ix86_set_move_mem_attrs_1 (PATTERN (insn
), dstref
, srcref
,
11611 /* Subroutine of above to actually do the updating by recursively walking
11615 ix86_set_move_mem_attrs_1 (x
, dstref
, srcref
, dstreg
, srcreg
)
11617 rtx dstref
, srcref
, dstreg
, srcreg
;
11619 enum rtx_code code
= GET_CODE (x
);
11620 const char *format_ptr
= GET_RTX_FORMAT (code
);
11623 if (code
== MEM
&& XEXP (x
, 0) == dstreg
)
11624 MEM_COPY_ATTRIBUTES (x
, dstref
);
11625 else if (code
== MEM
&& XEXP (x
, 0) == srcreg
)
11626 MEM_COPY_ATTRIBUTES (x
, srcref
);
11628 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++, format_ptr
++)
11630 if (*format_ptr
== 'e')
11631 ix86_set_move_mem_attrs_1 (XEXP (x
, i
), dstref
, srcref
,
11633 else if (*format_ptr
== 'E')
11634 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
11635 ix86_set_move_mem_attrs_1 (XVECEXP (x
, i
, j
), dstref
, srcref
,
11640 /* Compute the alignment given to a constant that is being placed in memory.
11641 EXP is the constant and ALIGN is the alignment that the object would
11643 The value of this function is used instead of that alignment to align
11647 ix86_constant_alignment (exp
, align
)
11651 if (TREE_CODE (exp
) == REAL_CST
)
11653 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
11655 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
11658 else if (TREE_CODE (exp
) == STRING_CST
&& TREE_STRING_LENGTH (exp
) >= 31
11665 /* Compute the alignment for a static variable.
11666 TYPE is the data type, and ALIGN is the alignment that
11667 the object would ordinarily have. The value of this function is used
11668 instead of that alignment to align the object. */
11671 ix86_data_alignment (type
, align
)
11675 if (AGGREGATE_TYPE_P (type
)
11676 && TYPE_SIZE (type
)
11677 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11678 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 256
11679 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 256)
11682 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11683 to 16byte boundary. */
11686 if (AGGREGATE_TYPE_P (type
)
11687 && TYPE_SIZE (type
)
11688 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11689 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
11690 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11694 if (TREE_CODE (type
) == ARRAY_TYPE
)
11696 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11698 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11701 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11704 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11706 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11709 else if ((TREE_CODE (type
) == RECORD_TYPE
11710 || TREE_CODE (type
) == UNION_TYPE
11711 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11712 && TYPE_FIELDS (type
))
11714 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11716 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11719 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11720 || TREE_CODE (type
) == INTEGER_TYPE
)
11722 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11724 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11731 /* Compute the alignment for a local variable.
11732 TYPE is the data type, and ALIGN is the alignment that
11733 the object would ordinarily have. The value of this macro is used
11734 instead of that alignment to align the object. */
11737 ix86_local_alignment (type
, align
)
11741 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11742 to 16byte boundary. */
11745 if (AGGREGATE_TYPE_P (type
)
11746 && TYPE_SIZE (type
)
11747 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
11748 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
11749 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
11752 if (TREE_CODE (type
) == ARRAY_TYPE
)
11754 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
11756 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
11759 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
11761 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
11763 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
11766 else if ((TREE_CODE (type
) == RECORD_TYPE
11767 || TREE_CODE (type
) == UNION_TYPE
11768 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
11769 && TYPE_FIELDS (type
))
11771 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
11773 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
11776 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
11777 || TREE_CODE (type
) == INTEGER_TYPE
)
11780 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
11782 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
11788 /* Emit RTL insns to initialize the variable parts of a trampoline.
11789 FNADDR is an RTX for the address of the function's pure code.
11790 CXT is an RTX for the static chain value for the function. */
11792 x86_initialize_trampoline (tramp
, fnaddr
, cxt
)
11793 rtx tramp
, fnaddr
, cxt
;
11797 /* Compute offset from the end of the jmp to the target function. */
11798 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
11799 plus_constant (tramp
, 10),
11800 NULL_RTX
, 1, OPTAB_DIRECT
);
11801 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
11802 gen_int_mode (0xb9, QImode
));
11803 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
11804 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
11805 gen_int_mode (0xe9, QImode
));
11806 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
11811 /* Try to load address using shorter movl instead of movabs.
11812 We may want to support movq for kernel mode, but kernel does not use
11813 trampolines at the moment. */
11814 if (x86_64_zero_extended_value (fnaddr
))
11816 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
11817 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11818 gen_int_mode (0xbb41, HImode
));
11819 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
11820 gen_lowpart (SImode
, fnaddr
));
11825 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11826 gen_int_mode (0xbb49, HImode
));
11827 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11831 /* Load static chain using movabs to r10. */
11832 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11833 gen_int_mode (0xba49, HImode
));
11834 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
11837 /* Jump to the r11 */
11838 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
11839 gen_int_mode (0xff49, HImode
));
11840 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
11841 gen_int_mode (0xe3, QImode
));
11843 if (offset
> TRAMPOLINE_SIZE
)
11847 #ifdef TRANSFER_FROM_TRAMPOLINE
11848 emit_library_call (gen_rtx (SYMBOL_REF
, Pmode
, "__enable_execute_stack"),
11849 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
11853 #define def_builtin(MASK, NAME, TYPE, CODE) \
11855 if ((MASK) & target_flags) \
11856 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11857 NULL, NULL_TREE); \
11860 struct builtin_description
11862 const unsigned int mask
;
11863 const enum insn_code icode
;
11864 const char *const name
;
11865 const enum ix86_builtins code
;
11866 const enum rtx_code comparison
;
11867 const unsigned int flag
;
11870 /* Used for builtins that are enabled both by -msse and -msse2. */
11871 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11873 static const struct builtin_description bdesc_comi
[] =
11875 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
11876 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
11877 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
11878 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
11879 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
11880 { MASK_SSE1
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
11881 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
11882 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
11883 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
11884 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
11885 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
11886 { MASK_SSE1
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
11887 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
11888 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
11889 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
11890 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
11891 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
11892 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
11893 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
11894 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
11895 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
11896 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
11897 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
11898 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
11901 static const struct builtin_description bdesc_2arg
[] =
11904 { MASK_SSE1
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
11905 { MASK_SSE1
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
11906 { MASK_SSE1
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
11907 { MASK_SSE1
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
11908 { MASK_SSE1
, CODE_FOR_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
11909 { MASK_SSE1
, CODE_FOR_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
11910 { MASK_SSE1
, CODE_FOR_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
11911 { MASK_SSE1
, CODE_FOR_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
11913 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
11914 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
11915 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
11916 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, 1 },
11917 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, 1 },
11918 { MASK_SSE1
, CODE_FOR_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
11919 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, EQ
, 0 },
11920 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, LT
, 0 },
11921 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, LE
, 0 },
11922 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, LT
, 1 },
11923 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, LE
, 1 },
11924 { MASK_SSE1
, CODE_FOR_maskncmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, UNORDERED
, 0 },
11925 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
11926 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
11927 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
11928 { MASK_SSE1
, CODE_FOR_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
11929 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, EQ
, 0 },
11930 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, LT
, 0 },
11931 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, LE
, 0 },
11932 { MASK_SSE1
, CODE_FOR_vmmaskncmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
11934 { MASK_SSE1
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
11935 { MASK_SSE1
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
11936 { MASK_SSE1
, CODE_FOR_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
11937 { MASK_SSE1
, CODE_FOR_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
11939 { MASK_SSE1
, CODE_FOR_sse_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
11940 { MASK_SSE1
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
11941 { MASK_SSE1
, CODE_FOR_sse_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
11942 { MASK_SSE1
, CODE_FOR_sse_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
11944 { MASK_SSE1
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
11945 { MASK_SSE1
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
11946 { MASK_SSE1
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
11947 { MASK_SSE1
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
11948 { MASK_SSE1
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
11951 { MASK_MMX
, CODE_FOR_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
11952 { MASK_MMX
, CODE_FOR_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
11953 { MASK_MMX
, CODE_FOR_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
11954 { MASK_MMX
, CODE_FOR_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
11955 { MASK_MMX
, CODE_FOR_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
11956 { MASK_MMX
, CODE_FOR_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
11958 { MASK_MMX
, CODE_FOR_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
11959 { MASK_MMX
, CODE_FOR_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
11960 { MASK_MMX
, CODE_FOR_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
11961 { MASK_MMX
, CODE_FOR_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
11962 { MASK_MMX
, CODE_FOR_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
11963 { MASK_MMX
, CODE_FOR_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
11964 { MASK_MMX
, CODE_FOR_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
11965 { MASK_MMX
, CODE_FOR_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
11967 { MASK_MMX
, CODE_FOR_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
11968 { MASK_MMX
, CODE_FOR_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
11969 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
11971 { MASK_MMX
, CODE_FOR_mmx_anddi3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
11972 { MASK_MMX
, CODE_FOR_mmx_nanddi3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
11973 { MASK_MMX
, CODE_FOR_mmx_iordi3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
11974 { MASK_MMX
, CODE_FOR_mmx_xordi3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
11976 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
11977 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
11979 { MASK_MMX
, CODE_FOR_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
11980 { MASK_MMX
, CODE_FOR_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
11981 { MASK_MMX
, CODE_FOR_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
11982 { MASK_MMX
, CODE_FOR_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
11983 { MASK_MMX
, CODE_FOR_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
11984 { MASK_MMX
, CODE_FOR_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
11986 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
11987 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
11988 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
11989 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
11991 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
11992 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
11993 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
11994 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
11995 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
11996 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
11999 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
12000 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
12001 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
12003 { MASK_SSE1
, CODE_FOR_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
12004 { MASK_SSE1
, CODE_FOR_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
12006 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
12007 { MASK_MMX
, CODE_FOR_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
12008 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
12009 { MASK_MMX
, CODE_FOR_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
12010 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
12011 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
12013 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
12014 { MASK_MMX
, CODE_FOR_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
12015 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
12016 { MASK_MMX
, CODE_FOR_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
12017 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
12018 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
12020 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
12021 { MASK_MMX
, CODE_FOR_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
12022 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
12023 { MASK_MMX
, CODE_FOR_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
12025 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
12026 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
12029 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
12030 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
12031 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
12032 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
12033 { MASK_SSE2
, CODE_FOR_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
12034 { MASK_SSE2
, CODE_FOR_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
12035 { MASK_SSE2
, CODE_FOR_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
12036 { MASK_SSE2
, CODE_FOR_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
12038 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
12039 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
12040 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
12041 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, 1 },
12042 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, 1 },
12043 { MASK_SSE2
, CODE_FOR_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
12044 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, EQ
, 0 },
12045 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, LT
, 0 },
12046 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, LE
, 0 },
12047 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, LT
, 1 },
12048 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, LE
, 1 },
12049 { MASK_SSE2
, CODE_FOR_maskncmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, UNORDERED
, 0 },
12050 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
12051 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
12052 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
12053 { MASK_SSE2
, CODE_FOR_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
12054 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, EQ
, 0 },
12055 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, LT
, 0 },
12056 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, LE
, 0 },
12057 { MASK_SSE2
, CODE_FOR_vmmaskncmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, UNORDERED
, 0 },
12059 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
12060 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
12061 { MASK_SSE2
, CODE_FOR_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
12062 { MASK_SSE2
, CODE_FOR_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
12064 { MASK_SSE2
, CODE_FOR_sse2_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
12065 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
12066 { MASK_SSE2
, CODE_FOR_sse2_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
12067 { MASK_SSE2
, CODE_FOR_sse2_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
12069 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
12070 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
12071 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
12074 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
12075 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
12076 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
12077 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
12078 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
12079 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
12080 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
12081 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
12083 { MASK_MMX
, CODE_FOR_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
12084 { MASK_MMX
, CODE_FOR_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
12085 { MASK_MMX
, CODE_FOR_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
12086 { MASK_MMX
, CODE_FOR_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
12087 { MASK_MMX
, CODE_FOR_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
12088 { MASK_MMX
, CODE_FOR_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
12089 { MASK_MMX
, CODE_FOR_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
12090 { MASK_MMX
, CODE_FOR_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
12092 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
12093 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
12094 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, 0, 0 },
12095 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, 0, 0 },
12097 { MASK_SSE2
, CODE_FOR_sse2_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
12098 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
12099 { MASK_SSE2
, CODE_FOR_sse2_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
12100 { MASK_SSE2
, CODE_FOR_sse2_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
12102 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
12103 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
12105 { MASK_SSE2
, CODE_FOR_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
12106 { MASK_SSE2
, CODE_FOR_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
12107 { MASK_SSE2
, CODE_FOR_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
12108 { MASK_SSE2
, CODE_FOR_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
12109 { MASK_SSE2
, CODE_FOR_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
12110 { MASK_SSE2
, CODE_FOR_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
12112 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
12113 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
12114 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
12115 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
12117 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
12118 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
12119 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
12120 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
12121 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
12122 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
12123 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
12124 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
12126 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
12127 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
12128 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
12130 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
12131 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
12133 { MASK_SSE2
, CODE_FOR_ashlv8hi3_ti
, 0, IX86_BUILTIN_PSLLW128
, 0, 0 },
12134 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
12135 { MASK_SSE2
, CODE_FOR_ashlv4si3_ti
, 0, IX86_BUILTIN_PSLLD128
, 0, 0 },
12136 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
12137 { MASK_SSE2
, CODE_FOR_ashlv2di3_ti
, 0, IX86_BUILTIN_PSLLQ128
, 0, 0 },
12138 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
12140 { MASK_SSE2
, CODE_FOR_lshrv8hi3_ti
, 0, IX86_BUILTIN_PSRLW128
, 0, 0 },
12141 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
12142 { MASK_SSE2
, CODE_FOR_lshrv4si3_ti
, 0, IX86_BUILTIN_PSRLD128
, 0, 0 },
12143 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
12144 { MASK_SSE2
, CODE_FOR_lshrv2di3_ti
, 0, IX86_BUILTIN_PSRLQ128
, 0, 0 },
12145 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
12147 { MASK_SSE2
, CODE_FOR_ashrv8hi3_ti
, 0, IX86_BUILTIN_PSRAW128
, 0, 0 },
12148 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
12149 { MASK_SSE2
, CODE_FOR_ashrv4si3_ti
, 0, IX86_BUILTIN_PSRAD128
, 0, 0 },
12150 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
12152 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
12154 { MASK_SSE2
, CODE_FOR_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
12155 { MASK_SSE2
, CODE_FOR_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
12156 { MASK_SSE2
, CODE_FOR_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 }
12159 static const struct builtin_description bdesc_1arg
[] =
12161 { MASK_SSE1
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
12162 { MASK_SSE1
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
12164 { MASK_SSE1
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
12165 { MASK_SSE1
, CODE_FOR_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
12166 { MASK_SSE1
, CODE_FOR_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
12168 { MASK_SSE1
, CODE_FOR_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
12169 { MASK_SSE1
, CODE_FOR_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
12170 { MASK_SSE1
, CODE_FOR_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
12171 { MASK_SSE1
, CODE_FOR_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
12173 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
12174 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
12175 { MASK_SSE2
, CODE_FOR_sse2_movq2dq
, 0, IX86_BUILTIN_MOVQ2DQ
, 0, 0 },
12176 { MASK_SSE2
, CODE_FOR_sse2_movdq2q
, 0, IX86_BUILTIN_MOVDQ2Q
, 0, 0 },
12178 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
12180 { MASK_SSE2
, CODE_FOR_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
12181 { MASK_SSE2
, CODE_FOR_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
12183 { MASK_SSE2
, CODE_FOR_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
12184 { MASK_SSE2
, CODE_FOR_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
12185 { MASK_SSE2
, CODE_FOR_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
12186 { MASK_SSE2
, CODE_FOR_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
12187 { MASK_SSE2
, CODE_FOR_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
12189 { MASK_SSE2
, CODE_FOR_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
12191 { MASK_SSE2
, CODE_FOR_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
12192 { MASK_SSE2
, CODE_FOR_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
12194 { MASK_SSE2
, CODE_FOR_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
12195 { MASK_SSE2
, CODE_FOR_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
12196 { MASK_SSE2
, CODE_FOR_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
12198 { MASK_SSE2
, CODE_FOR_sse2_movq
, 0, IX86_BUILTIN_MOVQ
, 0, 0 }
12202 ix86_init_builtins ()
12205 ix86_init_mmx_sse_builtins ();
12208 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12209 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12212 ix86_init_mmx_sse_builtins ()
12214 const struct builtin_description
* d
;
12217 tree pchar_type_node
= build_pointer_type (char_type_node
);
12218 tree pfloat_type_node
= build_pointer_type (float_type_node
);
12219 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
12220 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
12221 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
12224 tree int_ftype_v4sf_v4sf
12225 = build_function_type_list (integer_type_node
,
12226 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12227 tree v4si_ftype_v4sf_v4sf
12228 = build_function_type_list (V4SI_type_node
,
12229 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12230 /* MMX/SSE/integer conversions. */
12231 tree int_ftype_v4sf
12232 = build_function_type_list (integer_type_node
,
12233 V4SF_type_node
, NULL_TREE
);
12234 tree int_ftype_v8qi
12235 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
12236 tree v4sf_ftype_v4sf_int
12237 = build_function_type_list (V4SF_type_node
,
12238 V4SF_type_node
, integer_type_node
, NULL_TREE
);
12239 tree v4sf_ftype_v4sf_v2si
12240 = build_function_type_list (V4SF_type_node
,
12241 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
12242 tree int_ftype_v4hi_int
12243 = build_function_type_list (integer_type_node
,
12244 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12245 tree v4hi_ftype_v4hi_int_int
12246 = build_function_type_list (V4HI_type_node
, V4HI_type_node
,
12247 integer_type_node
, integer_type_node
,
12249 /* Miscellaneous. */
12250 tree v8qi_ftype_v4hi_v4hi
12251 = build_function_type_list (V8QI_type_node
,
12252 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12253 tree v4hi_ftype_v2si_v2si
12254 = build_function_type_list (V4HI_type_node
,
12255 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12256 tree v4sf_ftype_v4sf_v4sf_int
12257 = build_function_type_list (V4SF_type_node
,
12258 V4SF_type_node
, V4SF_type_node
,
12259 integer_type_node
, NULL_TREE
);
12260 tree v2si_ftype_v4hi_v4hi
12261 = build_function_type_list (V2SI_type_node
,
12262 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12263 tree v4hi_ftype_v4hi_int
12264 = build_function_type_list (V4HI_type_node
,
12265 V4HI_type_node
, integer_type_node
, NULL_TREE
);
12266 tree v4hi_ftype_v4hi_di
12267 = build_function_type_list (V4HI_type_node
,
12268 V4HI_type_node
, long_long_unsigned_type_node
,
12270 tree v2si_ftype_v2si_di
12271 = build_function_type_list (V2SI_type_node
,
12272 V2SI_type_node
, long_long_unsigned_type_node
,
12274 tree void_ftype_void
12275 = build_function_type (void_type_node
, void_list_node
);
12276 tree void_ftype_unsigned
12277 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
12278 tree unsigned_ftype_void
12279 = build_function_type (unsigned_type_node
, void_list_node
);
12281 = build_function_type (long_long_unsigned_type_node
, void_list_node
);
12282 tree v4sf_ftype_void
12283 = build_function_type (V4SF_type_node
, void_list_node
);
12284 tree v2si_ftype_v4sf
12285 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
12286 /* Loads/stores. */
12287 tree void_ftype_v8qi_v8qi_pchar
12288 = build_function_type_list (void_type_node
,
12289 V8QI_type_node
, V8QI_type_node
,
12290 pchar_type_node
, NULL_TREE
);
12291 tree v4sf_ftype_pfloat
12292 = build_function_type_list (V4SF_type_node
, pfloat_type_node
, NULL_TREE
);
12293 /* @@@ the type is bogus */
12294 tree v4sf_ftype_v4sf_pv2si
12295 = build_function_type_list (V4SF_type_node
,
12296 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
12297 tree void_ftype_pv2si_v4sf
12298 = build_function_type_list (void_type_node
,
12299 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
12300 tree void_ftype_pfloat_v4sf
12301 = build_function_type_list (void_type_node
,
12302 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
12303 tree void_ftype_pdi_di
12304 = build_function_type_list (void_type_node
,
12305 pdi_type_node
, long_long_unsigned_type_node
,
12307 tree void_ftype_pv2di_v2di
12308 = build_function_type_list (void_type_node
,
12309 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
12310 /* Normal vector unops. */
12311 tree v4sf_ftype_v4sf
12312 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12314 /* Normal vector binops. */
12315 tree v4sf_ftype_v4sf_v4sf
12316 = build_function_type_list (V4SF_type_node
,
12317 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
12318 tree v8qi_ftype_v8qi_v8qi
12319 = build_function_type_list (V8QI_type_node
,
12320 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12321 tree v4hi_ftype_v4hi_v4hi
12322 = build_function_type_list (V4HI_type_node
,
12323 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
12324 tree v2si_ftype_v2si_v2si
12325 = build_function_type_list (V2SI_type_node
,
12326 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12327 tree di_ftype_di_di
12328 = build_function_type_list (long_long_unsigned_type_node
,
12329 long_long_unsigned_type_node
,
12330 long_long_unsigned_type_node
, NULL_TREE
);
12332 tree v2si_ftype_v2sf
12333 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
12334 tree v2sf_ftype_v2si
12335 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
12336 tree v2si_ftype_v2si
12337 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
12338 tree v2sf_ftype_v2sf
12339 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12340 tree v2sf_ftype_v2sf_v2sf
12341 = build_function_type_list (V2SF_type_node
,
12342 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12343 tree v2si_ftype_v2sf_v2sf
12344 = build_function_type_list (V2SI_type_node
,
12345 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
12346 tree pint_type_node
= build_pointer_type (integer_type_node
);
12347 tree pdouble_type_node
= build_pointer_type (double_type_node
);
12348 tree int_ftype_v2df_v2df
12349 = build_function_type_list (integer_type_node
,
12350 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12353 = build_function_type (intTI_type_node
, void_list_node
);
12354 tree v2di_ftype_void
12355 = build_function_type (V2DI_type_node
, void_list_node
);
12356 tree ti_ftype_ti_ti
12357 = build_function_type_list (intTI_type_node
,
12358 intTI_type_node
, intTI_type_node
, NULL_TREE
);
12359 tree void_ftype_pvoid
12360 = build_function_type_list (void_type_node
, ptr_type_node
, NULL_TREE
);
12362 = build_function_type_list (V2DI_type_node
,
12363 long_long_unsigned_type_node
, NULL_TREE
);
12365 = build_function_type_list (long_long_unsigned_type_node
,
12366 V2DI_type_node
, NULL_TREE
);
12367 tree v4sf_ftype_v4si
12368 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
12369 tree v4si_ftype_v4sf
12370 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
12371 tree v2df_ftype_v4si
12372 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
12373 tree v4si_ftype_v2df
12374 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
12375 tree v2si_ftype_v2df
12376 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
12377 tree v4sf_ftype_v2df
12378 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12379 tree v2df_ftype_v2si
12380 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
12381 tree v2df_ftype_v4sf
12382 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12383 tree int_ftype_v2df
12384 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
12385 tree v2df_ftype_v2df_int
12386 = build_function_type_list (V2DF_type_node
,
12387 V2DF_type_node
, integer_type_node
, NULL_TREE
);
12388 tree v4sf_ftype_v4sf_v2df
12389 = build_function_type_list (V4SF_type_node
,
12390 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
12391 tree v2df_ftype_v2df_v4sf
12392 = build_function_type_list (V2DF_type_node
,
12393 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
12394 tree v2df_ftype_v2df_v2df_int
12395 = build_function_type_list (V2DF_type_node
,
12396 V2DF_type_node
, V2DF_type_node
,
12399 tree v2df_ftype_v2df_pv2si
12400 = build_function_type_list (V2DF_type_node
,
12401 V2DF_type_node
, pv2si_type_node
, NULL_TREE
);
12402 tree void_ftype_pv2si_v2df
12403 = build_function_type_list (void_type_node
,
12404 pv2si_type_node
, V2DF_type_node
, NULL_TREE
);
12405 tree void_ftype_pdouble_v2df
12406 = build_function_type_list (void_type_node
,
12407 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
12408 tree void_ftype_pint_int
12409 = build_function_type_list (void_type_node
,
12410 pint_type_node
, integer_type_node
, NULL_TREE
);
12411 tree void_ftype_v16qi_v16qi_pchar
12412 = build_function_type_list (void_type_node
,
12413 V16QI_type_node
, V16QI_type_node
,
12414 pchar_type_node
, NULL_TREE
);
12415 tree v2df_ftype_pdouble
12416 = build_function_type_list (V2DF_type_node
, pdouble_type_node
, NULL_TREE
);
12417 tree v2df_ftype_v2df_v2df
12418 = build_function_type_list (V2DF_type_node
,
12419 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12420 tree v16qi_ftype_v16qi_v16qi
12421 = build_function_type_list (V16QI_type_node
,
12422 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12423 tree v8hi_ftype_v8hi_v8hi
12424 = build_function_type_list (V8HI_type_node
,
12425 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12426 tree v4si_ftype_v4si_v4si
12427 = build_function_type_list (V4SI_type_node
,
12428 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
12429 tree v2di_ftype_v2di_v2di
12430 = build_function_type_list (V2DI_type_node
,
12431 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12432 tree v2di_ftype_v2df_v2df
12433 = build_function_type_list (V2DI_type_node
,
12434 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12435 tree v2df_ftype_v2df
12436 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
12437 tree v2df_ftype_double
12438 = build_function_type_list (V2DF_type_node
, double_type_node
, NULL_TREE
);
12439 tree v2df_ftype_double_double
12440 = build_function_type_list (V2DF_type_node
,
12441 double_type_node
, double_type_node
, NULL_TREE
);
12442 tree int_ftype_v8hi_int
12443 = build_function_type_list (integer_type_node
,
12444 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12445 tree v8hi_ftype_v8hi_int_int
12446 = build_function_type_list (V8HI_type_node
,
12447 V8HI_type_node
, integer_type_node
,
12448 integer_type_node
, NULL_TREE
);
12449 tree v2di_ftype_v2di_int
12450 = build_function_type_list (V2DI_type_node
,
12451 V2DI_type_node
, integer_type_node
, NULL_TREE
);
12452 tree v4si_ftype_v4si_int
12453 = build_function_type_list (V4SI_type_node
,
12454 V4SI_type_node
, integer_type_node
, NULL_TREE
);
12455 tree v8hi_ftype_v8hi_int
12456 = build_function_type_list (V8HI_type_node
,
12457 V8HI_type_node
, integer_type_node
, NULL_TREE
);
12458 tree v8hi_ftype_v8hi_v2di
12459 = build_function_type_list (V8HI_type_node
,
12460 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
12461 tree v4si_ftype_v4si_v2di
12462 = build_function_type_list (V4SI_type_node
,
12463 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
12464 tree v4si_ftype_v8hi_v8hi
12465 = build_function_type_list (V4SI_type_node
,
12466 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
12467 tree di_ftype_v8qi_v8qi
12468 = build_function_type_list (long_long_unsigned_type_node
,
12469 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
12470 tree v2di_ftype_v16qi_v16qi
12471 = build_function_type_list (V2DI_type_node
,
12472 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
12473 tree int_ftype_v16qi
12474 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
12475 tree v16qi_ftype_pchar
12476 = build_function_type_list (V16QI_type_node
, pchar_type_node
, NULL_TREE
);
12477 tree void_ftype_pchar_v16qi
12478 = build_function_type_list (void_type_node
,
12479 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
12480 tree v4si_ftype_pchar
12481 = build_function_type_list (V4SI_type_node
, pchar_type_node
, NULL_TREE
);
12482 tree void_ftype_pchar_v4si
12483 = build_function_type_list (void_type_node
,
12484 pchar_type_node
, V4SI_type_node
, NULL_TREE
);
12485 tree v2di_ftype_v2di
12486 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
12488 /* Add all builtins that are more or less simple operations on two
12490 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
12492 /* Use one of the operands; the target can have a different mode for
12493 mask-generating compares. */
12494 enum machine_mode mode
;
12499 mode
= insn_data
[d
->icode
].operand
[1].mode
;
12504 type
= v16qi_ftype_v16qi_v16qi
;
12507 type
= v8hi_ftype_v8hi_v8hi
;
12510 type
= v4si_ftype_v4si_v4si
;
12513 type
= v2di_ftype_v2di_v2di
;
12516 type
= v2df_ftype_v2df_v2df
;
12519 type
= ti_ftype_ti_ti
;
12522 type
= v4sf_ftype_v4sf_v4sf
;
12525 type
= v8qi_ftype_v8qi_v8qi
;
12528 type
= v4hi_ftype_v4hi_v4hi
;
12531 type
= v2si_ftype_v2si_v2si
;
12534 type
= di_ftype_di_di
;
12541 /* Override for comparisons. */
12542 if (d
->icode
== CODE_FOR_maskcmpv4sf3
12543 || d
->icode
== CODE_FOR_maskncmpv4sf3
12544 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
12545 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
)
12546 type
= v4si_ftype_v4sf_v4sf
;
12548 if (d
->icode
== CODE_FOR_maskcmpv2df3
12549 || d
->icode
== CODE_FOR_maskncmpv2df3
12550 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
12551 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
12552 type
= v2di_ftype_v2df_v2df
;
12554 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
12557 /* Add the remaining MMX insns with somewhat more complicated types. */
12558 def_builtin (MASK_MMX
, "__builtin_ia32_mmx_zero", di_ftype_void
, IX86_BUILTIN_MMX_ZERO
);
12559 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
12560 def_builtin (MASK_MMX
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
12561 def_builtin (MASK_MMX
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
12562 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
12563 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
12564 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
12566 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
12567 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
12568 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
12570 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
12571 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
12573 def_builtin (MASK_MMX
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
12574 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
12576 /* comi/ucomi insns. */
12577 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
12578 if (d
->mask
== MASK_SSE2
)
12579 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
12581 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
12583 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
12584 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
12585 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
12587 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
12588 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
12589 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
12590 def_builtin (MASK_SSE1
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
12591 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
12592 def_builtin (MASK_SSE1
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
12594 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pextrw", int_ftype_v4hi_int
, IX86_BUILTIN_PEXTRW
);
12595 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int
, IX86_BUILTIN_PINSRW
);
12597 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
12599 def_builtin (MASK_SSE1
, "__builtin_ia32_loadaps", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADAPS
);
12600 def_builtin (MASK_SSE1
, "__builtin_ia32_loadups", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADUPS
);
12601 def_builtin (MASK_SSE1
, "__builtin_ia32_loadss", v4sf_ftype_pfloat
, IX86_BUILTIN_LOADSS
);
12602 def_builtin (MASK_SSE1
, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREAPS
);
12603 def_builtin (MASK_SSE1
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
12604 def_builtin (MASK_SSE1
, "__builtin_ia32_storess", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STORESS
);
12606 def_builtin (MASK_SSE1
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
12607 def_builtin (MASK_SSE1
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
12608 def_builtin (MASK_SSE1
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
12609 def_builtin (MASK_SSE1
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
12611 def_builtin (MASK_SSE1
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
12612 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
12613 def_builtin (MASK_SSE1
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
12614 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
12616 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
12618 def_builtin (MASK_SSE1
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
12620 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
12621 def_builtin (MASK_SSE1
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
12622 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
12623 def_builtin (MASK_SSE1
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
12624 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
12625 def_builtin (MASK_SSE1
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
12627 def_builtin (MASK_SSE1
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
12629 /* Original 3DNow! */
12630 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
12631 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
12632 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
12633 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
12634 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
12635 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
12636 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
12637 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
12638 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
12639 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
12640 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
12641 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
12642 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
12643 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
12644 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
12645 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
12646 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
12647 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
12648 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
12649 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
12651 /* 3DNow! extension as used in the Athlon CPU. */
12652 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
12653 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
12654 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
12655 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
12656 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
12657 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
12659 def_builtin (MASK_SSE1
, "__builtin_ia32_setzerops", v4sf_ftype_void
, IX86_BUILTIN_SSE_ZERO
);
12662 def_builtin (MASK_SSE2
, "__builtin_ia32_pextrw128", int_ftype_v8hi_int
, IX86_BUILTIN_PEXTRW128
);
12663 def_builtin (MASK_SSE2
, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int
, IX86_BUILTIN_PINSRW128
);
12665 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
12666 def_builtin (MASK_SSE2
, "__builtin_ia32_movq2dq", v2di_ftype_di
, IX86_BUILTIN_MOVQ2DQ
);
12667 def_builtin (MASK_SSE2
, "__builtin_ia32_movdq2q", di_ftype_v2di
, IX86_BUILTIN_MOVDQ2Q
);
12669 def_builtin (MASK_SSE2
, "__builtin_ia32_loadapd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADAPD
);
12670 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADUPD
);
12671 def_builtin (MASK_SSE2
, "__builtin_ia32_loadsd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADSD
);
12672 def_builtin (MASK_SSE2
, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREAPD
);
12673 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
12674 def_builtin (MASK_SSE2
, "__builtin_ia32_storesd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORESD
);
12676 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADHPD
);
12677 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si
, IX86_BUILTIN_LOADLPD
);
12678 def_builtin (MASK_SSE2
, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STOREHPD
);
12679 def_builtin (MASK_SSE2
, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df
, IX86_BUILTIN_STORELPD
);
12681 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
12682 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
12683 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
12684 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
12685 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
12687 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
12688 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
12689 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
12690 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
12692 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
12693 def_builtin (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
12695 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
12697 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
12698 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
12700 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
12701 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
12702 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
12703 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
12704 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
12706 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
12708 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
12709 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
12711 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
12712 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
12713 def_builtin (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
12715 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
12716 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
12717 def_builtin (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
12719 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd1", v2df_ftype_double
, IX86_BUILTIN_SETPD1
);
12720 def_builtin (MASK_SSE2
, "__builtin_ia32_setpd", v2df_ftype_double_double
, IX86_BUILTIN_SETPD
);
12721 def_builtin (MASK_SSE2
, "__builtin_ia32_setzeropd", ti_ftype_void
, IX86_BUILTIN_CLRPD
);
12722 def_builtin (MASK_SSE2
, "__builtin_ia32_loadpd1", v2df_ftype_pdouble
, IX86_BUILTIN_LOADPD1
);
12723 def_builtin (MASK_SSE2
, "__builtin_ia32_loadrpd", v2df_ftype_pdouble
, IX86_BUILTIN_LOADRPD
);
12724 def_builtin (MASK_SSE2
, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREPD1
);
12725 def_builtin (MASK_SSE2
, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STORERPD
);
12727 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pvoid
, IX86_BUILTIN_CLFLUSH
);
12728 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
12729 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
12731 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqa", v16qi_ftype_pchar
, IX86_BUILTIN_LOADDQA
);
12732 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pchar
, IX86_BUILTIN_LOADDQU
);
12733 def_builtin (MASK_SSE2
, "__builtin_ia32_loadd", v4si_ftype_pchar
, IX86_BUILTIN_LOADD
);
12734 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQA
);
12735 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
12736 def_builtin (MASK_SSE2
, "__builtin_ia32_stored", void_ftype_pchar_v4si
, IX86_BUILTIN_STORED
);
12737 def_builtin (MASK_SSE2
, "__builtin_ia32_movq", v2di_ftype_v2di
, IX86_BUILTIN_MOVQ
);
12739 def_builtin (MASK_SSE1
, "__builtin_ia32_setzero128", v2di_ftype_void
, IX86_BUILTIN_CLRTI
);
12741 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
12742 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
12743 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
12745 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
12746 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
12747 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
12749 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
12750 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
12752 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
12753 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
12754 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
12755 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
12757 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
12758 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
12759 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
12760 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
12762 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
12763 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
12765 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
12768 /* Errors in the source file can cause expand_expr to return const0_rtx
12769 where we expect a vector. To avoid crashing, use one of the vector
12770 clear instructions. */
12772 safe_vector_operand (x
, mode
)
12774 enum machine_mode mode
;
12776 if (x
!= const0_rtx
)
12778 x
= gen_reg_rtx (mode
);
12780 if (VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
))
12781 emit_insn (gen_mmx_clrdi (mode
== DImode
? x
12782 : gen_rtx_SUBREG (DImode
, x
, 0)));
12784 emit_insn (gen_sse_clrv4sf (mode
== V4SFmode
? x
12785 : gen_rtx_SUBREG (V4SFmode
, x
, 0)));
12789 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12792 ix86_expand_binop_builtin (icode
, arglist
, target
)
12793 enum insn_code icode
;
12798 tree arg0
= TREE_VALUE (arglist
);
12799 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12800 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12801 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12802 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12803 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12804 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
12806 if (VECTOR_MODE_P (mode0
))
12807 op0
= safe_vector_operand (op0
, mode0
);
12808 if (VECTOR_MODE_P (mode1
))
12809 op1
= safe_vector_operand (op1
, mode1
);
12812 || GET_MODE (target
) != tmode
12813 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12814 target
= gen_reg_rtx (tmode
);
12816 /* In case the insn wants input operands in modes different from
12817 the result, abort. */
12818 if (GET_MODE (op0
) != mode0
|| GET_MODE (op1
) != mode1
)
12821 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12822 op0
= copy_to_mode_reg (mode0
, op0
);
12823 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
12824 op1
= copy_to_mode_reg (mode1
, op1
);
12826 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12827 yet one of the two must not be a memory. This is normally enforced
12828 by expanders, but we didn't bother to create one here. */
12829 if (GET_CODE (op0
) == MEM
&& GET_CODE (op1
) == MEM
)
12830 op0
= copy_to_mode_reg (mode0
, op0
);
12832 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12839 /* Subroutine of ix86_expand_builtin to take care of stores. */
12842 ix86_expand_store_builtin (icode
, arglist
)
12843 enum insn_code icode
;
12847 tree arg0
= TREE_VALUE (arglist
);
12848 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12849 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12850 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12851 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
12852 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
12854 if (VECTOR_MODE_P (mode1
))
12855 op1
= safe_vector_operand (op1
, mode1
);
12857 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12859 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
12860 op1
= copy_to_mode_reg (mode1
, op1
);
12862 pat
= GEN_FCN (icode
) (op0
, op1
);
12868 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12871 ix86_expand_unop_builtin (icode
, arglist
, target
, do_load
)
12872 enum insn_code icode
;
12878 tree arg0
= TREE_VALUE (arglist
);
12879 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12880 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12881 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12884 || GET_MODE (target
) != tmode
12885 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12886 target
= gen_reg_rtx (tmode
);
12888 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
12891 if (VECTOR_MODE_P (mode0
))
12892 op0
= safe_vector_operand (op0
, mode0
);
12894 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12895 op0
= copy_to_mode_reg (mode0
, op0
);
12898 pat
= GEN_FCN (icode
) (target
, op0
);
12905 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12906 sqrtss, rsqrtss, rcpss. */
12909 ix86_expand_unop1_builtin (icode
, arglist
, target
)
12910 enum insn_code icode
;
12915 tree arg0
= TREE_VALUE (arglist
);
12916 rtx op1
, op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12917 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
12918 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
12921 || GET_MODE (target
) != tmode
12922 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
12923 target
= gen_reg_rtx (tmode
);
12925 if (VECTOR_MODE_P (mode0
))
12926 op0
= safe_vector_operand (op0
, mode0
);
12928 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
12929 op0
= copy_to_mode_reg (mode0
, op0
);
12932 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
12933 op1
= copy_to_mode_reg (mode0
, op1
);
12935 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
12942 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12945 ix86_expand_sse_compare (d
, arglist
, target
)
12946 const struct builtin_description
*d
;
12951 tree arg0
= TREE_VALUE (arglist
);
12952 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
12953 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
12954 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
12956 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
12957 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
12958 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
12959 enum rtx_code comparison
= d
->comparison
;
12961 if (VECTOR_MODE_P (mode0
))
12962 op0
= safe_vector_operand (op0
, mode0
);
12963 if (VECTOR_MODE_P (mode1
))
12964 op1
= safe_vector_operand (op1
, mode1
);
12966 /* Swap operands if we have a comparison that isn't available in
12970 rtx tmp
= gen_reg_rtx (mode1
);
12971 emit_move_insn (tmp
, op1
);
12977 || GET_MODE (target
) != tmode
12978 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
12979 target
= gen_reg_rtx (tmode
);
12981 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
12982 op0
= copy_to_mode_reg (mode0
, op0
);
12983 if (! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
12984 op1
= copy_to_mode_reg (mode1
, op1
);
12986 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
12987 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
12994 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12997 ix86_expand_sse_comi (d
, arglist
, target
)
12998 const struct builtin_description
*d
;
13003 tree arg0
= TREE_VALUE (arglist
);
13004 tree arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13005 rtx op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13006 rtx op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13008 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
13009 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
13010 enum rtx_code comparison
= d
->comparison
;
13012 if (VECTOR_MODE_P (mode0
))
13013 op0
= safe_vector_operand (op0
, mode0
);
13014 if (VECTOR_MODE_P (mode1
))
13015 op1
= safe_vector_operand (op1
, mode1
);
13017 /* Swap operands if we have a comparison that isn't available in
13026 target
= gen_reg_rtx (SImode
);
13027 emit_move_insn (target
, const0_rtx
);
13028 target
= gen_rtx_SUBREG (QImode
, target
, 0);
13030 if (! (*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
13031 op0
= copy_to_mode_reg (mode0
, op0
);
13032 if (! (*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
13033 op1
= copy_to_mode_reg (mode1
, op1
);
13035 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
13036 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
13040 emit_insn (gen_rtx_SET (VOIDmode
,
13041 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
13042 gen_rtx_fmt_ee (comparison
, QImode
,
13046 return SUBREG_REG (target
);
13049 /* Expand an expression EXP that calls a built-in function,
13050 with result going to TARGET if that's convenient
13051 (and in mode MODE if that's convenient).
13052 SUBTARGET may be used as the target for computing one of EXP's operands.
13053 IGNORE is nonzero if the value is to be ignored. */
13056 ix86_expand_builtin (exp
, target
, subtarget
, mode
, ignore
)
13059 rtx subtarget ATTRIBUTE_UNUSED
;
13060 enum machine_mode mode ATTRIBUTE_UNUSED
;
13061 int ignore ATTRIBUTE_UNUSED
;
13063 const struct builtin_description
*d
;
13065 enum insn_code icode
;
13066 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
13067 tree arglist
= TREE_OPERAND (exp
, 1);
13068 tree arg0
, arg1
, arg2
;
13069 rtx op0
, op1
, op2
, pat
;
13070 enum machine_mode tmode
, mode0
, mode1
, mode2
;
13071 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
13075 case IX86_BUILTIN_EMMS
:
13076 emit_insn (gen_emms ());
13079 case IX86_BUILTIN_SFENCE
:
13080 emit_insn (gen_sfence ());
13083 case IX86_BUILTIN_PEXTRW
:
13084 case IX86_BUILTIN_PEXTRW128
:
13085 icode
= (fcode
== IX86_BUILTIN_PEXTRW
13086 ? CODE_FOR_mmx_pextrw
13087 : CODE_FOR_sse2_pextrw
);
13088 arg0
= TREE_VALUE (arglist
);
13089 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13090 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13091 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13092 tmode
= insn_data
[icode
].operand
[0].mode
;
13093 mode0
= insn_data
[icode
].operand
[1].mode
;
13094 mode1
= insn_data
[icode
].operand
[2].mode
;
13096 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13097 op0
= copy_to_mode_reg (mode0
, op0
);
13098 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13100 /* @@@ better error message */
13101 error ("selector must be an immediate");
13102 return gen_reg_rtx (tmode
);
13105 || GET_MODE (target
) != tmode
13106 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13107 target
= gen_reg_rtx (tmode
);
13108 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13114 case IX86_BUILTIN_PINSRW
:
13115 case IX86_BUILTIN_PINSRW128
:
13116 icode
= (fcode
== IX86_BUILTIN_PINSRW
13117 ? CODE_FOR_mmx_pinsrw
13118 : CODE_FOR_sse2_pinsrw
);
13119 arg0
= TREE_VALUE (arglist
);
13120 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13121 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13122 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13123 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13124 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13125 tmode
= insn_data
[icode
].operand
[0].mode
;
13126 mode0
= insn_data
[icode
].operand
[1].mode
;
13127 mode1
= insn_data
[icode
].operand
[2].mode
;
13128 mode2
= insn_data
[icode
].operand
[3].mode
;
13130 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13131 op0
= copy_to_mode_reg (mode0
, op0
);
13132 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13133 op1
= copy_to_mode_reg (mode1
, op1
);
13134 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13136 /* @@@ better error message */
13137 error ("selector must be an immediate");
13141 || GET_MODE (target
) != tmode
13142 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13143 target
= gen_reg_rtx (tmode
);
13144 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13150 case IX86_BUILTIN_MASKMOVQ
:
13151 case IX86_BUILTIN_MASKMOVDQU
:
13152 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
13153 ? (TARGET_64BIT
? CODE_FOR_mmx_maskmovq_rex
: CODE_FOR_mmx_maskmovq
)
13154 : (TARGET_64BIT
? CODE_FOR_sse2_maskmovdqu_rex64
13155 : CODE_FOR_sse2_maskmovdqu
));
13156 /* Note the arg order is different from the operand order. */
13157 arg1
= TREE_VALUE (arglist
);
13158 arg2
= TREE_VALUE (TREE_CHAIN (arglist
));
13159 arg0
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13160 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13161 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13162 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13163 mode0
= insn_data
[icode
].operand
[0].mode
;
13164 mode1
= insn_data
[icode
].operand
[1].mode
;
13165 mode2
= insn_data
[icode
].operand
[2].mode
;
13167 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
13168 op0
= copy_to_mode_reg (mode0
, op0
);
13169 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
13170 op1
= copy_to_mode_reg (mode1
, op1
);
13171 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
13172 op2
= copy_to_mode_reg (mode2
, op2
);
13173 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
13179 case IX86_BUILTIN_SQRTSS
:
13180 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2
, arglist
, target
);
13181 case IX86_BUILTIN_RSQRTSS
:
13182 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2
, arglist
, target
);
13183 case IX86_BUILTIN_RCPSS
:
13184 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2
, arglist
, target
);
13186 case IX86_BUILTIN_LOADAPS
:
13187 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps
, arglist
, target
, 1);
13189 case IX86_BUILTIN_LOADUPS
:
13190 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, arglist
, target
, 1);
13192 case IX86_BUILTIN_STOREAPS
:
13193 return ix86_expand_store_builtin (CODE_FOR_sse_movaps
, arglist
);
13195 case IX86_BUILTIN_STOREUPS
:
13196 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, arglist
);
13198 case IX86_BUILTIN_LOADSS
:
13199 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss
, arglist
, target
, 1);
13201 case IX86_BUILTIN_STORESS
:
13202 return ix86_expand_store_builtin (CODE_FOR_sse_storess
, arglist
);
13204 case IX86_BUILTIN_LOADHPS
:
13205 case IX86_BUILTIN_LOADLPS
:
13206 case IX86_BUILTIN_LOADHPD
:
13207 case IX86_BUILTIN_LOADLPD
:
13208 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_movhps
13209 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_movlps
13210 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_movhpd
13211 : CODE_FOR_sse2_movlpd
);
13212 arg0
= TREE_VALUE (arglist
);
13213 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13214 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13215 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13216 tmode
= insn_data
[icode
].operand
[0].mode
;
13217 mode0
= insn_data
[icode
].operand
[1].mode
;
13218 mode1
= insn_data
[icode
].operand
[2].mode
;
13220 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13221 op0
= copy_to_mode_reg (mode0
, op0
);
13222 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
13224 || GET_MODE (target
) != tmode
13225 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13226 target
= gen_reg_rtx (tmode
);
13227 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13233 case IX86_BUILTIN_STOREHPS
:
13234 case IX86_BUILTIN_STORELPS
:
13235 case IX86_BUILTIN_STOREHPD
:
13236 case IX86_BUILTIN_STORELPD
:
13237 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_movhps
13238 : fcode
== IX86_BUILTIN_STORELPS
? CODE_FOR_sse_movlps
13239 : fcode
== IX86_BUILTIN_STOREHPD
? CODE_FOR_sse2_movhpd
13240 : CODE_FOR_sse2_movlpd
);
13241 arg0
= TREE_VALUE (arglist
);
13242 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13243 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13244 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13245 mode0
= insn_data
[icode
].operand
[1].mode
;
13246 mode1
= insn_data
[icode
].operand
[2].mode
;
13248 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
13249 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13250 op1
= copy_to_mode_reg (mode1
, op1
);
13252 pat
= GEN_FCN (icode
) (op0
, op0
, op1
);
13258 case IX86_BUILTIN_MOVNTPS
:
13259 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, arglist
);
13260 case IX86_BUILTIN_MOVNTQ
:
13261 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, arglist
);
13263 case IX86_BUILTIN_LDMXCSR
:
13264 op0
= expand_expr (TREE_VALUE (arglist
), NULL_RTX
, VOIDmode
, 0);
13265 target
= assign_386_stack_local (SImode
, 0);
13266 emit_move_insn (target
, op0
);
13267 emit_insn (gen_ldmxcsr (target
));
13270 case IX86_BUILTIN_STMXCSR
:
13271 target
= assign_386_stack_local (SImode
, 0);
13272 emit_insn (gen_stmxcsr (target
));
13273 return copy_to_mode_reg (SImode
, target
);
13275 case IX86_BUILTIN_SHUFPS
:
13276 case IX86_BUILTIN_SHUFPD
:
13277 icode
= (fcode
== IX86_BUILTIN_SHUFPS
13278 ? CODE_FOR_sse_shufps
13279 : CODE_FOR_sse2_shufpd
);
13280 arg0
= TREE_VALUE (arglist
);
13281 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13282 arg2
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist
)));
13283 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13284 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13285 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
13286 tmode
= insn_data
[icode
].operand
[0].mode
;
13287 mode0
= insn_data
[icode
].operand
[1].mode
;
13288 mode1
= insn_data
[icode
].operand
[2].mode
;
13289 mode2
= insn_data
[icode
].operand
[3].mode
;
13291 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
13292 op0
= copy_to_mode_reg (mode0
, op0
);
13293 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
13294 op1
= copy_to_mode_reg (mode1
, op1
);
13295 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
13297 /* @@@ better error message */
13298 error ("mask must be an immediate");
13299 return gen_reg_rtx (tmode
);
13302 || GET_MODE (target
) != tmode
13303 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13304 target
= gen_reg_rtx (tmode
);
13305 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
13311 case IX86_BUILTIN_PSHUFW
:
13312 case IX86_BUILTIN_PSHUFD
:
13313 case IX86_BUILTIN_PSHUFHW
:
13314 case IX86_BUILTIN_PSHUFLW
:
13315 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
13316 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
13317 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
13318 : CODE_FOR_mmx_pshufw
);
13319 arg0
= TREE_VALUE (arglist
);
13320 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13321 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13322 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13323 tmode
= insn_data
[icode
].operand
[0].mode
;
13324 mode1
= insn_data
[icode
].operand
[1].mode
;
13325 mode2
= insn_data
[icode
].operand
[2].mode
;
13327 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13328 op0
= copy_to_mode_reg (mode1
, op0
);
13329 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13331 /* @@@ better error message */
13332 error ("mask must be an immediate");
13336 || GET_MODE (target
) != tmode
13337 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
13338 target
= gen_reg_rtx (tmode
);
13339 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
13345 case IX86_BUILTIN_PSLLDQI128
:
13346 case IX86_BUILTIN_PSRLDQI128
:
13347 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
13348 : CODE_FOR_sse2_lshrti3
);
13349 arg0
= TREE_VALUE (arglist
);
13350 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13351 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13352 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
13353 tmode
= insn_data
[icode
].operand
[0].mode
;
13354 mode1
= insn_data
[icode
].operand
[1].mode
;
13355 mode2
= insn_data
[icode
].operand
[2].mode
;
13357 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
13359 op0
= copy_to_reg (op0
);
13360 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
13362 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
13364 error ("shift must be an immediate");
13367 target
= gen_reg_rtx (V2DImode
);
13368 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
13374 case IX86_BUILTIN_FEMMS
:
13375 emit_insn (gen_femms ());
13378 case IX86_BUILTIN_PAVGUSB
:
13379 return ix86_expand_binop_builtin (CODE_FOR_pavgusb
, arglist
, target
);
13381 case IX86_BUILTIN_PF2ID
:
13382 return ix86_expand_unop_builtin (CODE_FOR_pf2id
, arglist
, target
, 0);
13384 case IX86_BUILTIN_PFACC
:
13385 return ix86_expand_binop_builtin (CODE_FOR_pfacc
, arglist
, target
);
13387 case IX86_BUILTIN_PFADD
:
13388 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3
, arglist
, target
);
13390 case IX86_BUILTIN_PFCMPEQ
:
13391 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3
, arglist
, target
);
13393 case IX86_BUILTIN_PFCMPGE
:
13394 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3
, arglist
, target
);
13396 case IX86_BUILTIN_PFCMPGT
:
13397 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3
, arglist
, target
);
13399 case IX86_BUILTIN_PFMAX
:
13400 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3
, arglist
, target
);
13402 case IX86_BUILTIN_PFMIN
:
13403 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3
, arglist
, target
);
13405 case IX86_BUILTIN_PFMUL
:
13406 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3
, arglist
, target
);
13408 case IX86_BUILTIN_PFRCP
:
13409 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2
, arglist
, target
, 0);
13411 case IX86_BUILTIN_PFRCPIT1
:
13412 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3
, arglist
, target
);
13414 case IX86_BUILTIN_PFRCPIT2
:
13415 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3
, arglist
, target
);
13417 case IX86_BUILTIN_PFRSQIT1
:
13418 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3
, arglist
, target
);
13420 case IX86_BUILTIN_PFRSQRT
:
13421 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2
, arglist
, target
, 0);
13423 case IX86_BUILTIN_PFSUB
:
13424 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3
, arglist
, target
);
13426 case IX86_BUILTIN_PFSUBR
:
13427 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3
, arglist
, target
);
13429 case IX86_BUILTIN_PI2FD
:
13430 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2
, arglist
, target
, 0);
13432 case IX86_BUILTIN_PMULHRW
:
13433 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3
, arglist
, target
);
13435 case IX86_BUILTIN_PF2IW
:
13436 return ix86_expand_unop_builtin (CODE_FOR_pf2iw
, arglist
, target
, 0);
13438 case IX86_BUILTIN_PFNACC
:
13439 return ix86_expand_binop_builtin (CODE_FOR_pfnacc
, arglist
, target
);
13441 case IX86_BUILTIN_PFPNACC
:
13442 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc
, arglist
, target
);
13444 case IX86_BUILTIN_PI2FW
:
13445 return ix86_expand_unop_builtin (CODE_FOR_pi2fw
, arglist
, target
, 0);
13447 case IX86_BUILTIN_PSWAPDSI
:
13448 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2
, arglist
, target
, 0);
13450 case IX86_BUILTIN_PSWAPDSF
:
13451 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2
, arglist
, target
, 0);
13453 case IX86_BUILTIN_SSE_ZERO
:
13454 target
= gen_reg_rtx (V4SFmode
);
13455 emit_insn (gen_sse_clrv4sf (target
));
13458 case IX86_BUILTIN_MMX_ZERO
:
13459 target
= gen_reg_rtx (DImode
);
13460 emit_insn (gen_mmx_clrdi (target
));
13463 case IX86_BUILTIN_CLRTI
:
13464 target
= gen_reg_rtx (V2DImode
);
13465 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode
, target
, V2DImode
, 0)));
13469 case IX86_BUILTIN_SQRTSD
:
13470 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2
, arglist
, target
);
13471 case IX86_BUILTIN_LOADAPD
:
13472 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
, target
, 1);
13473 case IX86_BUILTIN_LOADUPD
:
13474 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, arglist
, target
, 1);
13476 case IX86_BUILTIN_STOREAPD
:
13477 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13478 case IX86_BUILTIN_STOREUPD
:
13479 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, arglist
);
13481 case IX86_BUILTIN_LOADSD
:
13482 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
, target
, 1);
13484 case IX86_BUILTIN_STORESD
:
13485 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd
, arglist
);
13487 case IX86_BUILTIN_SETPD1
:
13488 target
= assign_386_stack_local (DFmode
, 0);
13489 arg0
= TREE_VALUE (arglist
);
13490 emit_move_insn (adjust_address (target
, DFmode
, 0),
13491 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13492 op0
= gen_reg_rtx (V2DFmode
);
13493 emit_insn (gen_sse2_loadsd (op0
, adjust_address (target
, V2DFmode
, 0)));
13494 emit_insn (gen_sse2_shufpd (op0
, op0
, op0
, GEN_INT (0)));
13497 case IX86_BUILTIN_SETPD
:
13498 target
= assign_386_stack_local (V2DFmode
, 0);
13499 arg0
= TREE_VALUE (arglist
);
13500 arg1
= TREE_VALUE (TREE_CHAIN (arglist
));
13501 emit_move_insn (adjust_address (target
, DFmode
, 0),
13502 expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0));
13503 emit_move_insn (adjust_address (target
, DFmode
, 8),
13504 expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0));
13505 op0
= gen_reg_rtx (V2DFmode
);
13506 emit_insn (gen_sse2_movapd (op0
, target
));
13509 case IX86_BUILTIN_LOADRPD
:
13510 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_movapd
, arglist
,
13511 gen_reg_rtx (V2DFmode
), 1);
13512 emit_insn (gen_sse2_shufpd (target
, target
, target
, GEN_INT (1)));
13515 case IX86_BUILTIN_LOADPD1
:
13516 target
= ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd
, arglist
,
13517 gen_reg_rtx (V2DFmode
), 1);
13518 emit_insn (gen_sse2_shufpd (target
, target
, target
, const0_rtx
));
13521 case IX86_BUILTIN_STOREPD1
:
13522 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13523 case IX86_BUILTIN_STORERPD
:
13524 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd
, arglist
);
13526 case IX86_BUILTIN_CLRPD
:
13527 target
= gen_reg_rtx (V2DFmode
);
13528 emit_insn (gen_sse_clrv2df (target
));
13531 case IX86_BUILTIN_MFENCE
:
13532 emit_insn (gen_sse2_mfence ());
13534 case IX86_BUILTIN_LFENCE
:
13535 emit_insn (gen_sse2_lfence ());
13538 case IX86_BUILTIN_CLFLUSH
:
13539 arg0
= TREE_VALUE (arglist
);
13540 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
13541 icode
= CODE_FOR_sse2_clflush
;
13542 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
13543 op0
= copy_to_mode_reg (Pmode
, op0
);
13545 emit_insn (gen_sse2_clflush (op0
));
13548 case IX86_BUILTIN_MOVNTPD
:
13549 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, arglist
);
13550 case IX86_BUILTIN_MOVNTDQ
:
13551 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, arglist
);
13552 case IX86_BUILTIN_MOVNTI
:
13553 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, arglist
);
13555 case IX86_BUILTIN_LOADDQA
:
13556 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa
, arglist
, target
, 1);
13557 case IX86_BUILTIN_LOADDQU
:
13558 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, arglist
, target
, 1);
13559 case IX86_BUILTIN_LOADD
:
13560 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd
, arglist
, target
, 1);
13562 case IX86_BUILTIN_STOREDQA
:
13563 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa
, arglist
);
13564 case IX86_BUILTIN_STOREDQU
:
13565 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, arglist
);
13566 case IX86_BUILTIN_STORED
:
13567 return ix86_expand_store_builtin (CODE_FOR_sse2_stored
, arglist
);
13573 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
13574 if (d
->code
== fcode
)
13576 /* Compares are treated specially. */
13577 if (d
->icode
== CODE_FOR_maskcmpv4sf3
13578 || d
->icode
== CODE_FOR_vmmaskcmpv4sf3
13579 || d
->icode
== CODE_FOR_maskncmpv4sf3
13580 || d
->icode
== CODE_FOR_vmmaskncmpv4sf3
13581 || d
->icode
== CODE_FOR_maskcmpv2df3
13582 || d
->icode
== CODE_FOR_vmmaskcmpv2df3
13583 || d
->icode
== CODE_FOR_maskncmpv2df3
13584 || d
->icode
== CODE_FOR_vmmaskncmpv2df3
)
13585 return ix86_expand_sse_compare (d
, arglist
, target
);
13587 return ix86_expand_binop_builtin (d
->icode
, arglist
, target
);
13590 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
13591 if (d
->code
== fcode
)
13592 return ix86_expand_unop_builtin (d
->icode
, arglist
, target
, 0);
13594 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
13595 if (d
->code
== fcode
)
13596 return ix86_expand_sse_comi (d
, arglist
, target
);
13598 /* @@@ Should really do something sensible here. */
13602 /* Store OPERAND to the memory after reload is completed. This means
13603 that we can't easily use assign_stack_local. */
13605 ix86_force_to_memory (mode
, operand
)
13606 enum machine_mode mode
;
13610 if (!reload_completed
)
13612 if (TARGET_64BIT
&& TARGET_RED_ZONE
)
13614 result
= gen_rtx_MEM (mode
,
13615 gen_rtx_PLUS (Pmode
,
13617 GEN_INT (-RED_ZONE_SIZE
)));
13618 emit_move_insn (result
, operand
);
13620 else if (TARGET_64BIT
&& !TARGET_RED_ZONE
)
13626 operand
= gen_lowpart (DImode
, operand
);
13630 gen_rtx_SET (VOIDmode
,
13631 gen_rtx_MEM (DImode
,
13632 gen_rtx_PRE_DEC (DImode
,
13633 stack_pointer_rtx
)),
13639 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13648 split_di (&operand
, 1, operands
, operands
+ 1);
13650 gen_rtx_SET (VOIDmode
,
13651 gen_rtx_MEM (SImode
,
13652 gen_rtx_PRE_DEC (Pmode
,
13653 stack_pointer_rtx
)),
13656 gen_rtx_SET (VOIDmode
,
13657 gen_rtx_MEM (SImode
,
13658 gen_rtx_PRE_DEC (Pmode
,
13659 stack_pointer_rtx
)),
13664 /* It is better to store HImodes as SImodes. */
13665 if (!TARGET_PARTIAL_REG_STALL
)
13666 operand
= gen_lowpart (SImode
, operand
);
13670 gen_rtx_SET (VOIDmode
,
13671 gen_rtx_MEM (GET_MODE (operand
),
13672 gen_rtx_PRE_DEC (SImode
,
13673 stack_pointer_rtx
)),
13679 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
13684 /* Free operand from the memory. */
13686 ix86_free_from_memory (mode
)
13687 enum machine_mode mode
;
13689 if (!TARGET_64BIT
|| !TARGET_RED_ZONE
)
13693 if (mode
== DImode
|| TARGET_64BIT
)
13695 else if (mode
== HImode
&& TARGET_PARTIAL_REG_STALL
)
13699 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13700 to pop or add instruction if registers are available. */
13701 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13702 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
13707 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13708 QImode must go into class Q_REGS.
13709 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13710 movdf to do mem-to-mem moves through integer regs. */
13712 ix86_preferred_reload_class (x
, class)
13714 enum reg_class
class;
13716 if (GET_CODE (x
) == CONST_VECTOR
&& x
!= CONST0_RTX (GET_MODE (x
)))
13718 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
13720 /* SSE can't load any constant directly yet. */
13721 if (SSE_CLASS_P (class))
13723 /* Floats can load 0 and 1. */
13724 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x
))
13726 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13727 if (MAYBE_SSE_CLASS_P (class))
13728 return (reg_class_subset_p (class, GENERAL_REGS
)
13729 ? GENERAL_REGS
: FLOAT_REGS
);
13733 /* General regs can load everything. */
13734 if (reg_class_subset_p (class, GENERAL_REGS
))
13735 return GENERAL_REGS
;
13736 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13737 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13740 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x
))
13742 if (GET_MODE (x
) == QImode
&& ! reg_class_subset_p (class, Q_REGS
))
13747 /* If we are copying between general and FP registers, we need a memory
13748 location. The same is true for SSE and MMX registers.
13750 The macro can't work reliably when one of the CLASSES is class containing
13751 registers from multiple units (SSE, MMX, integer). We avoid this by never
13752 combining those units in single alternative in the machine description.
13753 Ensure that this constraint holds to avoid unexpected surprises.
13755 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13756 enforce these sanity checks. */
13758 ix86_secondary_memory_needed (class1
, class2
, mode
, strict
)
13759 enum reg_class class1
, class2
;
13760 enum machine_mode mode
;
13763 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
13764 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
13765 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
13766 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
13767 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
13768 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
13775 return (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
)
13776 || (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
)
13777 && (mode
) != SImode
)
13778 || (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13779 && (mode
) != SImode
));
13781 /* Return the cost of moving data from a register in class CLASS1 to
13782 one in class CLASS2.
13784 It is not required that the cost always equal 2 when FROM is the same as TO;
13785 on some machines it is expensive to move between registers if they are not
13786 general registers. */
13788 ix86_register_move_cost (mode
, class1
, class2
)
13789 enum machine_mode mode
;
13790 enum reg_class class1
, class2
;
13792 /* In case we require secondary memory, compute cost of the store followed
13793 by load. In order to avoid bad register allocation choices, we need
13794 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13796 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
13800 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
13801 MEMORY_MOVE_COST (mode
, class1
, 1));
13802 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
13803 MEMORY_MOVE_COST (mode
, class2
, 1));
13805 /* In case of copying from general_purpose_register we may emit multiple
13806 stores followed by single load causing memory size mismatch stall.
13807 Count this as arbitarily high cost of 20. */
13808 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
13811 /* In the case of FP/MMX moves, the registers actually overlap, and we
13812 have to switch modes in order to treat them differently. */
13813 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
13814 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
13820 /* Moves between SSE/MMX and integer unit are expensive. */
13821 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
13822 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
13823 return ix86_cost
->mmxsse_to_integer
;
13824 if (MAYBE_FLOAT_CLASS_P (class1
))
13825 return ix86_cost
->fp_move
;
13826 if (MAYBE_SSE_CLASS_P (class1
))
13827 return ix86_cost
->sse_move
;
13828 if (MAYBE_MMX_CLASS_P (class1
))
13829 return ix86_cost
->mmx_move
;
13833 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13835 ix86_hard_regno_mode_ok (regno
, mode
)
13837 enum machine_mode mode
;
13839 /* Flags and only flags can only hold CCmode values. */
13840 if (CC_REGNO_P (regno
))
13841 return GET_MODE_CLASS (mode
) == MODE_CC
;
13842 if (GET_MODE_CLASS (mode
) == MODE_CC
13843 || GET_MODE_CLASS (mode
) == MODE_RANDOM
13844 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
13846 if (FP_REGNO_P (regno
))
13847 return VALID_FP_MODE_P (mode
);
13848 if (SSE_REGNO_P (regno
))
13849 return VALID_SSE_REG_MODE (mode
);
13850 if (MMX_REGNO_P (regno
))
13851 return VALID_MMX_REG_MODE (mode
) || VALID_MMX_REG_MODE_3DNOW (mode
);
13852 /* We handle both integer and floats in the general purpose registers.
13853 In future we should be able to handle vector modes as well. */
13854 if (!VALID_INT_MODE_P (mode
) && !VALID_FP_MODE_P (mode
))
13856 /* Take care for QImode values - they can be in non-QI regs, but then
13857 they do cause partial register stalls. */
13858 if (regno
< 4 || mode
!= QImode
|| TARGET_64BIT
)
13860 return reload_in_progress
|| reload_completed
|| !TARGET_PARTIAL_REG_STALL
;
13863 /* Return the cost of moving data of mode M between a
13864 register and memory. A value of 2 is the default; this cost is
13865 relative to those in `REGISTER_MOVE_COST'.
13867 If moving between registers and memory is more expensive than
13868 between two registers, you should define this macro to express the
13871 Model also increased moving costs of QImode registers in non
13875 ix86_memory_move_cost (mode
, class, in
)
13876 enum machine_mode mode
;
13877 enum reg_class
class;
13880 if (FLOAT_CLASS_P (class))
13898 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
13900 if (SSE_CLASS_P (class))
13903 switch (GET_MODE_SIZE (mode
))
13917 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
13919 if (MMX_CLASS_P (class))
13922 switch (GET_MODE_SIZE (mode
))
13933 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
13935 switch (GET_MODE_SIZE (mode
))
13939 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
13940 : ix86_cost
->movzbl_load
);
13942 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
13943 : ix86_cost
->int_store
[0] + 4);
13946 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
13948 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13949 if (mode
== TFmode
)
13951 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
13952 * ((int) GET_MODE_SIZE (mode
)
13953 + UNITS_PER_WORD
-1 ) / UNITS_PER_WORD
);
13957 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13959 ix86_svr3_asm_out_constructor (symbol
, priority
)
13961 int priority ATTRIBUTE_UNUSED
;
13964 fputs ("\tpushl $", asm_out_file
);
13965 assemble_name (asm_out_file
, XSTR (symbol
, 0));
13966 fputc ('\n', asm_out_file
);
13972 static int current_machopic_label_num
;
13974 /* Given a symbol name and its associated stub, write out the
13975 definition of the stub. */
13978 machopic_output_stub (file
, symb
, stub
)
13980 const char *symb
, *stub
;
13982 unsigned int length
;
13983 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
13984 int label
= ++current_machopic_label_num
;
13986 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13987 symb
= (*targetm
.strip_name_encoding
) (symb
);
13989 length
= strlen (stub
);
13990 binder_name
= alloca (length
+ 32);
13991 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
13993 length
= strlen (symb
);
13994 symbol_name
= alloca (length
+ 32);
13995 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
13997 sprintf (lazy_ptr_name
, "L%d$lz", label
);
14000 machopic_picsymbol_stub_section ();
14002 machopic_symbol_stub_section ();
14004 fprintf (file
, "%s:\n", stub
);
14005 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14009 fprintf (file
, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label
, label
);
14010 fprintf (file
, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
14011 fprintf (file
, "\tjmp %%edx\n");
14014 fprintf (file
, "\tjmp *%s\n", lazy_ptr_name
);
14016 fprintf (file
, "%s:\n", binder_name
);
14020 fprintf (file
, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
14021 fprintf (file
, "\tpushl %%eax\n");
14024 fprintf (file
, "\t pushl $%s\n", lazy_ptr_name
);
14026 fprintf (file
, "\tjmp dyld_stub_binding_helper\n");
14028 machopic_lazy_symbol_ptr_section ();
14029 fprintf (file
, "%s:\n", lazy_ptr_name
);
14030 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
14031 fprintf (file
, "\t.long %s\n", binder_name
);
14033 #endif /* TARGET_MACHO */
14035 /* Order the registers for register allocator. */
14038 x86_order_regs_for_local_alloc ()
14043 /* First allocate the local general purpose registers. */
14044 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14045 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
14046 reg_alloc_order
[pos
++] = i
;
14048 /* Global general purpose registers. */
14049 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
14050 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
14051 reg_alloc_order
[pos
++] = i
;
14053 /* x87 registers come first in case we are doing FP math
14055 if (!TARGET_SSE_MATH
)
14056 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14057 reg_alloc_order
[pos
++] = i
;
14059 /* SSE registers. */
14060 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
14061 reg_alloc_order
[pos
++] = i
;
14062 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
14063 reg_alloc_order
[pos
++] = i
;
14065 /* x87 registerts. */
14066 if (TARGET_SSE_MATH
)
14067 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
14068 reg_alloc_order
[pos
++] = i
;
14070 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
14071 reg_alloc_order
[pos
++] = i
;
14073 /* Initialize the rest of array as we do not allocate some registers
14075 while (pos
< FIRST_PSEUDO_REGISTER
)
14076 reg_alloc_order
[pos
++] = 0;
14079 /* Returns an expression indicating where the this parameter is
14080 located on entry to the FUNCTION. */
14083 x86_this_parameter (function
)
14086 tree type
= TREE_TYPE (function
);
14090 int n
= aggregate_value_p (TREE_TYPE (type
)) != 0;
14091 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
14094 if (ix86_fntype_regparm (type
) > 0)
14098 parm
= TYPE_ARG_TYPES (type
);
14099 /* Figure out whether or not the function has a variable number of
14101 for (; parm
; parm
= TREE_CHAIN (parm
))
14102 if (TREE_VALUE (parm
) == void_type_node
)
14104 /* If not, the this parameter is in %eax. */
14106 return gen_rtx_REG (SImode
, 0);
14109 if (aggregate_value_p (TREE_TYPE (type
)))
14110 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
14112 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
14115 /* Determine whether x86_output_mi_thunk can succeed. */
14118 x86_can_output_mi_thunk (thunk
, delta
, vcall_offset
, function
)
14119 tree thunk ATTRIBUTE_UNUSED
;
14120 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
;
14121 HOST_WIDE_INT vcall_offset
;
14124 /* 64-bit can handle anything. */
14128 /* For 32-bit, everything's fine if we have one free register. */
14129 if (ix86_fntype_regparm (TREE_TYPE (function
)) < 3)
14132 /* Need a free register for vcall_offset. */
14136 /* Need a free register for GOT references. */
14137 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
14140 /* Otherwise ok. */
14144 /* Output the assembler code for a thunk function. THUNK_DECL is the
14145 declaration for the thunk function itself, FUNCTION is the decl for
14146 the target function. DELTA is an immediate constant offset to be
14147 added to THIS. If VCALL_OFFSET is non-zero, the word at
14148 *(*this + vcall_offset) should be added to THIS. */
14151 x86_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
)
14152 FILE *file ATTRIBUTE_UNUSED
;
14153 tree thunk ATTRIBUTE_UNUSED
;
14154 HOST_WIDE_INT delta
;
14155 HOST_WIDE_INT vcall_offset
;
14159 rtx
this = x86_this_parameter (function
);
14162 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14163 pull it in now and let DELTA benefit. */
14166 else if (vcall_offset
)
14168 /* Put the this parameter into %eax. */
14170 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
14171 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14174 this_reg
= NULL_RTX
;
14176 /* Adjust the this parameter by a fixed constant. */
14179 xops
[0] = GEN_INT (delta
);
14180 xops
[1] = this_reg
? this_reg
: this;
14183 if (!x86_64_general_operand (xops
[0], DImode
))
14185 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14187 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
14191 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14194 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14197 /* Adjust the this parameter by a value stored in the vtable. */
14201 tmp
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 2 /* R10 */);
14203 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14205 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
14208 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14210 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14212 /* Adjust the this parameter. */
14213 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
14214 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
14216 rtx tmp2
= gen_rtx_REG (DImode
, FIRST_REX_INT_REG
+ 3 /* R11 */);
14217 xops
[0] = GEN_INT (vcall_offset
);
14219 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
14220 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
14222 xops
[1] = this_reg
;
14224 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
14226 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
14229 /* If necessary, drop THIS back to its stack slot. */
14230 if (this_reg
&& this_reg
!= this)
14232 xops
[0] = this_reg
;
14234 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
14237 xops
[0] = DECL_RTL (function
);
14240 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14241 output_asm_insn ("jmp\t%P0", xops
);
14244 tmp
= XEXP (xops
[0], 0);
14245 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, tmp
), UNSPEC_GOTPCREL
);
14246 tmp
= gen_rtx_CONST (Pmode
, tmp
);
14247 tmp
= gen_rtx_MEM (QImode
, tmp
);
14249 output_asm_insn ("jmp\t%A0", xops
);
14254 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
14255 output_asm_insn ("jmp\t%P0", xops
);
14258 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
14259 output_set_got (tmp
);
14262 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
14263 output_asm_insn ("jmp\t{*}%1", xops
);
14269 x86_field_alignment (field
, computed
)
14273 enum machine_mode mode
;
14274 tree type
= TREE_TYPE (field
);
14276 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
14278 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
14279 ? get_inner_array_type (type
) : type
);
14280 if (mode
== DFmode
|| mode
== DCmode
14281 || GET_MODE_CLASS (mode
) == MODE_INT
14282 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
14283 return MIN (32, computed
);
14287 /* Output assembler code to FILE to increment profiler label # LABELNO
14288 for profiling a function entry. */
14290 x86_function_profiler (file
, labelno
)
14297 #ifndef NO_PROFILE_COUNTERS
14298 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
14300 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
14304 #ifndef NO_PROFILE_COUNTERS
14305 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
14307 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14311 #ifndef NO_PROFILE_COUNTERS
14312 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14313 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
14315 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
14319 #ifndef NO_PROFILE_COUNTERS
14320 fprintf (file
, "\tmovl\t$%sP%d,%%$s\n", LPREFIX
, labelno
,
14321 PROFILE_COUNT_REGISTER
);
14323 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
14327 /* Implement machine specific optimizations.
14328 At the moment we implement single transformation: AMD Athlon works faster
14329 when RET is not destination of conditional jump or directly preceeded
14330 by other jump instruction. We avoid the penalty by inserting NOP just
14331 before the RET instructions in such cases. */
14333 x86_machine_dependent_reorg (first
)
14334 rtx first ATTRIBUTE_UNUSED
;
14338 if (!TARGET_ATHLON
|| !optimize
|| optimize_size
)
14340 for (e
= EXIT_BLOCK_PTR
->pred
; e
; e
= e
->pred_next
)
14342 basic_block bb
= e
->src
;
14345 bool insert
= false;
14347 if (!returnjump_p (ret
) || !maybe_hot_bb_p (bb
))
14349 prev
= prev_nonnote_insn (ret
);
14350 if (prev
&& GET_CODE (prev
) == CODE_LABEL
)
14353 for (e
= bb
->pred
; e
; e
= e
->pred_next
)
14354 if (EDGE_FREQUENCY (e
) && e
->src
->index
> 0
14355 && !(e
->flags
& EDGE_FALLTHRU
))
14360 prev
= prev_real_insn (ret
);
14361 if (prev
&& GET_CODE (prev
) == JUMP_INSN
14362 && any_condjump_p (prev
))
14366 emit_insn_before (gen_nop (), ret
);
14370 #include "gt-i386.h"