i386-protos.h (x86_64_sign_extended_value): Fix prototype.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
93 };
94
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
137 };
138
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
180 };
181
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
223 };
224
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
266 };
267
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
309 };
310
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
352 };
353
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
395 };
396
397 const struct processor_costs *ix86_cost = &pentium_cost;
398
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
407
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
457
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
465
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
487 };
488
489 /* The "default" register map used in 32bit mode. */
490
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
500 };
501
502 static int const x86_64_int_parameter_registers[6] =
503 {
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507
508 static int const x86_64_int_return_registers[4] =
509 {
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523 };
524
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
588 };
589
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595
596 /* The encoding characters for the four TLS models present in ELF. */
597
598 static char const tls_model_chars[] = " GLil";
599
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
606 {
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
611 };
612
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615
616 /* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635 struct ix86_frame
636 {
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650 };
651
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
656 /* Parsed value. */
657 enum cmodel ix86_cmodel;
658 /* Asm dialect. */
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
661 /* TLS dialext. */
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
664
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
667
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
672
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
677
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
680
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
683
684 /* ix86_regparm_string as a number */
685 int ix86_regparm;
686
687 /* Alignment to use for loops and jumps: */
688
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
691
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
694
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
697
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
700
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
704
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
707
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
711 \f
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
716 int, int, FILE *));
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx x86_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
758
759 struct ix86_address
760 {
761 rtx base, index, disp;
762 HOST_WIDE_INT scale;
763 };
764
765 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
766
767 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
768 static const char *ix86_strip_name_encoding PARAMS ((const char *))
769 ATTRIBUTE_UNUSED;
770
771 struct builtin_description;
772 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
773 tree, rtx));
774 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
775 tree, rtx));
776 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
777 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
778 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
779 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
780 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
781 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
782 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
783 enum rtx_code *,
784 enum rtx_code *,
785 enum rtx_code *));
786 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
787 rtx *, rtx *));
788 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
789 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
791 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
792 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
793 static int ix86_save_reg PARAMS ((unsigned int, int));
794 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
795 static int ix86_comp_type_attributes PARAMS ((tree, tree));
796 static int ix86_fntype_regparm PARAMS ((tree));
797 const struct attribute_spec ix86_attribute_table[];
798 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
799 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
800 static int ix86_value_regno PARAMS ((enum machine_mode));
801
802 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
803 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
804 #endif
805
806 /* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
810
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
813 */
814 enum x86_64_reg_class
815 {
816 X86_64_NO_CLASS,
817 X86_64_INTEGER_CLASS,
818 X86_64_INTEGERSI_CLASS,
819 X86_64_SSE_CLASS,
820 X86_64_SSESF_CLASS,
821 X86_64_SSEDF_CLASS,
822 X86_64_SSEUP_CLASS,
823 X86_64_X87_CLASS,
824 X86_64_X87UP_CLASS,
825 X86_64_MEMORY_CLASS
826 };
827 static const char * const x86_64_reg_class_name[] =
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
829
830 #define MAX_CLASSES 4
831 static int classify_argument PARAMS ((enum machine_mode, tree,
832 enum x86_64_reg_class [MAX_CLASSES],
833 int));
834 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
835 int *));
836 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
837 const int *, int));
838 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
839 enum x86_64_reg_class));
840 \f
841 /* Initialize the GCC target structure. */
842 #undef TARGET_ATTRIBUTE_TABLE
843 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
844 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
845 # undef TARGET_MERGE_DECL_ATTRIBUTES
846 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
847 #endif
848
849 #undef TARGET_COMP_TYPE_ATTRIBUTES
850 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
851
852 #undef TARGET_INIT_BUILTINS
853 #define TARGET_INIT_BUILTINS ix86_init_builtins
854
855 #undef TARGET_EXPAND_BUILTIN
856 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
857
858 #undef TARGET_ASM_FUNCTION_EPILOGUE
859 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
860
861 #undef TARGET_ASM_OPEN_PAREN
862 #define TARGET_ASM_OPEN_PAREN ""
863 #undef TARGET_ASM_CLOSE_PAREN
864 #define TARGET_ASM_CLOSE_PAREN ""
865
866 #undef TARGET_ASM_ALIGNED_HI_OP
867 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868 #undef TARGET_ASM_ALIGNED_SI_OP
869 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
870 #ifdef ASM_QUAD
871 #undef TARGET_ASM_ALIGNED_DI_OP
872 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
873 #endif
874
875 #undef TARGET_ASM_UNALIGNED_HI_OP
876 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877 #undef TARGET_ASM_UNALIGNED_SI_OP
878 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879 #undef TARGET_ASM_UNALIGNED_DI_OP
880 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
881
882 #undef TARGET_SCHED_ADJUST_COST
883 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884 #undef TARGET_SCHED_ISSUE_RATE
885 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886 #undef TARGET_SCHED_VARIABLE_ISSUE
887 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888 #undef TARGET_SCHED_INIT
889 #define TARGET_SCHED_INIT ix86_sched_init
890 #undef TARGET_SCHED_REORDER
891 #define TARGET_SCHED_REORDER ix86_sched_reorder
892 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
893 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
898
899 #ifdef HAVE_AS_TLS
900 #undef TARGET_HAVE_TLS
901 #define TARGET_HAVE_TLS true
902 #endif
903
904 #undef TARGET_ASM_OUTPUT_MI_THUNK
905 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
906 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
908
909 struct gcc_target targetm = TARGET_INITIALIZER;
910 \f
911 /* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
915 been parsed.
916
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
919
920 void
921 override_options ()
922 {
923 int i;
924 /* Comes from final.c -- no real reason to change it. */
925 #define MAX_CODE_ALIGN 16
926
927 static struct ptt
928 {
929 const struct processor_costs *cost; /* Processor costs */
930 const int target_enable; /* Target flags to enable. */
931 const int target_disable; /* Target flags to disable. */
932 const int align_loop; /* Default alignments. */
933 const int align_loop_max_skip;
934 const int align_jump;
935 const int align_jump_max_skip;
936 const int align_func;
937 const int branch_cost;
938 }
939 const processor_target_table[PROCESSOR_max] =
940 {
941 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
948 };
949
950 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
951 static struct pta
952 {
953 const char *const name; /* processor name or nickname. */
954 const enum processor_type processor;
955 const enum pta_flags
956 {
957 PTA_SSE = 1,
958 PTA_SSE2 = 2,
959 PTA_MMX = 4,
960 PTA_PREFETCH_SSE = 8,
961 PTA_3DNOW = 16,
962 PTA_3DNOW_A = 64
963 } flags;
964 }
965 const processor_alias_table[] =
966 {
967 {"i386", PROCESSOR_I386, 0},
968 {"i486", PROCESSOR_I486, 0},
969 {"i586", PROCESSOR_PENTIUM, 0},
970 {"pentium", PROCESSOR_PENTIUM, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
972 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
973 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
974 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
975 {"i686", PROCESSOR_PENTIUMPRO, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
978 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
979 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
980 PTA_MMX | PTA_PREFETCH_SSE},
981 {"k6", PROCESSOR_K6, PTA_MMX},
982 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
983 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
984 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
985 | PTA_3DNOW_A},
986 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
987 | PTA_3DNOW | PTA_3DNOW_A},
988 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
989 | PTA_3DNOW_A | PTA_SSE},
990 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
991 | PTA_3DNOW_A | PTA_SSE},
992 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
993 | PTA_3DNOW_A | PTA_SSE},
994 };
995
996 int const pta_size = ARRAY_SIZE (processor_alias_table);
997
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1001 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1002
1003 /* Set the default values for switches whose default depends on TARGET_64BIT
1004 in case they weren't overwriten by command line options. */
1005 if (TARGET_64BIT)
1006 {
1007 if (flag_omit_frame_pointer == 2)
1008 flag_omit_frame_pointer = 1;
1009 if (flag_asynchronous_unwind_tables == 2)
1010 flag_asynchronous_unwind_tables = 1;
1011 if (flag_pcc_struct_return == 2)
1012 flag_pcc_struct_return = 0;
1013 }
1014 else
1015 {
1016 if (flag_omit_frame_pointer == 2)
1017 flag_omit_frame_pointer = 0;
1018 if (flag_asynchronous_unwind_tables == 2)
1019 flag_asynchronous_unwind_tables = 0;
1020 if (flag_pcc_struct_return == 2)
1021 flag_pcc_struct_return = 1;
1022 }
1023
1024 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1025 SUBTARGET_OVERRIDE_OPTIONS;
1026 #endif
1027
1028 if (!ix86_cpu_string && ix86_arch_string)
1029 ix86_cpu_string = ix86_arch_string;
1030 if (!ix86_cpu_string)
1031 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1032 if (!ix86_arch_string)
1033 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1034
1035 if (ix86_cmodel_string != 0)
1036 {
1037 if (!strcmp (ix86_cmodel_string, "small"))
1038 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1039 else if (flag_pic)
1040 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1041 else if (!strcmp (ix86_cmodel_string, "32"))
1042 ix86_cmodel = CM_32;
1043 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1044 ix86_cmodel = CM_KERNEL;
1045 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1046 ix86_cmodel = CM_MEDIUM;
1047 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1048 ix86_cmodel = CM_LARGE;
1049 else
1050 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1051 }
1052 else
1053 {
1054 ix86_cmodel = CM_32;
1055 if (TARGET_64BIT)
1056 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1057 }
1058 if (ix86_asm_string != 0)
1059 {
1060 if (!strcmp (ix86_asm_string, "intel"))
1061 ix86_asm_dialect = ASM_INTEL;
1062 else if (!strcmp (ix86_asm_string, "att"))
1063 ix86_asm_dialect = ASM_ATT;
1064 else
1065 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1066 }
1067 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1068 error ("code model `%s' not supported in the %s bit mode",
1069 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1070 if (ix86_cmodel == CM_LARGE)
1071 sorry ("code model `large' not supported yet");
1072 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1073 sorry ("%i-bit mode not compiled in",
1074 (target_flags & MASK_64BIT) ? 64 : 32);
1075
1076 for (i = 0; i < pta_size; i++)
1077 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1078 {
1079 ix86_arch = processor_alias_table[i].processor;
1080 /* Default cpu tuning to the architecture. */
1081 ix86_cpu = ix86_arch;
1082 if (processor_alias_table[i].flags & PTA_MMX
1083 && !(target_flags_explicit & MASK_MMX))
1084 target_flags |= MASK_MMX;
1085 if (processor_alias_table[i].flags & PTA_3DNOW
1086 && !(target_flags_explicit & MASK_3DNOW))
1087 target_flags |= MASK_3DNOW;
1088 if (processor_alias_table[i].flags & PTA_3DNOW_A
1089 && !(target_flags_explicit & MASK_3DNOW_A))
1090 target_flags |= MASK_3DNOW_A;
1091 if (processor_alias_table[i].flags & PTA_SSE
1092 && !(target_flags_explicit & MASK_SSE))
1093 target_flags |= MASK_SSE;
1094 if (processor_alias_table[i].flags & PTA_SSE2
1095 && !(target_flags_explicit & MASK_SSE2))
1096 target_flags |= MASK_SSE2;
1097 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1098 x86_prefetch_sse = true;
1099 break;
1100 }
1101
1102 if (i == pta_size)
1103 error ("bad value (%s) for -march= switch", ix86_arch_string);
1104
1105 for (i = 0; i < pta_size; i++)
1106 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1107 {
1108 ix86_cpu = processor_alias_table[i].processor;
1109 break;
1110 }
1111 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1112 x86_prefetch_sse = true;
1113 if (i == pta_size)
1114 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1115
1116 if (optimize_size)
1117 ix86_cost = &size_cost;
1118 else
1119 ix86_cost = processor_target_table[ix86_cpu].cost;
1120 target_flags |= processor_target_table[ix86_cpu].target_enable;
1121 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1122
1123 /* Arrange to set up i386_stack_locals for all functions. */
1124 init_machine_status = ix86_init_machine_status;
1125
1126 /* Validate -mregparm= value. */
1127 if (ix86_regparm_string)
1128 {
1129 i = atoi (ix86_regparm_string);
1130 if (i < 0 || i > REGPARM_MAX)
1131 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1132 else
1133 ix86_regparm = i;
1134 }
1135 else
1136 if (TARGET_64BIT)
1137 ix86_regparm = REGPARM_MAX;
1138
1139 /* If the user has provided any of the -malign-* options,
1140 warn and use that value only if -falign-* is not set.
1141 Remove this code in GCC 3.2 or later. */
1142 if (ix86_align_loops_string)
1143 {
1144 warning ("-malign-loops is obsolete, use -falign-loops");
1145 if (align_loops == 0)
1146 {
1147 i = atoi (ix86_align_loops_string);
1148 if (i < 0 || i > MAX_CODE_ALIGN)
1149 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1150 else
1151 align_loops = 1 << i;
1152 }
1153 }
1154
1155 if (ix86_align_jumps_string)
1156 {
1157 warning ("-malign-jumps is obsolete, use -falign-jumps");
1158 if (align_jumps == 0)
1159 {
1160 i = atoi (ix86_align_jumps_string);
1161 if (i < 0 || i > MAX_CODE_ALIGN)
1162 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1163 else
1164 align_jumps = 1 << i;
1165 }
1166 }
1167
1168 if (ix86_align_funcs_string)
1169 {
1170 warning ("-malign-functions is obsolete, use -falign-functions");
1171 if (align_functions == 0)
1172 {
1173 i = atoi (ix86_align_funcs_string);
1174 if (i < 0 || i > MAX_CODE_ALIGN)
1175 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1176 else
1177 align_functions = 1 << i;
1178 }
1179 }
1180
1181 /* Default align_* from the processor table. */
1182 if (align_loops == 0)
1183 {
1184 align_loops = processor_target_table[ix86_cpu].align_loop;
1185 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1186 }
1187 if (align_jumps == 0)
1188 {
1189 align_jumps = processor_target_table[ix86_cpu].align_jump;
1190 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1191 }
1192 if (align_functions == 0)
1193 {
1194 align_functions = processor_target_table[ix86_cpu].align_func;
1195 }
1196
1197 /* Validate -mpreferred-stack-boundary= value, or provide default.
1198 The default of 128 bits is for Pentium III's SSE __m128, but we
1199 don't want additional code to keep the stack aligned when
1200 optimizing for code size. */
1201 ix86_preferred_stack_boundary = (optimize_size
1202 ? TARGET_64BIT ? 128 : 32
1203 : 128);
1204 if (ix86_preferred_stack_boundary_string)
1205 {
1206 i = atoi (ix86_preferred_stack_boundary_string);
1207 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1208 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1209 TARGET_64BIT ? 4 : 2);
1210 else
1211 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1212 }
1213
1214 /* Validate -mbranch-cost= value, or provide default. */
1215 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1216 if (ix86_branch_cost_string)
1217 {
1218 i = atoi (ix86_branch_cost_string);
1219 if (i < 0 || i > 5)
1220 error ("-mbranch-cost=%d is not between 0 and 5", i);
1221 else
1222 ix86_branch_cost = i;
1223 }
1224
1225 if (ix86_tls_dialect_string)
1226 {
1227 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1228 ix86_tls_dialect = TLS_DIALECT_GNU;
1229 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1230 ix86_tls_dialect = TLS_DIALECT_SUN;
1231 else
1232 error ("bad value (%s) for -mtls-dialect= switch",
1233 ix86_tls_dialect_string);
1234 }
1235
1236 /* Keep nonleaf frame pointers. */
1237 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1238 flag_omit_frame_pointer = 1;
1239
1240 /* If we're doing fast math, we don't care about comparison order
1241 wrt NaNs. This lets us use a shorter comparison sequence. */
1242 if (flag_unsafe_math_optimizations)
1243 target_flags &= ~MASK_IEEE_FP;
1244
1245 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1246 since the insns won't need emulation. */
1247 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1248 target_flags &= ~MASK_NO_FANCY_MATH_387;
1249
1250 if (TARGET_64BIT)
1251 {
1252 if (TARGET_ALIGN_DOUBLE)
1253 error ("-malign-double makes no sense in the 64bit mode");
1254 if (TARGET_RTD)
1255 error ("-mrtd calling convention not supported in the 64bit mode");
1256 /* Enable by default the SSE and MMX builtins. */
1257 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1258 ix86_fpmath = FPMATH_SSE;
1259 }
1260 else
1261 ix86_fpmath = FPMATH_387;
1262
1263 if (ix86_fpmath_string != 0)
1264 {
1265 if (! strcmp (ix86_fpmath_string, "387"))
1266 ix86_fpmath = FPMATH_387;
1267 else if (! strcmp (ix86_fpmath_string, "sse"))
1268 {
1269 if (!TARGET_SSE)
1270 {
1271 warning ("SSE instruction set disabled, using 387 arithmetics");
1272 ix86_fpmath = FPMATH_387;
1273 }
1274 else
1275 ix86_fpmath = FPMATH_SSE;
1276 }
1277 else if (! strcmp (ix86_fpmath_string, "387,sse")
1278 || ! strcmp (ix86_fpmath_string, "sse,387"))
1279 {
1280 if (!TARGET_SSE)
1281 {
1282 warning ("SSE instruction set disabled, using 387 arithmetics");
1283 ix86_fpmath = FPMATH_387;
1284 }
1285 else if (!TARGET_80387)
1286 {
1287 warning ("387 instruction set disabled, using SSE arithmetics");
1288 ix86_fpmath = FPMATH_SSE;
1289 }
1290 else
1291 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1292 }
1293 else
1294 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1295 }
1296
1297 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1298 on by -msse. */
1299 if (TARGET_SSE)
1300 {
1301 target_flags |= MASK_MMX;
1302 x86_prefetch_sse = true;
1303 }
1304
1305 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1306 if (TARGET_3DNOW)
1307 {
1308 target_flags |= MASK_MMX;
1309 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1310 extensions it adds. */
1311 if (x86_3dnow_a & (1 << ix86_arch))
1312 target_flags |= MASK_3DNOW_A;
1313 }
1314 if ((x86_accumulate_outgoing_args & CPUMASK)
1315 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1316 && !optimize_size)
1317 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1318
1319 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1320 {
1321 char *p;
1322 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1323 p = strchr (internal_label_prefix, 'X');
1324 internal_label_prefix_len = p - internal_label_prefix;
1325 *p = '\0';
1326 }
1327 }
1328 \f
1329 void
1330 optimization_options (level, size)
1331 int level;
1332 int size ATTRIBUTE_UNUSED;
1333 {
1334 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1335 make the problem with not enough registers even worse. */
1336 #ifdef INSN_SCHEDULING
1337 if (level > 1)
1338 flag_schedule_insns = 0;
1339 #endif
1340
1341 /* The default values of these switches depend on the TARGET_64BIT
1342 that is not known at this moment. Mark these values with 2 and
1343 let user the to override these. In case there is no command line option
1344 specifying them, we will set the defaults in override_options. */
1345 if (optimize >= 1)
1346 flag_omit_frame_pointer = 2;
1347 flag_pcc_struct_return = 2;
1348 flag_asynchronous_unwind_tables = 2;
1349 }
1350 \f
1351 /* Table of valid machine attributes. */
1352 const struct attribute_spec ix86_attribute_table[] =
1353 {
1354 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1355 /* Stdcall attribute says callee is responsible for popping arguments
1356 if they are not variable. */
1357 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1358 /* Cdecl attribute says the callee is a normal C declaration */
1359 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1360 /* Regparm attribute specifies how many integer arguments are to be
1361 passed in registers. */
1362 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1363 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1364 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1365 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1366 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1367 #endif
1368 { NULL, 0, 0, false, false, false, NULL }
1369 };
1370
1371 /* Handle a "cdecl" or "stdcall" attribute;
1372 arguments as in struct attribute_spec.handler. */
1373 static tree
1374 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1375 tree *node;
1376 tree name;
1377 tree args ATTRIBUTE_UNUSED;
1378 int flags ATTRIBUTE_UNUSED;
1379 bool *no_add_attrs;
1380 {
1381 if (TREE_CODE (*node) != FUNCTION_TYPE
1382 && TREE_CODE (*node) != METHOD_TYPE
1383 && TREE_CODE (*node) != FIELD_DECL
1384 && TREE_CODE (*node) != TYPE_DECL)
1385 {
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name));
1388 *no_add_attrs = true;
1389 }
1390
1391 if (TARGET_64BIT)
1392 {
1393 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1394 *no_add_attrs = true;
1395 }
1396
1397 return NULL_TREE;
1398 }
1399
1400 /* Handle a "regparm" attribute;
1401 arguments as in struct attribute_spec.handler. */
1402 static tree
1403 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1404 tree *node;
1405 tree name;
1406 tree args;
1407 int flags ATTRIBUTE_UNUSED;
1408 bool *no_add_attrs;
1409 {
1410 if (TREE_CODE (*node) != FUNCTION_TYPE
1411 && TREE_CODE (*node) != METHOD_TYPE
1412 && TREE_CODE (*node) != FIELD_DECL
1413 && TREE_CODE (*node) != TYPE_DECL)
1414 {
1415 warning ("`%s' attribute only applies to functions",
1416 IDENTIFIER_POINTER (name));
1417 *no_add_attrs = true;
1418 }
1419 else
1420 {
1421 tree cst;
1422
1423 cst = TREE_VALUE (args);
1424 if (TREE_CODE (cst) != INTEGER_CST)
1425 {
1426 warning ("`%s' attribute requires an integer constant argument",
1427 IDENTIFIER_POINTER (name));
1428 *no_add_attrs = true;
1429 }
1430 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1431 {
1432 warning ("argument to `%s' attribute larger than %d",
1433 IDENTIFIER_POINTER (name), REGPARM_MAX);
1434 *no_add_attrs = true;
1435 }
1436 }
1437
1438 return NULL_TREE;
1439 }
1440
1441 /* Return 0 if the attributes for two types are incompatible, 1 if they
1442 are compatible, and 2 if they are nearly compatible (which causes a
1443 warning to be generated). */
1444
1445 static int
1446 ix86_comp_type_attributes (type1, type2)
1447 tree type1;
1448 tree type2;
1449 {
1450 /* Check for mismatch of non-default calling convention. */
1451 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1452
1453 if (TREE_CODE (type1) != FUNCTION_TYPE)
1454 return 1;
1455
1456 /* Check for mismatched return types (cdecl vs stdcall). */
1457 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1458 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1459 return 0;
1460 return 1;
1461 }
1462 \f
1463 /* Return the regparm value for a fuctio with the indicated TYPE. */
1464
1465 static int
1466 ix86_fntype_regparm (type)
1467 tree type;
1468 {
1469 tree attr;
1470
1471 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1472 if (attr)
1473 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1474 else
1475 return ix86_regparm;
1476 }
1477
1478 /* Value is the number of bytes of arguments automatically
1479 popped when returning from a subroutine call.
1480 FUNDECL is the declaration node of the function (as a tree),
1481 FUNTYPE is the data type of the function (as a tree),
1482 or for a library call it is an identifier node for the subroutine name.
1483 SIZE is the number of bytes of arguments passed on the stack.
1484
1485 On the 80386, the RTD insn may be used to pop them if the number
1486 of args is fixed, but if the number is variable then the caller
1487 must pop them all. RTD can't be used for library calls now
1488 because the library is compiled with the Unix compiler.
1489 Use of RTD is a selectable option, since it is incompatible with
1490 standard Unix calling sequences. If the option is not selected,
1491 the caller must always pop the args.
1492
1493 The attribute stdcall is equivalent to RTD on a per module basis. */
1494
1495 int
1496 ix86_return_pops_args (fundecl, funtype, size)
1497 tree fundecl;
1498 tree funtype;
1499 int size;
1500 {
1501 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1502
1503 /* Cdecl functions override -mrtd, and never pop the stack. */
1504 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1505
1506 /* Stdcall functions will pop the stack if not variable args. */
1507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1508 rtd = 1;
1509
1510 if (rtd
1511 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1512 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1513 == void_type_node)))
1514 return size;
1515 }
1516
1517 /* Lose any fake structure return argument if it is passed on the stack. */
1518 if (aggregate_value_p (TREE_TYPE (funtype))
1519 && !TARGET_64BIT)
1520 {
1521 int nregs = ix86_fntype_regparm (funtype);
1522
1523 if (!nregs)
1524 return GET_MODE_SIZE (Pmode);
1525 }
1526
1527 return 0;
1528 }
1529 \f
1530 /* Argument support functions. */
1531
1532 /* Return true when register may be used to pass function parameters. */
1533 bool
1534 ix86_function_arg_regno_p (regno)
1535 int regno;
1536 {
1537 int i;
1538 if (!TARGET_64BIT)
1539 return (regno < REGPARM_MAX
1540 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1541 if (SSE_REGNO_P (regno) && TARGET_SSE)
1542 return true;
1543 /* RAX is used as hidden argument to va_arg functions. */
1544 if (!regno)
1545 return true;
1546 for (i = 0; i < REGPARM_MAX; i++)
1547 if (regno == x86_64_int_parameter_registers[i])
1548 return true;
1549 return false;
1550 }
1551
1552 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1553 for a call to a function whose data type is FNTYPE.
1554 For a library call, FNTYPE is 0. */
1555
1556 void
1557 init_cumulative_args (cum, fntype, libname)
1558 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1559 tree fntype; /* tree ptr for function decl */
1560 rtx libname; /* SYMBOL_REF of library name or 0 */
1561 {
1562 static CUMULATIVE_ARGS zero_cum;
1563 tree param, next_param;
1564
1565 if (TARGET_DEBUG_ARG)
1566 {
1567 fprintf (stderr, "\ninit_cumulative_args (");
1568 if (fntype)
1569 fprintf (stderr, "fntype code = %s, ret code = %s",
1570 tree_code_name[(int) TREE_CODE (fntype)],
1571 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1572 else
1573 fprintf (stderr, "no fntype");
1574
1575 if (libname)
1576 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1577 }
1578
1579 *cum = zero_cum;
1580
1581 /* Set up the number of registers to use for passing arguments. */
1582 cum->nregs = ix86_regparm;
1583 cum->sse_nregs = SSE_REGPARM_MAX;
1584 if (fntype && !TARGET_64BIT)
1585 {
1586 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1587
1588 if (attr)
1589 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1590 }
1591 cum->maybe_vaarg = false;
1592
1593 /* Determine if this function has variable arguments. This is
1594 indicated by the last argument being 'void_type_mode' if there
1595 are no variable arguments. If there are variable arguments, then
1596 we won't pass anything in registers */
1597
1598 if (cum->nregs)
1599 {
1600 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1601 param != 0; param = next_param)
1602 {
1603 next_param = TREE_CHAIN (param);
1604 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1605 {
1606 if (!TARGET_64BIT)
1607 cum->nregs = 0;
1608 cum->maybe_vaarg = true;
1609 }
1610 }
1611 }
1612 if ((!fntype && !libname)
1613 || (fntype && !TYPE_ARG_TYPES (fntype)))
1614 cum->maybe_vaarg = 1;
1615
1616 if (TARGET_DEBUG_ARG)
1617 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1618
1619 return;
1620 }
1621
1622 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1623 of this code is to classify each 8bytes of incoming argument by the register
1624 class and assign registers accordingly. */
1625
1626 /* Return the union class of CLASS1 and CLASS2.
1627 See the x86-64 PS ABI for details. */
1628
1629 static enum x86_64_reg_class
1630 merge_classes (class1, class2)
1631 enum x86_64_reg_class class1, class2;
1632 {
1633 /* Rule #1: If both classes are equal, this is the resulting class. */
1634 if (class1 == class2)
1635 return class1;
1636
1637 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1638 the other class. */
1639 if (class1 == X86_64_NO_CLASS)
1640 return class2;
1641 if (class2 == X86_64_NO_CLASS)
1642 return class1;
1643
1644 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1645 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1646 return X86_64_MEMORY_CLASS;
1647
1648 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1649 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1650 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1651 return X86_64_INTEGERSI_CLASS;
1652 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1653 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1654 return X86_64_INTEGER_CLASS;
1655
1656 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1657 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1658 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1659 return X86_64_MEMORY_CLASS;
1660
1661 /* Rule #6: Otherwise class SSE is used. */
1662 return X86_64_SSE_CLASS;
1663 }
1664
1665 /* Classify the argument of type TYPE and mode MODE.
1666 CLASSES will be filled by the register class used to pass each word
1667 of the operand. The number of words is returned. In case the parameter
1668 should be passed in memory, 0 is returned. As a special case for zero
1669 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1670
1671 BIT_OFFSET is used internally for handling records and specifies offset
1672 of the offset in bits modulo 256 to avoid overflow cases.
1673
1674 See the x86-64 PS ABI for details.
1675 */
1676
1677 static int
1678 classify_argument (mode, type, classes, bit_offset)
1679 enum machine_mode mode;
1680 tree type;
1681 enum x86_64_reg_class classes[MAX_CLASSES];
1682 int bit_offset;
1683 {
1684 int bytes =
1685 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1686 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1687
1688 /* Variable sized entities are always passed/returned in memory. */
1689 if (bytes < 0)
1690 return 0;
1691
1692 if (type && AGGREGATE_TYPE_P (type))
1693 {
1694 int i;
1695 tree field;
1696 enum x86_64_reg_class subclasses[MAX_CLASSES];
1697
1698 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1699 if (bytes > 16)
1700 return 0;
1701
1702 for (i = 0; i < words; i++)
1703 classes[i] = X86_64_NO_CLASS;
1704
1705 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1706 signalize memory class, so handle it as special case. */
1707 if (!words)
1708 {
1709 classes[0] = X86_64_NO_CLASS;
1710 return 1;
1711 }
1712
1713 /* Classify each field of record and merge classes. */
1714 if (TREE_CODE (type) == RECORD_TYPE)
1715 {
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1718 {
1719 tree bases = TYPE_BINFO_BASETYPES (type);
1720 int n_bases = TREE_VEC_LENGTH (bases);
1721 int i;
1722
1723 for (i = 0; i < n_bases; ++i)
1724 {
1725 tree binfo = TREE_VEC_ELT (bases, i);
1726 int num;
1727 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1728 tree type = BINFO_TYPE (binfo);
1729
1730 num = classify_argument (TYPE_MODE (type),
1731 type, subclasses,
1732 (offset + bit_offset) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
1737 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1738 classes[i + pos] =
1739 merge_classes (subclasses[i], classes[i + pos]);
1740 }
1741 }
1742 }
1743 /* And now merge the fields of structure. */
1744 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1745 {
1746 if (TREE_CODE (field) == FIELD_DECL)
1747 {
1748 int num;
1749
1750 /* Bitfields are always classified as integer. Handle them
1751 early, since later code would consider them to be
1752 misaligned integers. */
1753 if (DECL_BIT_FIELD (field))
1754 {
1755 for (i = int_bit_position (field) / 8 / 8;
1756 i < (int_bit_position (field)
1757 + tree_low_cst (DECL_SIZE (field), 0)
1758 + 63) / 8 / 8; i++)
1759 classes[i] =
1760 merge_classes (X86_64_INTEGER_CLASS,
1761 classes[i]);
1762 }
1763 else
1764 {
1765 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1766 TREE_TYPE (field), subclasses,
1767 (int_bit_position (field)
1768 + bit_offset) % 256);
1769 if (!num)
1770 return 0;
1771 for (i = 0; i < num; i++)
1772 {
1773 int pos =
1774 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1775 classes[i + pos] =
1776 merge_classes (subclasses[i], classes[i + pos]);
1777 }
1778 }
1779 }
1780 }
1781 }
1782 /* Arrays are handled as small records. */
1783 else if (TREE_CODE (type) == ARRAY_TYPE)
1784 {
1785 int num;
1786 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1787 TREE_TYPE (type), subclasses, bit_offset);
1788 if (!num)
1789 return 0;
1790
1791 /* The partial classes are now full classes. */
1792 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1793 subclasses[0] = X86_64_SSE_CLASS;
1794 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1795 subclasses[0] = X86_64_INTEGER_CLASS;
1796
1797 for (i = 0; i < words; i++)
1798 classes[i] = subclasses[i % num];
1799 }
1800 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1801 else if (TREE_CODE (type) == UNION_TYPE
1802 || TREE_CODE (type) == QUAL_UNION_TYPE)
1803 {
1804 /* For classes first merge in the field of the subclasses. */
1805 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1806 {
1807 tree bases = TYPE_BINFO_BASETYPES (type);
1808 int n_bases = TREE_VEC_LENGTH (bases);
1809 int i;
1810
1811 for (i = 0; i < n_bases; ++i)
1812 {
1813 tree binfo = TREE_VEC_ELT (bases, i);
1814 int num;
1815 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1816 tree type = BINFO_TYPE (binfo);
1817
1818 num = classify_argument (TYPE_MODE (type),
1819 type, subclasses,
1820 (offset + (bit_offset % 64)) % 256);
1821 if (!num)
1822 return 0;
1823 for (i = 0; i < num; i++)
1824 {
1825 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1826 classes[i + pos] =
1827 merge_classes (subclasses[i], classes[i + pos]);
1828 }
1829 }
1830 }
1831 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1832 {
1833 if (TREE_CODE (field) == FIELD_DECL)
1834 {
1835 int num;
1836 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1837 TREE_TYPE (field), subclasses,
1838 bit_offset);
1839 if (!num)
1840 return 0;
1841 for (i = 0; i < num; i++)
1842 classes[i] = merge_classes (subclasses[i], classes[i]);
1843 }
1844 }
1845 }
1846 else
1847 abort ();
1848
1849 /* Final merger cleanup. */
1850 for (i = 0; i < words; i++)
1851 {
1852 /* If one class is MEMORY, everything should be passed in
1853 memory. */
1854 if (classes[i] == X86_64_MEMORY_CLASS)
1855 return 0;
1856
1857 /* The X86_64_SSEUP_CLASS should be always preceded by
1858 X86_64_SSE_CLASS. */
1859 if (classes[i] == X86_64_SSEUP_CLASS
1860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1861 classes[i] = X86_64_SSE_CLASS;
1862
1863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1864 if (classes[i] == X86_64_X87UP_CLASS
1865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1866 classes[i] = X86_64_SSE_CLASS;
1867 }
1868 return words;
1869 }
1870
1871 /* Compute alignment needed. We align all types to natural boundaries with
1872 exception of XFmode that is aligned to 64bits. */
1873 if (mode != VOIDmode && mode != BLKmode)
1874 {
1875 int mode_alignment = GET_MODE_BITSIZE (mode);
1876
1877 if (mode == XFmode)
1878 mode_alignment = 128;
1879 else if (mode == XCmode)
1880 mode_alignment = 256;
1881 /* Misaligned fields are always returned in memory. */
1882 if (bit_offset % mode_alignment)
1883 return 0;
1884 }
1885
1886 /* Classification of atomic types. */
1887 switch (mode)
1888 {
1889 case DImode:
1890 case SImode:
1891 case HImode:
1892 case QImode:
1893 case CSImode:
1894 case CHImode:
1895 case CQImode:
1896 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1897 classes[0] = X86_64_INTEGERSI_CLASS;
1898 else
1899 classes[0] = X86_64_INTEGER_CLASS;
1900 return 1;
1901 case CDImode:
1902 case TImode:
1903 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1904 return 2;
1905 case CTImode:
1906 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1907 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1908 return 4;
1909 case SFmode:
1910 if (!(bit_offset % 64))
1911 classes[0] = X86_64_SSESF_CLASS;
1912 else
1913 classes[0] = X86_64_SSE_CLASS;
1914 return 1;
1915 case DFmode:
1916 classes[0] = X86_64_SSEDF_CLASS;
1917 return 1;
1918 case TFmode:
1919 classes[0] = X86_64_X87_CLASS;
1920 classes[1] = X86_64_X87UP_CLASS;
1921 return 2;
1922 case TCmode:
1923 classes[0] = X86_64_X87_CLASS;
1924 classes[1] = X86_64_X87UP_CLASS;
1925 classes[2] = X86_64_X87_CLASS;
1926 classes[3] = X86_64_X87UP_CLASS;
1927 return 4;
1928 case DCmode:
1929 classes[0] = X86_64_SSEDF_CLASS;
1930 classes[1] = X86_64_SSEDF_CLASS;
1931 return 2;
1932 case SCmode:
1933 classes[0] = X86_64_SSE_CLASS;
1934 return 1;
1935 case V4SFmode:
1936 case V4SImode:
1937 case V16QImode:
1938 case V8HImode:
1939 case V2DFmode:
1940 case V2DImode:
1941 classes[0] = X86_64_SSE_CLASS;
1942 classes[1] = X86_64_SSEUP_CLASS;
1943 return 2;
1944 case V2SFmode:
1945 case V2SImode:
1946 case V4HImode:
1947 case V8QImode:
1948 return 0;
1949 case BLKmode:
1950 case VOIDmode:
1951 return 0;
1952 default:
1953 abort ();
1954 }
1955 }
1956
1957 /* Examine the argument and return set number of register required in each
1958 class. Return 0 iff parameter should be passed in memory. */
1959 static int
1960 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1961 enum machine_mode mode;
1962 tree type;
1963 int *int_nregs, *sse_nregs;
1964 int in_return;
1965 {
1966 enum x86_64_reg_class class[MAX_CLASSES];
1967 int n = classify_argument (mode, type, class, 0);
1968
1969 *int_nregs = 0;
1970 *sse_nregs = 0;
1971 if (!n)
1972 return 0;
1973 for (n--; n >= 0; n--)
1974 switch (class[n])
1975 {
1976 case X86_64_INTEGER_CLASS:
1977 case X86_64_INTEGERSI_CLASS:
1978 (*int_nregs)++;
1979 break;
1980 case X86_64_SSE_CLASS:
1981 case X86_64_SSESF_CLASS:
1982 case X86_64_SSEDF_CLASS:
1983 (*sse_nregs)++;
1984 break;
1985 case X86_64_NO_CLASS:
1986 case X86_64_SSEUP_CLASS:
1987 break;
1988 case X86_64_X87_CLASS:
1989 case X86_64_X87UP_CLASS:
1990 if (!in_return)
1991 return 0;
1992 break;
1993 case X86_64_MEMORY_CLASS:
1994 abort ();
1995 }
1996 return 1;
1997 }
1998 /* Construct container for the argument used by GCC interface. See
1999 FUNCTION_ARG for the detailed description. */
2000 static rtx
2001 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2002 enum machine_mode mode;
2003 tree type;
2004 int in_return;
2005 int nintregs, nsseregs;
2006 const int * intreg;
2007 int sse_regno;
2008 {
2009 enum machine_mode tmpmode;
2010 int bytes =
2011 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2012 enum x86_64_reg_class class[MAX_CLASSES];
2013 int n;
2014 int i;
2015 int nexps = 0;
2016 int needed_sseregs, needed_intregs;
2017 rtx exp[MAX_CLASSES];
2018 rtx ret;
2019
2020 n = classify_argument (mode, type, class, 0);
2021 if (TARGET_DEBUG_ARG)
2022 {
2023 if (!n)
2024 fprintf (stderr, "Memory class\n");
2025 else
2026 {
2027 fprintf (stderr, "Classes:");
2028 for (i = 0; i < n; i++)
2029 {
2030 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2031 }
2032 fprintf (stderr, "\n");
2033 }
2034 }
2035 if (!n)
2036 return NULL;
2037 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2038 return NULL;
2039 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2040 return NULL;
2041
2042 /* First construct simple cases. Avoid SCmode, since we want to use
2043 single register to pass this type. */
2044 if (n == 1 && mode != SCmode)
2045 switch (class[0])
2046 {
2047 case X86_64_INTEGER_CLASS:
2048 case X86_64_INTEGERSI_CLASS:
2049 return gen_rtx_REG (mode, intreg[0]);
2050 case X86_64_SSE_CLASS:
2051 case X86_64_SSESF_CLASS:
2052 case X86_64_SSEDF_CLASS:
2053 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2054 case X86_64_X87_CLASS:
2055 return gen_rtx_REG (mode, FIRST_STACK_REG);
2056 case X86_64_NO_CLASS:
2057 /* Zero sized array, struct or class. */
2058 return NULL;
2059 default:
2060 abort ();
2061 }
2062 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2063 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2064 if (n == 2
2065 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2066 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2067 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2068 && class[1] == X86_64_INTEGER_CLASS
2069 && (mode == CDImode || mode == TImode)
2070 && intreg[0] + 1 == intreg[1])
2071 return gen_rtx_REG (mode, intreg[0]);
2072 if (n == 4
2073 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2074 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2075 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2076
2077 /* Otherwise figure out the entries of the PARALLEL. */
2078 for (i = 0; i < n; i++)
2079 {
2080 switch (class[i])
2081 {
2082 case X86_64_NO_CLASS:
2083 break;
2084 case X86_64_INTEGER_CLASS:
2085 case X86_64_INTEGERSI_CLASS:
2086 /* Merge TImodes on aligned occassions here too. */
2087 if (i * 8 + 8 > bytes)
2088 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2089 else if (class[i] == X86_64_INTEGERSI_CLASS)
2090 tmpmode = SImode;
2091 else
2092 tmpmode = DImode;
2093 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2094 if (tmpmode == BLKmode)
2095 tmpmode = DImode;
2096 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2097 gen_rtx_REG (tmpmode, *intreg),
2098 GEN_INT (i*8));
2099 intreg++;
2100 break;
2101 case X86_64_SSESF_CLASS:
2102 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2103 gen_rtx_REG (SFmode,
2104 SSE_REGNO (sse_regno)),
2105 GEN_INT (i*8));
2106 sse_regno++;
2107 break;
2108 case X86_64_SSEDF_CLASS:
2109 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2110 gen_rtx_REG (DFmode,
2111 SSE_REGNO (sse_regno)),
2112 GEN_INT (i*8));
2113 sse_regno++;
2114 break;
2115 case X86_64_SSE_CLASS:
2116 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2117 tmpmode = TImode;
2118 else
2119 tmpmode = DImode;
2120 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2121 gen_rtx_REG (tmpmode,
2122 SSE_REGNO (sse_regno)),
2123 GEN_INT (i*8));
2124 if (tmpmode == TImode)
2125 i++;
2126 sse_regno++;
2127 break;
2128 default:
2129 abort ();
2130 }
2131 }
2132 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2133 for (i = 0; i < nexps; i++)
2134 XVECEXP (ret, 0, i) = exp [i];
2135 return ret;
2136 }
2137
2138 /* Update the data in CUM to advance over an argument
2139 of mode MODE and data type TYPE.
2140 (TYPE is null for libcalls where that information may not be available.) */
2141
2142 void
2143 function_arg_advance (cum, mode, type, named)
2144 CUMULATIVE_ARGS *cum; /* current arg information */
2145 enum machine_mode mode; /* current arg mode */
2146 tree type; /* type of the argument or 0 if lib support */
2147 int named; /* whether or not the argument was named */
2148 {
2149 int bytes =
2150 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2151 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2152
2153 if (TARGET_DEBUG_ARG)
2154 fprintf (stderr,
2155 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2156 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2157 if (TARGET_64BIT)
2158 {
2159 int int_nregs, sse_nregs;
2160 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2161 cum->words += words;
2162 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2163 {
2164 cum->nregs -= int_nregs;
2165 cum->sse_nregs -= sse_nregs;
2166 cum->regno += int_nregs;
2167 cum->sse_regno += sse_nregs;
2168 }
2169 else
2170 cum->words += words;
2171 }
2172 else
2173 {
2174 if (TARGET_SSE && mode == TImode)
2175 {
2176 cum->sse_words += words;
2177 cum->sse_nregs -= 1;
2178 cum->sse_regno += 1;
2179 if (cum->sse_nregs <= 0)
2180 {
2181 cum->sse_nregs = 0;
2182 cum->sse_regno = 0;
2183 }
2184 }
2185 else
2186 {
2187 cum->words += words;
2188 cum->nregs -= words;
2189 cum->regno += words;
2190
2191 if (cum->nregs <= 0)
2192 {
2193 cum->nregs = 0;
2194 cum->regno = 0;
2195 }
2196 }
2197 }
2198 return;
2199 }
2200
2201 /* Define where to put the arguments to a function.
2202 Value is zero to push the argument on the stack,
2203 or a hard register in which to store the argument.
2204
2205 MODE is the argument's machine mode.
2206 TYPE is the data type of the argument (as a tree).
2207 This is null for libcalls where that information may
2208 not be available.
2209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2210 the preceding args and about the function being called.
2211 NAMED is nonzero if this argument is a named parameter
2212 (otherwise it is an extra parameter matching an ellipsis). */
2213
2214 rtx
2215 function_arg (cum, mode, type, named)
2216 CUMULATIVE_ARGS *cum; /* current arg information */
2217 enum machine_mode mode; /* current arg mode */
2218 tree type; /* type of the argument or 0 if lib support */
2219 int named; /* != 0 for normal args, == 0 for ... args */
2220 {
2221 rtx ret = NULL_RTX;
2222 int bytes =
2223 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2224 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2225
2226 /* Handle an hidden AL argument containing number of registers for varargs
2227 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2228 any AL settings. */
2229 if (mode == VOIDmode)
2230 {
2231 if (TARGET_64BIT)
2232 return GEN_INT (cum->maybe_vaarg
2233 ? (cum->sse_nregs < 0
2234 ? SSE_REGPARM_MAX
2235 : cum->sse_regno)
2236 : -1);
2237 else
2238 return constm1_rtx;
2239 }
2240 if (TARGET_64BIT)
2241 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2242 &x86_64_int_parameter_registers [cum->regno],
2243 cum->sse_regno);
2244 else
2245 switch (mode)
2246 {
2247 /* For now, pass fp/complex values on the stack. */
2248 default:
2249 break;
2250
2251 case BLKmode:
2252 case DImode:
2253 case SImode:
2254 case HImode:
2255 case QImode:
2256 if (words <= cum->nregs)
2257 ret = gen_rtx_REG (mode, cum->regno);
2258 break;
2259 case TImode:
2260 if (cum->sse_nregs)
2261 ret = gen_rtx_REG (mode, cum->sse_regno);
2262 break;
2263 }
2264
2265 if (TARGET_DEBUG_ARG)
2266 {
2267 fprintf (stderr,
2268 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2269 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2270
2271 if (ret)
2272 print_simple_rtl (stderr, ret);
2273 else
2274 fprintf (stderr, ", stack");
2275
2276 fprintf (stderr, " )\n");
2277 }
2278
2279 return ret;
2280 }
2281
2282 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2283 and type. */
2284
2285 int
2286 ix86_function_arg_boundary (mode, type)
2287 enum machine_mode mode;
2288 tree type;
2289 {
2290 int align;
2291 if (!TARGET_64BIT)
2292 return PARM_BOUNDARY;
2293 if (type)
2294 align = TYPE_ALIGN (type);
2295 else
2296 align = GET_MODE_ALIGNMENT (mode);
2297 if (align < PARM_BOUNDARY)
2298 align = PARM_BOUNDARY;
2299 if (align > 128)
2300 align = 128;
2301 return align;
2302 }
2303
2304 /* Return true if N is a possible register number of function value. */
2305 bool
2306 ix86_function_value_regno_p (regno)
2307 int regno;
2308 {
2309 if (!TARGET_64BIT)
2310 {
2311 return ((regno) == 0
2312 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2313 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2314 }
2315 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2316 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2317 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2318 }
2319
2320 /* Define how to find the value returned by a function.
2321 VALTYPE is the data type of the value (as a tree).
2322 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2323 otherwise, FUNC is 0. */
2324 rtx
2325 ix86_function_value (valtype)
2326 tree valtype;
2327 {
2328 if (TARGET_64BIT)
2329 {
2330 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2331 REGPARM_MAX, SSE_REGPARM_MAX,
2332 x86_64_int_return_registers, 0);
2333 /* For zero sized structures, construct_continer return NULL, but we need
2334 to keep rest of compiler happy by returning meaningfull value. */
2335 if (!ret)
2336 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2337 return ret;
2338 }
2339 else
2340 return gen_rtx_REG (TYPE_MODE (valtype),
2341 ix86_value_regno (TYPE_MODE (valtype)));
2342 }
2343
2344 /* Return false iff type is returned in memory. */
2345 int
2346 ix86_return_in_memory (type)
2347 tree type;
2348 {
2349 int needed_intregs, needed_sseregs;
2350 if (TARGET_64BIT)
2351 {
2352 return !examine_argument (TYPE_MODE (type), type, 1,
2353 &needed_intregs, &needed_sseregs);
2354 }
2355 else
2356 {
2357 if (TYPE_MODE (type) == BLKmode
2358 || (VECTOR_MODE_P (TYPE_MODE (type))
2359 && int_size_in_bytes (type) == 8)
2360 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2361 && TYPE_MODE (type) != TFmode
2362 && !VECTOR_MODE_P (TYPE_MODE (type))))
2363 return 1;
2364 return 0;
2365 }
2366 }
2367
2368 /* Define how to find the value returned by a library function
2369 assuming the value has mode MODE. */
2370 rtx
2371 ix86_libcall_value (mode)
2372 enum machine_mode mode;
2373 {
2374 if (TARGET_64BIT)
2375 {
2376 switch (mode)
2377 {
2378 case SFmode:
2379 case SCmode:
2380 case DFmode:
2381 case DCmode:
2382 return gen_rtx_REG (mode, FIRST_SSE_REG);
2383 case TFmode:
2384 case TCmode:
2385 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2386 default:
2387 return gen_rtx_REG (mode, 0);
2388 }
2389 }
2390 else
2391 return gen_rtx_REG (mode, ix86_value_regno (mode));
2392 }
2393
2394 /* Given a mode, return the register to use for a return value. */
2395
2396 static int
2397 ix86_value_regno (mode)
2398 enum machine_mode mode;
2399 {
2400 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2401 return FIRST_FLOAT_REG;
2402 if (mode == TImode || VECTOR_MODE_P (mode))
2403 return FIRST_SSE_REG;
2404 return 0;
2405 }
2406 \f
2407 /* Create the va_list data type. */
2408
2409 tree
2410 ix86_build_va_list ()
2411 {
2412 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2413
2414 /* For i386 we use plain pointer to argument area. */
2415 if (!TARGET_64BIT)
2416 return build_pointer_type (char_type_node);
2417
2418 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2419 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2420
2421 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2422 unsigned_type_node);
2423 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2424 unsigned_type_node);
2425 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2426 ptr_type_node);
2427 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2428 ptr_type_node);
2429
2430 DECL_FIELD_CONTEXT (f_gpr) = record;
2431 DECL_FIELD_CONTEXT (f_fpr) = record;
2432 DECL_FIELD_CONTEXT (f_ovf) = record;
2433 DECL_FIELD_CONTEXT (f_sav) = record;
2434
2435 TREE_CHAIN (record) = type_decl;
2436 TYPE_NAME (record) = type_decl;
2437 TYPE_FIELDS (record) = f_gpr;
2438 TREE_CHAIN (f_gpr) = f_fpr;
2439 TREE_CHAIN (f_fpr) = f_ovf;
2440 TREE_CHAIN (f_ovf) = f_sav;
2441
2442 layout_type (record);
2443
2444 /* The correct type is an array type of one element. */
2445 return build_array_type (record, build_index_type (size_zero_node));
2446 }
2447
2448 /* Perform any needed actions needed for a function that is receiving a
2449 variable number of arguments.
2450
2451 CUM is as above.
2452
2453 MODE and TYPE are the mode and type of the current parameter.
2454
2455 PRETEND_SIZE is a variable that should be set to the amount of stack
2456 that must be pushed by the prolog to pretend that our caller pushed
2457 it.
2458
2459 Normally, this macro will push all remaining incoming registers on the
2460 stack and set PRETEND_SIZE to the length of the registers pushed. */
2461
2462 void
2463 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2464 CUMULATIVE_ARGS *cum;
2465 enum machine_mode mode;
2466 tree type;
2467 int *pretend_size ATTRIBUTE_UNUSED;
2468 int no_rtl;
2469
2470 {
2471 CUMULATIVE_ARGS next_cum;
2472 rtx save_area = NULL_RTX, mem;
2473 rtx label;
2474 rtx label_ref;
2475 rtx tmp_reg;
2476 rtx nsse_reg;
2477 int set;
2478 tree fntype;
2479 int stdarg_p;
2480 int i;
2481
2482 if (!TARGET_64BIT)
2483 return;
2484
2485 /* Indicate to allocate space on the stack for varargs save area. */
2486 ix86_save_varrargs_registers = 1;
2487
2488 fntype = TREE_TYPE (current_function_decl);
2489 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2490 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2491 != void_type_node));
2492
2493 /* For varargs, we do not want to skip the dummy va_dcl argument.
2494 For stdargs, we do want to skip the last named argument. */
2495 next_cum = *cum;
2496 if (stdarg_p)
2497 function_arg_advance (&next_cum, mode, type, 1);
2498
2499 if (!no_rtl)
2500 save_area = frame_pointer_rtx;
2501
2502 set = get_varargs_alias_set ();
2503
2504 for (i = next_cum.regno; i < ix86_regparm; i++)
2505 {
2506 mem = gen_rtx_MEM (Pmode,
2507 plus_constant (save_area, i * UNITS_PER_WORD));
2508 set_mem_alias_set (mem, set);
2509 emit_move_insn (mem, gen_rtx_REG (Pmode,
2510 x86_64_int_parameter_registers[i]));
2511 }
2512
2513 if (next_cum.sse_nregs)
2514 {
2515 /* Now emit code to save SSE registers. The AX parameter contains number
2516 of SSE parameter regsiters used to call this function. We use
2517 sse_prologue_save insn template that produces computed jump across
2518 SSE saves. We need some preparation work to get this working. */
2519
2520 label = gen_label_rtx ();
2521 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2522
2523 /* Compute address to jump to :
2524 label - 5*eax + nnamed_sse_arguments*5 */
2525 tmp_reg = gen_reg_rtx (Pmode);
2526 nsse_reg = gen_reg_rtx (Pmode);
2527 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2528 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2529 gen_rtx_MULT (Pmode, nsse_reg,
2530 GEN_INT (4))));
2531 if (next_cum.sse_regno)
2532 emit_move_insn
2533 (nsse_reg,
2534 gen_rtx_CONST (DImode,
2535 gen_rtx_PLUS (DImode,
2536 label_ref,
2537 GEN_INT (next_cum.sse_regno * 4))));
2538 else
2539 emit_move_insn (nsse_reg, label_ref);
2540 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2541
2542 /* Compute address of memory block we save into. We always use pointer
2543 pointing 127 bytes after first byte to store - this is needed to keep
2544 instruction size limited by 4 bytes. */
2545 tmp_reg = gen_reg_rtx (Pmode);
2546 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2547 plus_constant (save_area,
2548 8 * REGPARM_MAX + 127)));
2549 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2550 set_mem_alias_set (mem, set);
2551 set_mem_align (mem, BITS_PER_WORD);
2552
2553 /* And finally do the dirty job! */
2554 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2555 GEN_INT (next_cum.sse_regno), label));
2556 }
2557
2558 }
2559
2560 /* Implement va_start. */
2561
2562 void
2563 ix86_va_start (valist, nextarg)
2564 tree valist;
2565 rtx nextarg;
2566 {
2567 HOST_WIDE_INT words, n_gpr, n_fpr;
2568 tree f_gpr, f_fpr, f_ovf, f_sav;
2569 tree gpr, fpr, ovf, sav, t;
2570
2571 /* Only 64bit target needs something special. */
2572 if (!TARGET_64BIT)
2573 {
2574 std_expand_builtin_va_start (valist, nextarg);
2575 return;
2576 }
2577
2578 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2579 f_fpr = TREE_CHAIN (f_gpr);
2580 f_ovf = TREE_CHAIN (f_fpr);
2581 f_sav = TREE_CHAIN (f_ovf);
2582
2583 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2584 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2585 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2586 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2587 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2588
2589 /* Count number of gp and fp argument registers used. */
2590 words = current_function_args_info.words;
2591 n_gpr = current_function_args_info.regno;
2592 n_fpr = current_function_args_info.sse_regno;
2593
2594 if (TARGET_DEBUG_ARG)
2595 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2596 (int) words, (int) n_gpr, (int) n_fpr);
2597
2598 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2599 build_int_2 (n_gpr * 8, 0));
2600 TREE_SIDE_EFFECTS (t) = 1;
2601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2602
2603 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2604 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2605 TREE_SIDE_EFFECTS (t) = 1;
2606 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2607
2608 /* Find the overflow area. */
2609 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2610 if (words != 0)
2611 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2612 build_int_2 (words * UNITS_PER_WORD, 0));
2613 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2614 TREE_SIDE_EFFECTS (t) = 1;
2615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2616
2617 /* Find the register save area.
2618 Prologue of the function save it right above stack frame. */
2619 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2620 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2621 TREE_SIDE_EFFECTS (t) = 1;
2622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2623 }
2624
2625 /* Implement va_arg. */
2626 rtx
2627 ix86_va_arg (valist, type)
2628 tree valist, type;
2629 {
2630 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2631 tree f_gpr, f_fpr, f_ovf, f_sav;
2632 tree gpr, fpr, ovf, sav, t;
2633 int size, rsize;
2634 rtx lab_false, lab_over = NULL_RTX;
2635 rtx addr_rtx, r;
2636 rtx container;
2637
2638 /* Only 64bit target needs something special. */
2639 if (!TARGET_64BIT)
2640 {
2641 return std_expand_builtin_va_arg (valist, type);
2642 }
2643
2644 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2645 f_fpr = TREE_CHAIN (f_gpr);
2646 f_ovf = TREE_CHAIN (f_fpr);
2647 f_sav = TREE_CHAIN (f_ovf);
2648
2649 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2650 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2651 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2652 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2653 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2654
2655 size = int_size_in_bytes (type);
2656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2657
2658 container = construct_container (TYPE_MODE (type), type, 0,
2659 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2660 /*
2661 * Pull the value out of the saved registers ...
2662 */
2663
2664 addr_rtx = gen_reg_rtx (Pmode);
2665
2666 if (container)
2667 {
2668 rtx int_addr_rtx, sse_addr_rtx;
2669 int needed_intregs, needed_sseregs;
2670 int need_temp;
2671
2672 lab_over = gen_label_rtx ();
2673 lab_false = gen_label_rtx ();
2674
2675 examine_argument (TYPE_MODE (type), type, 0,
2676 &needed_intregs, &needed_sseregs);
2677
2678
2679 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2680 || TYPE_ALIGN (type) > 128);
2681
2682 /* In case we are passing structure, verify that it is consetuctive block
2683 on the register save area. If not we need to do moves. */
2684 if (!need_temp && !REG_P (container))
2685 {
2686 /* Verify that all registers are strictly consetuctive */
2687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2688 {
2689 int i;
2690
2691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2692 {
2693 rtx slot = XVECEXP (container, 0, i);
2694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2695 || INTVAL (XEXP (slot, 1)) != i * 16)
2696 need_temp = 1;
2697 }
2698 }
2699 else
2700 {
2701 int i;
2702
2703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2704 {
2705 rtx slot = XVECEXP (container, 0, i);
2706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2707 || INTVAL (XEXP (slot, 1)) != i * 8)
2708 need_temp = 1;
2709 }
2710 }
2711 }
2712 if (!need_temp)
2713 {
2714 int_addr_rtx = addr_rtx;
2715 sse_addr_rtx = addr_rtx;
2716 }
2717 else
2718 {
2719 int_addr_rtx = gen_reg_rtx (Pmode);
2720 sse_addr_rtx = gen_reg_rtx (Pmode);
2721 }
2722 /* First ensure that we fit completely in registers. */
2723 if (needed_intregs)
2724 {
2725 emit_cmp_and_jump_insns (expand_expr
2726 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2727 GEN_INT ((REGPARM_MAX - needed_intregs +
2728 1) * 8), GE, const1_rtx, SImode,
2729 1, lab_false);
2730 }
2731 if (needed_sseregs)
2732 {
2733 emit_cmp_and_jump_insns (expand_expr
2734 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2735 GEN_INT ((SSE_REGPARM_MAX -
2736 needed_sseregs + 1) * 16 +
2737 REGPARM_MAX * 8), GE, const1_rtx,
2738 SImode, 1, lab_false);
2739 }
2740
2741 /* Compute index to start of area used for integer regs. */
2742 if (needed_intregs)
2743 {
2744 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2745 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2746 if (r != int_addr_rtx)
2747 emit_move_insn (int_addr_rtx, r);
2748 }
2749 if (needed_sseregs)
2750 {
2751 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2752 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2753 if (r != sse_addr_rtx)
2754 emit_move_insn (sse_addr_rtx, r);
2755 }
2756 if (need_temp)
2757 {
2758 int i;
2759 rtx mem;
2760
2761 /* Never use the memory itself, as it has the alias set. */
2762 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2763 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2764 set_mem_alias_set (mem, get_varargs_alias_set ());
2765 set_mem_align (mem, BITS_PER_UNIT);
2766
2767 for (i = 0; i < XVECLEN (container, 0); i++)
2768 {
2769 rtx slot = XVECEXP (container, 0, i);
2770 rtx reg = XEXP (slot, 0);
2771 enum machine_mode mode = GET_MODE (reg);
2772 rtx src_addr;
2773 rtx src_mem;
2774 int src_offset;
2775 rtx dest_mem;
2776
2777 if (SSE_REGNO_P (REGNO (reg)))
2778 {
2779 src_addr = sse_addr_rtx;
2780 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2781 }
2782 else
2783 {
2784 src_addr = int_addr_rtx;
2785 src_offset = REGNO (reg) * 8;
2786 }
2787 src_mem = gen_rtx_MEM (mode, src_addr);
2788 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2789 src_mem = adjust_address (src_mem, mode, src_offset);
2790 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2791 emit_move_insn (dest_mem, src_mem);
2792 }
2793 }
2794
2795 if (needed_intregs)
2796 {
2797 t =
2798 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2799 build_int_2 (needed_intregs * 8, 0));
2800 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2801 TREE_SIDE_EFFECTS (t) = 1;
2802 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2803 }
2804 if (needed_sseregs)
2805 {
2806 t =
2807 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2808 build_int_2 (needed_sseregs * 16, 0));
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2810 TREE_SIDE_EFFECTS (t) = 1;
2811 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2812 }
2813
2814 emit_jump_insn (gen_jump (lab_over));
2815 emit_barrier ();
2816 emit_label (lab_false);
2817 }
2818
2819 /* ... otherwise out of the overflow area. */
2820
2821 /* Care for on-stack alignment if needed. */
2822 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2823 t = ovf;
2824 else
2825 {
2826 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2827 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2828 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2829 }
2830 t = save_expr (t);
2831
2832 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2833 if (r != addr_rtx)
2834 emit_move_insn (addr_rtx, r);
2835
2836 t =
2837 build (PLUS_EXPR, TREE_TYPE (t), t,
2838 build_int_2 (rsize * UNITS_PER_WORD, 0));
2839 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2840 TREE_SIDE_EFFECTS (t) = 1;
2841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2842
2843 if (container)
2844 emit_label (lab_over);
2845
2846 return addr_rtx;
2847 }
2848 \f
2849 /* Return nonzero if OP is either a i387 or SSE fp register. */
2850 int
2851 any_fp_register_operand (op, mode)
2852 rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2854 {
2855 return ANY_FP_REG_P (op);
2856 }
2857
2858 /* Return nonzero if OP is an i387 fp register. */
2859 int
2860 fp_register_operand (op, mode)
2861 rtx op;
2862 enum machine_mode mode ATTRIBUTE_UNUSED;
2863 {
2864 return FP_REG_P (op);
2865 }
2866
2867 /* Return nonzero if OP is a non-fp register_operand. */
2868 int
2869 register_and_not_any_fp_reg_operand (op, mode)
2870 rtx op;
2871 enum machine_mode mode;
2872 {
2873 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2874 }
2875
2876 /* Return nonzero of OP is a register operand other than an
2877 i387 fp register. */
2878 int
2879 register_and_not_fp_reg_operand (op, mode)
2880 rtx op;
2881 enum machine_mode mode;
2882 {
2883 return register_operand (op, mode) && !FP_REG_P (op);
2884 }
2885
2886 /* Return nonzero if OP is general operand representable on x86_64. */
2887
2888 int
2889 x86_64_general_operand (op, mode)
2890 rtx op;
2891 enum machine_mode mode;
2892 {
2893 if (!TARGET_64BIT)
2894 return general_operand (op, mode);
2895 if (nonimmediate_operand (op, mode))
2896 return 1;
2897 return x86_64_sign_extended_value (op);
2898 }
2899
2900 /* Return nonzero if OP is general operand representable on x86_64
2901 as either sign extended or zero extended constant. */
2902
2903 int
2904 x86_64_szext_general_operand (op, mode)
2905 rtx op;
2906 enum machine_mode mode;
2907 {
2908 if (!TARGET_64BIT)
2909 return general_operand (op, mode);
2910 if (nonimmediate_operand (op, mode))
2911 return 1;
2912 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2913 }
2914
2915 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2916
2917 int
2918 x86_64_nonmemory_operand (op, mode)
2919 rtx op;
2920 enum machine_mode mode;
2921 {
2922 if (!TARGET_64BIT)
2923 return nonmemory_operand (op, mode);
2924 if (register_operand (op, mode))
2925 return 1;
2926 return x86_64_sign_extended_value (op);
2927 }
2928
2929 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2930
2931 int
2932 x86_64_movabs_operand (op, mode)
2933 rtx op;
2934 enum machine_mode mode;
2935 {
2936 if (!TARGET_64BIT || !flag_pic)
2937 return nonmemory_operand (op, mode);
2938 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2939 return 1;
2940 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2941 return 1;
2942 return 0;
2943 }
2944
2945 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2946
2947 int
2948 x86_64_szext_nonmemory_operand (op, mode)
2949 rtx op;
2950 enum machine_mode mode;
2951 {
2952 if (!TARGET_64BIT)
2953 return nonmemory_operand (op, mode);
2954 if (register_operand (op, mode))
2955 return 1;
2956 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2957 }
2958
2959 /* Return nonzero if OP is immediate operand representable on x86_64. */
2960
2961 int
2962 x86_64_immediate_operand (op, mode)
2963 rtx op;
2964 enum machine_mode mode;
2965 {
2966 if (!TARGET_64BIT)
2967 return immediate_operand (op, mode);
2968 return x86_64_sign_extended_value (op);
2969 }
2970
2971 /* Return nonzero if OP is immediate operand representable on x86_64. */
2972
2973 int
2974 x86_64_zext_immediate_operand (op, mode)
2975 rtx op;
2976 enum machine_mode mode ATTRIBUTE_UNUSED;
2977 {
2978 return x86_64_zero_extended_value (op);
2979 }
2980
2981 /* Return nonzero if OP is (const_int 1), else return zero. */
2982
2983 int
2984 const_int_1_operand (op, mode)
2985 rtx op;
2986 enum machine_mode mode ATTRIBUTE_UNUSED;
2987 {
2988 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2989 }
2990
2991 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2992 for shift & compare patterns, as shifting by 0 does not change flags),
2993 else return zero. */
2994
2995 int
2996 const_int_1_31_operand (op, mode)
2997 rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
2999 {
3000 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3001 }
3002
3003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3004 reference and a constant. */
3005
3006 int
3007 symbolic_operand (op, mode)
3008 register rtx op;
3009 enum machine_mode mode ATTRIBUTE_UNUSED;
3010 {
3011 switch (GET_CODE (op))
3012 {
3013 case SYMBOL_REF:
3014 case LABEL_REF:
3015 return 1;
3016
3017 case CONST:
3018 op = XEXP (op, 0);
3019 if (GET_CODE (op) == SYMBOL_REF
3020 || GET_CODE (op) == LABEL_REF
3021 || (GET_CODE (op) == UNSPEC
3022 && (XINT (op, 1) == UNSPEC_GOT
3023 || XINT (op, 1) == UNSPEC_GOTOFF
3024 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3025 return 1;
3026 if (GET_CODE (op) != PLUS
3027 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3028 return 0;
3029
3030 op = XEXP (op, 0);
3031 if (GET_CODE (op) == SYMBOL_REF
3032 || GET_CODE (op) == LABEL_REF)
3033 return 1;
3034 /* Only @GOTOFF gets offsets. */
3035 if (GET_CODE (op) != UNSPEC
3036 || XINT (op, 1) != UNSPEC_GOTOFF)
3037 return 0;
3038
3039 op = XVECEXP (op, 0, 0);
3040 if (GET_CODE (op) == SYMBOL_REF
3041 || GET_CODE (op) == LABEL_REF)
3042 return 1;
3043 return 0;
3044
3045 default:
3046 return 0;
3047 }
3048 }
3049
3050 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3051
3052 int
3053 pic_symbolic_operand (op, mode)
3054 register rtx op;
3055 enum machine_mode mode ATTRIBUTE_UNUSED;
3056 {
3057 if (GET_CODE (op) != CONST)
3058 return 0;
3059 op = XEXP (op, 0);
3060 if (TARGET_64BIT)
3061 {
3062 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3063 return 1;
3064 }
3065 else
3066 {
3067 if (GET_CODE (op) == UNSPEC)
3068 return 1;
3069 if (GET_CODE (op) != PLUS
3070 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3071 return 0;
3072 op = XEXP (op, 0);
3073 if (GET_CODE (op) == UNSPEC)
3074 return 1;
3075 }
3076 return 0;
3077 }
3078
3079 /* Return true if OP is a symbolic operand that resolves locally. */
3080
3081 static int
3082 local_symbolic_operand (op, mode)
3083 rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 {
3086 if (GET_CODE (op) == CONST
3087 && GET_CODE (XEXP (op, 0)) == PLUS
3088 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3089 op = XEXP (XEXP (op, 0), 0);
3090
3091 if (GET_CODE (op) == LABEL_REF)
3092 return 1;
3093
3094 if (GET_CODE (op) != SYMBOL_REF)
3095 return 0;
3096
3097 /* These we've been told are local by varasm and encode_section_info
3098 respectively. */
3099 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3100 return 1;
3101
3102 /* There is, however, a not insubstantial body of code in the rest of
3103 the compiler that assumes it can just stick the results of
3104 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3105 /* ??? This is a hack. Should update the body of the compiler to
3106 always create a DECL an invoke targetm.encode_section_info. */
3107 if (strncmp (XSTR (op, 0), internal_label_prefix,
3108 internal_label_prefix_len) == 0)
3109 return 1;
3110
3111 return 0;
3112 }
3113
3114 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3115
3116 int
3117 tls_symbolic_operand (op, mode)
3118 register rtx op;
3119 enum machine_mode mode ATTRIBUTE_UNUSED;
3120 {
3121 const char *symbol_str;
3122
3123 if (GET_CODE (op) != SYMBOL_REF)
3124 return 0;
3125 symbol_str = XSTR (op, 0);
3126
3127 if (symbol_str[0] != '%')
3128 return 0;
3129 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3130 }
3131
3132 static int
3133 tls_symbolic_operand_1 (op, kind)
3134 rtx op;
3135 enum tls_model kind;
3136 {
3137 const char *symbol_str;
3138
3139 if (GET_CODE (op) != SYMBOL_REF)
3140 return 0;
3141 symbol_str = XSTR (op, 0);
3142
3143 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3144 }
3145
3146 int
3147 global_dynamic_symbolic_operand (op, mode)
3148 register rtx op;
3149 enum machine_mode mode ATTRIBUTE_UNUSED;
3150 {
3151 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3152 }
3153
3154 int
3155 local_dynamic_symbolic_operand (op, mode)
3156 register rtx op;
3157 enum machine_mode mode ATTRIBUTE_UNUSED;
3158 {
3159 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3160 }
3161
3162 int
3163 initial_exec_symbolic_operand (op, mode)
3164 register rtx op;
3165 enum machine_mode mode ATTRIBUTE_UNUSED;
3166 {
3167 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3168 }
3169
3170 int
3171 local_exec_symbolic_operand (op, mode)
3172 register rtx op;
3173 enum machine_mode mode ATTRIBUTE_UNUSED;
3174 {
3175 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3176 }
3177
3178 /* Test for a valid operand for a call instruction. Don't allow the
3179 arg pointer register or virtual regs since they may decay into
3180 reg + const, which the patterns can't handle. */
3181
3182 int
3183 call_insn_operand (op, mode)
3184 rtx op;
3185 enum machine_mode mode ATTRIBUTE_UNUSED;
3186 {
3187 /* Disallow indirect through a virtual register. This leads to
3188 compiler aborts when trying to eliminate them. */
3189 if (GET_CODE (op) == REG
3190 && (op == arg_pointer_rtx
3191 || op == frame_pointer_rtx
3192 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3193 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3194 return 0;
3195
3196 /* Disallow `call 1234'. Due to varying assembler lameness this
3197 gets either rejected or translated to `call .+1234'. */
3198 if (GET_CODE (op) == CONST_INT)
3199 return 0;
3200
3201 /* Explicitly allow SYMBOL_REF even if pic. */
3202 if (GET_CODE (op) == SYMBOL_REF)
3203 return 1;
3204
3205 /* Otherwise we can allow any general_operand in the address. */
3206 return general_operand (op, Pmode);
3207 }
3208
3209 int
3210 constant_call_address_operand (op, mode)
3211 rtx op;
3212 enum machine_mode mode ATTRIBUTE_UNUSED;
3213 {
3214 if (GET_CODE (op) == CONST
3215 && GET_CODE (XEXP (op, 0)) == PLUS
3216 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3217 op = XEXP (XEXP (op, 0), 0);
3218 return GET_CODE (op) == SYMBOL_REF;
3219 }
3220
3221 /* Match exactly zero and one. */
3222
3223 int
3224 const0_operand (op, mode)
3225 register rtx op;
3226 enum machine_mode mode;
3227 {
3228 return op == CONST0_RTX (mode);
3229 }
3230
3231 int
3232 const1_operand (op, mode)
3233 register rtx op;
3234 enum machine_mode mode ATTRIBUTE_UNUSED;
3235 {
3236 return op == const1_rtx;
3237 }
3238
3239 /* Match 2, 4, or 8. Used for leal multiplicands. */
3240
3241 int
3242 const248_operand (op, mode)
3243 register rtx op;
3244 enum machine_mode mode ATTRIBUTE_UNUSED;
3245 {
3246 return (GET_CODE (op) == CONST_INT
3247 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3248 }
3249
3250 /* True if this is a constant appropriate for an increment or decremenmt. */
3251
3252 int
3253 incdec_operand (op, mode)
3254 register rtx op;
3255 enum machine_mode mode ATTRIBUTE_UNUSED;
3256 {
3257 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3258 registers, since carry flag is not set. */
3259 if (TARGET_PENTIUM4 && !optimize_size)
3260 return 0;
3261 return op == const1_rtx || op == constm1_rtx;
3262 }
3263
3264 /* Return nonzero if OP is acceptable as operand of DImode shift
3265 expander. */
3266
3267 int
3268 shiftdi_operand (op, mode)
3269 rtx op;
3270 enum machine_mode mode ATTRIBUTE_UNUSED;
3271 {
3272 if (TARGET_64BIT)
3273 return nonimmediate_operand (op, mode);
3274 else
3275 return register_operand (op, mode);
3276 }
3277
3278 /* Return false if this is the stack pointer, or any other fake
3279 register eliminable to the stack pointer. Otherwise, this is
3280 a register operand.
3281
3282 This is used to prevent esp from being used as an index reg.
3283 Which would only happen in pathological cases. */
3284
3285 int
3286 reg_no_sp_operand (op, mode)
3287 register rtx op;
3288 enum machine_mode mode;
3289 {
3290 rtx t = op;
3291 if (GET_CODE (t) == SUBREG)
3292 t = SUBREG_REG (t);
3293 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3294 return 0;
3295
3296 return register_operand (op, mode);
3297 }
3298
3299 int
3300 mmx_reg_operand (op, mode)
3301 register rtx op;
3302 enum machine_mode mode ATTRIBUTE_UNUSED;
3303 {
3304 return MMX_REG_P (op);
3305 }
3306
3307 /* Return false if this is any eliminable register. Otherwise
3308 general_operand. */
3309
3310 int
3311 general_no_elim_operand (op, mode)
3312 register rtx op;
3313 enum machine_mode mode;
3314 {
3315 rtx t = op;
3316 if (GET_CODE (t) == SUBREG)
3317 t = SUBREG_REG (t);
3318 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3319 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3320 || t == virtual_stack_dynamic_rtx)
3321 return 0;
3322 if (REG_P (t)
3323 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3324 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3325 return 0;
3326
3327 return general_operand (op, mode);
3328 }
3329
3330 /* Return false if this is any eliminable register. Otherwise
3331 register_operand or const_int. */
3332
3333 int
3334 nonmemory_no_elim_operand (op, mode)
3335 register rtx op;
3336 enum machine_mode mode;
3337 {
3338 rtx t = op;
3339 if (GET_CODE (t) == SUBREG)
3340 t = SUBREG_REG (t);
3341 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3342 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3343 || t == virtual_stack_dynamic_rtx)
3344 return 0;
3345
3346 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3347 }
3348
3349 /* Return false if this is any eliminable register or stack register,
3350 otherwise work like register_operand. */
3351
3352 int
3353 index_register_operand (op, mode)
3354 register rtx op;
3355 enum machine_mode mode;
3356 {
3357 rtx t = op;
3358 if (GET_CODE (t) == SUBREG)
3359 t = SUBREG_REG (t);
3360 if (!REG_P (t))
3361 return 0;
3362 if (t == arg_pointer_rtx
3363 || t == frame_pointer_rtx
3364 || t == virtual_incoming_args_rtx
3365 || t == virtual_stack_vars_rtx
3366 || t == virtual_stack_dynamic_rtx
3367 || REGNO (t) == STACK_POINTER_REGNUM)
3368 return 0;
3369
3370 return general_operand (op, mode);
3371 }
3372
3373 /* Return true if op is a Q_REGS class register. */
3374
3375 int
3376 q_regs_operand (op, mode)
3377 register rtx op;
3378 enum machine_mode mode;
3379 {
3380 if (mode != VOIDmode && GET_MODE (op) != mode)
3381 return 0;
3382 if (GET_CODE (op) == SUBREG)
3383 op = SUBREG_REG (op);
3384 return ANY_QI_REG_P (op);
3385 }
3386
3387 /* Return true if op is a NON_Q_REGS class register. */
3388
3389 int
3390 non_q_regs_operand (op, mode)
3391 register rtx op;
3392 enum machine_mode mode;
3393 {
3394 if (mode != VOIDmode && GET_MODE (op) != mode)
3395 return 0;
3396 if (GET_CODE (op) == SUBREG)
3397 op = SUBREG_REG (op);
3398 return NON_QI_REG_P (op);
3399 }
3400
3401 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3402 insns. */
3403 int
3404 sse_comparison_operator (op, mode)
3405 rtx op;
3406 enum machine_mode mode ATTRIBUTE_UNUSED;
3407 {
3408 enum rtx_code code = GET_CODE (op);
3409 switch (code)
3410 {
3411 /* Operations supported directly. */
3412 case EQ:
3413 case LT:
3414 case LE:
3415 case UNORDERED:
3416 case NE:
3417 case UNGE:
3418 case UNGT:
3419 case ORDERED:
3420 return 1;
3421 /* These are equivalent to ones above in non-IEEE comparisons. */
3422 case UNEQ:
3423 case UNLT:
3424 case UNLE:
3425 case LTGT:
3426 case GE:
3427 case GT:
3428 return !TARGET_IEEE_FP;
3429 default:
3430 return 0;
3431 }
3432 }
3433 /* Return 1 if OP is a valid comparison operator in valid mode. */
3434 int
3435 ix86_comparison_operator (op, mode)
3436 register rtx op;
3437 enum machine_mode mode;
3438 {
3439 enum machine_mode inmode;
3440 enum rtx_code code = GET_CODE (op);
3441 if (mode != VOIDmode && GET_MODE (op) != mode)
3442 return 0;
3443 if (GET_RTX_CLASS (code) != '<')
3444 return 0;
3445 inmode = GET_MODE (XEXP (op, 0));
3446
3447 if (inmode == CCFPmode || inmode == CCFPUmode)
3448 {
3449 enum rtx_code second_code, bypass_code;
3450 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3451 return (bypass_code == NIL && second_code == NIL);
3452 }
3453 switch (code)
3454 {
3455 case EQ: case NE:
3456 return 1;
3457 case LT: case GE:
3458 if (inmode == CCmode || inmode == CCGCmode
3459 || inmode == CCGOCmode || inmode == CCNOmode)
3460 return 1;
3461 return 0;
3462 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3463 if (inmode == CCmode)
3464 return 1;
3465 return 0;
3466 case GT: case LE:
3467 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3468 return 1;
3469 return 0;
3470 default:
3471 return 0;
3472 }
3473 }
3474
3475 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3476
3477 int
3478 fcmov_comparison_operator (op, mode)
3479 register rtx op;
3480 enum machine_mode mode;
3481 {
3482 enum machine_mode inmode;
3483 enum rtx_code code = GET_CODE (op);
3484 if (mode != VOIDmode && GET_MODE (op) != mode)
3485 return 0;
3486 if (GET_RTX_CLASS (code) != '<')
3487 return 0;
3488 inmode = GET_MODE (XEXP (op, 0));
3489 if (inmode == CCFPmode || inmode == CCFPUmode)
3490 {
3491 enum rtx_code second_code, bypass_code;
3492 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3493 if (bypass_code != NIL || second_code != NIL)
3494 return 0;
3495 code = ix86_fp_compare_code_to_integer (code);
3496 }
3497 /* i387 supports just limited amount of conditional codes. */
3498 switch (code)
3499 {
3500 case LTU: case GTU: case LEU: case GEU:
3501 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3502 return 1;
3503 return 0;
3504 case ORDERED: case UNORDERED:
3505 case EQ: case NE:
3506 return 1;
3507 default:
3508 return 0;
3509 }
3510 }
3511
3512 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3513
3514 int
3515 promotable_binary_operator (op, mode)
3516 register rtx op;
3517 enum machine_mode mode ATTRIBUTE_UNUSED;
3518 {
3519 switch (GET_CODE (op))
3520 {
3521 case MULT:
3522 /* Modern CPUs have same latency for HImode and SImode multiply,
3523 but 386 and 486 do HImode multiply faster. */
3524 return ix86_cpu > PROCESSOR_I486;
3525 case PLUS:
3526 case AND:
3527 case IOR:
3528 case XOR:
3529 case ASHIFT:
3530 return 1;
3531 default:
3532 return 0;
3533 }
3534 }
3535
3536 /* Nearly general operand, but accept any const_double, since we wish
3537 to be able to drop them into memory rather than have them get pulled
3538 into registers. */
3539
3540 int
3541 cmp_fp_expander_operand (op, mode)
3542 register rtx op;
3543 enum machine_mode mode;
3544 {
3545 if (mode != VOIDmode && mode != GET_MODE (op))
3546 return 0;
3547 if (GET_CODE (op) == CONST_DOUBLE)
3548 return 1;
3549 return general_operand (op, mode);
3550 }
3551
3552 /* Match an SI or HImode register for a zero_extract. */
3553
3554 int
3555 ext_register_operand (op, mode)
3556 register rtx op;
3557 enum machine_mode mode ATTRIBUTE_UNUSED;
3558 {
3559 int regno;
3560 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3561 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3562 return 0;
3563
3564 if (!register_operand (op, VOIDmode))
3565 return 0;
3566
3567 /* Be curefull to accept only registers having upper parts. */
3568 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3569 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3570 }
3571
3572 /* Return 1 if this is a valid binary floating-point operation.
3573 OP is the expression matched, and MODE is its mode. */
3574
3575 int
3576 binary_fp_operator (op, mode)
3577 register rtx op;
3578 enum machine_mode mode;
3579 {
3580 if (mode != VOIDmode && mode != GET_MODE (op))
3581 return 0;
3582
3583 switch (GET_CODE (op))
3584 {
3585 case PLUS:
3586 case MINUS:
3587 case MULT:
3588 case DIV:
3589 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3590
3591 default:
3592 return 0;
3593 }
3594 }
3595
3596 int
3597 mult_operator (op, mode)
3598 register rtx op;
3599 enum machine_mode mode ATTRIBUTE_UNUSED;
3600 {
3601 return GET_CODE (op) == MULT;
3602 }
3603
3604 int
3605 div_operator (op, mode)
3606 register rtx op;
3607 enum machine_mode mode ATTRIBUTE_UNUSED;
3608 {
3609 return GET_CODE (op) == DIV;
3610 }
3611
3612 int
3613 arith_or_logical_operator (op, mode)
3614 rtx op;
3615 enum machine_mode mode;
3616 {
3617 return ((mode == VOIDmode || GET_MODE (op) == mode)
3618 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3619 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3620 }
3621
3622 /* Returns 1 if OP is memory operand with a displacement. */
3623
3624 int
3625 memory_displacement_operand (op, mode)
3626 register rtx op;
3627 enum machine_mode mode;
3628 {
3629 struct ix86_address parts;
3630
3631 if (! memory_operand (op, mode))
3632 return 0;
3633
3634 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3635 abort ();
3636
3637 return parts.disp != NULL_RTX;
3638 }
3639
3640 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3641 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3642
3643 ??? It seems likely that this will only work because cmpsi is an
3644 expander, and no actual insns use this. */
3645
3646 int
3647 cmpsi_operand (op, mode)
3648 rtx op;
3649 enum machine_mode mode;
3650 {
3651 if (nonimmediate_operand (op, mode))
3652 return 1;
3653
3654 if (GET_CODE (op) == AND
3655 && GET_MODE (op) == SImode
3656 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3657 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3658 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3659 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3660 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3661 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3662 return 1;
3663
3664 return 0;
3665 }
3666
3667 /* Returns 1 if OP is memory operand that can not be represented by the
3668 modRM array. */
3669
3670 int
3671 long_memory_operand (op, mode)
3672 register rtx op;
3673 enum machine_mode mode;
3674 {
3675 if (! memory_operand (op, mode))
3676 return 0;
3677
3678 return memory_address_length (op) != 0;
3679 }
3680
3681 /* Return nonzero if the rtx is known aligned. */
3682
3683 int
3684 aligned_operand (op, mode)
3685 rtx op;
3686 enum machine_mode mode;
3687 {
3688 struct ix86_address parts;
3689
3690 if (!general_operand (op, mode))
3691 return 0;
3692
3693 /* Registers and immediate operands are always "aligned". */
3694 if (GET_CODE (op) != MEM)
3695 return 1;
3696
3697 /* Don't even try to do any aligned optimizations with volatiles. */
3698 if (MEM_VOLATILE_P (op))
3699 return 0;
3700
3701 op = XEXP (op, 0);
3702
3703 /* Pushes and pops are only valid on the stack pointer. */
3704 if (GET_CODE (op) == PRE_DEC
3705 || GET_CODE (op) == POST_INC)
3706 return 1;
3707
3708 /* Decode the address. */
3709 if (! ix86_decompose_address (op, &parts))
3710 abort ();
3711
3712 if (parts.base && GET_CODE (parts.base) == SUBREG)
3713 parts.base = SUBREG_REG (parts.base);
3714 if (parts.index && GET_CODE (parts.index) == SUBREG)
3715 parts.index = SUBREG_REG (parts.index);
3716
3717 /* Look for some component that isn't known to be aligned. */
3718 if (parts.index)
3719 {
3720 if (parts.scale < 4
3721 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3722 return 0;
3723 }
3724 if (parts.base)
3725 {
3726 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3727 return 0;
3728 }
3729 if (parts.disp)
3730 {
3731 if (GET_CODE (parts.disp) != CONST_INT
3732 || (INTVAL (parts.disp) & 3) != 0)
3733 return 0;
3734 }
3735
3736 /* Didn't find one -- this must be an aligned address. */
3737 return 1;
3738 }
3739 \f
3740 /* Return true if the constant is something that can be loaded with
3741 a special instruction. Only handle 0.0 and 1.0; others are less
3742 worthwhile. */
3743
3744 int
3745 standard_80387_constant_p (x)
3746 rtx x;
3747 {
3748 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3749 return -1;
3750 /* Note that on the 80387, other constants, such as pi, that we should support
3751 too. On some machines, these are much slower to load as standard constant,
3752 than to load from doubles in memory. */
3753 if (x == CONST0_RTX (GET_MODE (x)))
3754 return 1;
3755 if (x == CONST1_RTX (GET_MODE (x)))
3756 return 2;
3757 return 0;
3758 }
3759
3760 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3761 */
3762 int
3763 standard_sse_constant_p (x)
3764 rtx x;
3765 {
3766 if (x == const0_rtx)
3767 return 1;
3768 return (x == CONST0_RTX (GET_MODE (x)));
3769 }
3770
3771 /* Returns 1 if OP contains a symbol reference */
3772
3773 int
3774 symbolic_reference_mentioned_p (op)
3775 rtx op;
3776 {
3777 register const char *fmt;
3778 register int i;
3779
3780 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3781 return 1;
3782
3783 fmt = GET_RTX_FORMAT (GET_CODE (op));
3784 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3785 {
3786 if (fmt[i] == 'E')
3787 {
3788 register int j;
3789
3790 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3791 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3792 return 1;
3793 }
3794
3795 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3796 return 1;
3797 }
3798
3799 return 0;
3800 }
3801
3802 /* Return 1 if it is appropriate to emit `ret' instructions in the
3803 body of a function. Do this only if the epilogue is simple, needing a
3804 couple of insns. Prior to reloading, we can't tell how many registers
3805 must be saved, so return 0 then. Return 0 if there is no frame
3806 marker to de-allocate.
3807
3808 If NON_SAVING_SETJMP is defined and true, then it is not possible
3809 for the epilogue to be simple, so return 0. This is a special case
3810 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3811 until final, but jump_optimize may need to know sooner if a
3812 `return' is OK. */
3813
3814 int
3815 ix86_can_use_return_insn_p ()
3816 {
3817 struct ix86_frame frame;
3818
3819 #ifdef NON_SAVING_SETJMP
3820 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3821 return 0;
3822 #endif
3823
3824 if (! reload_completed || frame_pointer_needed)
3825 return 0;
3826
3827 /* Don't allow more than 32 pop, since that's all we can do
3828 with one instruction. */
3829 if (current_function_pops_args
3830 && current_function_args_size >= 32768)
3831 return 0;
3832
3833 ix86_compute_frame_layout (&frame);
3834 return frame.to_allocate == 0 && frame.nregs == 0;
3835 }
3836 \f
3837 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3838 int
3839 x86_64_sign_extended_value (value)
3840 rtx value;
3841 {
3842 switch (GET_CODE (value))
3843 {
3844 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3845 to be at least 32 and this all acceptable constants are
3846 represented as CONST_INT. */
3847 case CONST_INT:
3848 if (HOST_BITS_PER_WIDE_INT == 32)
3849 return 1;
3850 else
3851 {
3852 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3853 return trunc_int_for_mode (val, SImode) == val;
3854 }
3855 break;
3856
3857 /* For certain code models, the symbolic references are known to fit.
3858 in CM_SMALL_PIC model we know it fits if it is local to the shared
3859 library. Don't count TLS SYMBOL_REFs here, since they should fit
3860 only if inside of UNSPEC handled below. */
3861 case SYMBOL_REF:
3862 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
3863
3864 /* For certain code models, the code is near as well. */
3865 case LABEL_REF:
3866 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3867 || ix86_cmodel == CM_KERNEL);
3868
3869 /* We also may accept the offsetted memory references in certain special
3870 cases. */
3871 case CONST:
3872 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3873 switch (XINT (XEXP (value, 0), 1))
3874 {
3875 case UNSPEC_GOTPCREL:
3876 case UNSPEC_DTPOFF:
3877 case UNSPEC_GOTNTPOFF:
3878 case UNSPEC_NTPOFF:
3879 return 1;
3880 default:
3881 break;
3882 }
3883 if (GET_CODE (XEXP (value, 0)) == PLUS)
3884 {
3885 rtx op1 = XEXP (XEXP (value, 0), 0);
3886 rtx op2 = XEXP (XEXP (value, 0), 1);
3887 HOST_WIDE_INT offset;
3888
3889 if (ix86_cmodel == CM_LARGE)
3890 return 0;
3891 if (GET_CODE (op2) != CONST_INT)
3892 return 0;
3893 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3894 switch (GET_CODE (op1))
3895 {
3896 case SYMBOL_REF:
3897 /* For CM_SMALL assume that latest object is 16MB before
3898 end of 31bits boundary. We may also accept pretty
3899 large negative constants knowing that all objects are
3900 in the positive half of address space. */
3901 if (ix86_cmodel == CM_SMALL
3902 && offset < 16*1024*1024
3903 && trunc_int_for_mode (offset, SImode) == offset)
3904 return 1;
3905 /* For CM_KERNEL we know that all object resist in the
3906 negative half of 32bits address space. We may not
3907 accept negative offsets, since they may be just off
3908 and we may accept pretty large positive ones. */
3909 if (ix86_cmodel == CM_KERNEL
3910 && offset > 0
3911 && trunc_int_for_mode (offset, SImode) == offset)
3912 return 1;
3913 break;
3914 case LABEL_REF:
3915 /* These conditions are similar to SYMBOL_REF ones, just the
3916 constraints for code models differ. */
3917 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3918 && offset < 16*1024*1024
3919 && trunc_int_for_mode (offset, SImode) == offset)
3920 return 1;
3921 if (ix86_cmodel == CM_KERNEL
3922 && offset > 0
3923 && trunc_int_for_mode (offset, SImode) == offset)
3924 return 1;
3925 break;
3926 case UNSPEC:
3927 switch (XINT (op1, 1))
3928 {
3929 case UNSPEC_DTPOFF:
3930 case UNSPEC_NTPOFF:
3931 if (offset > 0
3932 && trunc_int_for_mode (offset, SImode) == offset)
3933 return 1;
3934 }
3935 break;
3936 default:
3937 return 0;
3938 }
3939 }
3940 return 0;
3941 default:
3942 return 0;
3943 }
3944 }
3945
3946 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3947 int
3948 x86_64_zero_extended_value (value)
3949 rtx value;
3950 {
3951 switch (GET_CODE (value))
3952 {
3953 case CONST_DOUBLE:
3954 if (HOST_BITS_PER_WIDE_INT == 32)
3955 return (GET_MODE (value) == VOIDmode
3956 && !CONST_DOUBLE_HIGH (value));
3957 else
3958 return 0;
3959 case CONST_INT:
3960 if (HOST_BITS_PER_WIDE_INT == 32)
3961 return INTVAL (value) >= 0;
3962 else
3963 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3964 break;
3965
3966 /* For certain code models, the symbolic references are known to fit. */
3967 case SYMBOL_REF:
3968 return ix86_cmodel == CM_SMALL;
3969
3970 /* For certain code models, the code is near as well. */
3971 case LABEL_REF:
3972 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3973
3974 /* We also may accept the offsetted memory references in certain special
3975 cases. */
3976 case CONST:
3977 if (GET_CODE (XEXP (value, 0)) == PLUS)
3978 {
3979 rtx op1 = XEXP (XEXP (value, 0), 0);
3980 rtx op2 = XEXP (XEXP (value, 0), 1);
3981
3982 if (ix86_cmodel == CM_LARGE)
3983 return 0;
3984 switch (GET_CODE (op1))
3985 {
3986 case SYMBOL_REF:
3987 return 0;
3988 /* For small code model we may accept pretty large positive
3989 offsets, since one bit is available for free. Negative
3990 offsets are limited by the size of NULL pointer area
3991 specified by the ABI. */
3992 if (ix86_cmodel == CM_SMALL
3993 && GET_CODE (op2) == CONST_INT
3994 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3995 && (trunc_int_for_mode (INTVAL (op2), SImode)
3996 == INTVAL (op2)))
3997 return 1;
3998 /* ??? For the kernel, we may accept adjustment of
3999 -0x10000000, since we know that it will just convert
4000 negative address space to positive, but perhaps this
4001 is not worthwhile. */
4002 break;
4003 case LABEL_REF:
4004 /* These conditions are similar to SYMBOL_REF ones, just the
4005 constraints for code models differ. */
4006 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4007 && GET_CODE (op2) == CONST_INT
4008 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4009 && (trunc_int_for_mode (INTVAL (op2), SImode)
4010 == INTVAL (op2)))
4011 return 1;
4012 break;
4013 default:
4014 return 0;
4015 }
4016 }
4017 return 0;
4018 default:
4019 return 0;
4020 }
4021 }
4022
4023 /* Value should be nonzero if functions must have frame pointers.
4024 Zero means the frame pointer need not be set up (and parms may
4025 be accessed via the stack pointer) in functions that seem suitable. */
4026
4027 int
4028 ix86_frame_pointer_required ()
4029 {
4030 /* If we accessed previous frames, then the generated code expects
4031 to be able to access the saved ebp value in our frame. */
4032 if (cfun->machine->accesses_prev_frame)
4033 return 1;
4034
4035 /* Several x86 os'es need a frame pointer for other reasons,
4036 usually pertaining to setjmp. */
4037 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4038 return 1;
4039
4040 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4041 the frame pointer by default. Turn it back on now if we've not
4042 got a leaf function. */
4043 if (TARGET_OMIT_LEAF_FRAME_POINTER
4044 && (!current_function_is_leaf))
4045 return 1;
4046
4047 if (current_function_profile)
4048 return 1;
4049
4050 return 0;
4051 }
4052
4053 /* Record that the current function accesses previous call frames. */
4054
4055 void
4056 ix86_setup_frame_addresses ()
4057 {
4058 cfun->machine->accesses_prev_frame = 1;
4059 }
4060 \f
4061 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4062 # define USE_HIDDEN_LINKONCE 1
4063 #else
4064 # define USE_HIDDEN_LINKONCE 0
4065 #endif
4066
4067 static int pic_labels_used;
4068
4069 /* Fills in the label name that should be used for a pc thunk for
4070 the given register. */
4071
4072 static void
4073 get_pc_thunk_name (name, regno)
4074 char name[32];
4075 unsigned int regno;
4076 {
4077 if (USE_HIDDEN_LINKONCE)
4078 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4079 else
4080 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4081 }
4082
4083
4084 /* This function generates code for -fpic that loads %ebx with
4085 the return address of the caller and then returns. */
4086
4087 void
4088 ix86_asm_file_end (file)
4089 FILE *file;
4090 {
4091 rtx xops[2];
4092 int regno;
4093
4094 for (regno = 0; regno < 8; ++regno)
4095 {
4096 char name[32];
4097
4098 if (! ((pic_labels_used >> regno) & 1))
4099 continue;
4100
4101 get_pc_thunk_name (name, regno);
4102
4103 if (USE_HIDDEN_LINKONCE)
4104 {
4105 tree decl;
4106
4107 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4108 error_mark_node);
4109 TREE_PUBLIC (decl) = 1;
4110 TREE_STATIC (decl) = 1;
4111 DECL_ONE_ONLY (decl) = 1;
4112
4113 (*targetm.asm_out.unique_section) (decl, 0);
4114 named_section (decl, NULL, 0);
4115
4116 (*targetm.asm_out.globalize_label) (file, name);
4117 fputs ("\t.hidden\t", file);
4118 assemble_name (file, name);
4119 fputc ('\n', file);
4120 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4121 }
4122 else
4123 {
4124 text_section ();
4125 ASM_OUTPUT_LABEL (file, name);
4126 }
4127
4128 xops[0] = gen_rtx_REG (SImode, regno);
4129 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4130 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4131 output_asm_insn ("ret", xops);
4132 }
4133 }
4134
4135 /* Emit code for the SET_GOT patterns. */
4136
4137 const char *
4138 output_set_got (dest)
4139 rtx dest;
4140 {
4141 rtx xops[3];
4142
4143 xops[0] = dest;
4144 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4145
4146 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4147 {
4148 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4149
4150 if (!flag_pic)
4151 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4152 else
4153 output_asm_insn ("call\t%a2", xops);
4154
4155 #if TARGET_MACHO
4156 /* Output the "canonical" label name ("Lxx$pb") here too. This
4157 is what will be referred to by the Mach-O PIC subsystem. */
4158 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4159 #endif
4160 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4161 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4162
4163 if (flag_pic)
4164 output_asm_insn ("pop{l}\t%0", xops);
4165 }
4166 else
4167 {
4168 char name[32];
4169 get_pc_thunk_name (name, REGNO (dest));
4170 pic_labels_used |= 1 << REGNO (dest);
4171
4172 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4173 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4174 output_asm_insn ("call\t%X2", xops);
4175 }
4176
4177 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4178 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4179 else if (!TARGET_MACHO)
4180 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4181
4182 return "";
4183 }
4184
4185 /* Generate an "push" pattern for input ARG. */
4186
4187 static rtx
4188 gen_push (arg)
4189 rtx arg;
4190 {
4191 return gen_rtx_SET (VOIDmode,
4192 gen_rtx_MEM (Pmode,
4193 gen_rtx_PRE_DEC (Pmode,
4194 stack_pointer_rtx)),
4195 arg);
4196 }
4197
4198 /* Return >= 0 if there is an unused call-clobbered register available
4199 for the entire function. */
4200
4201 static unsigned int
4202 ix86_select_alt_pic_regnum ()
4203 {
4204 if (current_function_is_leaf && !current_function_profile)
4205 {
4206 int i;
4207 for (i = 2; i >= 0; --i)
4208 if (!regs_ever_live[i])
4209 return i;
4210 }
4211
4212 return INVALID_REGNUM;
4213 }
4214
4215 /* Return 1 if we need to save REGNO. */
4216 static int
4217 ix86_save_reg (regno, maybe_eh_return)
4218 unsigned int regno;
4219 int maybe_eh_return;
4220 {
4221 if (pic_offset_table_rtx
4222 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4223 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4224 || current_function_profile
4225 || current_function_calls_eh_return))
4226 {
4227 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4228 return 0;
4229 return 1;
4230 }
4231
4232 if (current_function_calls_eh_return && maybe_eh_return)
4233 {
4234 unsigned i;
4235 for (i = 0; ; i++)
4236 {
4237 unsigned test = EH_RETURN_DATA_REGNO (i);
4238 if (test == INVALID_REGNUM)
4239 break;
4240 if (test == regno)
4241 return 1;
4242 }
4243 }
4244
4245 return (regs_ever_live[regno]
4246 && !call_used_regs[regno]
4247 && !fixed_regs[regno]
4248 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4249 }
4250
4251 /* Return number of registers to be saved on the stack. */
4252
4253 static int
4254 ix86_nsaved_regs ()
4255 {
4256 int nregs = 0;
4257 int regno;
4258
4259 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4260 if (ix86_save_reg (regno, true))
4261 nregs++;
4262 return nregs;
4263 }
4264
4265 /* Return the offset between two registers, one to be eliminated, and the other
4266 its replacement, at the start of a routine. */
4267
4268 HOST_WIDE_INT
4269 ix86_initial_elimination_offset (from, to)
4270 int from;
4271 int to;
4272 {
4273 struct ix86_frame frame;
4274 ix86_compute_frame_layout (&frame);
4275
4276 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4277 return frame.hard_frame_pointer_offset;
4278 else if (from == FRAME_POINTER_REGNUM
4279 && to == HARD_FRAME_POINTER_REGNUM)
4280 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4281 else
4282 {
4283 if (to != STACK_POINTER_REGNUM)
4284 abort ();
4285 else if (from == ARG_POINTER_REGNUM)
4286 return frame.stack_pointer_offset;
4287 else if (from != FRAME_POINTER_REGNUM)
4288 abort ();
4289 else
4290 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4291 }
4292 }
4293
4294 /* Fill structure ix86_frame about frame of currently computed function. */
4295
4296 static void
4297 ix86_compute_frame_layout (frame)
4298 struct ix86_frame *frame;
4299 {
4300 HOST_WIDE_INT total_size;
4301 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4302 int offset;
4303 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4304 HOST_WIDE_INT size = get_frame_size ();
4305
4306 frame->nregs = ix86_nsaved_regs ();
4307 total_size = size;
4308
4309 /* Skip return address and saved base pointer. */
4310 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4311
4312 frame->hard_frame_pointer_offset = offset;
4313
4314 /* Do some sanity checking of stack_alignment_needed and
4315 preferred_alignment, since i386 port is the only using those features
4316 that may break easily. */
4317
4318 if (size && !stack_alignment_needed)
4319 abort ();
4320 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4321 abort ();
4322 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4323 abort ();
4324 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4325 abort ();
4326
4327 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4328 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4329
4330 /* Register save area */
4331 offset += frame->nregs * UNITS_PER_WORD;
4332
4333 /* Va-arg area */
4334 if (ix86_save_varrargs_registers)
4335 {
4336 offset += X86_64_VARARGS_SIZE;
4337 frame->va_arg_size = X86_64_VARARGS_SIZE;
4338 }
4339 else
4340 frame->va_arg_size = 0;
4341
4342 /* Align start of frame for local function. */
4343 frame->padding1 = ((offset + stack_alignment_needed - 1)
4344 & -stack_alignment_needed) - offset;
4345
4346 offset += frame->padding1;
4347
4348 /* Frame pointer points here. */
4349 frame->frame_pointer_offset = offset;
4350
4351 offset += size;
4352
4353 /* Add outgoing arguments area. Can be skipped if we eliminated
4354 all the function calls as dead code. */
4355 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4356 {
4357 offset += current_function_outgoing_args_size;
4358 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4359 }
4360 else
4361 frame->outgoing_arguments_size = 0;
4362
4363 /* Align stack boundary. Only needed if we're calling another function
4364 or using alloca. */
4365 if (!current_function_is_leaf || current_function_calls_alloca)
4366 frame->padding2 = ((offset + preferred_alignment - 1)
4367 & -preferred_alignment) - offset;
4368 else
4369 frame->padding2 = 0;
4370
4371 offset += frame->padding2;
4372
4373 /* We've reached end of stack frame. */
4374 frame->stack_pointer_offset = offset;
4375
4376 /* Size prologue needs to allocate. */
4377 frame->to_allocate =
4378 (size + frame->padding1 + frame->padding2
4379 + frame->outgoing_arguments_size + frame->va_arg_size);
4380
4381 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4382 && current_function_is_leaf)
4383 {
4384 frame->red_zone_size = frame->to_allocate;
4385 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4386 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4387 }
4388 else
4389 frame->red_zone_size = 0;
4390 frame->to_allocate -= frame->red_zone_size;
4391 frame->stack_pointer_offset -= frame->red_zone_size;
4392 #if 0
4393 fprintf (stderr, "nregs: %i\n", frame->nregs);
4394 fprintf (stderr, "size: %i\n", size);
4395 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4396 fprintf (stderr, "padding1: %i\n", frame->padding1);
4397 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4398 fprintf (stderr, "padding2: %i\n", frame->padding2);
4399 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4400 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4401 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4402 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4403 frame->hard_frame_pointer_offset);
4404 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4405 #endif
4406 }
4407
4408 /* Emit code to save registers in the prologue. */
4409
4410 static void
4411 ix86_emit_save_regs ()
4412 {
4413 register int regno;
4414 rtx insn;
4415
4416 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4417 if (ix86_save_reg (regno, true))
4418 {
4419 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4420 RTX_FRAME_RELATED_P (insn) = 1;
4421 }
4422 }
4423
4424 /* Emit code to save registers using MOV insns. First register
4425 is restored from POINTER + OFFSET. */
4426 static void
4427 ix86_emit_save_regs_using_mov (pointer, offset)
4428 rtx pointer;
4429 HOST_WIDE_INT offset;
4430 {
4431 int regno;
4432 rtx insn;
4433
4434 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4435 if (ix86_save_reg (regno, true))
4436 {
4437 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4438 Pmode, offset),
4439 gen_rtx_REG (Pmode, regno));
4440 RTX_FRAME_RELATED_P (insn) = 1;
4441 offset += UNITS_PER_WORD;
4442 }
4443 }
4444
4445 /* Expand the prologue into a bunch of separate insns. */
4446
4447 void
4448 ix86_expand_prologue ()
4449 {
4450 rtx insn;
4451 bool pic_reg_used;
4452 struct ix86_frame frame;
4453 int use_mov = 0;
4454 HOST_WIDE_INT allocate;
4455
4456 if (!optimize_size)
4457 {
4458 use_fast_prologue_epilogue
4459 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4460 if (TARGET_PROLOGUE_USING_MOVE)
4461 use_mov = use_fast_prologue_epilogue;
4462 }
4463 ix86_compute_frame_layout (&frame);
4464
4465 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4466 slower on all targets. Also sdb doesn't like it. */
4467
4468 if (frame_pointer_needed)
4469 {
4470 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4471 RTX_FRAME_RELATED_P (insn) = 1;
4472
4473 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4474 RTX_FRAME_RELATED_P (insn) = 1;
4475 }
4476
4477 allocate = frame.to_allocate;
4478 /* In case we are dealing only with single register and empty frame,
4479 push is equivalent of the mov+add sequence. */
4480 if (allocate == 0 && frame.nregs <= 1)
4481 use_mov = 0;
4482
4483 if (!use_mov)
4484 ix86_emit_save_regs ();
4485 else
4486 allocate += frame.nregs * UNITS_PER_WORD;
4487
4488 if (allocate == 0)
4489 ;
4490 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4491 {
4492 insn = emit_insn (gen_pro_epilogue_adjust_stack
4493 (stack_pointer_rtx, stack_pointer_rtx,
4494 GEN_INT (-allocate)));
4495 RTX_FRAME_RELATED_P (insn) = 1;
4496 }
4497 else
4498 {
4499 /* ??? Is this only valid for Win32? */
4500
4501 rtx arg0, sym;
4502
4503 if (TARGET_64BIT)
4504 abort ();
4505
4506 arg0 = gen_rtx_REG (SImode, 0);
4507 emit_move_insn (arg0, GEN_INT (allocate));
4508
4509 sym = gen_rtx_MEM (FUNCTION_MODE,
4510 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4511 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4512
4513 CALL_INSN_FUNCTION_USAGE (insn)
4514 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4515 CALL_INSN_FUNCTION_USAGE (insn));
4516 }
4517 if (use_mov)
4518 {
4519 if (!frame_pointer_needed || !frame.to_allocate)
4520 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4521 else
4522 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4523 -frame.nregs * UNITS_PER_WORD);
4524 }
4525
4526 #ifdef SUBTARGET_PROLOGUE
4527 SUBTARGET_PROLOGUE;
4528 #endif
4529
4530 pic_reg_used = false;
4531 if (pic_offset_table_rtx
4532 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4533 || current_function_profile))
4534 {
4535 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4536
4537 if (alt_pic_reg_used != INVALID_REGNUM)
4538 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4539
4540 pic_reg_used = true;
4541 }
4542
4543 if (pic_reg_used)
4544 {
4545 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4546
4547 /* Even with accurate pre-reload life analysis, we can wind up
4548 deleting all references to the pic register after reload.
4549 Consider if cross-jumping unifies two sides of a branch
4550 controled by a comparison vs the only read from a global.
4551 In which case, allow the set_got to be deleted, though we're
4552 too late to do anything about the ebx save in the prologue. */
4553 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4554 }
4555
4556 /* Prevent function calls from be scheduled before the call to mcount.
4557 In the pic_reg_used case, make sure that the got load isn't deleted. */
4558 if (current_function_profile)
4559 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4560 }
4561
4562 /* Emit code to restore saved registers using MOV insns. First register
4563 is restored from POINTER + OFFSET. */
4564 static void
4565 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4566 rtx pointer;
4567 int offset;
4568 int maybe_eh_return;
4569 {
4570 int regno;
4571
4572 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4573 if (ix86_save_reg (regno, maybe_eh_return))
4574 {
4575 emit_move_insn (gen_rtx_REG (Pmode, regno),
4576 adjust_address (gen_rtx_MEM (Pmode, pointer),
4577 Pmode, offset));
4578 offset += UNITS_PER_WORD;
4579 }
4580 }
4581
4582 /* Restore function stack, frame, and registers. */
4583
4584 void
4585 ix86_expand_epilogue (style)
4586 int style;
4587 {
4588 int regno;
4589 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4590 struct ix86_frame frame;
4591 HOST_WIDE_INT offset;
4592
4593 ix86_compute_frame_layout (&frame);
4594
4595 /* Calculate start of saved registers relative to ebp. Special care
4596 must be taken for the normal return case of a function using
4597 eh_return: the eax and edx registers are marked as saved, but not
4598 restored along this path. */
4599 offset = frame.nregs;
4600 if (current_function_calls_eh_return && style != 2)
4601 offset -= 2;
4602 offset *= -UNITS_PER_WORD;
4603
4604 /* If we're only restoring one register and sp is not valid then
4605 using a move instruction to restore the register since it's
4606 less work than reloading sp and popping the register.
4607
4608 The default code result in stack adjustment using add/lea instruction,
4609 while this code results in LEAVE instruction (or discrete equivalent),
4610 so it is profitable in some other cases as well. Especially when there
4611 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4612 and there is exactly one register to pop. This heruistic may need some
4613 tuning in future. */
4614 if ((!sp_valid && frame.nregs <= 1)
4615 || (TARGET_EPILOGUE_USING_MOVE
4616 && use_fast_prologue_epilogue
4617 && (frame.nregs > 1 || frame.to_allocate))
4618 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4619 || (frame_pointer_needed && TARGET_USE_LEAVE
4620 && use_fast_prologue_epilogue && frame.nregs == 1)
4621 || current_function_calls_eh_return)
4622 {
4623 /* Restore registers. We can use ebp or esp to address the memory
4624 locations. If both are available, default to ebp, since offsets
4625 are known to be small. Only exception is esp pointing directly to the
4626 end of block of saved registers, where we may simplify addressing
4627 mode. */
4628
4629 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4630 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4631 frame.to_allocate, style == 2);
4632 else
4633 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4634 offset, style == 2);
4635
4636 /* eh_return epilogues need %ecx added to the stack pointer. */
4637 if (style == 2)
4638 {
4639 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4640
4641 if (frame_pointer_needed)
4642 {
4643 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4644 tmp = plus_constant (tmp, UNITS_PER_WORD);
4645 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4646
4647 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4648 emit_move_insn (hard_frame_pointer_rtx, tmp);
4649
4650 emit_insn (gen_pro_epilogue_adjust_stack
4651 (stack_pointer_rtx, sa, const0_rtx));
4652 }
4653 else
4654 {
4655 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4656 tmp = plus_constant (tmp, (frame.to_allocate
4657 + frame.nregs * UNITS_PER_WORD));
4658 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4659 }
4660 }
4661 else if (!frame_pointer_needed)
4662 emit_insn (gen_pro_epilogue_adjust_stack
4663 (stack_pointer_rtx, stack_pointer_rtx,
4664 GEN_INT (frame.to_allocate
4665 + frame.nregs * UNITS_PER_WORD)));
4666 /* If not an i386, mov & pop is faster than "leave". */
4667 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4668 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4669 else
4670 {
4671 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4672 hard_frame_pointer_rtx,
4673 const0_rtx));
4674 if (TARGET_64BIT)
4675 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4676 else
4677 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4678 }
4679 }
4680 else
4681 {
4682 /* First step is to deallocate the stack frame so that we can
4683 pop the registers. */
4684 if (!sp_valid)
4685 {
4686 if (!frame_pointer_needed)
4687 abort ();
4688 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4689 hard_frame_pointer_rtx,
4690 GEN_INT (offset)));
4691 }
4692 else if (frame.to_allocate)
4693 emit_insn (gen_pro_epilogue_adjust_stack
4694 (stack_pointer_rtx, stack_pointer_rtx,
4695 GEN_INT (frame.to_allocate)));
4696
4697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4698 if (ix86_save_reg (regno, false))
4699 {
4700 if (TARGET_64BIT)
4701 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4702 else
4703 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4704 }
4705 if (frame_pointer_needed)
4706 {
4707 /* Leave results in shorter dependency chains on CPUs that are
4708 able to grok it fast. */
4709 if (TARGET_USE_LEAVE)
4710 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4711 else if (TARGET_64BIT)
4712 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4713 else
4714 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4715 }
4716 }
4717
4718 /* Sibcall epilogues don't want a return instruction. */
4719 if (style == 0)
4720 return;
4721
4722 if (current_function_pops_args && current_function_args_size)
4723 {
4724 rtx popc = GEN_INT (current_function_pops_args);
4725
4726 /* i386 can only pop 64K bytes. If asked to pop more, pop
4727 return address, do explicit add, and jump indirectly to the
4728 caller. */
4729
4730 if (current_function_pops_args >= 65536)
4731 {
4732 rtx ecx = gen_rtx_REG (SImode, 2);
4733
4734 /* There are is no "pascal" calling convention in 64bit ABI. */
4735 if (TARGET_64BIT)
4736 abort ();
4737
4738 emit_insn (gen_popsi1 (ecx));
4739 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4740 emit_jump_insn (gen_return_indirect_internal (ecx));
4741 }
4742 else
4743 emit_jump_insn (gen_return_pop_internal (popc));
4744 }
4745 else
4746 emit_jump_insn (gen_return_internal ());
4747 }
4748
4749 /* Reset from the function's potential modifications. */
4750
4751 static void
4752 ix86_output_function_epilogue (file, size)
4753 FILE *file ATTRIBUTE_UNUSED;
4754 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4755 {
4756 if (pic_offset_table_rtx)
4757 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4758 }
4759 \f
4760 /* Extract the parts of an RTL expression that is a valid memory address
4761 for an instruction. Return 0 if the structure of the address is
4762 grossly off. Return -1 if the address contains ASHIFT, so it is not
4763 strictly valid, but still used for computing length of lea instruction.
4764 */
4765
4766 static int
4767 ix86_decompose_address (addr, out)
4768 register rtx addr;
4769 struct ix86_address *out;
4770 {
4771 rtx base = NULL_RTX;
4772 rtx index = NULL_RTX;
4773 rtx disp = NULL_RTX;
4774 HOST_WIDE_INT scale = 1;
4775 rtx scale_rtx = NULL_RTX;
4776 int retval = 1;
4777
4778 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4779 base = addr;
4780 else if (GET_CODE (addr) == PLUS)
4781 {
4782 rtx op0 = XEXP (addr, 0);
4783 rtx op1 = XEXP (addr, 1);
4784 enum rtx_code code0 = GET_CODE (op0);
4785 enum rtx_code code1 = GET_CODE (op1);
4786
4787 if (code0 == REG || code0 == SUBREG)
4788 {
4789 if (code1 == REG || code1 == SUBREG)
4790 index = op0, base = op1; /* index + base */
4791 else
4792 base = op0, disp = op1; /* base + displacement */
4793 }
4794 else if (code0 == MULT)
4795 {
4796 index = XEXP (op0, 0);
4797 scale_rtx = XEXP (op0, 1);
4798 if (code1 == REG || code1 == SUBREG)
4799 base = op1; /* index*scale + base */
4800 else
4801 disp = op1; /* index*scale + disp */
4802 }
4803 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4804 {
4805 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4806 scale_rtx = XEXP (XEXP (op0, 0), 1);
4807 base = XEXP (op0, 1);
4808 disp = op1;
4809 }
4810 else if (code0 == PLUS)
4811 {
4812 index = XEXP (op0, 0); /* index + base + disp */
4813 base = XEXP (op0, 1);
4814 disp = op1;
4815 }
4816 else
4817 return 0;
4818 }
4819 else if (GET_CODE (addr) == MULT)
4820 {
4821 index = XEXP (addr, 0); /* index*scale */
4822 scale_rtx = XEXP (addr, 1);
4823 }
4824 else if (GET_CODE (addr) == ASHIFT)
4825 {
4826 rtx tmp;
4827
4828 /* We're called for lea too, which implements ashift on occasion. */
4829 index = XEXP (addr, 0);
4830 tmp = XEXP (addr, 1);
4831 if (GET_CODE (tmp) != CONST_INT)
4832 return 0;
4833 scale = INTVAL (tmp);
4834 if ((unsigned HOST_WIDE_INT) scale > 3)
4835 return 0;
4836 scale = 1 << scale;
4837 retval = -1;
4838 }
4839 else
4840 disp = addr; /* displacement */
4841
4842 /* Extract the integral value of scale. */
4843 if (scale_rtx)
4844 {
4845 if (GET_CODE (scale_rtx) != CONST_INT)
4846 return 0;
4847 scale = INTVAL (scale_rtx);
4848 }
4849
4850 /* Allow arg pointer and stack pointer as index if there is not scaling */
4851 if (base && index && scale == 1
4852 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4853 || index == stack_pointer_rtx))
4854 {
4855 rtx tmp = base;
4856 base = index;
4857 index = tmp;
4858 }
4859
4860 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4861 if ((base == hard_frame_pointer_rtx
4862 || base == frame_pointer_rtx
4863 || base == arg_pointer_rtx) && !disp)
4864 disp = const0_rtx;
4865
4866 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4867 Avoid this by transforming to [%esi+0]. */
4868 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4869 && base && !index && !disp
4870 && REG_P (base)
4871 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4872 disp = const0_rtx;
4873
4874 /* Special case: encode reg+reg instead of reg*2. */
4875 if (!base && index && scale && scale == 2)
4876 base = index, scale = 1;
4877
4878 /* Special case: scaling cannot be encoded without base or displacement. */
4879 if (!base && !disp && index && scale != 1)
4880 disp = const0_rtx;
4881
4882 out->base = base;
4883 out->index = index;
4884 out->disp = disp;
4885 out->scale = scale;
4886
4887 return retval;
4888 }
4889 \f
4890 /* Return cost of the memory address x.
4891 For i386, it is better to use a complex address than let gcc copy
4892 the address into a reg and make a new pseudo. But not if the address
4893 requires to two regs - that would mean more pseudos with longer
4894 lifetimes. */
4895 int
4896 ix86_address_cost (x)
4897 rtx x;
4898 {
4899 struct ix86_address parts;
4900 int cost = 1;
4901
4902 if (!ix86_decompose_address (x, &parts))
4903 abort ();
4904
4905 if (parts.base && GET_CODE (parts.base) == SUBREG)
4906 parts.base = SUBREG_REG (parts.base);
4907 if (parts.index && GET_CODE (parts.index) == SUBREG)
4908 parts.index = SUBREG_REG (parts.index);
4909
4910 /* More complex memory references are better. */
4911 if (parts.disp && parts.disp != const0_rtx)
4912 cost--;
4913
4914 /* Attempt to minimize number of registers in the address. */
4915 if ((parts.base
4916 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4917 || (parts.index
4918 && (!REG_P (parts.index)
4919 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4920 cost++;
4921
4922 if (parts.base
4923 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4924 && parts.index
4925 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4926 && parts.base != parts.index)
4927 cost++;
4928
4929 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4930 since it's predecode logic can't detect the length of instructions
4931 and it degenerates to vector decoded. Increase cost of such
4932 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4933 to split such addresses or even refuse such addresses at all.
4934
4935 Following addressing modes are affected:
4936 [base+scale*index]
4937 [scale*index+disp]
4938 [base+index]
4939
4940 The first and last case may be avoidable by explicitly coding the zero in
4941 memory address, but I don't have AMD-K6 machine handy to check this
4942 theory. */
4943
4944 if (TARGET_K6
4945 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4946 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4947 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4948 cost += 10;
4949
4950 return cost;
4951 }
4952 \f
4953 /* If X is a machine specific address (i.e. a symbol or label being
4954 referenced as a displacement from the GOT implemented using an
4955 UNSPEC), then return the base term. Otherwise return X. */
4956
4957 rtx
4958 ix86_find_base_term (x)
4959 rtx x;
4960 {
4961 rtx term;
4962
4963 if (TARGET_64BIT)
4964 {
4965 if (GET_CODE (x) != CONST)
4966 return x;
4967 term = XEXP (x, 0);
4968 if (GET_CODE (term) == PLUS
4969 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4970 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4971 term = XEXP (term, 0);
4972 if (GET_CODE (term) != UNSPEC
4973 || XINT (term, 1) != UNSPEC_GOTPCREL)
4974 return x;
4975
4976 term = XVECEXP (term, 0, 0);
4977
4978 if (GET_CODE (term) != SYMBOL_REF
4979 && GET_CODE (term) != LABEL_REF)
4980 return x;
4981
4982 return term;
4983 }
4984
4985 if (GET_CODE (x) != PLUS
4986 || XEXP (x, 0) != pic_offset_table_rtx
4987 || GET_CODE (XEXP (x, 1)) != CONST)
4988 return x;
4989
4990 term = XEXP (XEXP (x, 1), 0);
4991
4992 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4993 term = XEXP (term, 0);
4994
4995 if (GET_CODE (term) != UNSPEC
4996 || XINT (term, 1) != UNSPEC_GOTOFF)
4997 return x;
4998
4999 term = XVECEXP (term, 0, 0);
5000
5001 if (GET_CODE (term) != SYMBOL_REF
5002 && GET_CODE (term) != LABEL_REF)
5003 return x;
5004
5005 return term;
5006 }
5007 \f
5008 /* Determine if a given RTX is a valid constant. We already know this
5009 satisfies CONSTANT_P. */
5010
5011 bool
5012 legitimate_constant_p (x)
5013 rtx x;
5014 {
5015 rtx inner;
5016
5017 switch (GET_CODE (x))
5018 {
5019 case SYMBOL_REF:
5020 /* TLS symbols are not constant. */
5021 if (tls_symbolic_operand (x, Pmode))
5022 return false;
5023 break;
5024
5025 case CONST:
5026 inner = XEXP (x, 0);
5027
5028 /* Offsets of TLS symbols are never valid.
5029 Discourage CSE from creating them. */
5030 if (GET_CODE (inner) == PLUS
5031 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5032 return false;
5033
5034 /* Only some unspecs are valid as "constants". */
5035 if (GET_CODE (inner) == UNSPEC)
5036 switch (XINT (inner, 1))
5037 {
5038 case UNSPEC_TPOFF:
5039 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5040 default:
5041 return false;
5042 }
5043 break;
5044
5045 default:
5046 break;
5047 }
5048
5049 /* Otherwise we handle everything else in the move patterns. */
5050 return true;
5051 }
5052
5053 /* Determine if a given RTX is a valid constant address. */
5054
5055 bool
5056 constant_address_p (x)
5057 rtx x;
5058 {
5059 switch (GET_CODE (x))
5060 {
5061 case LABEL_REF:
5062 case CONST_INT:
5063 return true;
5064
5065 case CONST_DOUBLE:
5066 return TARGET_64BIT;
5067
5068 case CONST:
5069 /* For Mach-O, really believe the CONST. */
5070 if (TARGET_MACHO)
5071 return true;
5072 /* Otherwise fall through. */
5073 case SYMBOL_REF:
5074 return !flag_pic && legitimate_constant_p (x);
5075
5076 default:
5077 return false;
5078 }
5079 }
5080
5081 /* Nonzero if the constant value X is a legitimate general operand
5082 when generating PIC code. It is given that flag_pic is on and
5083 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5084
5085 bool
5086 legitimate_pic_operand_p (x)
5087 rtx x;
5088 {
5089 rtx inner;
5090
5091 switch (GET_CODE (x))
5092 {
5093 case CONST:
5094 inner = XEXP (x, 0);
5095
5096 /* Only some unspecs are valid as "constants". */
5097 if (GET_CODE (inner) == UNSPEC)
5098 switch (XINT (inner, 1))
5099 {
5100 case UNSPEC_TPOFF:
5101 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5102 default:
5103 return false;
5104 }
5105 /* FALLTHRU */
5106
5107 case SYMBOL_REF:
5108 case LABEL_REF:
5109 return legitimate_pic_address_disp_p (x);
5110
5111 default:
5112 return true;
5113 }
5114 }
5115
5116 /* Determine if a given CONST RTX is a valid memory displacement
5117 in PIC mode. */
5118
5119 int
5120 legitimate_pic_address_disp_p (disp)
5121 register rtx disp;
5122 {
5123 bool saw_plus;
5124
5125 /* In 64bit mode we can allow direct addresses of symbols and labels
5126 when they are not dynamic symbols. */
5127 if (TARGET_64BIT)
5128 {
5129 /* TLS references should always be enclosed in UNSPEC. */
5130 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5131 return 0;
5132 if (GET_CODE (disp) == SYMBOL_REF
5133 && ix86_cmodel == CM_SMALL_PIC
5134 && (CONSTANT_POOL_ADDRESS_P (disp)
5135 || SYMBOL_REF_FLAG (disp)))
5136 return 1;
5137 if (GET_CODE (disp) == LABEL_REF)
5138 return 1;
5139 if (GET_CODE (disp) == CONST
5140 && GET_CODE (XEXP (disp, 0)) == PLUS
5141 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5142 && ix86_cmodel == CM_SMALL_PIC
5143 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5144 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5145 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5146 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5147 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5148 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5149 return 1;
5150 }
5151 if (GET_CODE (disp) != CONST)
5152 return 0;
5153 disp = XEXP (disp, 0);
5154
5155 if (TARGET_64BIT)
5156 {
5157 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5158 of GOT tables. We should not need these anyway. */
5159 if (GET_CODE (disp) != UNSPEC
5160 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5161 return 0;
5162
5163 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5164 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5165 return 0;
5166 return 1;
5167 }
5168
5169 saw_plus = false;
5170 if (GET_CODE (disp) == PLUS)
5171 {
5172 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5173 return 0;
5174 disp = XEXP (disp, 0);
5175 saw_plus = true;
5176 }
5177
5178 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5179 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5180 {
5181 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5182 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5183 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5184 {
5185 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5186 if (strstr (sym_name, "$pb") != 0)
5187 return 1;
5188 }
5189 }
5190
5191 if (GET_CODE (disp) != UNSPEC)
5192 return 0;
5193
5194 switch (XINT (disp, 1))
5195 {
5196 case UNSPEC_GOT:
5197 if (saw_plus)
5198 return false;
5199 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5200 case UNSPEC_GOTOFF:
5201 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5202 case UNSPEC_GOTTPOFF:
5203 case UNSPEC_GOTNTPOFF:
5204 case UNSPEC_INDNTPOFF:
5205 if (saw_plus)
5206 return false;
5207 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5208 case UNSPEC_NTPOFF:
5209 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5210 case UNSPEC_DTPOFF:
5211 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5212 }
5213
5214 return 0;
5215 }
5216
5217 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5218 memory address for an instruction. The MODE argument is the machine mode
5219 for the MEM expression that wants to use this address.
5220
5221 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5222 convert common non-canonical forms to canonical form so that they will
5223 be recognized. */
5224
5225 int
5226 legitimate_address_p (mode, addr, strict)
5227 enum machine_mode mode;
5228 register rtx addr;
5229 int strict;
5230 {
5231 struct ix86_address parts;
5232 rtx base, index, disp;
5233 HOST_WIDE_INT scale;
5234 const char *reason = NULL;
5235 rtx reason_rtx = NULL_RTX;
5236
5237 if (TARGET_DEBUG_ADDR)
5238 {
5239 fprintf (stderr,
5240 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5241 GET_MODE_NAME (mode), strict);
5242 debug_rtx (addr);
5243 }
5244
5245 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5246 {
5247 if (TARGET_DEBUG_ADDR)
5248 fprintf (stderr, "Success.\n");
5249 return TRUE;
5250 }
5251
5252 if (ix86_decompose_address (addr, &parts) <= 0)
5253 {
5254 reason = "decomposition failed";
5255 goto report_error;
5256 }
5257
5258 base = parts.base;
5259 index = parts.index;
5260 disp = parts.disp;
5261 scale = parts.scale;
5262
5263 /* Validate base register.
5264
5265 Don't allow SUBREG's here, it can lead to spill failures when the base
5266 is one word out of a two word structure, which is represented internally
5267 as a DImode int. */
5268
5269 if (base)
5270 {
5271 rtx reg;
5272 reason_rtx = base;
5273
5274 if (GET_CODE (base) == SUBREG)
5275 reg = SUBREG_REG (base);
5276 else
5277 reg = base;
5278
5279 if (GET_CODE (reg) != REG)
5280 {
5281 reason = "base is not a register";
5282 goto report_error;
5283 }
5284
5285 if (GET_MODE (base) != Pmode)
5286 {
5287 reason = "base is not in Pmode";
5288 goto report_error;
5289 }
5290
5291 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5292 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5293 {
5294 reason = "base is not valid";
5295 goto report_error;
5296 }
5297 }
5298
5299 /* Validate index register.
5300
5301 Don't allow SUBREG's here, it can lead to spill failures when the index
5302 is one word out of a two word structure, which is represented internally
5303 as a DImode int. */
5304
5305 if (index)
5306 {
5307 rtx reg;
5308 reason_rtx = index;
5309
5310 if (GET_CODE (index) == SUBREG)
5311 reg = SUBREG_REG (index);
5312 else
5313 reg = index;
5314
5315 if (GET_CODE (reg) != REG)
5316 {
5317 reason = "index is not a register";
5318 goto report_error;
5319 }
5320
5321 if (GET_MODE (index) != Pmode)
5322 {
5323 reason = "index is not in Pmode";
5324 goto report_error;
5325 }
5326
5327 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5328 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5329 {
5330 reason = "index is not valid";
5331 goto report_error;
5332 }
5333 }
5334
5335 /* Validate scale factor. */
5336 if (scale != 1)
5337 {
5338 reason_rtx = GEN_INT (scale);
5339 if (!index)
5340 {
5341 reason = "scale without index";
5342 goto report_error;
5343 }
5344
5345 if (scale != 2 && scale != 4 && scale != 8)
5346 {
5347 reason = "scale is not a valid multiplier";
5348 goto report_error;
5349 }
5350 }
5351
5352 /* Validate displacement. */
5353 if (disp)
5354 {
5355 reason_rtx = disp;
5356
5357 if (GET_CODE (disp) == CONST
5358 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5359 switch (XINT (XEXP (disp, 0), 1))
5360 {
5361 case UNSPEC_GOT:
5362 case UNSPEC_GOTOFF:
5363 case UNSPEC_GOTPCREL:
5364 if (!flag_pic)
5365 abort ();
5366 goto is_legitimate_pic;
5367
5368 case UNSPEC_GOTTPOFF:
5369 case UNSPEC_GOTNTPOFF:
5370 case UNSPEC_INDNTPOFF:
5371 case UNSPEC_NTPOFF:
5372 case UNSPEC_DTPOFF:
5373 break;
5374
5375 default:
5376 reason = "invalid address unspec";
5377 goto report_error;
5378 }
5379
5380 else if (flag_pic && (SYMBOLIC_CONST (disp)
5381 #if TARGET_MACHO
5382 && !machopic_operand_p (disp)
5383 #endif
5384 ))
5385 {
5386 is_legitimate_pic:
5387 if (TARGET_64BIT && (index || base))
5388 {
5389 /* foo@dtpoff(%rX) is ok. */
5390 if (GET_CODE (disp) != CONST
5391 || GET_CODE (XEXP (disp, 0)) != PLUS
5392 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5393 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5394 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5395 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5396 {
5397 reason = "non-constant pic memory reference";
5398 goto report_error;
5399 }
5400 }
5401 else if (! legitimate_pic_address_disp_p (disp))
5402 {
5403 reason = "displacement is an invalid pic construct";
5404 goto report_error;
5405 }
5406
5407 /* This code used to verify that a symbolic pic displacement
5408 includes the pic_offset_table_rtx register.
5409
5410 While this is good idea, unfortunately these constructs may
5411 be created by "adds using lea" optimization for incorrect
5412 code like:
5413
5414 int a;
5415 int foo(int i)
5416 {
5417 return *(&a+i);
5418 }
5419
5420 This code is nonsensical, but results in addressing
5421 GOT table with pic_offset_table_rtx base. We can't
5422 just refuse it easily, since it gets matched by
5423 "addsi3" pattern, that later gets split to lea in the
5424 case output register differs from input. While this
5425 can be handled by separate addsi pattern for this case
5426 that never results in lea, this seems to be easier and
5427 correct fix for crash to disable this test. */
5428 }
5429 else if (!CONSTANT_ADDRESS_P (disp))
5430 {
5431 reason = "displacement is not constant";
5432 goto report_error;
5433 }
5434 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5435 {
5436 reason = "displacement is out of range";
5437 goto report_error;
5438 }
5439 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5440 {
5441 reason = "displacement is a const_double";
5442 goto report_error;
5443 }
5444 }
5445
5446 /* Everything looks valid. */
5447 if (TARGET_DEBUG_ADDR)
5448 fprintf (stderr, "Success.\n");
5449 return TRUE;
5450
5451 report_error:
5452 if (TARGET_DEBUG_ADDR)
5453 {
5454 fprintf (stderr, "Error: %s\n", reason);
5455 debug_rtx (reason_rtx);
5456 }
5457 return FALSE;
5458 }
5459 \f
5460 /* Return an unique alias set for the GOT. */
5461
5462 static HOST_WIDE_INT
5463 ix86_GOT_alias_set ()
5464 {
5465 static HOST_WIDE_INT set = -1;
5466 if (set == -1)
5467 set = new_alias_set ();
5468 return set;
5469 }
5470
5471 /* Return a legitimate reference for ORIG (an address) using the
5472 register REG. If REG is 0, a new pseudo is generated.
5473
5474 There are two types of references that must be handled:
5475
5476 1. Global data references must load the address from the GOT, via
5477 the PIC reg. An insn is emitted to do this load, and the reg is
5478 returned.
5479
5480 2. Static data references, constant pool addresses, and code labels
5481 compute the address as an offset from the GOT, whose base is in
5482 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5483 differentiate them from global data objects. The returned
5484 address is the PIC reg + an unspec constant.
5485
5486 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5487 reg also appears in the address. */
5488
5489 rtx
5490 legitimize_pic_address (orig, reg)
5491 rtx orig;
5492 rtx reg;
5493 {
5494 rtx addr = orig;
5495 rtx new = orig;
5496 rtx base;
5497
5498 #if TARGET_MACHO
5499 if (reg == 0)
5500 reg = gen_reg_rtx (Pmode);
5501 /* Use the generic Mach-O PIC machinery. */
5502 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5503 #endif
5504
5505 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5506 new = addr;
5507 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5508 {
5509 /* This symbol may be referenced via a displacement from the PIC
5510 base address (@GOTOFF). */
5511
5512 if (reload_in_progress)
5513 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5514 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5515 new = gen_rtx_CONST (Pmode, new);
5516 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5517
5518 if (reg != 0)
5519 {
5520 emit_move_insn (reg, new);
5521 new = reg;
5522 }
5523 }
5524 else if (GET_CODE (addr) == SYMBOL_REF)
5525 {
5526 if (TARGET_64BIT)
5527 {
5528 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5529 new = gen_rtx_CONST (Pmode, new);
5530 new = gen_rtx_MEM (Pmode, new);
5531 RTX_UNCHANGING_P (new) = 1;
5532 set_mem_alias_set (new, ix86_GOT_alias_set ());
5533
5534 if (reg == 0)
5535 reg = gen_reg_rtx (Pmode);
5536 /* Use directly gen_movsi, otherwise the address is loaded
5537 into register for CSE. We don't want to CSE this addresses,
5538 instead we CSE addresses from the GOT table, so skip this. */
5539 emit_insn (gen_movsi (reg, new));
5540 new = reg;
5541 }
5542 else
5543 {
5544 /* This symbol must be referenced via a load from the
5545 Global Offset Table (@GOT). */
5546
5547 if (reload_in_progress)
5548 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5549 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5550 new = gen_rtx_CONST (Pmode, new);
5551 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5552 new = gen_rtx_MEM (Pmode, new);
5553 RTX_UNCHANGING_P (new) = 1;
5554 set_mem_alias_set (new, ix86_GOT_alias_set ());
5555
5556 if (reg == 0)
5557 reg = gen_reg_rtx (Pmode);
5558 emit_move_insn (reg, new);
5559 new = reg;
5560 }
5561 }
5562 else
5563 {
5564 if (GET_CODE (addr) == CONST)
5565 {
5566 addr = XEXP (addr, 0);
5567
5568 /* We must match stuff we generate before. Assume the only
5569 unspecs that can get here are ours. Not that we could do
5570 anything with them anyway... */
5571 if (GET_CODE (addr) == UNSPEC
5572 || (GET_CODE (addr) == PLUS
5573 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5574 return orig;
5575 if (GET_CODE (addr) != PLUS)
5576 abort ();
5577 }
5578 if (GET_CODE (addr) == PLUS)
5579 {
5580 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5581
5582 /* Check first to see if this is a constant offset from a @GOTOFF
5583 symbol reference. */
5584 if (local_symbolic_operand (op0, Pmode)
5585 && GET_CODE (op1) == CONST_INT)
5586 {
5587 if (!TARGET_64BIT)
5588 {
5589 if (reload_in_progress)
5590 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5591 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5592 UNSPEC_GOTOFF);
5593 new = gen_rtx_PLUS (Pmode, new, op1);
5594 new = gen_rtx_CONST (Pmode, new);
5595 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5596
5597 if (reg != 0)
5598 {
5599 emit_move_insn (reg, new);
5600 new = reg;
5601 }
5602 }
5603 else
5604 {
5605 if (INTVAL (op1) < -16*1024*1024
5606 || INTVAL (op1) >= 16*1024*1024)
5607 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5608 }
5609 }
5610 else
5611 {
5612 base = legitimize_pic_address (XEXP (addr, 0), reg);
5613 new = legitimize_pic_address (XEXP (addr, 1),
5614 base == reg ? NULL_RTX : reg);
5615
5616 if (GET_CODE (new) == CONST_INT)
5617 new = plus_constant (base, INTVAL (new));
5618 else
5619 {
5620 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5621 {
5622 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5623 new = XEXP (new, 1);
5624 }
5625 new = gen_rtx_PLUS (Pmode, base, new);
5626 }
5627 }
5628 }
5629 }
5630 return new;
5631 }
5632
5633 static void
5634 ix86_encode_section_info (decl, first)
5635 tree decl;
5636 int first ATTRIBUTE_UNUSED;
5637 {
5638 bool local_p = (*targetm.binds_local_p) (decl);
5639 rtx rtl, symbol;
5640
5641 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5642 if (GET_CODE (rtl) != MEM)
5643 return;
5644 symbol = XEXP (rtl, 0);
5645 if (GET_CODE (symbol) != SYMBOL_REF)
5646 return;
5647
5648 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5649 symbol so that we may access it directly in the GOT. */
5650
5651 if (flag_pic)
5652 SYMBOL_REF_FLAG (symbol) = local_p;
5653
5654 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5655 "local dynamic", "initial exec" or "local exec" TLS models
5656 respectively. */
5657
5658 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5659 {
5660 const char *symbol_str;
5661 char *newstr;
5662 size_t len;
5663 enum tls_model kind = decl_tls_model (decl);
5664
5665 if (TARGET_64BIT && ! flag_pic)
5666 {
5667 /* x86-64 doesn't allow non-pic code for shared libraries,
5668 so don't generate GD/LD TLS models for non-pic code. */
5669 switch (kind)
5670 {
5671 case TLS_MODEL_GLOBAL_DYNAMIC:
5672 kind = TLS_MODEL_INITIAL_EXEC; break;
5673 case TLS_MODEL_LOCAL_DYNAMIC:
5674 kind = TLS_MODEL_LOCAL_EXEC; break;
5675 default:
5676 break;
5677 }
5678 }
5679
5680 symbol_str = XSTR (symbol, 0);
5681
5682 if (symbol_str[0] == '%')
5683 {
5684 if (symbol_str[1] == tls_model_chars[kind])
5685 return;
5686 symbol_str += 2;
5687 }
5688 len = strlen (symbol_str) + 1;
5689 newstr = alloca (len + 2);
5690
5691 newstr[0] = '%';
5692 newstr[1] = tls_model_chars[kind];
5693 memcpy (newstr + 2, symbol_str, len);
5694
5695 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5696 }
5697 }
5698
5699 /* Undo the above when printing symbol names. */
5700
5701 static const char *
5702 ix86_strip_name_encoding (str)
5703 const char *str;
5704 {
5705 if (str[0] == '%')
5706 str += 2;
5707 if (str [0] == '*')
5708 str += 1;
5709 return str;
5710 }
5711 \f
5712 /* Load the thread pointer into a register. */
5713
5714 static rtx
5715 get_thread_pointer ()
5716 {
5717 rtx tp;
5718
5719 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5720 tp = gen_rtx_MEM (Pmode, tp);
5721 RTX_UNCHANGING_P (tp) = 1;
5722 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5723 tp = force_reg (Pmode, tp);
5724
5725 return tp;
5726 }
5727
5728 /* Try machine-dependent ways of modifying an illegitimate address
5729 to be legitimate. If we find one, return the new, valid address.
5730 This macro is used in only one place: `memory_address' in explow.c.
5731
5732 OLDX is the address as it was before break_out_memory_refs was called.
5733 In some cases it is useful to look at this to decide what needs to be done.
5734
5735 MODE and WIN are passed so that this macro can use
5736 GO_IF_LEGITIMATE_ADDRESS.
5737
5738 It is always safe for this macro to do nothing. It exists to recognize
5739 opportunities to optimize the output.
5740
5741 For the 80386, we handle X+REG by loading X into a register R and
5742 using R+REG. R will go in a general reg and indexing will be used.
5743 However, if REG is a broken-out memory address or multiplication,
5744 nothing needs to be done because REG can certainly go in a general reg.
5745
5746 When -fpic is used, special handling is needed for symbolic references.
5747 See comments by legitimize_pic_address in i386.c for details. */
5748
5749 rtx
5750 legitimize_address (x, oldx, mode)
5751 register rtx x;
5752 register rtx oldx ATTRIBUTE_UNUSED;
5753 enum machine_mode mode;
5754 {
5755 int changed = 0;
5756 unsigned log;
5757
5758 if (TARGET_DEBUG_ADDR)
5759 {
5760 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5761 GET_MODE_NAME (mode));
5762 debug_rtx (x);
5763 }
5764
5765 log = tls_symbolic_operand (x, mode);
5766 if (log)
5767 {
5768 rtx dest, base, off, pic;
5769 int type;
5770
5771 switch (log)
5772 {
5773 case TLS_MODEL_GLOBAL_DYNAMIC:
5774 dest = gen_reg_rtx (Pmode);
5775 if (TARGET_64BIT)
5776 {
5777 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5778
5779 start_sequence ();
5780 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5781 insns = get_insns ();
5782 end_sequence ();
5783
5784 emit_libcall_block (insns, dest, rax, x);
5785 }
5786 else
5787 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5788 break;
5789
5790 case TLS_MODEL_LOCAL_DYNAMIC:
5791 base = gen_reg_rtx (Pmode);
5792 if (TARGET_64BIT)
5793 {
5794 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5795
5796 start_sequence ();
5797 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5798 insns = get_insns ();
5799 end_sequence ();
5800
5801 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5802 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5803 emit_libcall_block (insns, base, rax, note);
5804 }
5805 else
5806 emit_insn (gen_tls_local_dynamic_base_32 (base));
5807
5808 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5809 off = gen_rtx_CONST (Pmode, off);
5810
5811 return gen_rtx_PLUS (Pmode, base, off);
5812
5813 case TLS_MODEL_INITIAL_EXEC:
5814 if (TARGET_64BIT)
5815 {
5816 pic = NULL;
5817 type = UNSPEC_GOTNTPOFF;
5818 }
5819 else if (flag_pic)
5820 {
5821 if (reload_in_progress)
5822 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5823 pic = pic_offset_table_rtx;
5824 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5825 }
5826 else if (!TARGET_GNU_TLS)
5827 {
5828 pic = gen_reg_rtx (Pmode);
5829 emit_insn (gen_set_got (pic));
5830 type = UNSPEC_GOTTPOFF;
5831 }
5832 else
5833 {
5834 pic = NULL;
5835 type = UNSPEC_INDNTPOFF;
5836 }
5837
5838 base = get_thread_pointer ();
5839
5840 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5841 off = gen_rtx_CONST (Pmode, off);
5842 if (pic)
5843 off = gen_rtx_PLUS (Pmode, pic, off);
5844 off = gen_rtx_MEM (Pmode, off);
5845 RTX_UNCHANGING_P (off) = 1;
5846 set_mem_alias_set (off, ix86_GOT_alias_set ());
5847 dest = gen_reg_rtx (Pmode);
5848
5849 if (TARGET_64BIT || TARGET_GNU_TLS)
5850 {
5851 emit_move_insn (dest, off);
5852 return gen_rtx_PLUS (Pmode, base, dest);
5853 }
5854 else
5855 emit_insn (gen_subsi3 (dest, base, off));
5856 break;
5857
5858 case TLS_MODEL_LOCAL_EXEC:
5859 base = get_thread_pointer ();
5860
5861 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5862 (TARGET_64BIT || TARGET_GNU_TLS)
5863 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5864 off = gen_rtx_CONST (Pmode, off);
5865
5866 if (TARGET_64BIT || TARGET_GNU_TLS)
5867 return gen_rtx_PLUS (Pmode, base, off);
5868 else
5869 {
5870 dest = gen_reg_rtx (Pmode);
5871 emit_insn (gen_subsi3 (dest, base, off));
5872 }
5873 break;
5874
5875 default:
5876 abort ();
5877 }
5878
5879 return dest;
5880 }
5881
5882 if (flag_pic && SYMBOLIC_CONST (x))
5883 return legitimize_pic_address (x, 0);
5884
5885 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5886 if (GET_CODE (x) == ASHIFT
5887 && GET_CODE (XEXP (x, 1)) == CONST_INT
5888 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5889 {
5890 changed = 1;
5891 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5892 GEN_INT (1 << log));
5893 }
5894
5895 if (GET_CODE (x) == PLUS)
5896 {
5897 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5898
5899 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5900 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5901 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5902 {
5903 changed = 1;
5904 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5905 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5906 GEN_INT (1 << log));
5907 }
5908
5909 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5910 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5911 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5912 {
5913 changed = 1;
5914 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5915 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5916 GEN_INT (1 << log));
5917 }
5918
5919 /* Put multiply first if it isn't already. */
5920 if (GET_CODE (XEXP (x, 1)) == MULT)
5921 {
5922 rtx tmp = XEXP (x, 0);
5923 XEXP (x, 0) = XEXP (x, 1);
5924 XEXP (x, 1) = tmp;
5925 changed = 1;
5926 }
5927
5928 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5929 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5930 created by virtual register instantiation, register elimination, and
5931 similar optimizations. */
5932 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5933 {
5934 changed = 1;
5935 x = gen_rtx_PLUS (Pmode,
5936 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5937 XEXP (XEXP (x, 1), 0)),
5938 XEXP (XEXP (x, 1), 1));
5939 }
5940
5941 /* Canonicalize
5942 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5943 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5944 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5946 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5947 && CONSTANT_P (XEXP (x, 1)))
5948 {
5949 rtx constant;
5950 rtx other = NULL_RTX;
5951
5952 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5953 {
5954 constant = XEXP (x, 1);
5955 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5956 }
5957 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5958 {
5959 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5960 other = XEXP (x, 1);
5961 }
5962 else
5963 constant = 0;
5964
5965 if (constant)
5966 {
5967 changed = 1;
5968 x = gen_rtx_PLUS (Pmode,
5969 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5970 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5971 plus_constant (other, INTVAL (constant)));
5972 }
5973 }
5974
5975 if (changed && legitimate_address_p (mode, x, FALSE))
5976 return x;
5977
5978 if (GET_CODE (XEXP (x, 0)) == MULT)
5979 {
5980 changed = 1;
5981 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5982 }
5983
5984 if (GET_CODE (XEXP (x, 1)) == MULT)
5985 {
5986 changed = 1;
5987 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5988 }
5989
5990 if (changed
5991 && GET_CODE (XEXP (x, 1)) == REG
5992 && GET_CODE (XEXP (x, 0)) == REG)
5993 return x;
5994
5995 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5996 {
5997 changed = 1;
5998 x = legitimize_pic_address (x, 0);
5999 }
6000
6001 if (changed && legitimate_address_p (mode, x, FALSE))
6002 return x;
6003
6004 if (GET_CODE (XEXP (x, 0)) == REG)
6005 {
6006 register rtx temp = gen_reg_rtx (Pmode);
6007 register rtx val = force_operand (XEXP (x, 1), temp);
6008 if (val != temp)
6009 emit_move_insn (temp, val);
6010
6011 XEXP (x, 1) = temp;
6012 return x;
6013 }
6014
6015 else if (GET_CODE (XEXP (x, 1)) == REG)
6016 {
6017 register rtx temp = gen_reg_rtx (Pmode);
6018 register rtx val = force_operand (XEXP (x, 0), temp);
6019 if (val != temp)
6020 emit_move_insn (temp, val);
6021
6022 XEXP (x, 0) = temp;
6023 return x;
6024 }
6025 }
6026
6027 return x;
6028 }
6029 \f
6030 /* Print an integer constant expression in assembler syntax. Addition
6031 and subtraction are the only arithmetic that may appear in these
6032 expressions. FILE is the stdio stream to write to, X is the rtx, and
6033 CODE is the operand print code from the output string. */
6034
6035 static void
6036 output_pic_addr_const (file, x, code)
6037 FILE *file;
6038 rtx x;
6039 int code;
6040 {
6041 char buf[256];
6042
6043 switch (GET_CODE (x))
6044 {
6045 case PC:
6046 if (flag_pic)
6047 putc ('.', file);
6048 else
6049 abort ();
6050 break;
6051
6052 case SYMBOL_REF:
6053 assemble_name (file, XSTR (x, 0));
6054 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6055 fputs ("@PLT", file);
6056 break;
6057
6058 case LABEL_REF:
6059 x = XEXP (x, 0);
6060 /* FALLTHRU */
6061 case CODE_LABEL:
6062 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6063 assemble_name (asm_out_file, buf);
6064 break;
6065
6066 case CONST_INT:
6067 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6068 break;
6069
6070 case CONST:
6071 /* This used to output parentheses around the expression,
6072 but that does not work on the 386 (either ATT or BSD assembler). */
6073 output_pic_addr_const (file, XEXP (x, 0), code);
6074 break;
6075
6076 case CONST_DOUBLE:
6077 if (GET_MODE (x) == VOIDmode)
6078 {
6079 /* We can use %d if the number is <32 bits and positive. */
6080 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6081 fprintf (file, "0x%lx%08lx",
6082 (unsigned long) CONST_DOUBLE_HIGH (x),
6083 (unsigned long) CONST_DOUBLE_LOW (x));
6084 else
6085 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6086 }
6087 else
6088 /* We can't handle floating point constants;
6089 PRINT_OPERAND must handle them. */
6090 output_operand_lossage ("floating constant misused");
6091 break;
6092
6093 case PLUS:
6094 /* Some assemblers need integer constants to appear first. */
6095 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6096 {
6097 output_pic_addr_const (file, XEXP (x, 0), code);
6098 putc ('+', file);
6099 output_pic_addr_const (file, XEXP (x, 1), code);
6100 }
6101 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6102 {
6103 output_pic_addr_const (file, XEXP (x, 1), code);
6104 putc ('+', file);
6105 output_pic_addr_const (file, XEXP (x, 0), code);
6106 }
6107 else
6108 abort ();
6109 break;
6110
6111 case MINUS:
6112 if (!TARGET_MACHO)
6113 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6114 output_pic_addr_const (file, XEXP (x, 0), code);
6115 putc ('-', file);
6116 output_pic_addr_const (file, XEXP (x, 1), code);
6117 if (!TARGET_MACHO)
6118 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6119 break;
6120
6121 case UNSPEC:
6122 if (XVECLEN (x, 0) != 1)
6123 abort ();
6124 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6125 switch (XINT (x, 1))
6126 {
6127 case UNSPEC_GOT:
6128 fputs ("@GOT", file);
6129 break;
6130 case UNSPEC_GOTOFF:
6131 fputs ("@GOTOFF", file);
6132 break;
6133 case UNSPEC_GOTPCREL:
6134 fputs ("@GOTPCREL(%rip)", file);
6135 break;
6136 case UNSPEC_GOTTPOFF:
6137 /* FIXME: This might be @TPOFF in Sun ld too. */
6138 fputs ("@GOTTPOFF", file);
6139 break;
6140 case UNSPEC_TPOFF:
6141 fputs ("@TPOFF", file);
6142 break;
6143 case UNSPEC_NTPOFF:
6144 if (TARGET_64BIT)
6145 fputs ("@TPOFF", file);
6146 else
6147 fputs ("@NTPOFF", file);
6148 break;
6149 case UNSPEC_DTPOFF:
6150 fputs ("@DTPOFF", file);
6151 break;
6152 case UNSPEC_GOTNTPOFF:
6153 if (TARGET_64BIT)
6154 fputs ("@GOTTPOFF(%rip)", file);
6155 else
6156 fputs ("@GOTNTPOFF", file);
6157 break;
6158 case UNSPEC_INDNTPOFF:
6159 fputs ("@INDNTPOFF", file);
6160 break;
6161 default:
6162 output_operand_lossage ("invalid UNSPEC as operand");
6163 break;
6164 }
6165 break;
6166
6167 default:
6168 output_operand_lossage ("invalid expression as operand");
6169 }
6170 }
6171
6172 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6173 We need to handle our special PIC relocations. */
6174
6175 void
6176 i386_dwarf_output_addr_const (file, x)
6177 FILE *file;
6178 rtx x;
6179 {
6180 #ifdef ASM_QUAD
6181 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6182 #else
6183 if (TARGET_64BIT)
6184 abort ();
6185 fprintf (file, "%s", ASM_LONG);
6186 #endif
6187 if (flag_pic)
6188 output_pic_addr_const (file, x, '\0');
6189 else
6190 output_addr_const (file, x);
6191 fputc ('\n', file);
6192 }
6193
6194 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6195 We need to emit DTP-relative relocations. */
6196
6197 void
6198 i386_output_dwarf_dtprel (file, size, x)
6199 FILE *file;
6200 int size;
6201 rtx x;
6202 {
6203 fputs (ASM_LONG, file);
6204 output_addr_const (file, x);
6205 fputs ("@DTPOFF", file);
6206 switch (size)
6207 {
6208 case 4:
6209 break;
6210 case 8:
6211 fputs (", 0", file);
6212 break;
6213 default:
6214 abort ();
6215 }
6216 }
6217
6218 /* In the name of slightly smaller debug output, and to cater to
6219 general assembler losage, recognize PIC+GOTOFF and turn it back
6220 into a direct symbol reference. */
6221
6222 rtx
6223 i386_simplify_dwarf_addr (orig_x)
6224 rtx orig_x;
6225 {
6226 rtx x = orig_x, y;
6227
6228 if (GET_CODE (x) == MEM)
6229 x = XEXP (x, 0);
6230
6231 if (TARGET_64BIT)
6232 {
6233 if (GET_CODE (x) != CONST
6234 || GET_CODE (XEXP (x, 0)) != UNSPEC
6235 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6236 || GET_CODE (orig_x) != MEM)
6237 return orig_x;
6238 return XVECEXP (XEXP (x, 0), 0, 0);
6239 }
6240
6241 if (GET_CODE (x) != PLUS
6242 || GET_CODE (XEXP (x, 1)) != CONST)
6243 return orig_x;
6244
6245 if (GET_CODE (XEXP (x, 0)) == REG
6246 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6247 /* %ebx + GOT/GOTOFF */
6248 y = NULL;
6249 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6250 {
6251 /* %ebx + %reg * scale + GOT/GOTOFF */
6252 y = XEXP (x, 0);
6253 if (GET_CODE (XEXP (y, 0)) == REG
6254 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6255 y = XEXP (y, 1);
6256 else if (GET_CODE (XEXP (y, 1)) == REG
6257 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6258 y = XEXP (y, 0);
6259 else
6260 return orig_x;
6261 if (GET_CODE (y) != REG
6262 && GET_CODE (y) != MULT
6263 && GET_CODE (y) != ASHIFT)
6264 return orig_x;
6265 }
6266 else
6267 return orig_x;
6268
6269 x = XEXP (XEXP (x, 1), 0);
6270 if (GET_CODE (x) == UNSPEC
6271 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6272 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6273 {
6274 if (y)
6275 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6276 return XVECEXP (x, 0, 0);
6277 }
6278
6279 if (GET_CODE (x) == PLUS
6280 && GET_CODE (XEXP (x, 0)) == UNSPEC
6281 && GET_CODE (XEXP (x, 1)) == CONST_INT
6282 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6283 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6284 && GET_CODE (orig_x) != MEM)))
6285 {
6286 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6287 if (y)
6288 return gen_rtx_PLUS (Pmode, y, x);
6289 return x;
6290 }
6291
6292 return orig_x;
6293 }
6294 \f
6295 static void
6296 put_condition_code (code, mode, reverse, fp, file)
6297 enum rtx_code code;
6298 enum machine_mode mode;
6299 int reverse, fp;
6300 FILE *file;
6301 {
6302 const char *suffix;
6303
6304 if (mode == CCFPmode || mode == CCFPUmode)
6305 {
6306 enum rtx_code second_code, bypass_code;
6307 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6308 if (bypass_code != NIL || second_code != NIL)
6309 abort ();
6310 code = ix86_fp_compare_code_to_integer (code);
6311 mode = CCmode;
6312 }
6313 if (reverse)
6314 code = reverse_condition (code);
6315
6316 switch (code)
6317 {
6318 case EQ:
6319 suffix = "e";
6320 break;
6321 case NE:
6322 suffix = "ne";
6323 break;
6324 case GT:
6325 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6326 abort ();
6327 suffix = "g";
6328 break;
6329 case GTU:
6330 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6331 Those same assemblers have the same but opposite losage on cmov. */
6332 if (mode != CCmode)
6333 abort ();
6334 suffix = fp ? "nbe" : "a";
6335 break;
6336 case LT:
6337 if (mode == CCNOmode || mode == CCGOCmode)
6338 suffix = "s";
6339 else if (mode == CCmode || mode == CCGCmode)
6340 suffix = "l";
6341 else
6342 abort ();
6343 break;
6344 case LTU:
6345 if (mode != CCmode)
6346 abort ();
6347 suffix = "b";
6348 break;
6349 case GE:
6350 if (mode == CCNOmode || mode == CCGOCmode)
6351 suffix = "ns";
6352 else if (mode == CCmode || mode == CCGCmode)
6353 suffix = "ge";
6354 else
6355 abort ();
6356 break;
6357 case GEU:
6358 /* ??? As above. */
6359 if (mode != CCmode)
6360 abort ();
6361 suffix = fp ? "nb" : "ae";
6362 break;
6363 case LE:
6364 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6365 abort ();
6366 suffix = "le";
6367 break;
6368 case LEU:
6369 if (mode != CCmode)
6370 abort ();
6371 suffix = "be";
6372 break;
6373 case UNORDERED:
6374 suffix = fp ? "u" : "p";
6375 break;
6376 case ORDERED:
6377 suffix = fp ? "nu" : "np";
6378 break;
6379 default:
6380 abort ();
6381 }
6382 fputs (suffix, file);
6383 }
6384
6385 void
6386 print_reg (x, code, file)
6387 rtx x;
6388 int code;
6389 FILE *file;
6390 {
6391 if (REGNO (x) == ARG_POINTER_REGNUM
6392 || REGNO (x) == FRAME_POINTER_REGNUM
6393 || REGNO (x) == FLAGS_REG
6394 || REGNO (x) == FPSR_REG)
6395 abort ();
6396
6397 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6398 putc ('%', file);
6399
6400 if (code == 'w' || MMX_REG_P (x))
6401 code = 2;
6402 else if (code == 'b')
6403 code = 1;
6404 else if (code == 'k')
6405 code = 4;
6406 else if (code == 'q')
6407 code = 8;
6408 else if (code == 'y')
6409 code = 3;
6410 else if (code == 'h')
6411 code = 0;
6412 else
6413 code = GET_MODE_SIZE (GET_MODE (x));
6414
6415 /* Irritatingly, AMD extended registers use different naming convention
6416 from the normal registers. */
6417 if (REX_INT_REG_P (x))
6418 {
6419 if (!TARGET_64BIT)
6420 abort ();
6421 switch (code)
6422 {
6423 case 0:
6424 error ("extended registers have no high halves");
6425 break;
6426 case 1:
6427 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6428 break;
6429 case 2:
6430 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6431 break;
6432 case 4:
6433 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6434 break;
6435 case 8:
6436 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6437 break;
6438 default:
6439 error ("unsupported operand size for extended register");
6440 break;
6441 }
6442 return;
6443 }
6444 switch (code)
6445 {
6446 case 3:
6447 if (STACK_TOP_P (x))
6448 {
6449 fputs ("st(0)", file);
6450 break;
6451 }
6452 /* FALLTHRU */
6453 case 8:
6454 case 4:
6455 case 12:
6456 if (! ANY_FP_REG_P (x))
6457 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6458 /* FALLTHRU */
6459 case 16:
6460 case 2:
6461 fputs (hi_reg_name[REGNO (x)], file);
6462 break;
6463 case 1:
6464 fputs (qi_reg_name[REGNO (x)], file);
6465 break;
6466 case 0:
6467 fputs (qi_high_reg_name[REGNO (x)], file);
6468 break;
6469 default:
6470 abort ();
6471 }
6472 }
6473
6474 /* Locate some local-dynamic symbol still in use by this function
6475 so that we can print its name in some tls_local_dynamic_base
6476 pattern. */
6477
6478 static const char *
6479 get_some_local_dynamic_name ()
6480 {
6481 rtx insn;
6482
6483 if (cfun->machine->some_ld_name)
6484 return cfun->machine->some_ld_name;
6485
6486 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6487 if (INSN_P (insn)
6488 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6489 return cfun->machine->some_ld_name;
6490
6491 abort ();
6492 }
6493
6494 static int
6495 get_some_local_dynamic_name_1 (px, data)
6496 rtx *px;
6497 void *data ATTRIBUTE_UNUSED;
6498 {
6499 rtx x = *px;
6500
6501 if (GET_CODE (x) == SYMBOL_REF
6502 && local_dynamic_symbolic_operand (x, Pmode))
6503 {
6504 cfun->machine->some_ld_name = XSTR (x, 0);
6505 return 1;
6506 }
6507
6508 return 0;
6509 }
6510
6511 /* Meaning of CODE:
6512 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6513 C -- print opcode suffix for set/cmov insn.
6514 c -- like C, but print reversed condition
6515 F,f -- likewise, but for floating-point.
6516 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6517 nothing
6518 R -- print the prefix for register names.
6519 z -- print the opcode suffix for the size of the current operand.
6520 * -- print a star (in certain assembler syntax)
6521 A -- print an absolute memory reference.
6522 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6523 s -- print a shift double count, followed by the assemblers argument
6524 delimiter.
6525 b -- print the QImode name of the register for the indicated operand.
6526 %b0 would print %al if operands[0] is reg 0.
6527 w -- likewise, print the HImode name of the register.
6528 k -- likewise, print the SImode name of the register.
6529 q -- likewise, print the DImode name of the register.
6530 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6531 y -- print "st(0)" instead of "st" as a register.
6532 D -- print condition for SSE cmp instruction.
6533 P -- if PIC, print an @PLT suffix.
6534 X -- don't print any sort of PIC '@' suffix for a symbol.
6535 & -- print some in-use local-dynamic symbol name.
6536 */
6537
6538 void
6539 print_operand (file, x, code)
6540 FILE *file;
6541 rtx x;
6542 int code;
6543 {
6544 if (code)
6545 {
6546 switch (code)
6547 {
6548 case '*':
6549 if (ASSEMBLER_DIALECT == ASM_ATT)
6550 putc ('*', file);
6551 return;
6552
6553 case '&':
6554 assemble_name (file, get_some_local_dynamic_name ());
6555 return;
6556
6557 case 'A':
6558 if (ASSEMBLER_DIALECT == ASM_ATT)
6559 putc ('*', file);
6560 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6561 {
6562 /* Intel syntax. For absolute addresses, registers should not
6563 be surrounded by braces. */
6564 if (GET_CODE (x) != REG)
6565 {
6566 putc ('[', file);
6567 PRINT_OPERAND (file, x, 0);
6568 putc (']', file);
6569 return;
6570 }
6571 }
6572 else
6573 abort ();
6574
6575 PRINT_OPERAND (file, x, 0);
6576 return;
6577
6578
6579 case 'L':
6580 if (ASSEMBLER_DIALECT == ASM_ATT)
6581 putc ('l', file);
6582 return;
6583
6584 case 'W':
6585 if (ASSEMBLER_DIALECT == ASM_ATT)
6586 putc ('w', file);
6587 return;
6588
6589 case 'B':
6590 if (ASSEMBLER_DIALECT == ASM_ATT)
6591 putc ('b', file);
6592 return;
6593
6594 case 'Q':
6595 if (ASSEMBLER_DIALECT == ASM_ATT)
6596 putc ('l', file);
6597 return;
6598
6599 case 'S':
6600 if (ASSEMBLER_DIALECT == ASM_ATT)
6601 putc ('s', file);
6602 return;
6603
6604 case 'T':
6605 if (ASSEMBLER_DIALECT == ASM_ATT)
6606 putc ('t', file);
6607 return;
6608
6609 case 'z':
6610 /* 387 opcodes don't get size suffixes if the operands are
6611 registers. */
6612 if (STACK_REG_P (x))
6613 return;
6614
6615 /* Likewise if using Intel opcodes. */
6616 if (ASSEMBLER_DIALECT == ASM_INTEL)
6617 return;
6618
6619 /* This is the size of op from size of operand. */
6620 switch (GET_MODE_SIZE (GET_MODE (x)))
6621 {
6622 case 2:
6623 #ifdef HAVE_GAS_FILDS_FISTS
6624 putc ('s', file);
6625 #endif
6626 return;
6627
6628 case 4:
6629 if (GET_MODE (x) == SFmode)
6630 {
6631 putc ('s', file);
6632 return;
6633 }
6634 else
6635 putc ('l', file);
6636 return;
6637
6638 case 12:
6639 case 16:
6640 putc ('t', file);
6641 return;
6642
6643 case 8:
6644 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6645 {
6646 #ifdef GAS_MNEMONICS
6647 putc ('q', file);
6648 #else
6649 putc ('l', file);
6650 putc ('l', file);
6651 #endif
6652 }
6653 else
6654 putc ('l', file);
6655 return;
6656
6657 default:
6658 abort ();
6659 }
6660
6661 case 'b':
6662 case 'w':
6663 case 'k':
6664 case 'q':
6665 case 'h':
6666 case 'y':
6667 case 'X':
6668 case 'P':
6669 break;
6670
6671 case 's':
6672 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6673 {
6674 PRINT_OPERAND (file, x, 0);
6675 putc (',', file);
6676 }
6677 return;
6678
6679 case 'D':
6680 /* Little bit of braindamage here. The SSE compare instructions
6681 does use completely different names for the comparisons that the
6682 fp conditional moves. */
6683 switch (GET_CODE (x))
6684 {
6685 case EQ:
6686 case UNEQ:
6687 fputs ("eq", file);
6688 break;
6689 case LT:
6690 case UNLT:
6691 fputs ("lt", file);
6692 break;
6693 case LE:
6694 case UNLE:
6695 fputs ("le", file);
6696 break;
6697 case UNORDERED:
6698 fputs ("unord", file);
6699 break;
6700 case NE:
6701 case LTGT:
6702 fputs ("neq", file);
6703 break;
6704 case UNGE:
6705 case GE:
6706 fputs ("nlt", file);
6707 break;
6708 case UNGT:
6709 case GT:
6710 fputs ("nle", file);
6711 break;
6712 case ORDERED:
6713 fputs ("ord", file);
6714 break;
6715 default:
6716 abort ();
6717 break;
6718 }
6719 return;
6720 case 'O':
6721 #ifdef CMOV_SUN_AS_SYNTAX
6722 if (ASSEMBLER_DIALECT == ASM_ATT)
6723 {
6724 switch (GET_MODE (x))
6725 {
6726 case HImode: putc ('w', file); break;
6727 case SImode:
6728 case SFmode: putc ('l', file); break;
6729 case DImode:
6730 case DFmode: putc ('q', file); break;
6731 default: abort ();
6732 }
6733 putc ('.', file);
6734 }
6735 #endif
6736 return;
6737 case 'C':
6738 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6739 return;
6740 case 'F':
6741 #ifdef CMOV_SUN_AS_SYNTAX
6742 if (ASSEMBLER_DIALECT == ASM_ATT)
6743 putc ('.', file);
6744 #endif
6745 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6746 return;
6747
6748 /* Like above, but reverse condition */
6749 case 'c':
6750 /* Check to see if argument to %c is really a constant
6751 and not a condition code which needs to be reversed. */
6752 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6753 {
6754 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6755 return;
6756 }
6757 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6758 return;
6759 case 'f':
6760 #ifdef CMOV_SUN_AS_SYNTAX
6761 if (ASSEMBLER_DIALECT == ASM_ATT)
6762 putc ('.', file);
6763 #endif
6764 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6765 return;
6766 case '+':
6767 {
6768 rtx x;
6769
6770 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6771 return;
6772
6773 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6774 if (x)
6775 {
6776 int pred_val = INTVAL (XEXP (x, 0));
6777
6778 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6779 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6780 {
6781 int taken = pred_val > REG_BR_PROB_BASE / 2;
6782 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6783
6784 /* Emit hints only in the case default branch prediction
6785 heruistics would fail. */
6786 if (taken != cputaken)
6787 {
6788 /* We use 3e (DS) prefix for taken branches and
6789 2e (CS) prefix for not taken branches. */
6790 if (taken)
6791 fputs ("ds ; ", file);
6792 else
6793 fputs ("cs ; ", file);
6794 }
6795 }
6796 }
6797 return;
6798 }
6799 default:
6800 output_operand_lossage ("invalid operand code `%c'", code);
6801 }
6802 }
6803
6804 if (GET_CODE (x) == REG)
6805 {
6806 PRINT_REG (x, code, file);
6807 }
6808
6809 else if (GET_CODE (x) == MEM)
6810 {
6811 /* No `byte ptr' prefix for call instructions. */
6812 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6813 {
6814 const char * size;
6815 switch (GET_MODE_SIZE (GET_MODE (x)))
6816 {
6817 case 1: size = "BYTE"; break;
6818 case 2: size = "WORD"; break;
6819 case 4: size = "DWORD"; break;
6820 case 8: size = "QWORD"; break;
6821 case 12: size = "XWORD"; break;
6822 case 16: size = "XMMWORD"; break;
6823 default:
6824 abort ();
6825 }
6826
6827 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6828 if (code == 'b')
6829 size = "BYTE";
6830 else if (code == 'w')
6831 size = "WORD";
6832 else if (code == 'k')
6833 size = "DWORD";
6834
6835 fputs (size, file);
6836 fputs (" PTR ", file);
6837 }
6838
6839 x = XEXP (x, 0);
6840 if (flag_pic && CONSTANT_ADDRESS_P (x))
6841 output_pic_addr_const (file, x, code);
6842 /* Avoid (%rip) for call operands. */
6843 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6844 && GET_CODE (x) != CONST_INT)
6845 output_addr_const (file, x);
6846 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6847 output_operand_lossage ("invalid constraints for operand");
6848 else
6849 output_address (x);
6850 }
6851
6852 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6853 {
6854 REAL_VALUE_TYPE r;
6855 long l;
6856
6857 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6858 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6859
6860 if (ASSEMBLER_DIALECT == ASM_ATT)
6861 putc ('$', file);
6862 fprintf (file, "0x%lx", l);
6863 }
6864
6865 /* These float cases don't actually occur as immediate operands. */
6866 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6867 {
6868 char dstr[30];
6869
6870 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6871 fprintf (file, "%s", dstr);
6872 }
6873
6874 else if (GET_CODE (x) == CONST_DOUBLE
6875 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6876 {
6877 char dstr[30];
6878
6879 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6880 fprintf (file, "%s", dstr);
6881 }
6882
6883 else
6884 {
6885 if (code != 'P')
6886 {
6887 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6888 {
6889 if (ASSEMBLER_DIALECT == ASM_ATT)
6890 putc ('$', file);
6891 }
6892 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6893 || GET_CODE (x) == LABEL_REF)
6894 {
6895 if (ASSEMBLER_DIALECT == ASM_ATT)
6896 putc ('$', file);
6897 else
6898 fputs ("OFFSET FLAT:", file);
6899 }
6900 }
6901 if (GET_CODE (x) == CONST_INT)
6902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6903 else if (flag_pic)
6904 output_pic_addr_const (file, x, code);
6905 else
6906 output_addr_const (file, x);
6907 }
6908 }
6909 \f
6910 /* Print a memory operand whose address is ADDR. */
6911
6912 void
6913 print_operand_address (file, addr)
6914 FILE *file;
6915 register rtx addr;
6916 {
6917 struct ix86_address parts;
6918 rtx base, index, disp;
6919 int scale;
6920
6921 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6922 {
6923 if (ASSEMBLER_DIALECT == ASM_INTEL)
6924 fputs ("DWORD PTR ", file);
6925 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6926 putc ('%', file);
6927 if (TARGET_64BIT)
6928 fputs ("fs:0", file);
6929 else
6930 fputs ("gs:0", file);
6931 return;
6932 }
6933
6934 if (! ix86_decompose_address (addr, &parts))
6935 abort ();
6936
6937 base = parts.base;
6938 index = parts.index;
6939 disp = parts.disp;
6940 scale = parts.scale;
6941
6942 if (!base && !index)
6943 {
6944 /* Displacement only requires special attention. */
6945
6946 if (GET_CODE (disp) == CONST_INT)
6947 {
6948 if (ASSEMBLER_DIALECT == ASM_INTEL)
6949 {
6950 if (USER_LABEL_PREFIX[0] == 0)
6951 putc ('%', file);
6952 fputs ("ds:", file);
6953 }
6954 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6955 }
6956 else if (flag_pic)
6957 output_pic_addr_const (file, addr, 0);
6958 else
6959 output_addr_const (file, addr);
6960
6961 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6962 if (TARGET_64BIT
6963 && ((GET_CODE (addr) == SYMBOL_REF
6964 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
6965 || GET_CODE (addr) == LABEL_REF
6966 || (GET_CODE (addr) == CONST
6967 && GET_CODE (XEXP (addr, 0)) == PLUS
6968 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6969 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6970 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6971 fputs ("(%rip)", file);
6972 }
6973 else
6974 {
6975 if (ASSEMBLER_DIALECT == ASM_ATT)
6976 {
6977 if (disp)
6978 {
6979 if (flag_pic)
6980 output_pic_addr_const (file, disp, 0);
6981 else if (GET_CODE (disp) == LABEL_REF)
6982 output_asm_label (disp);
6983 else
6984 output_addr_const (file, disp);
6985 }
6986
6987 putc ('(', file);
6988 if (base)
6989 PRINT_REG (base, 0, file);
6990 if (index)
6991 {
6992 putc (',', file);
6993 PRINT_REG (index, 0, file);
6994 if (scale != 1)
6995 fprintf (file, ",%d", scale);
6996 }
6997 putc (')', file);
6998 }
6999 else
7000 {
7001 rtx offset = NULL_RTX;
7002
7003 if (disp)
7004 {
7005 /* Pull out the offset of a symbol; print any symbol itself. */
7006 if (GET_CODE (disp) == CONST
7007 && GET_CODE (XEXP (disp, 0)) == PLUS
7008 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7009 {
7010 offset = XEXP (XEXP (disp, 0), 1);
7011 disp = gen_rtx_CONST (VOIDmode,
7012 XEXP (XEXP (disp, 0), 0));
7013 }
7014
7015 if (flag_pic)
7016 output_pic_addr_const (file, disp, 0);
7017 else if (GET_CODE (disp) == LABEL_REF)
7018 output_asm_label (disp);
7019 else if (GET_CODE (disp) == CONST_INT)
7020 offset = disp;
7021 else
7022 output_addr_const (file, disp);
7023 }
7024
7025 putc ('[', file);
7026 if (base)
7027 {
7028 PRINT_REG (base, 0, file);
7029 if (offset)
7030 {
7031 if (INTVAL (offset) >= 0)
7032 putc ('+', file);
7033 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7034 }
7035 }
7036 else if (offset)
7037 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7038 else
7039 putc ('0', file);
7040
7041 if (index)
7042 {
7043 putc ('+', file);
7044 PRINT_REG (index, 0, file);
7045 if (scale != 1)
7046 fprintf (file, "*%d", scale);
7047 }
7048 putc (']', file);
7049 }
7050 }
7051 }
7052
7053 bool
7054 output_addr_const_extra (file, x)
7055 FILE *file;
7056 rtx x;
7057 {
7058 rtx op;
7059
7060 if (GET_CODE (x) != UNSPEC)
7061 return false;
7062
7063 op = XVECEXP (x, 0, 0);
7064 switch (XINT (x, 1))
7065 {
7066 case UNSPEC_GOTTPOFF:
7067 output_addr_const (file, op);
7068 /* FIXME: This might be @TPOFF in Sun ld. */
7069 fputs ("@GOTTPOFF", file);
7070 break;
7071 case UNSPEC_TPOFF:
7072 output_addr_const (file, op);
7073 fputs ("@TPOFF", file);
7074 break;
7075 case UNSPEC_NTPOFF:
7076 output_addr_const (file, op);
7077 if (TARGET_64BIT)
7078 fputs ("@TPOFF", file);
7079 else
7080 fputs ("@NTPOFF", file);
7081 break;
7082 case UNSPEC_DTPOFF:
7083 output_addr_const (file, op);
7084 fputs ("@DTPOFF", file);
7085 break;
7086 case UNSPEC_GOTNTPOFF:
7087 output_addr_const (file, op);
7088 if (TARGET_64BIT)
7089 fputs ("@GOTTPOFF(%rip)", file);
7090 else
7091 fputs ("@GOTNTPOFF", file);
7092 break;
7093 case UNSPEC_INDNTPOFF:
7094 output_addr_const (file, op);
7095 fputs ("@INDNTPOFF", file);
7096 break;
7097
7098 default:
7099 return false;
7100 }
7101
7102 return true;
7103 }
7104 \f
7105 /* Split one or more DImode RTL references into pairs of SImode
7106 references. The RTL can be REG, offsettable MEM, integer constant, or
7107 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7108 split and "num" is its length. lo_half and hi_half are output arrays
7109 that parallel "operands". */
7110
7111 void
7112 split_di (operands, num, lo_half, hi_half)
7113 rtx operands[];
7114 int num;
7115 rtx lo_half[], hi_half[];
7116 {
7117 while (num--)
7118 {
7119 rtx op = operands[num];
7120
7121 /* simplify_subreg refuse to split volatile memory addresses,
7122 but we still have to handle it. */
7123 if (GET_CODE (op) == MEM)
7124 {
7125 lo_half[num] = adjust_address (op, SImode, 0);
7126 hi_half[num] = adjust_address (op, SImode, 4);
7127 }
7128 else
7129 {
7130 lo_half[num] = simplify_gen_subreg (SImode, op,
7131 GET_MODE (op) == VOIDmode
7132 ? DImode : GET_MODE (op), 0);
7133 hi_half[num] = simplify_gen_subreg (SImode, op,
7134 GET_MODE (op) == VOIDmode
7135 ? DImode : GET_MODE (op), 4);
7136 }
7137 }
7138 }
7139 /* Split one or more TImode RTL references into pairs of SImode
7140 references. The RTL can be REG, offsettable MEM, integer constant, or
7141 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7142 split and "num" is its length. lo_half and hi_half are output arrays
7143 that parallel "operands". */
7144
7145 void
7146 split_ti (operands, num, lo_half, hi_half)
7147 rtx operands[];
7148 int num;
7149 rtx lo_half[], hi_half[];
7150 {
7151 while (num--)
7152 {
7153 rtx op = operands[num];
7154
7155 /* simplify_subreg refuse to split volatile memory addresses, but we
7156 still have to handle it. */
7157 if (GET_CODE (op) == MEM)
7158 {
7159 lo_half[num] = adjust_address (op, DImode, 0);
7160 hi_half[num] = adjust_address (op, DImode, 8);
7161 }
7162 else
7163 {
7164 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7165 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7166 }
7167 }
7168 }
7169 \f
7170 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7171 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7172 is the expression of the binary operation. The output may either be
7173 emitted here, or returned to the caller, like all output_* functions.
7174
7175 There is no guarantee that the operands are the same mode, as they
7176 might be within FLOAT or FLOAT_EXTEND expressions. */
7177
7178 #ifndef SYSV386_COMPAT
7179 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7180 wants to fix the assemblers because that causes incompatibility
7181 with gcc. No-one wants to fix gcc because that causes
7182 incompatibility with assemblers... You can use the option of
7183 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7184 #define SYSV386_COMPAT 1
7185 #endif
7186
7187 const char *
7188 output_387_binary_op (insn, operands)
7189 rtx insn;
7190 rtx *operands;
7191 {
7192 static char buf[30];
7193 const char *p;
7194 const char *ssep;
7195 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7196
7197 #ifdef ENABLE_CHECKING
7198 /* Even if we do not want to check the inputs, this documents input
7199 constraints. Which helps in understanding the following code. */
7200 if (STACK_REG_P (operands[0])
7201 && ((REG_P (operands[1])
7202 && REGNO (operands[0]) == REGNO (operands[1])
7203 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7204 || (REG_P (operands[2])
7205 && REGNO (operands[0]) == REGNO (operands[2])
7206 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7207 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7208 ; /* ok */
7209 else if (!is_sse)
7210 abort ();
7211 #endif
7212
7213 switch (GET_CODE (operands[3]))
7214 {
7215 case PLUS:
7216 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7217 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7218 p = "fiadd";
7219 else
7220 p = "fadd";
7221 ssep = "add";
7222 break;
7223
7224 case MINUS:
7225 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7226 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7227 p = "fisub";
7228 else
7229 p = "fsub";
7230 ssep = "sub";
7231 break;
7232
7233 case MULT:
7234 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7235 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7236 p = "fimul";
7237 else
7238 p = "fmul";
7239 ssep = "mul";
7240 break;
7241
7242 case DIV:
7243 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7244 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7245 p = "fidiv";
7246 else
7247 p = "fdiv";
7248 ssep = "div";
7249 break;
7250
7251 default:
7252 abort ();
7253 }
7254
7255 if (is_sse)
7256 {
7257 strcpy (buf, ssep);
7258 if (GET_MODE (operands[0]) == SFmode)
7259 strcat (buf, "ss\t{%2, %0|%0, %2}");
7260 else
7261 strcat (buf, "sd\t{%2, %0|%0, %2}");
7262 return buf;
7263 }
7264 strcpy (buf, p);
7265
7266 switch (GET_CODE (operands[3]))
7267 {
7268 case MULT:
7269 case PLUS:
7270 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7271 {
7272 rtx temp = operands[2];
7273 operands[2] = operands[1];
7274 operands[1] = temp;
7275 }
7276
7277 /* know operands[0] == operands[1]. */
7278
7279 if (GET_CODE (operands[2]) == MEM)
7280 {
7281 p = "%z2\t%2";
7282 break;
7283 }
7284
7285 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7286 {
7287 if (STACK_TOP_P (operands[0]))
7288 /* How is it that we are storing to a dead operand[2]?
7289 Well, presumably operands[1] is dead too. We can't
7290 store the result to st(0) as st(0) gets popped on this
7291 instruction. Instead store to operands[2] (which I
7292 think has to be st(1)). st(1) will be popped later.
7293 gcc <= 2.8.1 didn't have this check and generated
7294 assembly code that the Unixware assembler rejected. */
7295 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7296 else
7297 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7298 break;
7299 }
7300
7301 if (STACK_TOP_P (operands[0]))
7302 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7303 else
7304 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7305 break;
7306
7307 case MINUS:
7308 case DIV:
7309 if (GET_CODE (operands[1]) == MEM)
7310 {
7311 p = "r%z1\t%1";
7312 break;
7313 }
7314
7315 if (GET_CODE (operands[2]) == MEM)
7316 {
7317 p = "%z2\t%2";
7318 break;
7319 }
7320
7321 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7322 {
7323 #if SYSV386_COMPAT
7324 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7325 derived assemblers, confusingly reverse the direction of
7326 the operation for fsub{r} and fdiv{r} when the
7327 destination register is not st(0). The Intel assembler
7328 doesn't have this brain damage. Read !SYSV386_COMPAT to
7329 figure out what the hardware really does. */
7330 if (STACK_TOP_P (operands[0]))
7331 p = "{p\t%0, %2|rp\t%2, %0}";
7332 else
7333 p = "{rp\t%2, %0|p\t%0, %2}";
7334 #else
7335 if (STACK_TOP_P (operands[0]))
7336 /* As above for fmul/fadd, we can't store to st(0). */
7337 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7338 else
7339 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7340 #endif
7341 break;
7342 }
7343
7344 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7345 {
7346 #if SYSV386_COMPAT
7347 if (STACK_TOP_P (operands[0]))
7348 p = "{rp\t%0, %1|p\t%1, %0}";
7349 else
7350 p = "{p\t%1, %0|rp\t%0, %1}";
7351 #else
7352 if (STACK_TOP_P (operands[0]))
7353 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7354 else
7355 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7356 #endif
7357 break;
7358 }
7359
7360 if (STACK_TOP_P (operands[0]))
7361 {
7362 if (STACK_TOP_P (operands[1]))
7363 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7364 else
7365 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7366 break;
7367 }
7368 else if (STACK_TOP_P (operands[1]))
7369 {
7370 #if SYSV386_COMPAT
7371 p = "{\t%1, %0|r\t%0, %1}";
7372 #else
7373 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7374 #endif
7375 }
7376 else
7377 {
7378 #if SYSV386_COMPAT
7379 p = "{r\t%2, %0|\t%0, %2}";
7380 #else
7381 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7382 #endif
7383 }
7384 break;
7385
7386 default:
7387 abort ();
7388 }
7389
7390 strcat (buf, p);
7391 return buf;
7392 }
7393
7394 /* Output code to initialize control word copies used by
7395 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7396 is set to control word rounding downwards. */
7397 void
7398 emit_i387_cw_initialization (normal, round_down)
7399 rtx normal, round_down;
7400 {
7401 rtx reg = gen_reg_rtx (HImode);
7402
7403 emit_insn (gen_x86_fnstcw_1 (normal));
7404 emit_move_insn (reg, normal);
7405 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7406 && !TARGET_64BIT)
7407 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7408 else
7409 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7410 emit_move_insn (round_down, reg);
7411 }
7412
7413 /* Output code for INSN to convert a float to a signed int. OPERANDS
7414 are the insn operands. The output may be [HSD]Imode and the input
7415 operand may be [SDX]Fmode. */
7416
7417 const char *
7418 output_fix_trunc (insn, operands)
7419 rtx insn;
7420 rtx *operands;
7421 {
7422 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7423 int dimode_p = GET_MODE (operands[0]) == DImode;
7424
7425 /* Jump through a hoop or two for DImode, since the hardware has no
7426 non-popping instruction. We used to do this a different way, but
7427 that was somewhat fragile and broke with post-reload splitters. */
7428 if (dimode_p && !stack_top_dies)
7429 output_asm_insn ("fld\t%y1", operands);
7430
7431 if (!STACK_TOP_P (operands[1]))
7432 abort ();
7433
7434 if (GET_CODE (operands[0]) != MEM)
7435 abort ();
7436
7437 output_asm_insn ("fldcw\t%3", operands);
7438 if (stack_top_dies || dimode_p)
7439 output_asm_insn ("fistp%z0\t%0", operands);
7440 else
7441 output_asm_insn ("fist%z0\t%0", operands);
7442 output_asm_insn ("fldcw\t%2", operands);
7443
7444 return "";
7445 }
7446
7447 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7448 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7449 when fucom should be used. */
7450
7451 const char *
7452 output_fp_compare (insn, operands, eflags_p, unordered_p)
7453 rtx insn;
7454 rtx *operands;
7455 int eflags_p, unordered_p;
7456 {
7457 int stack_top_dies;
7458 rtx cmp_op0 = operands[0];
7459 rtx cmp_op1 = operands[1];
7460 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7461
7462 if (eflags_p == 2)
7463 {
7464 cmp_op0 = cmp_op1;
7465 cmp_op1 = operands[2];
7466 }
7467 if (is_sse)
7468 {
7469 if (GET_MODE (operands[0]) == SFmode)
7470 if (unordered_p)
7471 return "ucomiss\t{%1, %0|%0, %1}";
7472 else
7473 return "comiss\t{%1, %0|%0, %y}";
7474 else
7475 if (unordered_p)
7476 return "ucomisd\t{%1, %0|%0, %1}";
7477 else
7478 return "comisd\t{%1, %0|%0, %y}";
7479 }
7480
7481 if (! STACK_TOP_P (cmp_op0))
7482 abort ();
7483
7484 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7485
7486 if (STACK_REG_P (cmp_op1)
7487 && stack_top_dies
7488 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7489 && REGNO (cmp_op1) != FIRST_STACK_REG)
7490 {
7491 /* If both the top of the 387 stack dies, and the other operand
7492 is also a stack register that dies, then this must be a
7493 `fcompp' float compare */
7494
7495 if (eflags_p == 1)
7496 {
7497 /* There is no double popping fcomi variant. Fortunately,
7498 eflags is immune from the fstp's cc clobbering. */
7499 if (unordered_p)
7500 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7501 else
7502 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7503 return "fstp\t%y0";
7504 }
7505 else
7506 {
7507 if (eflags_p == 2)
7508 {
7509 if (unordered_p)
7510 return "fucompp\n\tfnstsw\t%0";
7511 else
7512 return "fcompp\n\tfnstsw\t%0";
7513 }
7514 else
7515 {
7516 if (unordered_p)
7517 return "fucompp";
7518 else
7519 return "fcompp";
7520 }
7521 }
7522 }
7523 else
7524 {
7525 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7526
7527 static const char * const alt[24] =
7528 {
7529 "fcom%z1\t%y1",
7530 "fcomp%z1\t%y1",
7531 "fucom%z1\t%y1",
7532 "fucomp%z1\t%y1",
7533
7534 "ficom%z1\t%y1",
7535 "ficomp%z1\t%y1",
7536 NULL,
7537 NULL,
7538
7539 "fcomi\t{%y1, %0|%0, %y1}",
7540 "fcomip\t{%y1, %0|%0, %y1}",
7541 "fucomi\t{%y1, %0|%0, %y1}",
7542 "fucomip\t{%y1, %0|%0, %y1}",
7543
7544 NULL,
7545 NULL,
7546 NULL,
7547 NULL,
7548
7549 "fcom%z2\t%y2\n\tfnstsw\t%0",
7550 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7551 "fucom%z2\t%y2\n\tfnstsw\t%0",
7552 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7553
7554 "ficom%z2\t%y2\n\tfnstsw\t%0",
7555 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7556 NULL,
7557 NULL
7558 };
7559
7560 int mask;
7561 const char *ret;
7562
7563 mask = eflags_p << 3;
7564 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7565 mask |= unordered_p << 1;
7566 mask |= stack_top_dies;
7567
7568 if (mask >= 24)
7569 abort ();
7570 ret = alt[mask];
7571 if (ret == NULL)
7572 abort ();
7573
7574 return ret;
7575 }
7576 }
7577
7578 void
7579 ix86_output_addr_vec_elt (file, value)
7580 FILE *file;
7581 int value;
7582 {
7583 const char *directive = ASM_LONG;
7584
7585 if (TARGET_64BIT)
7586 {
7587 #ifdef ASM_QUAD
7588 directive = ASM_QUAD;
7589 #else
7590 abort ();
7591 #endif
7592 }
7593
7594 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7595 }
7596
7597 void
7598 ix86_output_addr_diff_elt (file, value, rel)
7599 FILE *file;
7600 int value, rel;
7601 {
7602 if (TARGET_64BIT)
7603 fprintf (file, "%s%s%d-%s%d\n",
7604 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7605 else if (HAVE_AS_GOTOFF_IN_DATA)
7606 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7607 #if TARGET_MACHO
7608 else if (TARGET_MACHO)
7609 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7610 machopic_function_base_name () + 1);
7611 #endif
7612 else
7613 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7614 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7615 }
7616 \f
7617 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7618 for the target. */
7619
7620 void
7621 ix86_expand_clear (dest)
7622 rtx dest;
7623 {
7624 rtx tmp;
7625
7626 /* We play register width games, which are only valid after reload. */
7627 if (!reload_completed)
7628 abort ();
7629
7630 /* Avoid HImode and its attendant prefix byte. */
7631 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7632 dest = gen_rtx_REG (SImode, REGNO (dest));
7633
7634 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7635
7636 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7637 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7638 {
7639 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7640 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7641 }
7642
7643 emit_insn (tmp);
7644 }
7645
7646 /* X is an unchanging MEM. If it is a constant pool reference, return
7647 the constant pool rtx, else NULL. */
7648
7649 static rtx
7650 maybe_get_pool_constant (x)
7651 rtx x;
7652 {
7653 x = XEXP (x, 0);
7654
7655 if (flag_pic && ! TARGET_64BIT)
7656 {
7657 if (GET_CODE (x) != PLUS)
7658 return NULL_RTX;
7659 if (XEXP (x, 0) != pic_offset_table_rtx)
7660 return NULL_RTX;
7661 x = XEXP (x, 1);
7662 if (GET_CODE (x) != CONST)
7663 return NULL_RTX;
7664 x = XEXP (x, 0);
7665 if (GET_CODE (x) != UNSPEC)
7666 return NULL_RTX;
7667 if (XINT (x, 1) != UNSPEC_GOTOFF)
7668 return NULL_RTX;
7669 x = XVECEXP (x, 0, 0);
7670 }
7671
7672 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7673 return get_pool_constant (x);
7674
7675 return NULL_RTX;
7676 }
7677
7678 void
7679 ix86_expand_move (mode, operands)
7680 enum machine_mode mode;
7681 rtx operands[];
7682 {
7683 int strict = (reload_in_progress || reload_completed);
7684 rtx insn, op0, op1, tmp;
7685
7686 op0 = operands[0];
7687 op1 = operands[1];
7688
7689 /* ??? We have a slight problem. We need to say that tls symbols are
7690 not legitimate constants so that reload does not helpfully reload
7691 these constants from a REG_EQUIV, which we cannot handle. (Recall
7692 that general- and local-dynamic address resolution requires a
7693 function call.)
7694
7695 However, if we say that tls symbols are not legitimate constants,
7696 then emit_move_insn helpfully drop them into the constant pool.
7697
7698 It is far easier to work around emit_move_insn than reload. Recognize
7699 the MEM that we would have created and extract the symbol_ref. */
7700
7701 if (mode == Pmode
7702 && GET_CODE (op1) == MEM
7703 && RTX_UNCHANGING_P (op1))
7704 {
7705 tmp = maybe_get_pool_constant (op1);
7706 /* Note that we only care about symbolic constants here, which
7707 unlike CONST_INT will always have a proper mode. */
7708 if (tmp && GET_MODE (tmp) == Pmode)
7709 op1 = tmp;
7710 }
7711
7712 if (tls_symbolic_operand (op1, Pmode))
7713 {
7714 op1 = legitimize_address (op1, op1, VOIDmode);
7715 if (GET_CODE (op0) == MEM)
7716 {
7717 tmp = gen_reg_rtx (mode);
7718 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7719 op1 = tmp;
7720 }
7721 }
7722 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7723 {
7724 #if TARGET_MACHO
7725 if (MACHOPIC_PURE)
7726 {
7727 rtx temp = ((reload_in_progress
7728 || ((op0 && GET_CODE (op0) == REG)
7729 && mode == Pmode))
7730 ? op0 : gen_reg_rtx (Pmode));
7731 op1 = machopic_indirect_data_reference (op1, temp);
7732 op1 = machopic_legitimize_pic_address (op1, mode,
7733 temp == op1 ? 0 : temp);
7734 }
7735 else
7736 {
7737 if (MACHOPIC_INDIRECT)
7738 op1 = machopic_indirect_data_reference (op1, 0);
7739 }
7740 if (op0 != op1)
7741 {
7742 insn = gen_rtx_SET (VOIDmode, op0, op1);
7743 emit_insn (insn);
7744 }
7745 return;
7746 #endif /* TARGET_MACHO */
7747 if (GET_CODE (op0) == MEM)
7748 op1 = force_reg (Pmode, op1);
7749 else
7750 {
7751 rtx temp = op0;
7752 if (GET_CODE (temp) != REG)
7753 temp = gen_reg_rtx (Pmode);
7754 temp = legitimize_pic_address (op1, temp);
7755 if (temp == op0)
7756 return;
7757 op1 = temp;
7758 }
7759 }
7760 else
7761 {
7762 if (GET_CODE (op0) == MEM
7763 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7764 || !push_operand (op0, mode))
7765 && GET_CODE (op1) == MEM)
7766 op1 = force_reg (mode, op1);
7767
7768 if (push_operand (op0, mode)
7769 && ! general_no_elim_operand (op1, mode))
7770 op1 = copy_to_mode_reg (mode, op1);
7771
7772 /* Force large constants in 64bit compilation into register
7773 to get them CSEed. */
7774 if (TARGET_64BIT && mode == DImode
7775 && immediate_operand (op1, mode)
7776 && !x86_64_zero_extended_value (op1)
7777 && !register_operand (op0, mode)
7778 && optimize && !reload_completed && !reload_in_progress)
7779 op1 = copy_to_mode_reg (mode, op1);
7780
7781 if (FLOAT_MODE_P (mode))
7782 {
7783 /* If we are loading a floating point constant to a register,
7784 force the value to memory now, since we'll get better code
7785 out the back end. */
7786
7787 if (strict)
7788 ;
7789 else if (GET_CODE (op1) == CONST_DOUBLE
7790 && register_operand (op0, mode))
7791 op1 = validize_mem (force_const_mem (mode, op1));
7792 }
7793 }
7794
7795 insn = gen_rtx_SET (VOIDmode, op0, op1);
7796
7797 emit_insn (insn);
7798 }
7799
7800 void
7801 ix86_expand_vector_move (mode, operands)
7802 enum machine_mode mode;
7803 rtx operands[];
7804 {
7805 /* Force constants other than zero into memory. We do not know how
7806 the instructions used to build constants modify the upper 64 bits
7807 of the register, once we have that information we may be able
7808 to handle some of them more efficiently. */
7809 if ((reload_in_progress | reload_completed) == 0
7810 && register_operand (operands[0], mode)
7811 && CONSTANT_P (operands[1]))
7812 operands[1] = force_const_mem (mode, operands[1]);
7813
7814 /* Make operand1 a register if it isn't already. */
7815 if (!no_new_pseudos
7816 && !register_operand (operands[0], mode)
7817 && !register_operand (operands[1], mode))
7818 {
7819 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7820 emit_move_insn (operands[0], temp);
7821 return;
7822 }
7823
7824 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7825 }
7826
7827 /* Attempt to expand a binary operator. Make the expansion closer to the
7828 actual machine, then just general_operand, which will allow 3 separate
7829 memory references (one output, two input) in a single insn. */
7830
7831 void
7832 ix86_expand_binary_operator (code, mode, operands)
7833 enum rtx_code code;
7834 enum machine_mode mode;
7835 rtx operands[];
7836 {
7837 int matching_memory;
7838 rtx src1, src2, dst, op, clob;
7839
7840 dst = operands[0];
7841 src1 = operands[1];
7842 src2 = operands[2];
7843
7844 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7845 if (GET_RTX_CLASS (code) == 'c'
7846 && (rtx_equal_p (dst, src2)
7847 || immediate_operand (src1, mode)))
7848 {
7849 rtx temp = src1;
7850 src1 = src2;
7851 src2 = temp;
7852 }
7853
7854 /* If the destination is memory, and we do not have matching source
7855 operands, do things in registers. */
7856 matching_memory = 0;
7857 if (GET_CODE (dst) == MEM)
7858 {
7859 if (rtx_equal_p (dst, src1))
7860 matching_memory = 1;
7861 else if (GET_RTX_CLASS (code) == 'c'
7862 && rtx_equal_p (dst, src2))
7863 matching_memory = 2;
7864 else
7865 dst = gen_reg_rtx (mode);
7866 }
7867
7868 /* Both source operands cannot be in memory. */
7869 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7870 {
7871 if (matching_memory != 2)
7872 src2 = force_reg (mode, src2);
7873 else
7874 src1 = force_reg (mode, src1);
7875 }
7876
7877 /* If the operation is not commutable, source 1 cannot be a constant
7878 or non-matching memory. */
7879 if ((CONSTANT_P (src1)
7880 || (!matching_memory && GET_CODE (src1) == MEM))
7881 && GET_RTX_CLASS (code) != 'c')
7882 src1 = force_reg (mode, src1);
7883
7884 /* If optimizing, copy to regs to improve CSE */
7885 if (optimize && ! no_new_pseudos)
7886 {
7887 if (GET_CODE (dst) == MEM)
7888 dst = gen_reg_rtx (mode);
7889 if (GET_CODE (src1) == MEM)
7890 src1 = force_reg (mode, src1);
7891 if (GET_CODE (src2) == MEM)
7892 src2 = force_reg (mode, src2);
7893 }
7894
7895 /* Emit the instruction. */
7896
7897 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7898 if (reload_in_progress)
7899 {
7900 /* Reload doesn't know about the flags register, and doesn't know that
7901 it doesn't want to clobber it. We can only do this with PLUS. */
7902 if (code != PLUS)
7903 abort ();
7904 emit_insn (op);
7905 }
7906 else
7907 {
7908 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7909 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7910 }
7911
7912 /* Fix up the destination if needed. */
7913 if (dst != operands[0])
7914 emit_move_insn (operands[0], dst);
7915 }
7916
7917 /* Return TRUE or FALSE depending on whether the binary operator meets the
7918 appropriate constraints. */
7919
7920 int
7921 ix86_binary_operator_ok (code, mode, operands)
7922 enum rtx_code code;
7923 enum machine_mode mode ATTRIBUTE_UNUSED;
7924 rtx operands[3];
7925 {
7926 /* Both source operands cannot be in memory. */
7927 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7928 return 0;
7929 /* If the operation is not commutable, source 1 cannot be a constant. */
7930 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7931 return 0;
7932 /* If the destination is memory, we must have a matching source operand. */
7933 if (GET_CODE (operands[0]) == MEM
7934 && ! (rtx_equal_p (operands[0], operands[1])
7935 || (GET_RTX_CLASS (code) == 'c'
7936 && rtx_equal_p (operands[0], operands[2]))))
7937 return 0;
7938 /* If the operation is not commutable and the source 1 is memory, we must
7939 have a matching destination. */
7940 if (GET_CODE (operands[1]) == MEM
7941 && GET_RTX_CLASS (code) != 'c'
7942 && ! rtx_equal_p (operands[0], operands[1]))
7943 return 0;
7944 return 1;
7945 }
7946
7947 /* Attempt to expand a unary operator. Make the expansion closer to the
7948 actual machine, then just general_operand, which will allow 2 separate
7949 memory references (one output, one input) in a single insn. */
7950
7951 void
7952 ix86_expand_unary_operator (code, mode, operands)
7953 enum rtx_code code;
7954 enum machine_mode mode;
7955 rtx operands[];
7956 {
7957 int matching_memory;
7958 rtx src, dst, op, clob;
7959
7960 dst = operands[0];
7961 src = operands[1];
7962
7963 /* If the destination is memory, and we do not have matching source
7964 operands, do things in registers. */
7965 matching_memory = 0;
7966 if (GET_CODE (dst) == MEM)
7967 {
7968 if (rtx_equal_p (dst, src))
7969 matching_memory = 1;
7970 else
7971 dst = gen_reg_rtx (mode);
7972 }
7973
7974 /* When source operand is memory, destination must match. */
7975 if (!matching_memory && GET_CODE (src) == MEM)
7976 src = force_reg (mode, src);
7977
7978 /* If optimizing, copy to regs to improve CSE */
7979 if (optimize && ! no_new_pseudos)
7980 {
7981 if (GET_CODE (dst) == MEM)
7982 dst = gen_reg_rtx (mode);
7983 if (GET_CODE (src) == MEM)
7984 src = force_reg (mode, src);
7985 }
7986
7987 /* Emit the instruction. */
7988
7989 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7990 if (reload_in_progress || code == NOT)
7991 {
7992 /* Reload doesn't know about the flags register, and doesn't know that
7993 it doesn't want to clobber it. */
7994 if (code != NOT)
7995 abort ();
7996 emit_insn (op);
7997 }
7998 else
7999 {
8000 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8001 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8002 }
8003
8004 /* Fix up the destination if needed. */
8005 if (dst != operands[0])
8006 emit_move_insn (operands[0], dst);
8007 }
8008
8009 /* Return TRUE or FALSE depending on whether the unary operator meets the
8010 appropriate constraints. */
8011
8012 int
8013 ix86_unary_operator_ok (code, mode, operands)
8014 enum rtx_code code ATTRIBUTE_UNUSED;
8015 enum machine_mode mode ATTRIBUTE_UNUSED;
8016 rtx operands[2] ATTRIBUTE_UNUSED;
8017 {
8018 /* If one of operands is memory, source and destination must match. */
8019 if ((GET_CODE (operands[0]) == MEM
8020 || GET_CODE (operands[1]) == MEM)
8021 && ! rtx_equal_p (operands[0], operands[1]))
8022 return FALSE;
8023 return TRUE;
8024 }
8025
8026 /* Return TRUE or FALSE depending on whether the first SET in INSN
8027 has source and destination with matching CC modes, and that the
8028 CC mode is at least as constrained as REQ_MODE. */
8029
8030 int
8031 ix86_match_ccmode (insn, req_mode)
8032 rtx insn;
8033 enum machine_mode req_mode;
8034 {
8035 rtx set;
8036 enum machine_mode set_mode;
8037
8038 set = PATTERN (insn);
8039 if (GET_CODE (set) == PARALLEL)
8040 set = XVECEXP (set, 0, 0);
8041 if (GET_CODE (set) != SET)
8042 abort ();
8043 if (GET_CODE (SET_SRC (set)) != COMPARE)
8044 abort ();
8045
8046 set_mode = GET_MODE (SET_DEST (set));
8047 switch (set_mode)
8048 {
8049 case CCNOmode:
8050 if (req_mode != CCNOmode
8051 && (req_mode != CCmode
8052 || XEXP (SET_SRC (set), 1) != const0_rtx))
8053 return 0;
8054 break;
8055 case CCmode:
8056 if (req_mode == CCGCmode)
8057 return 0;
8058 /* FALLTHRU */
8059 case CCGCmode:
8060 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8061 return 0;
8062 /* FALLTHRU */
8063 case CCGOCmode:
8064 if (req_mode == CCZmode)
8065 return 0;
8066 /* FALLTHRU */
8067 case CCZmode:
8068 break;
8069
8070 default:
8071 abort ();
8072 }
8073
8074 return (GET_MODE (SET_SRC (set)) == set_mode);
8075 }
8076
8077 /* Generate insn patterns to do an integer compare of OPERANDS. */
8078
8079 static rtx
8080 ix86_expand_int_compare (code, op0, op1)
8081 enum rtx_code code;
8082 rtx op0, op1;
8083 {
8084 enum machine_mode cmpmode;
8085 rtx tmp, flags;
8086
8087 cmpmode = SELECT_CC_MODE (code, op0, op1);
8088 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8089
8090 /* This is very simple, but making the interface the same as in the
8091 FP case makes the rest of the code easier. */
8092 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8093 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8094
8095 /* Return the test that should be put into the flags user, i.e.
8096 the bcc, scc, or cmov instruction. */
8097 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8098 }
8099
8100 /* Figure out whether to use ordered or unordered fp comparisons.
8101 Return the appropriate mode to use. */
8102
8103 enum machine_mode
8104 ix86_fp_compare_mode (code)
8105 enum rtx_code code ATTRIBUTE_UNUSED;
8106 {
8107 /* ??? In order to make all comparisons reversible, we do all comparisons
8108 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8109 all forms trapping and nontrapping comparisons, we can make inequality
8110 comparisons trapping again, since it results in better code when using
8111 FCOM based compares. */
8112 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8113 }
8114
8115 enum machine_mode
8116 ix86_cc_mode (code, op0, op1)
8117 enum rtx_code code;
8118 rtx op0, op1;
8119 {
8120 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8121 return ix86_fp_compare_mode (code);
8122 switch (code)
8123 {
8124 /* Only zero flag is needed. */
8125 case EQ: /* ZF=0 */
8126 case NE: /* ZF!=0 */
8127 return CCZmode;
8128 /* Codes needing carry flag. */
8129 case GEU: /* CF=0 */
8130 case GTU: /* CF=0 & ZF=0 */
8131 case LTU: /* CF=1 */
8132 case LEU: /* CF=1 | ZF=1 */
8133 return CCmode;
8134 /* Codes possibly doable only with sign flag when
8135 comparing against zero. */
8136 case GE: /* SF=OF or SF=0 */
8137 case LT: /* SF<>OF or SF=1 */
8138 if (op1 == const0_rtx)
8139 return CCGOCmode;
8140 else
8141 /* For other cases Carry flag is not required. */
8142 return CCGCmode;
8143 /* Codes doable only with sign flag when comparing
8144 against zero, but we miss jump instruction for it
8145 so we need to use relational tests agains overflow
8146 that thus needs to be zero. */
8147 case GT: /* ZF=0 & SF=OF */
8148 case LE: /* ZF=1 | SF<>OF */
8149 if (op1 == const0_rtx)
8150 return CCNOmode;
8151 else
8152 return CCGCmode;
8153 /* strcmp pattern do (use flags) and combine may ask us for proper
8154 mode. */
8155 case USE:
8156 return CCmode;
8157 default:
8158 abort ();
8159 }
8160 }
8161
8162 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8163
8164 int
8165 ix86_use_fcomi_compare (code)
8166 enum rtx_code code ATTRIBUTE_UNUSED;
8167 {
8168 enum rtx_code swapped_code = swap_condition (code);
8169 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8170 || (ix86_fp_comparison_cost (swapped_code)
8171 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8172 }
8173
8174 /* Swap, force into registers, or otherwise massage the two operands
8175 to a fp comparison. The operands are updated in place; the new
8176 comparsion code is returned. */
8177
8178 static enum rtx_code
8179 ix86_prepare_fp_compare_args (code, pop0, pop1)
8180 enum rtx_code code;
8181 rtx *pop0, *pop1;
8182 {
8183 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8184 rtx op0 = *pop0, op1 = *pop1;
8185 enum machine_mode op_mode = GET_MODE (op0);
8186 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8187
8188 /* All of the unordered compare instructions only work on registers.
8189 The same is true of the XFmode compare instructions. The same is
8190 true of the fcomi compare instructions. */
8191
8192 if (!is_sse
8193 && (fpcmp_mode == CCFPUmode
8194 || op_mode == XFmode
8195 || op_mode == TFmode
8196 || ix86_use_fcomi_compare (code)))
8197 {
8198 op0 = force_reg (op_mode, op0);
8199 op1 = force_reg (op_mode, op1);
8200 }
8201 else
8202 {
8203 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8204 things around if they appear profitable, otherwise force op0
8205 into a register. */
8206
8207 if (standard_80387_constant_p (op0) == 0
8208 || (GET_CODE (op0) == MEM
8209 && ! (standard_80387_constant_p (op1) == 0
8210 || GET_CODE (op1) == MEM)))
8211 {
8212 rtx tmp;
8213 tmp = op0, op0 = op1, op1 = tmp;
8214 code = swap_condition (code);
8215 }
8216
8217 if (GET_CODE (op0) != REG)
8218 op0 = force_reg (op_mode, op0);
8219
8220 if (CONSTANT_P (op1))
8221 {
8222 if (standard_80387_constant_p (op1))
8223 op1 = force_reg (op_mode, op1);
8224 else
8225 op1 = validize_mem (force_const_mem (op_mode, op1));
8226 }
8227 }
8228
8229 /* Try to rearrange the comparison to make it cheaper. */
8230 if (ix86_fp_comparison_cost (code)
8231 > ix86_fp_comparison_cost (swap_condition (code))
8232 && (GET_CODE (op1) == REG || !no_new_pseudos))
8233 {
8234 rtx tmp;
8235 tmp = op0, op0 = op1, op1 = tmp;
8236 code = swap_condition (code);
8237 if (GET_CODE (op0) != REG)
8238 op0 = force_reg (op_mode, op0);
8239 }
8240
8241 *pop0 = op0;
8242 *pop1 = op1;
8243 return code;
8244 }
8245
8246 /* Convert comparison codes we use to represent FP comparison to integer
8247 code that will result in proper branch. Return UNKNOWN if no such code
8248 is available. */
8249 static enum rtx_code
8250 ix86_fp_compare_code_to_integer (code)
8251 enum rtx_code code;
8252 {
8253 switch (code)
8254 {
8255 case GT:
8256 return GTU;
8257 case GE:
8258 return GEU;
8259 case ORDERED:
8260 case UNORDERED:
8261 return code;
8262 break;
8263 case UNEQ:
8264 return EQ;
8265 break;
8266 case UNLT:
8267 return LTU;
8268 break;
8269 case UNLE:
8270 return LEU;
8271 break;
8272 case LTGT:
8273 return NE;
8274 break;
8275 default:
8276 return UNKNOWN;
8277 }
8278 }
8279
8280 /* Split comparison code CODE into comparisons we can do using branch
8281 instructions. BYPASS_CODE is comparison code for branch that will
8282 branch around FIRST_CODE and SECOND_CODE. If some of branches
8283 is not required, set value to NIL.
8284 We never require more than two branches. */
8285 static void
8286 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8287 enum rtx_code code, *bypass_code, *first_code, *second_code;
8288 {
8289 *first_code = code;
8290 *bypass_code = NIL;
8291 *second_code = NIL;
8292
8293 /* The fcomi comparison sets flags as follows:
8294
8295 cmp ZF PF CF
8296 > 0 0 0
8297 < 0 0 1
8298 = 1 0 0
8299 un 1 1 1 */
8300
8301 switch (code)
8302 {
8303 case GT: /* GTU - CF=0 & ZF=0 */
8304 case GE: /* GEU - CF=0 */
8305 case ORDERED: /* PF=0 */
8306 case UNORDERED: /* PF=1 */
8307 case UNEQ: /* EQ - ZF=1 */
8308 case UNLT: /* LTU - CF=1 */
8309 case UNLE: /* LEU - CF=1 | ZF=1 */
8310 case LTGT: /* EQ - ZF=0 */
8311 break;
8312 case LT: /* LTU - CF=1 - fails on unordered */
8313 *first_code = UNLT;
8314 *bypass_code = UNORDERED;
8315 break;
8316 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8317 *first_code = UNLE;
8318 *bypass_code = UNORDERED;
8319 break;
8320 case EQ: /* EQ - ZF=1 - fails on unordered */
8321 *first_code = UNEQ;
8322 *bypass_code = UNORDERED;
8323 break;
8324 case NE: /* NE - ZF=0 - fails on unordered */
8325 *first_code = LTGT;
8326 *second_code = UNORDERED;
8327 break;
8328 case UNGE: /* GEU - CF=0 - fails on unordered */
8329 *first_code = GE;
8330 *second_code = UNORDERED;
8331 break;
8332 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8333 *first_code = GT;
8334 *second_code = UNORDERED;
8335 break;
8336 default:
8337 abort ();
8338 }
8339 if (!TARGET_IEEE_FP)
8340 {
8341 *second_code = NIL;
8342 *bypass_code = NIL;
8343 }
8344 }
8345
8346 /* Return cost of comparison done fcom + arithmetics operations on AX.
8347 All following functions do use number of instructions as an cost metrics.
8348 In future this should be tweaked to compute bytes for optimize_size and
8349 take into account performance of various instructions on various CPUs. */
8350 static int
8351 ix86_fp_comparison_arithmetics_cost (code)
8352 enum rtx_code code;
8353 {
8354 if (!TARGET_IEEE_FP)
8355 return 4;
8356 /* The cost of code output by ix86_expand_fp_compare. */
8357 switch (code)
8358 {
8359 case UNLE:
8360 case UNLT:
8361 case LTGT:
8362 case GT:
8363 case GE:
8364 case UNORDERED:
8365 case ORDERED:
8366 case UNEQ:
8367 return 4;
8368 break;
8369 case LT:
8370 case NE:
8371 case EQ:
8372 case UNGE:
8373 return 5;
8374 break;
8375 case LE:
8376 case UNGT:
8377 return 6;
8378 break;
8379 default:
8380 abort ();
8381 }
8382 }
8383
8384 /* Return cost of comparison done using fcomi operation.
8385 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8386 static int
8387 ix86_fp_comparison_fcomi_cost (code)
8388 enum rtx_code code;
8389 {
8390 enum rtx_code bypass_code, first_code, second_code;
8391 /* Return arbitarily high cost when instruction is not supported - this
8392 prevents gcc from using it. */
8393 if (!TARGET_CMOVE)
8394 return 1024;
8395 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8396 return (bypass_code != NIL || second_code != NIL) + 2;
8397 }
8398
8399 /* Return cost of comparison done using sahf operation.
8400 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8401 static int
8402 ix86_fp_comparison_sahf_cost (code)
8403 enum rtx_code code;
8404 {
8405 enum rtx_code bypass_code, first_code, second_code;
8406 /* Return arbitarily high cost when instruction is not preferred - this
8407 avoids gcc from using it. */
8408 if (!TARGET_USE_SAHF && !optimize_size)
8409 return 1024;
8410 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8411 return (bypass_code != NIL || second_code != NIL) + 3;
8412 }
8413
8414 /* Compute cost of the comparison done using any method.
8415 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8416 static int
8417 ix86_fp_comparison_cost (code)
8418 enum rtx_code code;
8419 {
8420 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8421 int min;
8422
8423 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8424 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8425
8426 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8427 if (min > sahf_cost)
8428 min = sahf_cost;
8429 if (min > fcomi_cost)
8430 min = fcomi_cost;
8431 return min;
8432 }
8433
8434 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8435
8436 static rtx
8437 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8438 enum rtx_code code;
8439 rtx op0, op1, scratch;
8440 rtx *second_test;
8441 rtx *bypass_test;
8442 {
8443 enum machine_mode fpcmp_mode, intcmp_mode;
8444 rtx tmp, tmp2;
8445 int cost = ix86_fp_comparison_cost (code);
8446 enum rtx_code bypass_code, first_code, second_code;
8447
8448 fpcmp_mode = ix86_fp_compare_mode (code);
8449 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8450
8451 if (second_test)
8452 *second_test = NULL_RTX;
8453 if (bypass_test)
8454 *bypass_test = NULL_RTX;
8455
8456 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8457
8458 /* Do fcomi/sahf based test when profitable. */
8459 if ((bypass_code == NIL || bypass_test)
8460 && (second_code == NIL || second_test)
8461 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8462 {
8463 if (TARGET_CMOVE)
8464 {
8465 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8466 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8467 tmp);
8468 emit_insn (tmp);
8469 }
8470 else
8471 {
8472 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8473 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8474 if (!scratch)
8475 scratch = gen_reg_rtx (HImode);
8476 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8477 emit_insn (gen_x86_sahf_1 (scratch));
8478 }
8479
8480 /* The FP codes work out to act like unsigned. */
8481 intcmp_mode = fpcmp_mode;
8482 code = first_code;
8483 if (bypass_code != NIL)
8484 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8485 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8486 const0_rtx);
8487 if (second_code != NIL)
8488 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8489 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8490 const0_rtx);
8491 }
8492 else
8493 {
8494 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8495 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8496 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8497 if (!scratch)
8498 scratch = gen_reg_rtx (HImode);
8499 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8500
8501 /* In the unordered case, we have to check C2 for NaN's, which
8502 doesn't happen to work out to anything nice combination-wise.
8503 So do some bit twiddling on the value we've got in AH to come
8504 up with an appropriate set of condition codes. */
8505
8506 intcmp_mode = CCNOmode;
8507 switch (code)
8508 {
8509 case GT:
8510 case UNGT:
8511 if (code == GT || !TARGET_IEEE_FP)
8512 {
8513 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8514 code = EQ;
8515 }
8516 else
8517 {
8518 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8519 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8520 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8521 intcmp_mode = CCmode;
8522 code = GEU;
8523 }
8524 break;
8525 case LT:
8526 case UNLT:
8527 if (code == LT && TARGET_IEEE_FP)
8528 {
8529 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8530 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8531 intcmp_mode = CCmode;
8532 code = EQ;
8533 }
8534 else
8535 {
8536 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8537 code = NE;
8538 }
8539 break;
8540 case GE:
8541 case UNGE:
8542 if (code == GE || !TARGET_IEEE_FP)
8543 {
8544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8545 code = EQ;
8546 }
8547 else
8548 {
8549 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8550 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8551 GEN_INT (0x01)));
8552 code = NE;
8553 }
8554 break;
8555 case LE:
8556 case UNLE:
8557 if (code == LE && TARGET_IEEE_FP)
8558 {
8559 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8560 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8561 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8562 intcmp_mode = CCmode;
8563 code = LTU;
8564 }
8565 else
8566 {
8567 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8568 code = NE;
8569 }
8570 break;
8571 case EQ:
8572 case UNEQ:
8573 if (code == EQ && TARGET_IEEE_FP)
8574 {
8575 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8576 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8577 intcmp_mode = CCmode;
8578 code = EQ;
8579 }
8580 else
8581 {
8582 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8583 code = NE;
8584 break;
8585 }
8586 break;
8587 case NE:
8588 case LTGT:
8589 if (code == NE && TARGET_IEEE_FP)
8590 {
8591 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8592 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8593 GEN_INT (0x40)));
8594 code = NE;
8595 }
8596 else
8597 {
8598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8599 code = EQ;
8600 }
8601 break;
8602
8603 case UNORDERED:
8604 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8605 code = NE;
8606 break;
8607 case ORDERED:
8608 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8609 code = EQ;
8610 break;
8611
8612 default:
8613 abort ();
8614 }
8615 }
8616
8617 /* Return the test that should be put into the flags user, i.e.
8618 the bcc, scc, or cmov instruction. */
8619 return gen_rtx_fmt_ee (code, VOIDmode,
8620 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8621 const0_rtx);
8622 }
8623
8624 rtx
8625 ix86_expand_compare (code, second_test, bypass_test)
8626 enum rtx_code code;
8627 rtx *second_test, *bypass_test;
8628 {
8629 rtx op0, op1, ret;
8630 op0 = ix86_compare_op0;
8631 op1 = ix86_compare_op1;
8632
8633 if (second_test)
8634 *second_test = NULL_RTX;
8635 if (bypass_test)
8636 *bypass_test = NULL_RTX;
8637
8638 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8639 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8640 second_test, bypass_test);
8641 else
8642 ret = ix86_expand_int_compare (code, op0, op1);
8643
8644 return ret;
8645 }
8646
8647 /* Return true if the CODE will result in nontrivial jump sequence. */
8648 bool
8649 ix86_fp_jump_nontrivial_p (code)
8650 enum rtx_code code;
8651 {
8652 enum rtx_code bypass_code, first_code, second_code;
8653 if (!TARGET_CMOVE)
8654 return true;
8655 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8656 return bypass_code != NIL || second_code != NIL;
8657 }
8658
8659 void
8660 ix86_expand_branch (code, label)
8661 enum rtx_code code;
8662 rtx label;
8663 {
8664 rtx tmp;
8665
8666 switch (GET_MODE (ix86_compare_op0))
8667 {
8668 case QImode:
8669 case HImode:
8670 case SImode:
8671 simple:
8672 tmp = ix86_expand_compare (code, NULL, NULL);
8673 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8674 gen_rtx_LABEL_REF (VOIDmode, label),
8675 pc_rtx);
8676 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8677 return;
8678
8679 case SFmode:
8680 case DFmode:
8681 case XFmode:
8682 case TFmode:
8683 {
8684 rtvec vec;
8685 int use_fcomi;
8686 enum rtx_code bypass_code, first_code, second_code;
8687
8688 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8689 &ix86_compare_op1);
8690
8691 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8692
8693 /* Check whether we will use the natural sequence with one jump. If
8694 so, we can expand jump early. Otherwise delay expansion by
8695 creating compound insn to not confuse optimizers. */
8696 if (bypass_code == NIL && second_code == NIL
8697 && TARGET_CMOVE)
8698 {
8699 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8700 gen_rtx_LABEL_REF (VOIDmode, label),
8701 pc_rtx, NULL_RTX);
8702 }
8703 else
8704 {
8705 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8706 ix86_compare_op0, ix86_compare_op1);
8707 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8708 gen_rtx_LABEL_REF (VOIDmode, label),
8709 pc_rtx);
8710 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8711
8712 use_fcomi = ix86_use_fcomi_compare (code);
8713 vec = rtvec_alloc (3 + !use_fcomi);
8714 RTVEC_ELT (vec, 0) = tmp;
8715 RTVEC_ELT (vec, 1)
8716 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8717 RTVEC_ELT (vec, 2)
8718 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8719 if (! use_fcomi)
8720 RTVEC_ELT (vec, 3)
8721 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8722
8723 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8724 }
8725 return;
8726 }
8727
8728 case DImode:
8729 if (TARGET_64BIT)
8730 goto simple;
8731 /* Expand DImode branch into multiple compare+branch. */
8732 {
8733 rtx lo[2], hi[2], label2;
8734 enum rtx_code code1, code2, code3;
8735
8736 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8737 {
8738 tmp = ix86_compare_op0;
8739 ix86_compare_op0 = ix86_compare_op1;
8740 ix86_compare_op1 = tmp;
8741 code = swap_condition (code);
8742 }
8743 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8744 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8745
8746 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8747 avoid two branches. This costs one extra insn, so disable when
8748 optimizing for size. */
8749
8750 if ((code == EQ || code == NE)
8751 && (!optimize_size
8752 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8753 {
8754 rtx xor0, xor1;
8755
8756 xor1 = hi[0];
8757 if (hi[1] != const0_rtx)
8758 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8759 NULL_RTX, 0, OPTAB_WIDEN);
8760
8761 xor0 = lo[0];
8762 if (lo[1] != const0_rtx)
8763 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8764 NULL_RTX, 0, OPTAB_WIDEN);
8765
8766 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8767 NULL_RTX, 0, OPTAB_WIDEN);
8768
8769 ix86_compare_op0 = tmp;
8770 ix86_compare_op1 = const0_rtx;
8771 ix86_expand_branch (code, label);
8772 return;
8773 }
8774
8775 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8776 op1 is a constant and the low word is zero, then we can just
8777 examine the high word. */
8778
8779 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8780 switch (code)
8781 {
8782 case LT: case LTU: case GE: case GEU:
8783 ix86_compare_op0 = hi[0];
8784 ix86_compare_op1 = hi[1];
8785 ix86_expand_branch (code, label);
8786 return;
8787 default:
8788 break;
8789 }
8790
8791 /* Otherwise, we need two or three jumps. */
8792
8793 label2 = gen_label_rtx ();
8794
8795 code1 = code;
8796 code2 = swap_condition (code);
8797 code3 = unsigned_condition (code);
8798
8799 switch (code)
8800 {
8801 case LT: case GT: case LTU: case GTU:
8802 break;
8803
8804 case LE: code1 = LT; code2 = GT; break;
8805 case GE: code1 = GT; code2 = LT; break;
8806 case LEU: code1 = LTU; code2 = GTU; break;
8807 case GEU: code1 = GTU; code2 = LTU; break;
8808
8809 case EQ: code1 = NIL; code2 = NE; break;
8810 case NE: code2 = NIL; break;
8811
8812 default:
8813 abort ();
8814 }
8815
8816 /*
8817 * a < b =>
8818 * if (hi(a) < hi(b)) goto true;
8819 * if (hi(a) > hi(b)) goto false;
8820 * if (lo(a) < lo(b)) goto true;
8821 * false:
8822 */
8823
8824 ix86_compare_op0 = hi[0];
8825 ix86_compare_op1 = hi[1];
8826
8827 if (code1 != NIL)
8828 ix86_expand_branch (code1, label);
8829 if (code2 != NIL)
8830 ix86_expand_branch (code2, label2);
8831
8832 ix86_compare_op0 = lo[0];
8833 ix86_compare_op1 = lo[1];
8834 ix86_expand_branch (code3, label);
8835
8836 if (code2 != NIL)
8837 emit_label (label2);
8838 return;
8839 }
8840
8841 default:
8842 abort ();
8843 }
8844 }
8845
8846 /* Split branch based on floating point condition. */
8847 void
8848 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8849 enum rtx_code code;
8850 rtx op1, op2, target1, target2, tmp;
8851 {
8852 rtx second, bypass;
8853 rtx label = NULL_RTX;
8854 rtx condition;
8855 int bypass_probability = -1, second_probability = -1, probability = -1;
8856 rtx i;
8857
8858 if (target2 != pc_rtx)
8859 {
8860 rtx tmp = target2;
8861 code = reverse_condition_maybe_unordered (code);
8862 target2 = target1;
8863 target1 = tmp;
8864 }
8865
8866 condition = ix86_expand_fp_compare (code, op1, op2,
8867 tmp, &second, &bypass);
8868
8869 if (split_branch_probability >= 0)
8870 {
8871 /* Distribute the probabilities across the jumps.
8872 Assume the BYPASS and SECOND to be always test
8873 for UNORDERED. */
8874 probability = split_branch_probability;
8875
8876 /* Value of 1 is low enough to make no need for probability
8877 to be updated. Later we may run some experiments and see
8878 if unordered values are more frequent in practice. */
8879 if (bypass)
8880 bypass_probability = 1;
8881 if (second)
8882 second_probability = 1;
8883 }
8884 if (bypass != NULL_RTX)
8885 {
8886 label = gen_label_rtx ();
8887 i = emit_jump_insn (gen_rtx_SET
8888 (VOIDmode, pc_rtx,
8889 gen_rtx_IF_THEN_ELSE (VOIDmode,
8890 bypass,
8891 gen_rtx_LABEL_REF (VOIDmode,
8892 label),
8893 pc_rtx)));
8894 if (bypass_probability >= 0)
8895 REG_NOTES (i)
8896 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8897 GEN_INT (bypass_probability),
8898 REG_NOTES (i));
8899 }
8900 i = emit_jump_insn (gen_rtx_SET
8901 (VOIDmode, pc_rtx,
8902 gen_rtx_IF_THEN_ELSE (VOIDmode,
8903 condition, target1, target2)));
8904 if (probability >= 0)
8905 REG_NOTES (i)
8906 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8907 GEN_INT (probability),
8908 REG_NOTES (i));
8909 if (second != NULL_RTX)
8910 {
8911 i = emit_jump_insn (gen_rtx_SET
8912 (VOIDmode, pc_rtx,
8913 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8914 target2)));
8915 if (second_probability >= 0)
8916 REG_NOTES (i)
8917 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8918 GEN_INT (second_probability),
8919 REG_NOTES (i));
8920 }
8921 if (label != NULL_RTX)
8922 emit_label (label);
8923 }
8924
8925 int
8926 ix86_expand_setcc (code, dest)
8927 enum rtx_code code;
8928 rtx dest;
8929 {
8930 rtx ret, tmp, tmpreg;
8931 rtx second_test, bypass_test;
8932
8933 if (GET_MODE (ix86_compare_op0) == DImode
8934 && !TARGET_64BIT)
8935 return 0; /* FAIL */
8936
8937 if (GET_MODE (dest) != QImode)
8938 abort ();
8939
8940 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8941 PUT_MODE (ret, QImode);
8942
8943 tmp = dest;
8944 tmpreg = dest;
8945
8946 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8947 if (bypass_test || second_test)
8948 {
8949 rtx test = second_test;
8950 int bypass = 0;
8951 rtx tmp2 = gen_reg_rtx (QImode);
8952 if (bypass_test)
8953 {
8954 if (second_test)
8955 abort ();
8956 test = bypass_test;
8957 bypass = 1;
8958 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8959 }
8960 PUT_MODE (test, QImode);
8961 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8962
8963 if (bypass)
8964 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8965 else
8966 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8967 }
8968
8969 return 1; /* DONE */
8970 }
8971
8972 int
8973 ix86_expand_int_movcc (operands)
8974 rtx operands[];
8975 {
8976 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8977 rtx compare_seq, compare_op;
8978 rtx second_test, bypass_test;
8979 enum machine_mode mode = GET_MODE (operands[0]);
8980
8981 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8982 In case comparsion is done with immediate, we can convert it to LTU or
8983 GEU by altering the integer. */
8984
8985 if ((code == LEU || code == GTU)
8986 && GET_CODE (ix86_compare_op1) == CONST_INT
8987 && mode != HImode
8988 && INTVAL (ix86_compare_op1) != -1
8989 /* For x86-64, the immediate field in the instruction is 32-bit
8990 signed, so we can't increment a DImode value above 0x7fffffff. */
8991 && (!TARGET_64BIT
8992 || GET_MODE (ix86_compare_op0) != DImode
8993 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8994 && GET_CODE (operands[2]) == CONST_INT
8995 && GET_CODE (operands[3]) == CONST_INT)
8996 {
8997 if (code == LEU)
8998 code = LTU;
8999 else
9000 code = GEU;
9001 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9002 GET_MODE (ix86_compare_op0));
9003 }
9004
9005 start_sequence ();
9006 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9007 compare_seq = get_insns ();
9008 end_sequence ();
9009
9010 compare_code = GET_CODE (compare_op);
9011
9012 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9013 HImode insns, we'd be swallowed in word prefix ops. */
9014
9015 if (mode != HImode
9016 && (mode != DImode || TARGET_64BIT)
9017 && GET_CODE (operands[2]) == CONST_INT
9018 && GET_CODE (operands[3]) == CONST_INT)
9019 {
9020 rtx out = operands[0];
9021 HOST_WIDE_INT ct = INTVAL (operands[2]);
9022 HOST_WIDE_INT cf = INTVAL (operands[3]);
9023 HOST_WIDE_INT diff;
9024
9025 if ((compare_code == LTU || compare_code == GEU)
9026 && !second_test && !bypass_test)
9027 {
9028 /* Detect overlap between destination and compare sources. */
9029 rtx tmp = out;
9030
9031 /* To simplify rest of code, restrict to the GEU case. */
9032 if (compare_code == LTU)
9033 {
9034 int tmp = ct;
9035 ct = cf;
9036 cf = tmp;
9037 compare_code = reverse_condition (compare_code);
9038 code = reverse_condition (code);
9039 }
9040 diff = ct - cf;
9041
9042 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9043 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9044 tmp = gen_reg_rtx (mode);
9045
9046 emit_insn (compare_seq);
9047 if (mode == DImode)
9048 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9049 else
9050 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9051
9052 if (diff == 1)
9053 {
9054 /*
9055 * cmpl op0,op1
9056 * sbbl dest,dest
9057 * [addl dest, ct]
9058 *
9059 * Size 5 - 8.
9060 */
9061 if (ct)
9062 tmp = expand_simple_binop (mode, PLUS,
9063 tmp, GEN_INT (ct),
9064 tmp, 1, OPTAB_DIRECT);
9065 }
9066 else if (cf == -1)
9067 {
9068 /*
9069 * cmpl op0,op1
9070 * sbbl dest,dest
9071 * orl $ct, dest
9072 *
9073 * Size 8.
9074 */
9075 tmp = expand_simple_binop (mode, IOR,
9076 tmp, GEN_INT (ct),
9077 tmp, 1, OPTAB_DIRECT);
9078 }
9079 else if (diff == -1 && ct)
9080 {
9081 /*
9082 * cmpl op0,op1
9083 * sbbl dest,dest
9084 * notl dest
9085 * [addl dest, cf]
9086 *
9087 * Size 8 - 11.
9088 */
9089 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9090 if (cf)
9091 tmp = expand_simple_binop (mode, PLUS,
9092 tmp, GEN_INT (cf),
9093 tmp, 1, OPTAB_DIRECT);
9094 }
9095 else
9096 {
9097 /*
9098 * cmpl op0,op1
9099 * sbbl dest,dest
9100 * [notl dest]
9101 * andl cf - ct, dest
9102 * [addl dest, ct]
9103 *
9104 * Size 8 - 11.
9105 */
9106
9107 if (cf == 0)
9108 {
9109 cf = ct;
9110 ct = 0;
9111 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9112 }
9113
9114 tmp = expand_simple_binop (mode, AND,
9115 tmp,
9116 gen_int_mode (cf - ct, mode),
9117 tmp, 1, OPTAB_DIRECT);
9118 if (ct)
9119 tmp = expand_simple_binop (mode, PLUS,
9120 tmp, GEN_INT (ct),
9121 tmp, 1, OPTAB_DIRECT);
9122 }
9123
9124 if (tmp != out)
9125 emit_move_insn (out, tmp);
9126
9127 return 1; /* DONE */
9128 }
9129
9130 diff = ct - cf;
9131 if (diff < 0)
9132 {
9133 HOST_WIDE_INT tmp;
9134 tmp = ct, ct = cf, cf = tmp;
9135 diff = -diff;
9136 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9137 {
9138 /* We may be reversing unordered compare to normal compare, that
9139 is not valid in general (we may convert non-trapping condition
9140 to trapping one), however on i386 we currently emit all
9141 comparisons unordered. */
9142 compare_code = reverse_condition_maybe_unordered (compare_code);
9143 code = reverse_condition_maybe_unordered (code);
9144 }
9145 else
9146 {
9147 compare_code = reverse_condition (compare_code);
9148 code = reverse_condition (code);
9149 }
9150 }
9151
9152 compare_code = NIL;
9153 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9154 && GET_CODE (ix86_compare_op1) == CONST_INT)
9155 {
9156 if (ix86_compare_op1 == const0_rtx
9157 && (code == LT || code == GE))
9158 compare_code = code;
9159 else if (ix86_compare_op1 == constm1_rtx)
9160 {
9161 if (code == LE)
9162 compare_code = LT;
9163 else if (code == GT)
9164 compare_code = GE;
9165 }
9166 }
9167
9168 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9169 if (compare_code != NIL
9170 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9171 && (cf == -1 || ct == -1))
9172 {
9173 /* If lea code below could be used, only optimize
9174 if it results in a 2 insn sequence. */
9175
9176 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9177 || diff == 3 || diff == 5 || diff == 9)
9178 || (compare_code == LT && ct == -1)
9179 || (compare_code == GE && cf == -1))
9180 {
9181 /*
9182 * notl op1 (if necessary)
9183 * sarl $31, op1
9184 * orl cf, op1
9185 */
9186 if (ct != -1)
9187 {
9188 cf = ct;
9189 ct = -1;
9190 code = reverse_condition (code);
9191 }
9192
9193 out = emit_store_flag (out, code, ix86_compare_op0,
9194 ix86_compare_op1, VOIDmode, 0, -1);
9195
9196 out = expand_simple_binop (mode, IOR,
9197 out, GEN_INT (cf),
9198 out, 1, OPTAB_DIRECT);
9199 if (out != operands[0])
9200 emit_move_insn (operands[0], out);
9201
9202 return 1; /* DONE */
9203 }
9204 }
9205
9206 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9207 || diff == 3 || diff == 5 || diff == 9)
9208 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9209 {
9210 /*
9211 * xorl dest,dest
9212 * cmpl op1,op2
9213 * setcc dest
9214 * lea cf(dest*(ct-cf)),dest
9215 *
9216 * Size 14.
9217 *
9218 * This also catches the degenerate setcc-only case.
9219 */
9220
9221 rtx tmp;
9222 int nops;
9223
9224 out = emit_store_flag (out, code, ix86_compare_op0,
9225 ix86_compare_op1, VOIDmode, 0, 1);
9226
9227 nops = 0;
9228 /* On x86_64 the lea instruction operates on Pmode, so we need
9229 to get arithmetics done in proper mode to match. */
9230 if (diff == 1)
9231 tmp = out;
9232 else
9233 {
9234 rtx out1;
9235 out1 = out;
9236 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9237 nops++;
9238 if (diff & 1)
9239 {
9240 tmp = gen_rtx_PLUS (mode, tmp, out1);
9241 nops++;
9242 }
9243 }
9244 if (cf != 0)
9245 {
9246 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9247 nops++;
9248 }
9249 if (tmp != out
9250 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9251 {
9252 if (nops == 1)
9253 {
9254 rtx clob;
9255
9256 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9257 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9258
9259 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9260 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9261 emit_insn (tmp);
9262 }
9263 else
9264 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9265 }
9266 if (out != operands[0])
9267 emit_move_insn (operands[0], copy_rtx (out));
9268
9269 return 1; /* DONE */
9270 }
9271
9272 /*
9273 * General case: Jumpful:
9274 * xorl dest,dest cmpl op1, op2
9275 * cmpl op1, op2 movl ct, dest
9276 * setcc dest jcc 1f
9277 * decl dest movl cf, dest
9278 * andl (cf-ct),dest 1:
9279 * addl ct,dest
9280 *
9281 * Size 20. Size 14.
9282 *
9283 * This is reasonably steep, but branch mispredict costs are
9284 * high on modern cpus, so consider failing only if optimizing
9285 * for space.
9286 *
9287 * %%% Parameterize branch_cost on the tuning architecture, then
9288 * use that. The 80386 couldn't care less about mispredicts.
9289 */
9290
9291 if (!optimize_size && !TARGET_CMOVE)
9292 {
9293 if (cf == 0)
9294 {
9295 cf = ct;
9296 ct = 0;
9297 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9298 /* We may be reversing unordered compare to normal compare,
9299 that is not valid in general (we may convert non-trapping
9300 condition to trapping one), however on i386 we currently
9301 emit all comparisons unordered. */
9302 code = reverse_condition_maybe_unordered (code);
9303 else
9304 {
9305 code = reverse_condition (code);
9306 if (compare_code != NIL)
9307 compare_code = reverse_condition (compare_code);
9308 }
9309 }
9310
9311 if (compare_code != NIL)
9312 {
9313 /* notl op1 (if needed)
9314 sarl $31, op1
9315 andl (cf-ct), op1
9316 addl ct, op1
9317
9318 For x < 0 (resp. x <= -1) there will be no notl,
9319 so if possible swap the constants to get rid of the
9320 complement.
9321 True/false will be -1/0 while code below (store flag
9322 followed by decrement) is 0/-1, so the constants need
9323 to be exchanged once more. */
9324
9325 if (compare_code == GE || !cf)
9326 {
9327 code = reverse_condition (code);
9328 compare_code = LT;
9329 }
9330 else
9331 {
9332 HOST_WIDE_INT tmp = cf;
9333 cf = ct;
9334 ct = tmp;
9335 }
9336
9337 out = emit_store_flag (out, code, ix86_compare_op0,
9338 ix86_compare_op1, VOIDmode, 0, -1);
9339 }
9340 else
9341 {
9342 out = emit_store_flag (out, code, ix86_compare_op0,
9343 ix86_compare_op1, VOIDmode, 0, 1);
9344
9345 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9346 out, 1, OPTAB_DIRECT);
9347 }
9348
9349 out = expand_simple_binop (mode, AND, out,
9350 gen_int_mode (cf - ct, mode),
9351 out, 1, OPTAB_DIRECT);
9352 if (ct)
9353 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9354 out, 1, OPTAB_DIRECT);
9355 if (out != operands[0])
9356 emit_move_insn (operands[0], out);
9357
9358 return 1; /* DONE */
9359 }
9360 }
9361
9362 if (!TARGET_CMOVE)
9363 {
9364 /* Try a few things more with specific constants and a variable. */
9365
9366 optab op;
9367 rtx var, orig_out, out, tmp;
9368
9369 if (optimize_size)
9370 return 0; /* FAIL */
9371
9372 /* If one of the two operands is an interesting constant, load a
9373 constant with the above and mask it in with a logical operation. */
9374
9375 if (GET_CODE (operands[2]) == CONST_INT)
9376 {
9377 var = operands[3];
9378 if (INTVAL (operands[2]) == 0)
9379 operands[3] = constm1_rtx, op = and_optab;
9380 else if (INTVAL (operands[2]) == -1)
9381 operands[3] = const0_rtx, op = ior_optab;
9382 else
9383 return 0; /* FAIL */
9384 }
9385 else if (GET_CODE (operands[3]) == CONST_INT)
9386 {
9387 var = operands[2];
9388 if (INTVAL (operands[3]) == 0)
9389 operands[2] = constm1_rtx, op = and_optab;
9390 else if (INTVAL (operands[3]) == -1)
9391 operands[2] = const0_rtx, op = ior_optab;
9392 else
9393 return 0; /* FAIL */
9394 }
9395 else
9396 return 0; /* FAIL */
9397
9398 orig_out = operands[0];
9399 tmp = gen_reg_rtx (mode);
9400 operands[0] = tmp;
9401
9402 /* Recurse to get the constant loaded. */
9403 if (ix86_expand_int_movcc (operands) == 0)
9404 return 0; /* FAIL */
9405
9406 /* Mask in the interesting variable. */
9407 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9408 OPTAB_WIDEN);
9409 if (out != orig_out)
9410 emit_move_insn (orig_out, out);
9411
9412 return 1; /* DONE */
9413 }
9414
9415 /*
9416 * For comparison with above,
9417 *
9418 * movl cf,dest
9419 * movl ct,tmp
9420 * cmpl op1,op2
9421 * cmovcc tmp,dest
9422 *
9423 * Size 15.
9424 */
9425
9426 if (! nonimmediate_operand (operands[2], mode))
9427 operands[2] = force_reg (mode, operands[2]);
9428 if (! nonimmediate_operand (operands[3], mode))
9429 operands[3] = force_reg (mode, operands[3]);
9430
9431 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9432 {
9433 rtx tmp = gen_reg_rtx (mode);
9434 emit_move_insn (tmp, operands[3]);
9435 operands[3] = tmp;
9436 }
9437 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9438 {
9439 rtx tmp = gen_reg_rtx (mode);
9440 emit_move_insn (tmp, operands[2]);
9441 operands[2] = tmp;
9442 }
9443 if (! register_operand (operands[2], VOIDmode)
9444 && ! register_operand (operands[3], VOIDmode))
9445 operands[2] = force_reg (mode, operands[2]);
9446
9447 emit_insn (compare_seq);
9448 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9449 gen_rtx_IF_THEN_ELSE (mode,
9450 compare_op, operands[2],
9451 operands[3])));
9452 if (bypass_test)
9453 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9454 gen_rtx_IF_THEN_ELSE (mode,
9455 bypass_test,
9456 operands[3],
9457 operands[0])));
9458 if (second_test)
9459 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9460 gen_rtx_IF_THEN_ELSE (mode,
9461 second_test,
9462 operands[2],
9463 operands[0])));
9464
9465 return 1; /* DONE */
9466 }
9467
9468 int
9469 ix86_expand_fp_movcc (operands)
9470 rtx operands[];
9471 {
9472 enum rtx_code code;
9473 rtx tmp;
9474 rtx compare_op, second_test, bypass_test;
9475
9476 /* For SF/DFmode conditional moves based on comparisons
9477 in same mode, we may want to use SSE min/max instructions. */
9478 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9479 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9480 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9481 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9482 && (!TARGET_IEEE_FP
9483 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9484 /* We may be called from the post-reload splitter. */
9485 && (!REG_P (operands[0])
9486 || SSE_REG_P (operands[0])
9487 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9488 {
9489 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9490 code = GET_CODE (operands[1]);
9491
9492 /* See if we have (cross) match between comparison operands and
9493 conditional move operands. */
9494 if (rtx_equal_p (operands[2], op1))
9495 {
9496 rtx tmp = op0;
9497 op0 = op1;
9498 op1 = tmp;
9499 code = reverse_condition_maybe_unordered (code);
9500 }
9501 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9502 {
9503 /* Check for min operation. */
9504 if (code == LT)
9505 {
9506 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9507 if (memory_operand (op0, VOIDmode))
9508 op0 = force_reg (GET_MODE (operands[0]), op0);
9509 if (GET_MODE (operands[0]) == SFmode)
9510 emit_insn (gen_minsf3 (operands[0], op0, op1));
9511 else
9512 emit_insn (gen_mindf3 (operands[0], op0, op1));
9513 return 1;
9514 }
9515 /* Check for max operation. */
9516 if (code == GT)
9517 {
9518 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9519 if (memory_operand (op0, VOIDmode))
9520 op0 = force_reg (GET_MODE (operands[0]), op0);
9521 if (GET_MODE (operands[0]) == SFmode)
9522 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9523 else
9524 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9525 return 1;
9526 }
9527 }
9528 /* Manage condition to be sse_comparison_operator. In case we are
9529 in non-ieee mode, try to canonicalize the destination operand
9530 to be first in the comparison - this helps reload to avoid extra
9531 moves. */
9532 if (!sse_comparison_operator (operands[1], VOIDmode)
9533 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9534 {
9535 rtx tmp = ix86_compare_op0;
9536 ix86_compare_op0 = ix86_compare_op1;
9537 ix86_compare_op1 = tmp;
9538 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9539 VOIDmode, ix86_compare_op0,
9540 ix86_compare_op1);
9541 }
9542 /* Similary try to manage result to be first operand of conditional
9543 move. We also don't support the NE comparison on SSE, so try to
9544 avoid it. */
9545 if ((rtx_equal_p (operands[0], operands[3])
9546 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9547 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9548 {
9549 rtx tmp = operands[2];
9550 operands[2] = operands[3];
9551 operands[3] = tmp;
9552 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9553 (GET_CODE (operands[1])),
9554 VOIDmode, ix86_compare_op0,
9555 ix86_compare_op1);
9556 }
9557 if (GET_MODE (operands[0]) == SFmode)
9558 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9559 operands[2], operands[3],
9560 ix86_compare_op0, ix86_compare_op1));
9561 else
9562 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9563 operands[2], operands[3],
9564 ix86_compare_op0, ix86_compare_op1));
9565 return 1;
9566 }
9567
9568 /* The floating point conditional move instructions don't directly
9569 support conditions resulting from a signed integer comparison. */
9570
9571 code = GET_CODE (operands[1]);
9572 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9573
9574 /* The floating point conditional move instructions don't directly
9575 support signed integer comparisons. */
9576
9577 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9578 {
9579 if (second_test != NULL || bypass_test != NULL)
9580 abort ();
9581 tmp = gen_reg_rtx (QImode);
9582 ix86_expand_setcc (code, tmp);
9583 code = NE;
9584 ix86_compare_op0 = tmp;
9585 ix86_compare_op1 = const0_rtx;
9586 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9587 }
9588 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9589 {
9590 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9591 emit_move_insn (tmp, operands[3]);
9592 operands[3] = tmp;
9593 }
9594 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9595 {
9596 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9597 emit_move_insn (tmp, operands[2]);
9598 operands[2] = tmp;
9599 }
9600
9601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9602 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9603 compare_op,
9604 operands[2],
9605 operands[3])));
9606 if (bypass_test)
9607 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9608 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9609 bypass_test,
9610 operands[3],
9611 operands[0])));
9612 if (second_test)
9613 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9614 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9615 second_test,
9616 operands[2],
9617 operands[0])));
9618
9619 return 1;
9620 }
9621
9622 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9623 works for floating pointer parameters and nonoffsetable memories.
9624 For pushes, it returns just stack offsets; the values will be saved
9625 in the right order. Maximally three parts are generated. */
9626
9627 static int
9628 ix86_split_to_parts (operand, parts, mode)
9629 rtx operand;
9630 rtx *parts;
9631 enum machine_mode mode;
9632 {
9633 int size;
9634
9635 if (!TARGET_64BIT)
9636 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9637 else
9638 size = (GET_MODE_SIZE (mode) + 4) / 8;
9639
9640 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9641 abort ();
9642 if (size < 2 || size > 3)
9643 abort ();
9644
9645 /* Optimize constant pool reference to immediates. This is used by fp
9646 moves, that force all constants to memory to allow combining. */
9647 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9648 {
9649 rtx tmp = maybe_get_pool_constant (operand);
9650 if (tmp)
9651 operand = tmp;
9652 }
9653
9654 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9655 {
9656 /* The only non-offsetable memories we handle are pushes. */
9657 if (! push_operand (operand, VOIDmode))
9658 abort ();
9659
9660 operand = copy_rtx (operand);
9661 PUT_MODE (operand, Pmode);
9662 parts[0] = parts[1] = parts[2] = operand;
9663 }
9664 else if (!TARGET_64BIT)
9665 {
9666 if (mode == DImode)
9667 split_di (&operand, 1, &parts[0], &parts[1]);
9668 else
9669 {
9670 if (REG_P (operand))
9671 {
9672 if (!reload_completed)
9673 abort ();
9674 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9675 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9676 if (size == 3)
9677 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9678 }
9679 else if (offsettable_memref_p (operand))
9680 {
9681 operand = adjust_address (operand, SImode, 0);
9682 parts[0] = operand;
9683 parts[1] = adjust_address (operand, SImode, 4);
9684 if (size == 3)
9685 parts[2] = adjust_address (operand, SImode, 8);
9686 }
9687 else if (GET_CODE (operand) == CONST_DOUBLE)
9688 {
9689 REAL_VALUE_TYPE r;
9690 long l[4];
9691
9692 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9693 switch (mode)
9694 {
9695 case XFmode:
9696 case TFmode:
9697 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9698 parts[2] = gen_int_mode (l[2], SImode);
9699 break;
9700 case DFmode:
9701 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9702 break;
9703 default:
9704 abort ();
9705 }
9706 parts[1] = gen_int_mode (l[1], SImode);
9707 parts[0] = gen_int_mode (l[0], SImode);
9708 }
9709 else
9710 abort ();
9711 }
9712 }
9713 else
9714 {
9715 if (mode == TImode)
9716 split_ti (&operand, 1, &parts[0], &parts[1]);
9717 if (mode == XFmode || mode == TFmode)
9718 {
9719 if (REG_P (operand))
9720 {
9721 if (!reload_completed)
9722 abort ();
9723 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9724 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9725 }
9726 else if (offsettable_memref_p (operand))
9727 {
9728 operand = adjust_address (operand, DImode, 0);
9729 parts[0] = operand;
9730 parts[1] = adjust_address (operand, SImode, 8);
9731 }
9732 else if (GET_CODE (operand) == CONST_DOUBLE)
9733 {
9734 REAL_VALUE_TYPE r;
9735 long l[3];
9736
9737 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9738 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9739 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9740 if (HOST_BITS_PER_WIDE_INT >= 64)
9741 parts[0]
9742 = gen_int_mode
9743 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9744 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9745 DImode);
9746 else
9747 parts[0] = immed_double_const (l[0], l[1], DImode);
9748 parts[1] = gen_int_mode (l[2], SImode);
9749 }
9750 else
9751 abort ();
9752 }
9753 }
9754
9755 return size;
9756 }
9757
9758 /* Emit insns to perform a move or push of DI, DF, and XF values.
9759 Return false when normal moves are needed; true when all required
9760 insns have been emitted. Operands 2-4 contain the input values
9761 int the correct order; operands 5-7 contain the output values. */
9762
9763 void
9764 ix86_split_long_move (operands)
9765 rtx operands[];
9766 {
9767 rtx part[2][3];
9768 int nparts;
9769 int push = 0;
9770 int collisions = 0;
9771 enum machine_mode mode = GET_MODE (operands[0]);
9772
9773 /* The DFmode expanders may ask us to move double.
9774 For 64bit target this is single move. By hiding the fact
9775 here we simplify i386.md splitters. */
9776 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9777 {
9778 /* Optimize constant pool reference to immediates. This is used by
9779 fp moves, that force all constants to memory to allow combining. */
9780
9781 if (GET_CODE (operands[1]) == MEM
9782 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9783 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9784 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9785 if (push_operand (operands[0], VOIDmode))
9786 {
9787 operands[0] = copy_rtx (operands[0]);
9788 PUT_MODE (operands[0], Pmode);
9789 }
9790 else
9791 operands[0] = gen_lowpart (DImode, operands[0]);
9792 operands[1] = gen_lowpart (DImode, operands[1]);
9793 emit_move_insn (operands[0], operands[1]);
9794 return;
9795 }
9796
9797 /* The only non-offsettable memory we handle is push. */
9798 if (push_operand (operands[0], VOIDmode))
9799 push = 1;
9800 else if (GET_CODE (operands[0]) == MEM
9801 && ! offsettable_memref_p (operands[0]))
9802 abort ();
9803
9804 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9805 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9806
9807 /* When emitting push, take care for source operands on the stack. */
9808 if (push && GET_CODE (operands[1]) == MEM
9809 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9810 {
9811 if (nparts == 3)
9812 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9813 XEXP (part[1][2], 0));
9814 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9815 XEXP (part[1][1], 0));
9816 }
9817
9818 /* We need to do copy in the right order in case an address register
9819 of the source overlaps the destination. */
9820 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9821 {
9822 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9823 collisions++;
9824 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9825 collisions++;
9826 if (nparts == 3
9827 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9828 collisions++;
9829
9830 /* Collision in the middle part can be handled by reordering. */
9831 if (collisions == 1 && nparts == 3
9832 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9833 {
9834 rtx tmp;
9835 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9836 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9837 }
9838
9839 /* If there are more collisions, we can't handle it by reordering.
9840 Do an lea to the last part and use only one colliding move. */
9841 else if (collisions > 1)
9842 {
9843 collisions = 1;
9844 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9845 XEXP (part[1][0], 0)));
9846 part[1][0] = change_address (part[1][0],
9847 TARGET_64BIT ? DImode : SImode,
9848 part[0][nparts - 1]);
9849 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9850 if (nparts == 3)
9851 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9852 }
9853 }
9854
9855 if (push)
9856 {
9857 if (!TARGET_64BIT)
9858 {
9859 if (nparts == 3)
9860 {
9861 /* We use only first 12 bytes of TFmode value, but for pushing we
9862 are required to adjust stack as if we were pushing real 16byte
9863 value. */
9864 if (mode == TFmode && !TARGET_64BIT)
9865 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9866 GEN_INT (-4)));
9867 emit_move_insn (part[0][2], part[1][2]);
9868 }
9869 }
9870 else
9871 {
9872 /* In 64bit mode we don't have 32bit push available. In case this is
9873 register, it is OK - we will just use larger counterpart. We also
9874 retype memory - these comes from attempt to avoid REX prefix on
9875 moving of second half of TFmode value. */
9876 if (GET_MODE (part[1][1]) == SImode)
9877 {
9878 if (GET_CODE (part[1][1]) == MEM)
9879 part[1][1] = adjust_address (part[1][1], DImode, 0);
9880 else if (REG_P (part[1][1]))
9881 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9882 else
9883 abort ();
9884 if (GET_MODE (part[1][0]) == SImode)
9885 part[1][0] = part[1][1];
9886 }
9887 }
9888 emit_move_insn (part[0][1], part[1][1]);
9889 emit_move_insn (part[0][0], part[1][0]);
9890 return;
9891 }
9892
9893 /* Choose correct order to not overwrite the source before it is copied. */
9894 if ((REG_P (part[0][0])
9895 && REG_P (part[1][1])
9896 && (REGNO (part[0][0]) == REGNO (part[1][1])
9897 || (nparts == 3
9898 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9899 || (collisions > 0
9900 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9901 {
9902 if (nparts == 3)
9903 {
9904 operands[2] = part[0][2];
9905 operands[3] = part[0][1];
9906 operands[4] = part[0][0];
9907 operands[5] = part[1][2];
9908 operands[6] = part[1][1];
9909 operands[7] = part[1][0];
9910 }
9911 else
9912 {
9913 operands[2] = part[0][1];
9914 operands[3] = part[0][0];
9915 operands[5] = part[1][1];
9916 operands[6] = part[1][0];
9917 }
9918 }
9919 else
9920 {
9921 if (nparts == 3)
9922 {
9923 operands[2] = part[0][0];
9924 operands[3] = part[0][1];
9925 operands[4] = part[0][2];
9926 operands[5] = part[1][0];
9927 operands[6] = part[1][1];
9928 operands[7] = part[1][2];
9929 }
9930 else
9931 {
9932 operands[2] = part[0][0];
9933 operands[3] = part[0][1];
9934 operands[5] = part[1][0];
9935 operands[6] = part[1][1];
9936 }
9937 }
9938 emit_move_insn (operands[2], operands[5]);
9939 emit_move_insn (operands[3], operands[6]);
9940 if (nparts == 3)
9941 emit_move_insn (operands[4], operands[7]);
9942
9943 return;
9944 }
9945
9946 void
9947 ix86_split_ashldi (operands, scratch)
9948 rtx *operands, scratch;
9949 {
9950 rtx low[2], high[2];
9951 int count;
9952
9953 if (GET_CODE (operands[2]) == CONST_INT)
9954 {
9955 split_di (operands, 2, low, high);
9956 count = INTVAL (operands[2]) & 63;
9957
9958 if (count >= 32)
9959 {
9960 emit_move_insn (high[0], low[1]);
9961 emit_move_insn (low[0], const0_rtx);
9962
9963 if (count > 32)
9964 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9965 }
9966 else
9967 {
9968 if (!rtx_equal_p (operands[0], operands[1]))
9969 emit_move_insn (operands[0], operands[1]);
9970 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9971 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9972 }
9973 }
9974 else
9975 {
9976 if (!rtx_equal_p (operands[0], operands[1]))
9977 emit_move_insn (operands[0], operands[1]);
9978
9979 split_di (operands, 1, low, high);
9980
9981 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9982 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9983
9984 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9985 {
9986 if (! no_new_pseudos)
9987 scratch = force_reg (SImode, const0_rtx);
9988 else
9989 emit_move_insn (scratch, const0_rtx);
9990
9991 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9992 scratch));
9993 }
9994 else
9995 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9996 }
9997 }
9998
9999 void
10000 ix86_split_ashrdi (operands, scratch)
10001 rtx *operands, scratch;
10002 {
10003 rtx low[2], high[2];
10004 int count;
10005
10006 if (GET_CODE (operands[2]) == CONST_INT)
10007 {
10008 split_di (operands, 2, low, high);
10009 count = INTVAL (operands[2]) & 63;
10010
10011 if (count >= 32)
10012 {
10013 emit_move_insn (low[0], high[1]);
10014
10015 if (! reload_completed)
10016 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10017 else
10018 {
10019 emit_move_insn (high[0], low[0]);
10020 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10021 }
10022
10023 if (count > 32)
10024 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10025 }
10026 else
10027 {
10028 if (!rtx_equal_p (operands[0], operands[1]))
10029 emit_move_insn (operands[0], operands[1]);
10030 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10031 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10032 }
10033 }
10034 else
10035 {
10036 if (!rtx_equal_p (operands[0], operands[1]))
10037 emit_move_insn (operands[0], operands[1]);
10038
10039 split_di (operands, 1, low, high);
10040
10041 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10042 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10043
10044 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10045 {
10046 if (! no_new_pseudos)
10047 scratch = gen_reg_rtx (SImode);
10048 emit_move_insn (scratch, high[0]);
10049 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10050 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10051 scratch));
10052 }
10053 else
10054 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10055 }
10056 }
10057
10058 void
10059 ix86_split_lshrdi (operands, scratch)
10060 rtx *operands, scratch;
10061 {
10062 rtx low[2], high[2];
10063 int count;
10064
10065 if (GET_CODE (operands[2]) == CONST_INT)
10066 {
10067 split_di (operands, 2, low, high);
10068 count = INTVAL (operands[2]) & 63;
10069
10070 if (count >= 32)
10071 {
10072 emit_move_insn (low[0], high[1]);
10073 emit_move_insn (high[0], const0_rtx);
10074
10075 if (count > 32)
10076 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10077 }
10078 else
10079 {
10080 if (!rtx_equal_p (operands[0], operands[1]))
10081 emit_move_insn (operands[0], operands[1]);
10082 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10083 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10084 }
10085 }
10086 else
10087 {
10088 if (!rtx_equal_p (operands[0], operands[1]))
10089 emit_move_insn (operands[0], operands[1]);
10090
10091 split_di (operands, 1, low, high);
10092
10093 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10094 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10095
10096 /* Heh. By reversing the arguments, we can reuse this pattern. */
10097 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10098 {
10099 if (! no_new_pseudos)
10100 scratch = force_reg (SImode, const0_rtx);
10101 else
10102 emit_move_insn (scratch, const0_rtx);
10103
10104 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10105 scratch));
10106 }
10107 else
10108 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10109 }
10110 }
10111
10112 /* Helper function for the string operations below. Dest VARIABLE whether
10113 it is aligned to VALUE bytes. If true, jump to the label. */
10114 static rtx
10115 ix86_expand_aligntest (variable, value)
10116 rtx variable;
10117 int value;
10118 {
10119 rtx label = gen_label_rtx ();
10120 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10121 if (GET_MODE (variable) == DImode)
10122 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10123 else
10124 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10125 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10126 1, label);
10127 return label;
10128 }
10129
10130 /* Adjust COUNTER by the VALUE. */
10131 static void
10132 ix86_adjust_counter (countreg, value)
10133 rtx countreg;
10134 HOST_WIDE_INT value;
10135 {
10136 if (GET_MODE (countreg) == DImode)
10137 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10138 else
10139 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10140 }
10141
10142 /* Zero extend possibly SImode EXP to Pmode register. */
10143 rtx
10144 ix86_zero_extend_to_Pmode (exp)
10145 rtx exp;
10146 {
10147 rtx r;
10148 if (GET_MODE (exp) == VOIDmode)
10149 return force_reg (Pmode, exp);
10150 if (GET_MODE (exp) == Pmode)
10151 return copy_to_mode_reg (Pmode, exp);
10152 r = gen_reg_rtx (Pmode);
10153 emit_insn (gen_zero_extendsidi2 (r, exp));
10154 return r;
10155 }
10156
10157 /* Expand string move (memcpy) operation. Use i386 string operations when
10158 profitable. expand_clrstr contains similar code. */
10159 int
10160 ix86_expand_movstr (dst, src, count_exp, align_exp)
10161 rtx dst, src, count_exp, align_exp;
10162 {
10163 rtx srcreg, destreg, countreg;
10164 enum machine_mode counter_mode;
10165 HOST_WIDE_INT align = 0;
10166 unsigned HOST_WIDE_INT count = 0;
10167 rtx insns;
10168
10169 start_sequence ();
10170
10171 if (GET_CODE (align_exp) == CONST_INT)
10172 align = INTVAL (align_exp);
10173
10174 /* This simple hack avoids all inlining code and simplifies code below. */
10175 if (!TARGET_ALIGN_STRINGOPS)
10176 align = 64;
10177
10178 if (GET_CODE (count_exp) == CONST_INT)
10179 count = INTVAL (count_exp);
10180
10181 /* Figure out proper mode for counter. For 32bits it is always SImode,
10182 for 64bits use SImode when possible, otherwise DImode.
10183 Set count to number of bytes copied when known at compile time. */
10184 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10185 || x86_64_zero_extended_value (count_exp))
10186 counter_mode = SImode;
10187 else
10188 counter_mode = DImode;
10189
10190 if (counter_mode != SImode && counter_mode != DImode)
10191 abort ();
10192
10193 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10194 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10195
10196 emit_insn (gen_cld ());
10197
10198 /* When optimizing for size emit simple rep ; movsb instruction for
10199 counts not divisible by 4. */
10200
10201 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10202 {
10203 countreg = ix86_zero_extend_to_Pmode (count_exp);
10204 if (TARGET_64BIT)
10205 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10206 destreg, srcreg, countreg));
10207 else
10208 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10209 destreg, srcreg, countreg));
10210 }
10211
10212 /* For constant aligned (or small unaligned) copies use rep movsl
10213 followed by code copying the rest. For PentiumPro ensure 8 byte
10214 alignment to allow rep movsl acceleration. */
10215
10216 else if (count != 0
10217 && (align >= 8
10218 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10219 || optimize_size || count < (unsigned int) 64))
10220 {
10221 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10222 if (count & ~(size - 1))
10223 {
10224 countreg = copy_to_mode_reg (counter_mode,
10225 GEN_INT ((count >> (size == 4 ? 2 : 3))
10226 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10227 countreg = ix86_zero_extend_to_Pmode (countreg);
10228 if (size == 4)
10229 {
10230 if (TARGET_64BIT)
10231 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10232 destreg, srcreg, countreg));
10233 else
10234 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10235 destreg, srcreg, countreg));
10236 }
10237 else
10238 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10239 destreg, srcreg, countreg));
10240 }
10241 if (size == 8 && (count & 0x04))
10242 emit_insn (gen_strmovsi (destreg, srcreg));
10243 if (count & 0x02)
10244 emit_insn (gen_strmovhi (destreg, srcreg));
10245 if (count & 0x01)
10246 emit_insn (gen_strmovqi (destreg, srcreg));
10247 }
10248 /* The generic code based on the glibc implementation:
10249 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10250 allowing accelerated copying there)
10251 - copy the data using rep movsl
10252 - copy the rest. */
10253 else
10254 {
10255 rtx countreg2;
10256 rtx label = NULL;
10257 int desired_alignment = (TARGET_PENTIUMPRO
10258 && (count == 0 || count >= (unsigned int) 260)
10259 ? 8 : UNITS_PER_WORD);
10260
10261 /* In case we don't know anything about the alignment, default to
10262 library version, since it is usually equally fast and result in
10263 shorter code. */
10264 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10265 {
10266 end_sequence ();
10267 return 0;
10268 }
10269
10270 if (TARGET_SINGLE_STRINGOP)
10271 emit_insn (gen_cld ());
10272
10273 countreg2 = gen_reg_rtx (Pmode);
10274 countreg = copy_to_mode_reg (counter_mode, count_exp);
10275
10276 /* We don't use loops to align destination and to copy parts smaller
10277 than 4 bytes, because gcc is able to optimize such code better (in
10278 the case the destination or the count really is aligned, gcc is often
10279 able to predict the branches) and also it is friendlier to the
10280 hardware branch prediction.
10281
10282 Using loops is benefical for generic case, because we can
10283 handle small counts using the loops. Many CPUs (such as Athlon)
10284 have large REP prefix setup costs.
10285
10286 This is quite costy. Maybe we can revisit this decision later or
10287 add some customizability to this code. */
10288
10289 if (count == 0 && align < desired_alignment)
10290 {
10291 label = gen_label_rtx ();
10292 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10293 LEU, 0, counter_mode, 1, label);
10294 }
10295 if (align <= 1)
10296 {
10297 rtx label = ix86_expand_aligntest (destreg, 1);
10298 emit_insn (gen_strmovqi (destreg, srcreg));
10299 ix86_adjust_counter (countreg, 1);
10300 emit_label (label);
10301 LABEL_NUSES (label) = 1;
10302 }
10303 if (align <= 2)
10304 {
10305 rtx label = ix86_expand_aligntest (destreg, 2);
10306 emit_insn (gen_strmovhi (destreg, srcreg));
10307 ix86_adjust_counter (countreg, 2);
10308 emit_label (label);
10309 LABEL_NUSES (label) = 1;
10310 }
10311 if (align <= 4 && desired_alignment > 4)
10312 {
10313 rtx label = ix86_expand_aligntest (destreg, 4);
10314 emit_insn (gen_strmovsi (destreg, srcreg));
10315 ix86_adjust_counter (countreg, 4);
10316 emit_label (label);
10317 LABEL_NUSES (label) = 1;
10318 }
10319
10320 if (label && desired_alignment > 4 && !TARGET_64BIT)
10321 {
10322 emit_label (label);
10323 LABEL_NUSES (label) = 1;
10324 label = NULL_RTX;
10325 }
10326 if (!TARGET_SINGLE_STRINGOP)
10327 emit_insn (gen_cld ());
10328 if (TARGET_64BIT)
10329 {
10330 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10331 GEN_INT (3)));
10332 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10333 destreg, srcreg, countreg2));
10334 }
10335 else
10336 {
10337 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10338 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10339 destreg, srcreg, countreg2));
10340 }
10341
10342 if (label)
10343 {
10344 emit_label (label);
10345 LABEL_NUSES (label) = 1;
10346 }
10347 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10348 emit_insn (gen_strmovsi (destreg, srcreg));
10349 if ((align <= 4 || count == 0) && TARGET_64BIT)
10350 {
10351 rtx label = ix86_expand_aligntest (countreg, 4);
10352 emit_insn (gen_strmovsi (destreg, srcreg));
10353 emit_label (label);
10354 LABEL_NUSES (label) = 1;
10355 }
10356 if (align > 2 && count != 0 && (count & 2))
10357 emit_insn (gen_strmovhi (destreg, srcreg));
10358 if (align <= 2 || count == 0)
10359 {
10360 rtx label = ix86_expand_aligntest (countreg, 2);
10361 emit_insn (gen_strmovhi (destreg, srcreg));
10362 emit_label (label);
10363 LABEL_NUSES (label) = 1;
10364 }
10365 if (align > 1 && count != 0 && (count & 1))
10366 emit_insn (gen_strmovqi (destreg, srcreg));
10367 if (align <= 1 || count == 0)
10368 {
10369 rtx label = ix86_expand_aligntest (countreg, 1);
10370 emit_insn (gen_strmovqi (destreg, srcreg));
10371 emit_label (label);
10372 LABEL_NUSES (label) = 1;
10373 }
10374 }
10375
10376 insns = get_insns ();
10377 end_sequence ();
10378
10379 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10380 emit_insn (insns);
10381 return 1;
10382 }
10383
10384 /* Expand string clear operation (bzero). Use i386 string operations when
10385 profitable. expand_movstr contains similar code. */
10386 int
10387 ix86_expand_clrstr (src, count_exp, align_exp)
10388 rtx src, count_exp, align_exp;
10389 {
10390 rtx destreg, zeroreg, countreg;
10391 enum machine_mode counter_mode;
10392 HOST_WIDE_INT align = 0;
10393 unsigned HOST_WIDE_INT count = 0;
10394
10395 if (GET_CODE (align_exp) == CONST_INT)
10396 align = INTVAL (align_exp);
10397
10398 /* This simple hack avoids all inlining code and simplifies code below. */
10399 if (!TARGET_ALIGN_STRINGOPS)
10400 align = 32;
10401
10402 if (GET_CODE (count_exp) == CONST_INT)
10403 count = INTVAL (count_exp);
10404 /* Figure out proper mode for counter. For 32bits it is always SImode,
10405 for 64bits use SImode when possible, otherwise DImode.
10406 Set count to number of bytes copied when known at compile time. */
10407 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10408 || x86_64_zero_extended_value (count_exp))
10409 counter_mode = SImode;
10410 else
10411 counter_mode = DImode;
10412
10413 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10414
10415 emit_insn (gen_cld ());
10416
10417 /* When optimizing for size emit simple rep ; movsb instruction for
10418 counts not divisible by 4. */
10419
10420 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10421 {
10422 countreg = ix86_zero_extend_to_Pmode (count_exp);
10423 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10424 if (TARGET_64BIT)
10425 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10426 destreg, countreg));
10427 else
10428 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10429 destreg, countreg));
10430 }
10431 else if (count != 0
10432 && (align >= 8
10433 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10434 || optimize_size || count < (unsigned int) 64))
10435 {
10436 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10437 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10438 if (count & ~(size - 1))
10439 {
10440 countreg = copy_to_mode_reg (counter_mode,
10441 GEN_INT ((count >> (size == 4 ? 2 : 3))
10442 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10443 countreg = ix86_zero_extend_to_Pmode (countreg);
10444 if (size == 4)
10445 {
10446 if (TARGET_64BIT)
10447 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10448 destreg, countreg));
10449 else
10450 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10451 destreg, countreg));
10452 }
10453 else
10454 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10455 destreg, countreg));
10456 }
10457 if (size == 8 && (count & 0x04))
10458 emit_insn (gen_strsetsi (destreg,
10459 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10460 if (count & 0x02)
10461 emit_insn (gen_strsethi (destreg,
10462 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10463 if (count & 0x01)
10464 emit_insn (gen_strsetqi (destreg,
10465 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10466 }
10467 else
10468 {
10469 rtx countreg2;
10470 rtx label = NULL;
10471 /* Compute desired alignment of the string operation. */
10472 int desired_alignment = (TARGET_PENTIUMPRO
10473 && (count == 0 || count >= (unsigned int) 260)
10474 ? 8 : UNITS_PER_WORD);
10475
10476 /* In case we don't know anything about the alignment, default to
10477 library version, since it is usually equally fast and result in
10478 shorter code. */
10479 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10480 return 0;
10481
10482 if (TARGET_SINGLE_STRINGOP)
10483 emit_insn (gen_cld ());
10484
10485 countreg2 = gen_reg_rtx (Pmode);
10486 countreg = copy_to_mode_reg (counter_mode, count_exp);
10487 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10488
10489 if (count == 0 && align < desired_alignment)
10490 {
10491 label = gen_label_rtx ();
10492 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10493 LEU, 0, counter_mode, 1, label);
10494 }
10495 if (align <= 1)
10496 {
10497 rtx label = ix86_expand_aligntest (destreg, 1);
10498 emit_insn (gen_strsetqi (destreg,
10499 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10500 ix86_adjust_counter (countreg, 1);
10501 emit_label (label);
10502 LABEL_NUSES (label) = 1;
10503 }
10504 if (align <= 2)
10505 {
10506 rtx label = ix86_expand_aligntest (destreg, 2);
10507 emit_insn (gen_strsethi (destreg,
10508 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10509 ix86_adjust_counter (countreg, 2);
10510 emit_label (label);
10511 LABEL_NUSES (label) = 1;
10512 }
10513 if (align <= 4 && desired_alignment > 4)
10514 {
10515 rtx label = ix86_expand_aligntest (destreg, 4);
10516 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10517 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10518 : zeroreg)));
10519 ix86_adjust_counter (countreg, 4);
10520 emit_label (label);
10521 LABEL_NUSES (label) = 1;
10522 }
10523
10524 if (label && desired_alignment > 4 && !TARGET_64BIT)
10525 {
10526 emit_label (label);
10527 LABEL_NUSES (label) = 1;
10528 label = NULL_RTX;
10529 }
10530
10531 if (!TARGET_SINGLE_STRINGOP)
10532 emit_insn (gen_cld ());
10533 if (TARGET_64BIT)
10534 {
10535 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10536 GEN_INT (3)));
10537 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10538 destreg, countreg2));
10539 }
10540 else
10541 {
10542 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10543 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10544 destreg, countreg2));
10545 }
10546 if (label)
10547 {
10548 emit_label (label);
10549 LABEL_NUSES (label) = 1;
10550 }
10551
10552 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10553 emit_insn (gen_strsetsi (destreg,
10554 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10555 if (TARGET_64BIT && (align <= 4 || count == 0))
10556 {
10557 rtx label = ix86_expand_aligntest (countreg, 4);
10558 emit_insn (gen_strsetsi (destreg,
10559 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10560 emit_label (label);
10561 LABEL_NUSES (label) = 1;
10562 }
10563 if (align > 2 && count != 0 && (count & 2))
10564 emit_insn (gen_strsethi (destreg,
10565 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10566 if (align <= 2 || count == 0)
10567 {
10568 rtx label = ix86_expand_aligntest (countreg, 2);
10569 emit_insn (gen_strsethi (destreg,
10570 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10571 emit_label (label);
10572 LABEL_NUSES (label) = 1;
10573 }
10574 if (align > 1 && count != 0 && (count & 1))
10575 emit_insn (gen_strsetqi (destreg,
10576 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10577 if (align <= 1 || count == 0)
10578 {
10579 rtx label = ix86_expand_aligntest (countreg, 1);
10580 emit_insn (gen_strsetqi (destreg,
10581 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10582 emit_label (label);
10583 LABEL_NUSES (label) = 1;
10584 }
10585 }
10586 return 1;
10587 }
10588 /* Expand strlen. */
10589 int
10590 ix86_expand_strlen (out, src, eoschar, align)
10591 rtx out, src, eoschar, align;
10592 {
10593 rtx addr, scratch1, scratch2, scratch3, scratch4;
10594
10595 /* The generic case of strlen expander is long. Avoid it's
10596 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10597
10598 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10599 && !TARGET_INLINE_ALL_STRINGOPS
10600 && !optimize_size
10601 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10602 return 0;
10603
10604 addr = force_reg (Pmode, XEXP (src, 0));
10605 scratch1 = gen_reg_rtx (Pmode);
10606
10607 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10608 && !optimize_size)
10609 {
10610 /* Well it seems that some optimizer does not combine a call like
10611 foo(strlen(bar), strlen(bar));
10612 when the move and the subtraction is done here. It does calculate
10613 the length just once when these instructions are done inside of
10614 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10615 often used and I use one fewer register for the lifetime of
10616 output_strlen_unroll() this is better. */
10617
10618 emit_move_insn (out, addr);
10619
10620 ix86_expand_strlensi_unroll_1 (out, align);
10621
10622 /* strlensi_unroll_1 returns the address of the zero at the end of
10623 the string, like memchr(), so compute the length by subtracting
10624 the start address. */
10625 if (TARGET_64BIT)
10626 emit_insn (gen_subdi3 (out, out, addr));
10627 else
10628 emit_insn (gen_subsi3 (out, out, addr));
10629 }
10630 else
10631 {
10632 scratch2 = gen_reg_rtx (Pmode);
10633 scratch3 = gen_reg_rtx (Pmode);
10634 scratch4 = force_reg (Pmode, constm1_rtx);
10635
10636 emit_move_insn (scratch3, addr);
10637 eoschar = force_reg (QImode, eoschar);
10638
10639 emit_insn (gen_cld ());
10640 if (TARGET_64BIT)
10641 {
10642 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10643 align, scratch4, scratch3));
10644 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10645 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10646 }
10647 else
10648 {
10649 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10650 align, scratch4, scratch3));
10651 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10652 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10653 }
10654 }
10655 return 1;
10656 }
10657
10658 /* Expand the appropriate insns for doing strlen if not just doing
10659 repnz; scasb
10660
10661 out = result, initialized with the start address
10662 align_rtx = alignment of the address.
10663 scratch = scratch register, initialized with the startaddress when
10664 not aligned, otherwise undefined
10665
10666 This is just the body. It needs the initialisations mentioned above and
10667 some address computing at the end. These things are done in i386.md. */
10668
10669 static void
10670 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10671 rtx out, align_rtx;
10672 {
10673 int align;
10674 rtx tmp;
10675 rtx align_2_label = NULL_RTX;
10676 rtx align_3_label = NULL_RTX;
10677 rtx align_4_label = gen_label_rtx ();
10678 rtx end_0_label = gen_label_rtx ();
10679 rtx mem;
10680 rtx tmpreg = gen_reg_rtx (SImode);
10681 rtx scratch = gen_reg_rtx (SImode);
10682
10683 align = 0;
10684 if (GET_CODE (align_rtx) == CONST_INT)
10685 align = INTVAL (align_rtx);
10686
10687 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10688
10689 /* Is there a known alignment and is it less than 4? */
10690 if (align < 4)
10691 {
10692 rtx scratch1 = gen_reg_rtx (Pmode);
10693 emit_move_insn (scratch1, out);
10694 /* Is there a known alignment and is it not 2? */
10695 if (align != 2)
10696 {
10697 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10698 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10699
10700 /* Leave just the 3 lower bits. */
10701 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10702 NULL_RTX, 0, OPTAB_WIDEN);
10703
10704 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10705 Pmode, 1, align_4_label);
10706 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10707 Pmode, 1, align_2_label);
10708 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10709 Pmode, 1, align_3_label);
10710 }
10711 else
10712 {
10713 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10714 check if is aligned to 4 - byte. */
10715
10716 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10717 NULL_RTX, 0, OPTAB_WIDEN);
10718
10719 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10720 Pmode, 1, align_4_label);
10721 }
10722
10723 mem = gen_rtx_MEM (QImode, out);
10724
10725 /* Now compare the bytes. */
10726
10727 /* Compare the first n unaligned byte on a byte per byte basis. */
10728 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10729 QImode, 1, end_0_label);
10730
10731 /* Increment the address. */
10732 if (TARGET_64BIT)
10733 emit_insn (gen_adddi3 (out, out, const1_rtx));
10734 else
10735 emit_insn (gen_addsi3 (out, out, const1_rtx));
10736
10737 /* Not needed with an alignment of 2 */
10738 if (align != 2)
10739 {
10740 emit_label (align_2_label);
10741
10742 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10743 end_0_label);
10744
10745 if (TARGET_64BIT)
10746 emit_insn (gen_adddi3 (out, out, const1_rtx));
10747 else
10748 emit_insn (gen_addsi3 (out, out, const1_rtx));
10749
10750 emit_label (align_3_label);
10751 }
10752
10753 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10754 end_0_label);
10755
10756 if (TARGET_64BIT)
10757 emit_insn (gen_adddi3 (out, out, const1_rtx));
10758 else
10759 emit_insn (gen_addsi3 (out, out, const1_rtx));
10760 }
10761
10762 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10763 align this loop. It gives only huge programs, but does not help to
10764 speed up. */
10765 emit_label (align_4_label);
10766
10767 mem = gen_rtx_MEM (SImode, out);
10768 emit_move_insn (scratch, mem);
10769 if (TARGET_64BIT)
10770 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10771 else
10772 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10773
10774 /* This formula yields a nonzero result iff one of the bytes is zero.
10775 This saves three branches inside loop and many cycles. */
10776
10777 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10778 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10779 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10780 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10781 gen_int_mode (0x80808080, SImode)));
10782 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10783 align_4_label);
10784
10785 if (TARGET_CMOVE)
10786 {
10787 rtx reg = gen_reg_rtx (SImode);
10788 rtx reg2 = gen_reg_rtx (Pmode);
10789 emit_move_insn (reg, tmpreg);
10790 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10791
10792 /* If zero is not in the first two bytes, move two bytes forward. */
10793 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10794 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10795 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10796 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10797 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10798 reg,
10799 tmpreg)));
10800 /* Emit lea manually to avoid clobbering of flags. */
10801 emit_insn (gen_rtx_SET (SImode, reg2,
10802 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10803
10804 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10805 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10806 emit_insn (gen_rtx_SET (VOIDmode, out,
10807 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10808 reg2,
10809 out)));
10810
10811 }
10812 else
10813 {
10814 rtx end_2_label = gen_label_rtx ();
10815 /* Is zero in the first two bytes? */
10816
10817 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10818 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10819 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10820 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10821 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10822 pc_rtx);
10823 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10824 JUMP_LABEL (tmp) = end_2_label;
10825
10826 /* Not in the first two. Move two bytes forward. */
10827 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10828 if (TARGET_64BIT)
10829 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10830 else
10831 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10832
10833 emit_label (end_2_label);
10834
10835 }
10836
10837 /* Avoid branch in fixing the byte. */
10838 tmpreg = gen_lowpart (QImode, tmpreg);
10839 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10840 if (TARGET_64BIT)
10841 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10842 else
10843 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10844
10845 emit_label (end_0_label);
10846 }
10847
10848 void
10849 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10850 rtx retval, fnaddr, callarg1, callarg2, pop;
10851 {
10852 rtx use = NULL, call;
10853
10854 if (pop == const0_rtx)
10855 pop = NULL;
10856 if (TARGET_64BIT && pop)
10857 abort ();
10858
10859 #if TARGET_MACHO
10860 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10861 fnaddr = machopic_indirect_call_target (fnaddr);
10862 #else
10863 /* Static functions and indirect calls don't need the pic register. */
10864 if (! TARGET_64BIT && flag_pic
10865 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10866 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10867 use_reg (&use, pic_offset_table_rtx);
10868
10869 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10870 {
10871 rtx al = gen_rtx_REG (QImode, 0);
10872 emit_move_insn (al, callarg2);
10873 use_reg (&use, al);
10874 }
10875 #endif /* TARGET_MACHO */
10876
10877 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10878 {
10879 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10880 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10881 }
10882
10883 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10884 if (retval)
10885 call = gen_rtx_SET (VOIDmode, retval, call);
10886 if (pop)
10887 {
10888 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10889 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10890 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10891 }
10892
10893 call = emit_call_insn (call);
10894 if (use)
10895 CALL_INSN_FUNCTION_USAGE (call) = use;
10896 }
10897
10898 \f
10899 /* Clear stack slot assignments remembered from previous functions.
10900 This is called from INIT_EXPANDERS once before RTL is emitted for each
10901 function. */
10902
10903 static struct machine_function *
10904 ix86_init_machine_status ()
10905 {
10906 return ggc_alloc_cleared (sizeof (struct machine_function));
10907 }
10908
10909 /* Return a MEM corresponding to a stack slot with mode MODE.
10910 Allocate a new slot if necessary.
10911
10912 The RTL for a function can have several slots available: N is
10913 which slot to use. */
10914
10915 rtx
10916 assign_386_stack_local (mode, n)
10917 enum machine_mode mode;
10918 int n;
10919 {
10920 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10921 abort ();
10922
10923 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10924 ix86_stack_locals[(int) mode][n]
10925 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10926
10927 return ix86_stack_locals[(int) mode][n];
10928 }
10929
10930 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10931
10932 static GTY(()) rtx ix86_tls_symbol;
10933 rtx
10934 ix86_tls_get_addr ()
10935 {
10936
10937 if (!ix86_tls_symbol)
10938 {
10939 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10940 (TARGET_GNU_TLS && !TARGET_64BIT)
10941 ? "___tls_get_addr"
10942 : "__tls_get_addr");
10943 }
10944
10945 return ix86_tls_symbol;
10946 }
10947 \f
10948 /* Calculate the length of the memory address in the instruction
10949 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10950
10951 static int
10952 memory_address_length (addr)
10953 rtx addr;
10954 {
10955 struct ix86_address parts;
10956 rtx base, index, disp;
10957 int len;
10958
10959 if (GET_CODE (addr) == PRE_DEC
10960 || GET_CODE (addr) == POST_INC
10961 || GET_CODE (addr) == PRE_MODIFY
10962 || GET_CODE (addr) == POST_MODIFY)
10963 return 0;
10964
10965 if (! ix86_decompose_address (addr, &parts))
10966 abort ();
10967
10968 base = parts.base;
10969 index = parts.index;
10970 disp = parts.disp;
10971 len = 0;
10972
10973 /* Register Indirect. */
10974 if (base && !index && !disp)
10975 {
10976 /* Special cases: ebp and esp need the two-byte modrm form. */
10977 if (addr == stack_pointer_rtx
10978 || addr == arg_pointer_rtx
10979 || addr == frame_pointer_rtx
10980 || addr == hard_frame_pointer_rtx)
10981 len = 1;
10982 }
10983
10984 /* Direct Addressing. */
10985 else if (disp && !base && !index)
10986 len = 4;
10987
10988 else
10989 {
10990 /* Find the length of the displacement constant. */
10991 if (disp)
10992 {
10993 if (GET_CODE (disp) == CONST_INT
10994 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10995 len = 1;
10996 else
10997 len = 4;
10998 }
10999
11000 /* An index requires the two-byte modrm form. */
11001 if (index)
11002 len += 1;
11003 }
11004
11005 return len;
11006 }
11007
11008 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11009 is set, expect that insn have 8bit immediate alternative. */
11010 int
11011 ix86_attr_length_immediate_default (insn, shortform)
11012 rtx insn;
11013 int shortform;
11014 {
11015 int len = 0;
11016 int i;
11017 extract_insn_cached (insn);
11018 for (i = recog_data.n_operands - 1; i >= 0; --i)
11019 if (CONSTANT_P (recog_data.operand[i]))
11020 {
11021 if (len)
11022 abort ();
11023 if (shortform
11024 && GET_CODE (recog_data.operand[i]) == CONST_INT
11025 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11026 len = 1;
11027 else
11028 {
11029 switch (get_attr_mode (insn))
11030 {
11031 case MODE_QI:
11032 len+=1;
11033 break;
11034 case MODE_HI:
11035 len+=2;
11036 break;
11037 case MODE_SI:
11038 len+=4;
11039 break;
11040 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11041 case MODE_DI:
11042 len+=4;
11043 break;
11044 default:
11045 fatal_insn ("unknown insn mode", insn);
11046 }
11047 }
11048 }
11049 return len;
11050 }
11051 /* Compute default value for "length_address" attribute. */
11052 int
11053 ix86_attr_length_address_default (insn)
11054 rtx insn;
11055 {
11056 int i;
11057 extract_insn_cached (insn);
11058 for (i = recog_data.n_operands - 1; i >= 0; --i)
11059 if (GET_CODE (recog_data.operand[i]) == MEM)
11060 {
11061 return memory_address_length (XEXP (recog_data.operand[i], 0));
11062 break;
11063 }
11064 return 0;
11065 }
11066 \f
11067 /* Return the maximum number of instructions a cpu can issue. */
11068
11069 static int
11070 ix86_issue_rate ()
11071 {
11072 switch (ix86_cpu)
11073 {
11074 case PROCESSOR_PENTIUM:
11075 case PROCESSOR_K6:
11076 return 2;
11077
11078 case PROCESSOR_PENTIUMPRO:
11079 case PROCESSOR_PENTIUM4:
11080 case PROCESSOR_ATHLON:
11081 return 3;
11082
11083 default:
11084 return 1;
11085 }
11086 }
11087
11088 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11089 by DEP_INSN and nothing set by DEP_INSN. */
11090
11091 static int
11092 ix86_flags_dependant (insn, dep_insn, insn_type)
11093 rtx insn, dep_insn;
11094 enum attr_type insn_type;
11095 {
11096 rtx set, set2;
11097
11098 /* Simplify the test for uninteresting insns. */
11099 if (insn_type != TYPE_SETCC
11100 && insn_type != TYPE_ICMOV
11101 && insn_type != TYPE_FCMOV
11102 && insn_type != TYPE_IBR)
11103 return 0;
11104
11105 if ((set = single_set (dep_insn)) != 0)
11106 {
11107 set = SET_DEST (set);
11108 set2 = NULL_RTX;
11109 }
11110 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11111 && XVECLEN (PATTERN (dep_insn), 0) == 2
11112 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11113 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11114 {
11115 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11116 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11117 }
11118 else
11119 return 0;
11120
11121 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11122 return 0;
11123
11124 /* This test is true if the dependent insn reads the flags but
11125 not any other potentially set register. */
11126 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11127 return 0;
11128
11129 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11130 return 0;
11131
11132 return 1;
11133 }
11134
11135 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11136 address with operands set by DEP_INSN. */
11137
11138 static int
11139 ix86_agi_dependant (insn, dep_insn, insn_type)
11140 rtx insn, dep_insn;
11141 enum attr_type insn_type;
11142 {
11143 rtx addr;
11144
11145 if (insn_type == TYPE_LEA
11146 && TARGET_PENTIUM)
11147 {
11148 addr = PATTERN (insn);
11149 if (GET_CODE (addr) == SET)
11150 ;
11151 else if (GET_CODE (addr) == PARALLEL
11152 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11153 addr = XVECEXP (addr, 0, 0);
11154 else
11155 abort ();
11156 addr = SET_SRC (addr);
11157 }
11158 else
11159 {
11160 int i;
11161 extract_insn_cached (insn);
11162 for (i = recog_data.n_operands - 1; i >= 0; --i)
11163 if (GET_CODE (recog_data.operand[i]) == MEM)
11164 {
11165 addr = XEXP (recog_data.operand[i], 0);
11166 goto found;
11167 }
11168 return 0;
11169 found:;
11170 }
11171
11172 return modified_in_p (addr, dep_insn);
11173 }
11174
11175 static int
11176 ix86_adjust_cost (insn, link, dep_insn, cost)
11177 rtx insn, link, dep_insn;
11178 int cost;
11179 {
11180 enum attr_type insn_type, dep_insn_type;
11181 enum attr_memory memory, dep_memory;
11182 rtx set, set2;
11183 int dep_insn_code_number;
11184
11185 /* Anti and output depenancies have zero cost on all CPUs. */
11186 if (REG_NOTE_KIND (link) != 0)
11187 return 0;
11188
11189 dep_insn_code_number = recog_memoized (dep_insn);
11190
11191 /* If we can't recognize the insns, we can't really do anything. */
11192 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11193 return cost;
11194
11195 insn_type = get_attr_type (insn);
11196 dep_insn_type = get_attr_type (dep_insn);
11197
11198 switch (ix86_cpu)
11199 {
11200 case PROCESSOR_PENTIUM:
11201 /* Address Generation Interlock adds a cycle of latency. */
11202 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11203 cost += 1;
11204
11205 /* ??? Compares pair with jump/setcc. */
11206 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11207 cost = 0;
11208
11209 /* Floating point stores require value to be ready one cycle ealier. */
11210 if (insn_type == TYPE_FMOV
11211 && get_attr_memory (insn) == MEMORY_STORE
11212 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11213 cost += 1;
11214 break;
11215
11216 case PROCESSOR_PENTIUMPRO:
11217 memory = get_attr_memory (insn);
11218 dep_memory = get_attr_memory (dep_insn);
11219
11220 /* Since we can't represent delayed latencies of load+operation,
11221 increase the cost here for non-imov insns. */
11222 if (dep_insn_type != TYPE_IMOV
11223 && dep_insn_type != TYPE_FMOV
11224 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11225 cost += 1;
11226
11227 /* INT->FP conversion is expensive. */
11228 if (get_attr_fp_int_src (dep_insn))
11229 cost += 5;
11230
11231 /* There is one cycle extra latency between an FP op and a store. */
11232 if (insn_type == TYPE_FMOV
11233 && (set = single_set (dep_insn)) != NULL_RTX
11234 && (set2 = single_set (insn)) != NULL_RTX
11235 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11236 && GET_CODE (SET_DEST (set2)) == MEM)
11237 cost += 1;
11238
11239 /* Show ability of reorder buffer to hide latency of load by executing
11240 in parallel with previous instruction in case
11241 previous instruction is not needed to compute the address. */
11242 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11243 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11244 {
11245 /* Claim moves to take one cycle, as core can issue one load
11246 at time and the next load can start cycle later. */
11247 if (dep_insn_type == TYPE_IMOV
11248 || dep_insn_type == TYPE_FMOV)
11249 cost = 1;
11250 else if (cost > 1)
11251 cost--;
11252 }
11253 break;
11254
11255 case PROCESSOR_K6:
11256 memory = get_attr_memory (insn);
11257 dep_memory = get_attr_memory (dep_insn);
11258 /* The esp dependency is resolved before the instruction is really
11259 finished. */
11260 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11261 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11262 return 1;
11263
11264 /* Since we can't represent delayed latencies of load+operation,
11265 increase the cost here for non-imov insns. */
11266 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11267 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11268
11269 /* INT->FP conversion is expensive. */
11270 if (get_attr_fp_int_src (dep_insn))
11271 cost += 5;
11272
11273 /* Show ability of reorder buffer to hide latency of load by executing
11274 in parallel with previous instruction in case
11275 previous instruction is not needed to compute the address. */
11276 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11277 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11278 {
11279 /* Claim moves to take one cycle, as core can issue one load
11280 at time and the next load can start cycle later. */
11281 if (dep_insn_type == TYPE_IMOV
11282 || dep_insn_type == TYPE_FMOV)
11283 cost = 1;
11284 else if (cost > 2)
11285 cost -= 2;
11286 else
11287 cost = 1;
11288 }
11289 break;
11290
11291 case PROCESSOR_ATHLON:
11292 memory = get_attr_memory (insn);
11293 dep_memory = get_attr_memory (dep_insn);
11294
11295 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11296 {
11297 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11298 cost += 2;
11299 else
11300 cost += 3;
11301 }
11302 /* Show ability of reorder buffer to hide latency of load by executing
11303 in parallel with previous instruction in case
11304 previous instruction is not needed to compute the address. */
11305 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11306 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11307 {
11308 /* Claim moves to take one cycle, as core can issue one load
11309 at time and the next load can start cycle later. */
11310 if (dep_insn_type == TYPE_IMOV
11311 || dep_insn_type == TYPE_FMOV)
11312 cost = 0;
11313 else if (cost >= 3)
11314 cost -= 3;
11315 else
11316 cost = 0;
11317 }
11318
11319 default:
11320 break;
11321 }
11322
11323 return cost;
11324 }
11325
11326 static union
11327 {
11328 struct ppro_sched_data
11329 {
11330 rtx decode[3];
11331 int issued_this_cycle;
11332 } ppro;
11333 } ix86_sched_data;
11334
11335 static enum attr_ppro_uops
11336 ix86_safe_ppro_uops (insn)
11337 rtx insn;
11338 {
11339 if (recog_memoized (insn) >= 0)
11340 return get_attr_ppro_uops (insn);
11341 else
11342 return PPRO_UOPS_MANY;
11343 }
11344
11345 static void
11346 ix86_dump_ppro_packet (dump)
11347 FILE *dump;
11348 {
11349 if (ix86_sched_data.ppro.decode[0])
11350 {
11351 fprintf (dump, "PPRO packet: %d",
11352 INSN_UID (ix86_sched_data.ppro.decode[0]));
11353 if (ix86_sched_data.ppro.decode[1])
11354 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11355 if (ix86_sched_data.ppro.decode[2])
11356 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11357 fputc ('\n', dump);
11358 }
11359 }
11360
11361 /* We're beginning a new block. Initialize data structures as necessary. */
11362
11363 static void
11364 ix86_sched_init (dump, sched_verbose, veclen)
11365 FILE *dump ATTRIBUTE_UNUSED;
11366 int sched_verbose ATTRIBUTE_UNUSED;
11367 int veclen ATTRIBUTE_UNUSED;
11368 {
11369 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11370 }
11371
11372 /* Shift INSN to SLOT, and shift everything else down. */
11373
11374 static void
11375 ix86_reorder_insn (insnp, slot)
11376 rtx *insnp, *slot;
11377 {
11378 if (insnp != slot)
11379 {
11380 rtx insn = *insnp;
11381 do
11382 insnp[0] = insnp[1];
11383 while (++insnp != slot);
11384 *insnp = insn;
11385 }
11386 }
11387
11388 static void
11389 ix86_sched_reorder_ppro (ready, e_ready)
11390 rtx *ready;
11391 rtx *e_ready;
11392 {
11393 rtx decode[3];
11394 enum attr_ppro_uops cur_uops;
11395 int issued_this_cycle;
11396 rtx *insnp;
11397 int i;
11398
11399 /* At this point .ppro.decode contains the state of the three
11400 decoders from last "cycle". That is, those insns that were
11401 actually independent. But here we're scheduling for the
11402 decoder, and we may find things that are decodable in the
11403 same cycle. */
11404
11405 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11406 issued_this_cycle = 0;
11407
11408 insnp = e_ready;
11409 cur_uops = ix86_safe_ppro_uops (*insnp);
11410
11411 /* If the decoders are empty, and we've a complex insn at the
11412 head of the priority queue, let it issue without complaint. */
11413 if (decode[0] == NULL)
11414 {
11415 if (cur_uops == PPRO_UOPS_MANY)
11416 {
11417 decode[0] = *insnp;
11418 goto ppro_done;
11419 }
11420
11421 /* Otherwise, search for a 2-4 uop unsn to issue. */
11422 while (cur_uops != PPRO_UOPS_FEW)
11423 {
11424 if (insnp == ready)
11425 break;
11426 cur_uops = ix86_safe_ppro_uops (*--insnp);
11427 }
11428
11429 /* If so, move it to the head of the line. */
11430 if (cur_uops == PPRO_UOPS_FEW)
11431 ix86_reorder_insn (insnp, e_ready);
11432
11433 /* Issue the head of the queue. */
11434 issued_this_cycle = 1;
11435 decode[0] = *e_ready--;
11436 }
11437
11438 /* Look for simple insns to fill in the other two slots. */
11439 for (i = 1; i < 3; ++i)
11440 if (decode[i] == NULL)
11441 {
11442 if (ready > e_ready)
11443 goto ppro_done;
11444
11445 insnp = e_ready;
11446 cur_uops = ix86_safe_ppro_uops (*insnp);
11447 while (cur_uops != PPRO_UOPS_ONE)
11448 {
11449 if (insnp == ready)
11450 break;
11451 cur_uops = ix86_safe_ppro_uops (*--insnp);
11452 }
11453
11454 /* Found one. Move it to the head of the queue and issue it. */
11455 if (cur_uops == PPRO_UOPS_ONE)
11456 {
11457 ix86_reorder_insn (insnp, e_ready);
11458 decode[i] = *e_ready--;
11459 issued_this_cycle++;
11460 continue;
11461 }
11462
11463 /* ??? Didn't find one. Ideally, here we would do a lazy split
11464 of 2-uop insns, issue one and queue the other. */
11465 }
11466
11467 ppro_done:
11468 if (issued_this_cycle == 0)
11469 issued_this_cycle = 1;
11470 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11471 }
11472
11473 /* We are about to being issuing insns for this clock cycle.
11474 Override the default sort algorithm to better slot instructions. */
11475 static int
11476 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11477 FILE *dump ATTRIBUTE_UNUSED;
11478 int sched_verbose ATTRIBUTE_UNUSED;
11479 rtx *ready;
11480 int *n_readyp;
11481 int clock_var ATTRIBUTE_UNUSED;
11482 {
11483 int n_ready = *n_readyp;
11484 rtx *e_ready = ready + n_ready - 1;
11485
11486 /* Make sure to go ahead and initialize key items in
11487 ix86_sched_data if we are not going to bother trying to
11488 reorder the ready queue. */
11489 if (n_ready < 2)
11490 {
11491 ix86_sched_data.ppro.issued_this_cycle = 1;
11492 goto out;
11493 }
11494
11495 switch (ix86_cpu)
11496 {
11497 default:
11498 break;
11499
11500 case PROCESSOR_PENTIUMPRO:
11501 ix86_sched_reorder_ppro (ready, e_ready);
11502 break;
11503 }
11504
11505 out:
11506 return ix86_issue_rate ();
11507 }
11508
11509 /* We are about to issue INSN. Return the number of insns left on the
11510 ready queue that can be issued this cycle. */
11511
11512 static int
11513 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11514 FILE *dump;
11515 int sched_verbose;
11516 rtx insn;
11517 int can_issue_more;
11518 {
11519 int i;
11520 switch (ix86_cpu)
11521 {
11522 default:
11523 return can_issue_more - 1;
11524
11525 case PROCESSOR_PENTIUMPRO:
11526 {
11527 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11528
11529 if (uops == PPRO_UOPS_MANY)
11530 {
11531 if (sched_verbose)
11532 ix86_dump_ppro_packet (dump);
11533 ix86_sched_data.ppro.decode[0] = insn;
11534 ix86_sched_data.ppro.decode[1] = NULL;
11535 ix86_sched_data.ppro.decode[2] = NULL;
11536 if (sched_verbose)
11537 ix86_dump_ppro_packet (dump);
11538 ix86_sched_data.ppro.decode[0] = NULL;
11539 }
11540 else if (uops == PPRO_UOPS_FEW)
11541 {
11542 if (sched_verbose)
11543 ix86_dump_ppro_packet (dump);
11544 ix86_sched_data.ppro.decode[0] = insn;
11545 ix86_sched_data.ppro.decode[1] = NULL;
11546 ix86_sched_data.ppro.decode[2] = NULL;
11547 }
11548 else
11549 {
11550 for (i = 0; i < 3; ++i)
11551 if (ix86_sched_data.ppro.decode[i] == NULL)
11552 {
11553 ix86_sched_data.ppro.decode[i] = insn;
11554 break;
11555 }
11556 if (i == 3)
11557 abort ();
11558 if (i == 2)
11559 {
11560 if (sched_verbose)
11561 ix86_dump_ppro_packet (dump);
11562 ix86_sched_data.ppro.decode[0] = NULL;
11563 ix86_sched_data.ppro.decode[1] = NULL;
11564 ix86_sched_data.ppro.decode[2] = NULL;
11565 }
11566 }
11567 }
11568 return --ix86_sched_data.ppro.issued_this_cycle;
11569 }
11570 }
11571
11572 static int
11573 ia32_use_dfa_pipeline_interface ()
11574 {
11575 if (ix86_cpu == PROCESSOR_PENTIUM)
11576 return 1;
11577 return 0;
11578 }
11579
11580 /* How many alternative schedules to try. This should be as wide as the
11581 scheduling freedom in the DFA, but no wider. Making this value too
11582 large results extra work for the scheduler. */
11583
11584 static int
11585 ia32_multipass_dfa_lookahead ()
11586 {
11587 if (ix86_cpu == PROCESSOR_PENTIUM)
11588 return 2;
11589 else
11590 return 0;
11591 }
11592
11593 \f
11594 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11595 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11596 appropriate. */
11597
11598 void
11599 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11600 rtx insns;
11601 rtx dstref, srcref, dstreg, srcreg;
11602 {
11603 rtx insn;
11604
11605 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11606 if (INSN_P (insn))
11607 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11608 dstreg, srcreg);
11609 }
11610
11611 /* Subroutine of above to actually do the updating by recursively walking
11612 the rtx. */
11613
11614 static void
11615 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11616 rtx x;
11617 rtx dstref, srcref, dstreg, srcreg;
11618 {
11619 enum rtx_code code = GET_CODE (x);
11620 const char *format_ptr = GET_RTX_FORMAT (code);
11621 int i, j;
11622
11623 if (code == MEM && XEXP (x, 0) == dstreg)
11624 MEM_COPY_ATTRIBUTES (x, dstref);
11625 else if (code == MEM && XEXP (x, 0) == srcreg)
11626 MEM_COPY_ATTRIBUTES (x, srcref);
11627
11628 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11629 {
11630 if (*format_ptr == 'e')
11631 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11632 dstreg, srcreg);
11633 else if (*format_ptr == 'E')
11634 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11635 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11636 dstreg, srcreg);
11637 }
11638 }
11639 \f
11640 /* Compute the alignment given to a constant that is being placed in memory.
11641 EXP is the constant and ALIGN is the alignment that the object would
11642 ordinarily have.
11643 The value of this function is used instead of that alignment to align
11644 the object. */
11645
11646 int
11647 ix86_constant_alignment (exp, align)
11648 tree exp;
11649 int align;
11650 {
11651 if (TREE_CODE (exp) == REAL_CST)
11652 {
11653 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11654 return 64;
11655 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11656 return 128;
11657 }
11658 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11659 && align < 256)
11660 return 256;
11661
11662 return align;
11663 }
11664
11665 /* Compute the alignment for a static variable.
11666 TYPE is the data type, and ALIGN is the alignment that
11667 the object would ordinarily have. The value of this function is used
11668 instead of that alignment to align the object. */
11669
11670 int
11671 ix86_data_alignment (type, align)
11672 tree type;
11673 int align;
11674 {
11675 if (AGGREGATE_TYPE_P (type)
11676 && TYPE_SIZE (type)
11677 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11678 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11679 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11680 return 256;
11681
11682 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11683 to 16byte boundary. */
11684 if (TARGET_64BIT)
11685 {
11686 if (AGGREGATE_TYPE_P (type)
11687 && TYPE_SIZE (type)
11688 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11689 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11690 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11691 return 128;
11692 }
11693
11694 if (TREE_CODE (type) == ARRAY_TYPE)
11695 {
11696 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11697 return 64;
11698 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11699 return 128;
11700 }
11701 else if (TREE_CODE (type) == COMPLEX_TYPE)
11702 {
11703
11704 if (TYPE_MODE (type) == DCmode && align < 64)
11705 return 64;
11706 if (TYPE_MODE (type) == XCmode && align < 128)
11707 return 128;
11708 }
11709 else if ((TREE_CODE (type) == RECORD_TYPE
11710 || TREE_CODE (type) == UNION_TYPE
11711 || TREE_CODE (type) == QUAL_UNION_TYPE)
11712 && TYPE_FIELDS (type))
11713 {
11714 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11715 return 64;
11716 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11717 return 128;
11718 }
11719 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11720 || TREE_CODE (type) == INTEGER_TYPE)
11721 {
11722 if (TYPE_MODE (type) == DFmode && align < 64)
11723 return 64;
11724 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11725 return 128;
11726 }
11727
11728 return align;
11729 }
11730
11731 /* Compute the alignment for a local variable.
11732 TYPE is the data type, and ALIGN is the alignment that
11733 the object would ordinarily have. The value of this macro is used
11734 instead of that alignment to align the object. */
11735
11736 int
11737 ix86_local_alignment (type, align)
11738 tree type;
11739 int align;
11740 {
11741 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11742 to 16byte boundary. */
11743 if (TARGET_64BIT)
11744 {
11745 if (AGGREGATE_TYPE_P (type)
11746 && TYPE_SIZE (type)
11747 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11748 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11749 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11750 return 128;
11751 }
11752 if (TREE_CODE (type) == ARRAY_TYPE)
11753 {
11754 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11755 return 64;
11756 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11757 return 128;
11758 }
11759 else if (TREE_CODE (type) == COMPLEX_TYPE)
11760 {
11761 if (TYPE_MODE (type) == DCmode && align < 64)
11762 return 64;
11763 if (TYPE_MODE (type) == XCmode && align < 128)
11764 return 128;
11765 }
11766 else if ((TREE_CODE (type) == RECORD_TYPE
11767 || TREE_CODE (type) == UNION_TYPE
11768 || TREE_CODE (type) == QUAL_UNION_TYPE)
11769 && TYPE_FIELDS (type))
11770 {
11771 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11772 return 64;
11773 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11774 return 128;
11775 }
11776 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11777 || TREE_CODE (type) == INTEGER_TYPE)
11778 {
11779
11780 if (TYPE_MODE (type) == DFmode && align < 64)
11781 return 64;
11782 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11783 return 128;
11784 }
11785 return align;
11786 }
11787 \f
11788 /* Emit RTL insns to initialize the variable parts of a trampoline.
11789 FNADDR is an RTX for the address of the function's pure code.
11790 CXT is an RTX for the static chain value for the function. */
11791 void
11792 x86_initialize_trampoline (tramp, fnaddr, cxt)
11793 rtx tramp, fnaddr, cxt;
11794 {
11795 if (!TARGET_64BIT)
11796 {
11797 /* Compute offset from the end of the jmp to the target function. */
11798 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11799 plus_constant (tramp, 10),
11800 NULL_RTX, 1, OPTAB_DIRECT);
11801 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11802 gen_int_mode (0xb9, QImode));
11803 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11804 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11805 gen_int_mode (0xe9, QImode));
11806 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11807 }
11808 else
11809 {
11810 int offset = 0;
11811 /* Try to load address using shorter movl instead of movabs.
11812 We may want to support movq for kernel mode, but kernel does not use
11813 trampolines at the moment. */
11814 if (x86_64_zero_extended_value (fnaddr))
11815 {
11816 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11817 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11818 gen_int_mode (0xbb41, HImode));
11819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11820 gen_lowpart (SImode, fnaddr));
11821 offset += 6;
11822 }
11823 else
11824 {
11825 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11826 gen_int_mode (0xbb49, HImode));
11827 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11828 fnaddr);
11829 offset += 10;
11830 }
11831 /* Load static chain using movabs to r10. */
11832 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11833 gen_int_mode (0xba49, HImode));
11834 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11835 cxt);
11836 offset += 10;
11837 /* Jump to the r11 */
11838 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11839 gen_int_mode (0xff49, HImode));
11840 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11841 gen_int_mode (0xe3, QImode));
11842 offset += 3;
11843 if (offset > TRAMPOLINE_SIZE)
11844 abort ();
11845 }
11846
11847 #ifdef TRANSFER_FROM_TRAMPOLINE
11848 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11849 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11850 #endif
11851 }
11852 \f
11853 #define def_builtin(MASK, NAME, TYPE, CODE) \
11854 do { \
11855 if ((MASK) & target_flags) \
11856 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11857 NULL, NULL_TREE); \
11858 } while (0)
11859
11860 struct builtin_description
11861 {
11862 const unsigned int mask;
11863 const enum insn_code icode;
11864 const char *const name;
11865 const enum ix86_builtins code;
11866 const enum rtx_code comparison;
11867 const unsigned int flag;
11868 };
11869
11870 /* Used for builtins that are enabled both by -msse and -msse2. */
11871 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11872
11873 static const struct builtin_description bdesc_comi[] =
11874 {
11875 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11876 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11877 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11878 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11879 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11880 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11881 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11882 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11883 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11884 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11885 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11886 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11887 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11888 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11889 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11892 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11893 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11894 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11895 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11896 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11897 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11898 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11899 };
11900
11901 static const struct builtin_description bdesc_2arg[] =
11902 {
11903 /* SSE */
11904 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11905 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11906 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11907 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11908 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11909 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11910 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11911 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11912
11913 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11914 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11915 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11916 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11917 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11918 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11919 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11920 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11921 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11922 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11923 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11924 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11925 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11926 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11927 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11928 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11929 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11930 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11931 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11932 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11933
11934 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11935 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11936 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11937 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11938
11939 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11940 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11941 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11942 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11943
11944 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11945 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11946 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11947 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11948 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11949
11950 /* MMX */
11951 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11952 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11953 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11954 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11955 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11956 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11957
11958 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11959 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11960 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11961 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11962 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11963 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11964 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11965 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11966
11967 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11968 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11969 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11970
11971 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11972 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11973 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11974 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11975
11976 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11977 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11978
11979 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11980 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11981 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11982 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11983 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11984 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11985
11986 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11987 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11988 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11989 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11990
11991 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11992 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11993 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11994 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11995 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11996 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11997
11998 /* Special. */
11999 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12000 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12001 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12002
12003 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12004 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12005
12006 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12007 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12008 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12009 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12010 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12011 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12012
12013 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12014 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12015 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12016 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12017 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12018 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12019
12020 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12021 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12022 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12023 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12024
12025 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12026 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12027
12028 /* SSE2 */
12029 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12031 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12032 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12033 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12034 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12035 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12036 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12037
12038 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12039 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12040 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12041 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12042 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12043 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12044 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12045 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12046 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12047 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12048 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12049 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12050 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12051 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12052 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12053 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12054 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12055 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12056 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12057 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12058
12059 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12061 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12062 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12063
12064 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12066 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12067 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12068
12069 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12070 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12071 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12072
12073 /* SSE2 MMX */
12074 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12076 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12077 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12078 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12079 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12080 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12081 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12082
12083 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12084 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12085 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12086 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12087 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12088 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12089 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12090 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12091
12092 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12095 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12096
12097 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12098 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12099 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12101
12102 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12103 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12104
12105 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12110 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12111
12112 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12116
12117 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12120 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12124 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12125
12126 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12129
12130 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12132
12133 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12134 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12139
12140 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12141 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12142 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12143 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12145 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12146
12147 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12148 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12151
12152 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12153
12154 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12155 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12157 };
12158
12159 static const struct builtin_description bdesc_1arg[] =
12160 {
12161 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12162 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12163
12164 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12165 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12166 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12167
12168 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12169 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12170 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12171 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12172
12173 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12174 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12176 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12177
12178 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12179
12180 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12181 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12182
12183 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12184 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12185 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12186 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12188
12189 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12190
12191 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12192 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12193
12194 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12195 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12196 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12197
12198 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12199 };
12200
12201 void
12202 ix86_init_builtins ()
12203 {
12204 if (TARGET_MMX)
12205 ix86_init_mmx_sse_builtins ();
12206 }
12207
12208 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12209 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12210 builtins. */
12211 static void
12212 ix86_init_mmx_sse_builtins ()
12213 {
12214 const struct builtin_description * d;
12215 size_t i;
12216
12217 tree pchar_type_node = build_pointer_type (char_type_node);
12218 tree pfloat_type_node = build_pointer_type (float_type_node);
12219 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12220 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12221 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12222
12223 /* Comparisons. */
12224 tree int_ftype_v4sf_v4sf
12225 = build_function_type_list (integer_type_node,
12226 V4SF_type_node, V4SF_type_node, NULL_TREE);
12227 tree v4si_ftype_v4sf_v4sf
12228 = build_function_type_list (V4SI_type_node,
12229 V4SF_type_node, V4SF_type_node, NULL_TREE);
12230 /* MMX/SSE/integer conversions. */
12231 tree int_ftype_v4sf
12232 = build_function_type_list (integer_type_node,
12233 V4SF_type_node, NULL_TREE);
12234 tree int_ftype_v8qi
12235 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12236 tree v4sf_ftype_v4sf_int
12237 = build_function_type_list (V4SF_type_node,
12238 V4SF_type_node, integer_type_node, NULL_TREE);
12239 tree v4sf_ftype_v4sf_v2si
12240 = build_function_type_list (V4SF_type_node,
12241 V4SF_type_node, V2SI_type_node, NULL_TREE);
12242 tree int_ftype_v4hi_int
12243 = build_function_type_list (integer_type_node,
12244 V4HI_type_node, integer_type_node, NULL_TREE);
12245 tree v4hi_ftype_v4hi_int_int
12246 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12247 integer_type_node, integer_type_node,
12248 NULL_TREE);
12249 /* Miscellaneous. */
12250 tree v8qi_ftype_v4hi_v4hi
12251 = build_function_type_list (V8QI_type_node,
12252 V4HI_type_node, V4HI_type_node, NULL_TREE);
12253 tree v4hi_ftype_v2si_v2si
12254 = build_function_type_list (V4HI_type_node,
12255 V2SI_type_node, V2SI_type_node, NULL_TREE);
12256 tree v4sf_ftype_v4sf_v4sf_int
12257 = build_function_type_list (V4SF_type_node,
12258 V4SF_type_node, V4SF_type_node,
12259 integer_type_node, NULL_TREE);
12260 tree v2si_ftype_v4hi_v4hi
12261 = build_function_type_list (V2SI_type_node,
12262 V4HI_type_node, V4HI_type_node, NULL_TREE);
12263 tree v4hi_ftype_v4hi_int
12264 = build_function_type_list (V4HI_type_node,
12265 V4HI_type_node, integer_type_node, NULL_TREE);
12266 tree v4hi_ftype_v4hi_di
12267 = build_function_type_list (V4HI_type_node,
12268 V4HI_type_node, long_long_unsigned_type_node,
12269 NULL_TREE);
12270 tree v2si_ftype_v2si_di
12271 = build_function_type_list (V2SI_type_node,
12272 V2SI_type_node, long_long_unsigned_type_node,
12273 NULL_TREE);
12274 tree void_ftype_void
12275 = build_function_type (void_type_node, void_list_node);
12276 tree void_ftype_unsigned
12277 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12278 tree unsigned_ftype_void
12279 = build_function_type (unsigned_type_node, void_list_node);
12280 tree di_ftype_void
12281 = build_function_type (long_long_unsigned_type_node, void_list_node);
12282 tree v4sf_ftype_void
12283 = build_function_type (V4SF_type_node, void_list_node);
12284 tree v2si_ftype_v4sf
12285 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12286 /* Loads/stores. */
12287 tree void_ftype_v8qi_v8qi_pchar
12288 = build_function_type_list (void_type_node,
12289 V8QI_type_node, V8QI_type_node,
12290 pchar_type_node, NULL_TREE);
12291 tree v4sf_ftype_pfloat
12292 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12293 /* @@@ the type is bogus */
12294 tree v4sf_ftype_v4sf_pv2si
12295 = build_function_type_list (V4SF_type_node,
12296 V4SF_type_node, pv2si_type_node, NULL_TREE);
12297 tree void_ftype_pv2si_v4sf
12298 = build_function_type_list (void_type_node,
12299 pv2si_type_node, V4SF_type_node, NULL_TREE);
12300 tree void_ftype_pfloat_v4sf
12301 = build_function_type_list (void_type_node,
12302 pfloat_type_node, V4SF_type_node, NULL_TREE);
12303 tree void_ftype_pdi_di
12304 = build_function_type_list (void_type_node,
12305 pdi_type_node, long_long_unsigned_type_node,
12306 NULL_TREE);
12307 tree void_ftype_pv2di_v2di
12308 = build_function_type_list (void_type_node,
12309 pv2di_type_node, V2DI_type_node, NULL_TREE);
12310 /* Normal vector unops. */
12311 tree v4sf_ftype_v4sf
12312 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12313
12314 /* Normal vector binops. */
12315 tree v4sf_ftype_v4sf_v4sf
12316 = build_function_type_list (V4SF_type_node,
12317 V4SF_type_node, V4SF_type_node, NULL_TREE);
12318 tree v8qi_ftype_v8qi_v8qi
12319 = build_function_type_list (V8QI_type_node,
12320 V8QI_type_node, V8QI_type_node, NULL_TREE);
12321 tree v4hi_ftype_v4hi_v4hi
12322 = build_function_type_list (V4HI_type_node,
12323 V4HI_type_node, V4HI_type_node, NULL_TREE);
12324 tree v2si_ftype_v2si_v2si
12325 = build_function_type_list (V2SI_type_node,
12326 V2SI_type_node, V2SI_type_node, NULL_TREE);
12327 tree di_ftype_di_di
12328 = build_function_type_list (long_long_unsigned_type_node,
12329 long_long_unsigned_type_node,
12330 long_long_unsigned_type_node, NULL_TREE);
12331
12332 tree v2si_ftype_v2sf
12333 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12334 tree v2sf_ftype_v2si
12335 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12336 tree v2si_ftype_v2si
12337 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12338 tree v2sf_ftype_v2sf
12339 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12340 tree v2sf_ftype_v2sf_v2sf
12341 = build_function_type_list (V2SF_type_node,
12342 V2SF_type_node, V2SF_type_node, NULL_TREE);
12343 tree v2si_ftype_v2sf_v2sf
12344 = build_function_type_list (V2SI_type_node,
12345 V2SF_type_node, V2SF_type_node, NULL_TREE);
12346 tree pint_type_node = build_pointer_type (integer_type_node);
12347 tree pdouble_type_node = build_pointer_type (double_type_node);
12348 tree int_ftype_v2df_v2df
12349 = build_function_type_list (integer_type_node,
12350 V2DF_type_node, V2DF_type_node, NULL_TREE);
12351
12352 tree ti_ftype_void
12353 = build_function_type (intTI_type_node, void_list_node);
12354 tree v2di_ftype_void
12355 = build_function_type (V2DI_type_node, void_list_node);
12356 tree ti_ftype_ti_ti
12357 = build_function_type_list (intTI_type_node,
12358 intTI_type_node, intTI_type_node, NULL_TREE);
12359 tree void_ftype_pvoid
12360 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12361 tree v2di_ftype_di
12362 = build_function_type_list (V2DI_type_node,
12363 long_long_unsigned_type_node, NULL_TREE);
12364 tree di_ftype_v2di
12365 = build_function_type_list (long_long_unsigned_type_node,
12366 V2DI_type_node, NULL_TREE);
12367 tree v4sf_ftype_v4si
12368 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12369 tree v4si_ftype_v4sf
12370 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12371 tree v2df_ftype_v4si
12372 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12373 tree v4si_ftype_v2df
12374 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12375 tree v2si_ftype_v2df
12376 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12377 tree v4sf_ftype_v2df
12378 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12379 tree v2df_ftype_v2si
12380 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12381 tree v2df_ftype_v4sf
12382 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12383 tree int_ftype_v2df
12384 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12385 tree v2df_ftype_v2df_int
12386 = build_function_type_list (V2DF_type_node,
12387 V2DF_type_node, integer_type_node, NULL_TREE);
12388 tree v4sf_ftype_v4sf_v2df
12389 = build_function_type_list (V4SF_type_node,
12390 V4SF_type_node, V2DF_type_node, NULL_TREE);
12391 tree v2df_ftype_v2df_v4sf
12392 = build_function_type_list (V2DF_type_node,
12393 V2DF_type_node, V4SF_type_node, NULL_TREE);
12394 tree v2df_ftype_v2df_v2df_int
12395 = build_function_type_list (V2DF_type_node,
12396 V2DF_type_node, V2DF_type_node,
12397 integer_type_node,
12398 NULL_TREE);
12399 tree v2df_ftype_v2df_pv2si
12400 = build_function_type_list (V2DF_type_node,
12401 V2DF_type_node, pv2si_type_node, NULL_TREE);
12402 tree void_ftype_pv2si_v2df
12403 = build_function_type_list (void_type_node,
12404 pv2si_type_node, V2DF_type_node, NULL_TREE);
12405 tree void_ftype_pdouble_v2df
12406 = build_function_type_list (void_type_node,
12407 pdouble_type_node, V2DF_type_node, NULL_TREE);
12408 tree void_ftype_pint_int
12409 = build_function_type_list (void_type_node,
12410 pint_type_node, integer_type_node, NULL_TREE);
12411 tree void_ftype_v16qi_v16qi_pchar
12412 = build_function_type_list (void_type_node,
12413 V16QI_type_node, V16QI_type_node,
12414 pchar_type_node, NULL_TREE);
12415 tree v2df_ftype_pdouble
12416 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12417 tree v2df_ftype_v2df_v2df
12418 = build_function_type_list (V2DF_type_node,
12419 V2DF_type_node, V2DF_type_node, NULL_TREE);
12420 tree v16qi_ftype_v16qi_v16qi
12421 = build_function_type_list (V16QI_type_node,
12422 V16QI_type_node, V16QI_type_node, NULL_TREE);
12423 tree v8hi_ftype_v8hi_v8hi
12424 = build_function_type_list (V8HI_type_node,
12425 V8HI_type_node, V8HI_type_node, NULL_TREE);
12426 tree v4si_ftype_v4si_v4si
12427 = build_function_type_list (V4SI_type_node,
12428 V4SI_type_node, V4SI_type_node, NULL_TREE);
12429 tree v2di_ftype_v2di_v2di
12430 = build_function_type_list (V2DI_type_node,
12431 V2DI_type_node, V2DI_type_node, NULL_TREE);
12432 tree v2di_ftype_v2df_v2df
12433 = build_function_type_list (V2DI_type_node,
12434 V2DF_type_node, V2DF_type_node, NULL_TREE);
12435 tree v2df_ftype_v2df
12436 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12437 tree v2df_ftype_double
12438 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12439 tree v2df_ftype_double_double
12440 = build_function_type_list (V2DF_type_node,
12441 double_type_node, double_type_node, NULL_TREE);
12442 tree int_ftype_v8hi_int
12443 = build_function_type_list (integer_type_node,
12444 V8HI_type_node, integer_type_node, NULL_TREE);
12445 tree v8hi_ftype_v8hi_int_int
12446 = build_function_type_list (V8HI_type_node,
12447 V8HI_type_node, integer_type_node,
12448 integer_type_node, NULL_TREE);
12449 tree v2di_ftype_v2di_int
12450 = build_function_type_list (V2DI_type_node,
12451 V2DI_type_node, integer_type_node, NULL_TREE);
12452 tree v4si_ftype_v4si_int
12453 = build_function_type_list (V4SI_type_node,
12454 V4SI_type_node, integer_type_node, NULL_TREE);
12455 tree v8hi_ftype_v8hi_int
12456 = build_function_type_list (V8HI_type_node,
12457 V8HI_type_node, integer_type_node, NULL_TREE);
12458 tree v8hi_ftype_v8hi_v2di
12459 = build_function_type_list (V8HI_type_node,
12460 V8HI_type_node, V2DI_type_node, NULL_TREE);
12461 tree v4si_ftype_v4si_v2di
12462 = build_function_type_list (V4SI_type_node,
12463 V4SI_type_node, V2DI_type_node, NULL_TREE);
12464 tree v4si_ftype_v8hi_v8hi
12465 = build_function_type_list (V4SI_type_node,
12466 V8HI_type_node, V8HI_type_node, NULL_TREE);
12467 tree di_ftype_v8qi_v8qi
12468 = build_function_type_list (long_long_unsigned_type_node,
12469 V8QI_type_node, V8QI_type_node, NULL_TREE);
12470 tree v2di_ftype_v16qi_v16qi
12471 = build_function_type_list (V2DI_type_node,
12472 V16QI_type_node, V16QI_type_node, NULL_TREE);
12473 tree int_ftype_v16qi
12474 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12475 tree v16qi_ftype_pchar
12476 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12477 tree void_ftype_pchar_v16qi
12478 = build_function_type_list (void_type_node,
12479 pchar_type_node, V16QI_type_node, NULL_TREE);
12480 tree v4si_ftype_pchar
12481 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12482 tree void_ftype_pchar_v4si
12483 = build_function_type_list (void_type_node,
12484 pchar_type_node, V4SI_type_node, NULL_TREE);
12485 tree v2di_ftype_v2di
12486 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12487
12488 /* Add all builtins that are more or less simple operations on two
12489 operands. */
12490 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12491 {
12492 /* Use one of the operands; the target can have a different mode for
12493 mask-generating compares. */
12494 enum machine_mode mode;
12495 tree type;
12496
12497 if (d->name == 0)
12498 continue;
12499 mode = insn_data[d->icode].operand[1].mode;
12500
12501 switch (mode)
12502 {
12503 case V16QImode:
12504 type = v16qi_ftype_v16qi_v16qi;
12505 break;
12506 case V8HImode:
12507 type = v8hi_ftype_v8hi_v8hi;
12508 break;
12509 case V4SImode:
12510 type = v4si_ftype_v4si_v4si;
12511 break;
12512 case V2DImode:
12513 type = v2di_ftype_v2di_v2di;
12514 break;
12515 case V2DFmode:
12516 type = v2df_ftype_v2df_v2df;
12517 break;
12518 case TImode:
12519 type = ti_ftype_ti_ti;
12520 break;
12521 case V4SFmode:
12522 type = v4sf_ftype_v4sf_v4sf;
12523 break;
12524 case V8QImode:
12525 type = v8qi_ftype_v8qi_v8qi;
12526 break;
12527 case V4HImode:
12528 type = v4hi_ftype_v4hi_v4hi;
12529 break;
12530 case V2SImode:
12531 type = v2si_ftype_v2si_v2si;
12532 break;
12533 case DImode:
12534 type = di_ftype_di_di;
12535 break;
12536
12537 default:
12538 abort ();
12539 }
12540
12541 /* Override for comparisons. */
12542 if (d->icode == CODE_FOR_maskcmpv4sf3
12543 || d->icode == CODE_FOR_maskncmpv4sf3
12544 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12545 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12546 type = v4si_ftype_v4sf_v4sf;
12547
12548 if (d->icode == CODE_FOR_maskcmpv2df3
12549 || d->icode == CODE_FOR_maskncmpv2df3
12550 || d->icode == CODE_FOR_vmmaskcmpv2df3
12551 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12552 type = v2di_ftype_v2df_v2df;
12553
12554 def_builtin (d->mask, d->name, type, d->code);
12555 }
12556
12557 /* Add the remaining MMX insns with somewhat more complicated types. */
12558 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12559 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12560 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12561 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12562 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12563 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12564 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12565
12566 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12567 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12568 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12569
12570 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12571 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12572
12573 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12574 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12575
12576 /* comi/ucomi insns. */
12577 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12578 if (d->mask == MASK_SSE2)
12579 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12580 else
12581 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12582
12583 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12584 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12585 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12586
12587 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12588 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12589 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12590 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12591 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12592 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12593
12594 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12595 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12596
12597 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12598
12599 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12600 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12601 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12602 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12603 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12604 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12605
12606 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12607 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12608 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12609 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12610
12611 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12612 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12613 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12614 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12615
12616 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12617
12618 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12619
12620 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12621 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12622 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12623 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12624 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12625 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12626
12627 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12628
12629 /* Original 3DNow! */
12630 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12631 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12632 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12640 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12641 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12642 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12643 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12644 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12645 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12647 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12648 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12649 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12650
12651 /* 3DNow! extension as used in the Athlon CPU. */
12652 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12653 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12654 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12655 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12656 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12657 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12658
12659 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12660
12661 /* SSE2 */
12662 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12663 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12664
12665 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12666 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12667 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12668
12669 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12670 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12671 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12672 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12673 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12674 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12675
12676 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12677 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12678 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12679 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12680
12681 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12682 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12684 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12685 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12686
12687 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12688 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12689 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12690 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12691
12692 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12693 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12694
12695 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12696
12697 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12698 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12699
12700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12701 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12702 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12703 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12704 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12705
12706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12707
12708 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12709 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12710
12711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12712 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12713 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12714
12715 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12718
12719 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12720 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12721 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12722 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12723 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12724 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12725 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12726
12727 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12728 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12730
12731 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
12732 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
12733 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
12734 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12735 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
12737 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12738
12739 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12740
12741 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12742 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12743 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12744
12745 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12746 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12748
12749 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12750 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12751
12752 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12753 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12754 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12755 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12756
12757 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12758 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12759 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12760 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12761
12762 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12763 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12764
12765 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12766 }
12767
12768 /* Errors in the source file can cause expand_expr to return const0_rtx
12769 where we expect a vector. To avoid crashing, use one of the vector
12770 clear instructions. */
12771 static rtx
12772 safe_vector_operand (x, mode)
12773 rtx x;
12774 enum machine_mode mode;
12775 {
12776 if (x != const0_rtx)
12777 return x;
12778 x = gen_reg_rtx (mode);
12779
12780 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12781 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12782 : gen_rtx_SUBREG (DImode, x, 0)));
12783 else
12784 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12785 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12786 return x;
12787 }
12788
12789 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12790
12791 static rtx
12792 ix86_expand_binop_builtin (icode, arglist, target)
12793 enum insn_code icode;
12794 tree arglist;
12795 rtx target;
12796 {
12797 rtx pat;
12798 tree arg0 = TREE_VALUE (arglist);
12799 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12800 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12801 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12802 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12803 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12804 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12805
12806 if (VECTOR_MODE_P (mode0))
12807 op0 = safe_vector_operand (op0, mode0);
12808 if (VECTOR_MODE_P (mode1))
12809 op1 = safe_vector_operand (op1, mode1);
12810
12811 if (! target
12812 || GET_MODE (target) != tmode
12813 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12814 target = gen_reg_rtx (tmode);
12815
12816 /* In case the insn wants input operands in modes different from
12817 the result, abort. */
12818 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12819 abort ();
12820
12821 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12822 op0 = copy_to_mode_reg (mode0, op0);
12823 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12824 op1 = copy_to_mode_reg (mode1, op1);
12825
12826 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12827 yet one of the two must not be a memory. This is normally enforced
12828 by expanders, but we didn't bother to create one here. */
12829 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12830 op0 = copy_to_mode_reg (mode0, op0);
12831
12832 pat = GEN_FCN (icode) (target, op0, op1);
12833 if (! pat)
12834 return 0;
12835 emit_insn (pat);
12836 return target;
12837 }
12838
12839 /* Subroutine of ix86_expand_builtin to take care of stores. */
12840
12841 static rtx
12842 ix86_expand_store_builtin (icode, arglist)
12843 enum insn_code icode;
12844 tree arglist;
12845 {
12846 rtx pat;
12847 tree arg0 = TREE_VALUE (arglist);
12848 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12849 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12850 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12851 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12852 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12853
12854 if (VECTOR_MODE_P (mode1))
12855 op1 = safe_vector_operand (op1, mode1);
12856
12857 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12858
12859 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12860 op1 = copy_to_mode_reg (mode1, op1);
12861
12862 pat = GEN_FCN (icode) (op0, op1);
12863 if (pat)
12864 emit_insn (pat);
12865 return 0;
12866 }
12867
12868 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12869
12870 static rtx
12871 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12872 enum insn_code icode;
12873 tree arglist;
12874 rtx target;
12875 int do_load;
12876 {
12877 rtx pat;
12878 tree arg0 = TREE_VALUE (arglist);
12879 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12880 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12881 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12882
12883 if (! target
12884 || GET_MODE (target) != tmode
12885 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12886 target = gen_reg_rtx (tmode);
12887 if (do_load)
12888 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12889 else
12890 {
12891 if (VECTOR_MODE_P (mode0))
12892 op0 = safe_vector_operand (op0, mode0);
12893
12894 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12895 op0 = copy_to_mode_reg (mode0, op0);
12896 }
12897
12898 pat = GEN_FCN (icode) (target, op0);
12899 if (! pat)
12900 return 0;
12901 emit_insn (pat);
12902 return target;
12903 }
12904
12905 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12906 sqrtss, rsqrtss, rcpss. */
12907
12908 static rtx
12909 ix86_expand_unop1_builtin (icode, arglist, target)
12910 enum insn_code icode;
12911 tree arglist;
12912 rtx target;
12913 {
12914 rtx pat;
12915 tree arg0 = TREE_VALUE (arglist);
12916 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12917 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12918 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12919
12920 if (! target
12921 || GET_MODE (target) != tmode
12922 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12923 target = gen_reg_rtx (tmode);
12924
12925 if (VECTOR_MODE_P (mode0))
12926 op0 = safe_vector_operand (op0, mode0);
12927
12928 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12929 op0 = copy_to_mode_reg (mode0, op0);
12930
12931 op1 = op0;
12932 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12933 op1 = copy_to_mode_reg (mode0, op1);
12934
12935 pat = GEN_FCN (icode) (target, op0, op1);
12936 if (! pat)
12937 return 0;
12938 emit_insn (pat);
12939 return target;
12940 }
12941
12942 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12943
12944 static rtx
12945 ix86_expand_sse_compare (d, arglist, target)
12946 const struct builtin_description *d;
12947 tree arglist;
12948 rtx target;
12949 {
12950 rtx pat;
12951 tree arg0 = TREE_VALUE (arglist);
12952 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12953 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12954 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12955 rtx op2;
12956 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12957 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12958 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12959 enum rtx_code comparison = d->comparison;
12960
12961 if (VECTOR_MODE_P (mode0))
12962 op0 = safe_vector_operand (op0, mode0);
12963 if (VECTOR_MODE_P (mode1))
12964 op1 = safe_vector_operand (op1, mode1);
12965
12966 /* Swap operands if we have a comparison that isn't available in
12967 hardware. */
12968 if (d->flag)
12969 {
12970 rtx tmp = gen_reg_rtx (mode1);
12971 emit_move_insn (tmp, op1);
12972 op1 = op0;
12973 op0 = tmp;
12974 }
12975
12976 if (! target
12977 || GET_MODE (target) != tmode
12978 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12979 target = gen_reg_rtx (tmode);
12980
12981 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12982 op0 = copy_to_mode_reg (mode0, op0);
12983 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12984 op1 = copy_to_mode_reg (mode1, op1);
12985
12986 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12987 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12988 if (! pat)
12989 return 0;
12990 emit_insn (pat);
12991 return target;
12992 }
12993
12994 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12995
12996 static rtx
12997 ix86_expand_sse_comi (d, arglist, target)
12998 const struct builtin_description *d;
12999 tree arglist;
13000 rtx target;
13001 {
13002 rtx pat;
13003 tree arg0 = TREE_VALUE (arglist);
13004 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13005 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13006 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13007 rtx op2;
13008 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13009 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13010 enum rtx_code comparison = d->comparison;
13011
13012 if (VECTOR_MODE_P (mode0))
13013 op0 = safe_vector_operand (op0, mode0);
13014 if (VECTOR_MODE_P (mode1))
13015 op1 = safe_vector_operand (op1, mode1);
13016
13017 /* Swap operands if we have a comparison that isn't available in
13018 hardware. */
13019 if (d->flag)
13020 {
13021 rtx tmp = op1;
13022 op1 = op0;
13023 op0 = tmp;
13024 }
13025
13026 target = gen_reg_rtx (SImode);
13027 emit_move_insn (target, const0_rtx);
13028 target = gen_rtx_SUBREG (QImode, target, 0);
13029
13030 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13031 op0 = copy_to_mode_reg (mode0, op0);
13032 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13033 op1 = copy_to_mode_reg (mode1, op1);
13034
13035 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13036 pat = GEN_FCN (d->icode) (op0, op1);
13037 if (! pat)
13038 return 0;
13039 emit_insn (pat);
13040 emit_insn (gen_rtx_SET (VOIDmode,
13041 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13042 gen_rtx_fmt_ee (comparison, QImode,
13043 SET_DEST (pat),
13044 const0_rtx)));
13045
13046 return SUBREG_REG (target);
13047 }
13048
13049 /* Expand an expression EXP that calls a built-in function,
13050 with result going to TARGET if that's convenient
13051 (and in mode MODE if that's convenient).
13052 SUBTARGET may be used as the target for computing one of EXP's operands.
13053 IGNORE is nonzero if the value is to be ignored. */
13054
13055 rtx
13056 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13057 tree exp;
13058 rtx target;
13059 rtx subtarget ATTRIBUTE_UNUSED;
13060 enum machine_mode mode ATTRIBUTE_UNUSED;
13061 int ignore ATTRIBUTE_UNUSED;
13062 {
13063 const struct builtin_description *d;
13064 size_t i;
13065 enum insn_code icode;
13066 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13067 tree arglist = TREE_OPERAND (exp, 1);
13068 tree arg0, arg1, arg2;
13069 rtx op0, op1, op2, pat;
13070 enum machine_mode tmode, mode0, mode1, mode2;
13071 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13072
13073 switch (fcode)
13074 {
13075 case IX86_BUILTIN_EMMS:
13076 emit_insn (gen_emms ());
13077 return 0;
13078
13079 case IX86_BUILTIN_SFENCE:
13080 emit_insn (gen_sfence ());
13081 return 0;
13082
13083 case IX86_BUILTIN_PEXTRW:
13084 case IX86_BUILTIN_PEXTRW128:
13085 icode = (fcode == IX86_BUILTIN_PEXTRW
13086 ? CODE_FOR_mmx_pextrw
13087 : CODE_FOR_sse2_pextrw);
13088 arg0 = TREE_VALUE (arglist);
13089 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13090 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13091 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13092 tmode = insn_data[icode].operand[0].mode;
13093 mode0 = insn_data[icode].operand[1].mode;
13094 mode1 = insn_data[icode].operand[2].mode;
13095
13096 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13097 op0 = copy_to_mode_reg (mode0, op0);
13098 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13099 {
13100 /* @@@ better error message */
13101 error ("selector must be an immediate");
13102 return gen_reg_rtx (tmode);
13103 }
13104 if (target == 0
13105 || GET_MODE (target) != tmode
13106 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13107 target = gen_reg_rtx (tmode);
13108 pat = GEN_FCN (icode) (target, op0, op1);
13109 if (! pat)
13110 return 0;
13111 emit_insn (pat);
13112 return target;
13113
13114 case IX86_BUILTIN_PINSRW:
13115 case IX86_BUILTIN_PINSRW128:
13116 icode = (fcode == IX86_BUILTIN_PINSRW
13117 ? CODE_FOR_mmx_pinsrw
13118 : CODE_FOR_sse2_pinsrw);
13119 arg0 = TREE_VALUE (arglist);
13120 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13121 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13122 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13123 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13124 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13125 tmode = insn_data[icode].operand[0].mode;
13126 mode0 = insn_data[icode].operand[1].mode;
13127 mode1 = insn_data[icode].operand[2].mode;
13128 mode2 = insn_data[icode].operand[3].mode;
13129
13130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13131 op0 = copy_to_mode_reg (mode0, op0);
13132 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13133 op1 = copy_to_mode_reg (mode1, op1);
13134 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13135 {
13136 /* @@@ better error message */
13137 error ("selector must be an immediate");
13138 return const0_rtx;
13139 }
13140 if (target == 0
13141 || GET_MODE (target) != tmode
13142 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13143 target = gen_reg_rtx (tmode);
13144 pat = GEN_FCN (icode) (target, op0, op1, op2);
13145 if (! pat)
13146 return 0;
13147 emit_insn (pat);
13148 return target;
13149
13150 case IX86_BUILTIN_MASKMOVQ:
13151 case IX86_BUILTIN_MASKMOVDQU:
13152 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13153 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13154 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13155 : CODE_FOR_sse2_maskmovdqu));
13156 /* Note the arg order is different from the operand order. */
13157 arg1 = TREE_VALUE (arglist);
13158 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13159 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13160 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13161 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13162 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13163 mode0 = insn_data[icode].operand[0].mode;
13164 mode1 = insn_data[icode].operand[1].mode;
13165 mode2 = insn_data[icode].operand[2].mode;
13166
13167 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13168 op0 = copy_to_mode_reg (mode0, op0);
13169 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13170 op1 = copy_to_mode_reg (mode1, op1);
13171 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13172 op2 = copy_to_mode_reg (mode2, op2);
13173 pat = GEN_FCN (icode) (op0, op1, op2);
13174 if (! pat)
13175 return 0;
13176 emit_insn (pat);
13177 return 0;
13178
13179 case IX86_BUILTIN_SQRTSS:
13180 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13181 case IX86_BUILTIN_RSQRTSS:
13182 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13183 case IX86_BUILTIN_RCPSS:
13184 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13185
13186 case IX86_BUILTIN_LOADAPS:
13187 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13188
13189 case IX86_BUILTIN_LOADUPS:
13190 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13191
13192 case IX86_BUILTIN_STOREAPS:
13193 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13194
13195 case IX86_BUILTIN_STOREUPS:
13196 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13197
13198 case IX86_BUILTIN_LOADSS:
13199 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13200
13201 case IX86_BUILTIN_STORESS:
13202 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13203
13204 case IX86_BUILTIN_LOADHPS:
13205 case IX86_BUILTIN_LOADLPS:
13206 case IX86_BUILTIN_LOADHPD:
13207 case IX86_BUILTIN_LOADLPD:
13208 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13209 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13210 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13211 : CODE_FOR_sse2_movlpd);
13212 arg0 = TREE_VALUE (arglist);
13213 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13214 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13215 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13216 tmode = insn_data[icode].operand[0].mode;
13217 mode0 = insn_data[icode].operand[1].mode;
13218 mode1 = insn_data[icode].operand[2].mode;
13219
13220 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13221 op0 = copy_to_mode_reg (mode0, op0);
13222 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13223 if (target == 0
13224 || GET_MODE (target) != tmode
13225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13226 target = gen_reg_rtx (tmode);
13227 pat = GEN_FCN (icode) (target, op0, op1);
13228 if (! pat)
13229 return 0;
13230 emit_insn (pat);
13231 return target;
13232
13233 case IX86_BUILTIN_STOREHPS:
13234 case IX86_BUILTIN_STORELPS:
13235 case IX86_BUILTIN_STOREHPD:
13236 case IX86_BUILTIN_STORELPD:
13237 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13238 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13239 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13240 : CODE_FOR_sse2_movlpd);
13241 arg0 = TREE_VALUE (arglist);
13242 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13243 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13244 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13245 mode0 = insn_data[icode].operand[1].mode;
13246 mode1 = insn_data[icode].operand[2].mode;
13247
13248 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13249 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13250 op1 = copy_to_mode_reg (mode1, op1);
13251
13252 pat = GEN_FCN (icode) (op0, op0, op1);
13253 if (! pat)
13254 return 0;
13255 emit_insn (pat);
13256 return 0;
13257
13258 case IX86_BUILTIN_MOVNTPS:
13259 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13260 case IX86_BUILTIN_MOVNTQ:
13261 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13262
13263 case IX86_BUILTIN_LDMXCSR:
13264 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13265 target = assign_386_stack_local (SImode, 0);
13266 emit_move_insn (target, op0);
13267 emit_insn (gen_ldmxcsr (target));
13268 return 0;
13269
13270 case IX86_BUILTIN_STMXCSR:
13271 target = assign_386_stack_local (SImode, 0);
13272 emit_insn (gen_stmxcsr (target));
13273 return copy_to_mode_reg (SImode, target);
13274
13275 case IX86_BUILTIN_SHUFPS:
13276 case IX86_BUILTIN_SHUFPD:
13277 icode = (fcode == IX86_BUILTIN_SHUFPS
13278 ? CODE_FOR_sse_shufps
13279 : CODE_FOR_sse2_shufpd);
13280 arg0 = TREE_VALUE (arglist);
13281 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13282 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13283 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13284 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13285 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13286 tmode = insn_data[icode].operand[0].mode;
13287 mode0 = insn_data[icode].operand[1].mode;
13288 mode1 = insn_data[icode].operand[2].mode;
13289 mode2 = insn_data[icode].operand[3].mode;
13290
13291 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13292 op0 = copy_to_mode_reg (mode0, op0);
13293 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13294 op1 = copy_to_mode_reg (mode1, op1);
13295 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13296 {
13297 /* @@@ better error message */
13298 error ("mask must be an immediate");
13299 return gen_reg_rtx (tmode);
13300 }
13301 if (target == 0
13302 || GET_MODE (target) != tmode
13303 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13304 target = gen_reg_rtx (tmode);
13305 pat = GEN_FCN (icode) (target, op0, op1, op2);
13306 if (! pat)
13307 return 0;
13308 emit_insn (pat);
13309 return target;
13310
13311 case IX86_BUILTIN_PSHUFW:
13312 case IX86_BUILTIN_PSHUFD:
13313 case IX86_BUILTIN_PSHUFHW:
13314 case IX86_BUILTIN_PSHUFLW:
13315 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13316 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13317 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13318 : CODE_FOR_mmx_pshufw);
13319 arg0 = TREE_VALUE (arglist);
13320 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13321 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13322 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13323 tmode = insn_data[icode].operand[0].mode;
13324 mode1 = insn_data[icode].operand[1].mode;
13325 mode2 = insn_data[icode].operand[2].mode;
13326
13327 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13328 op0 = copy_to_mode_reg (mode1, op0);
13329 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13330 {
13331 /* @@@ better error message */
13332 error ("mask must be an immediate");
13333 return const0_rtx;
13334 }
13335 if (target == 0
13336 || GET_MODE (target) != tmode
13337 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13338 target = gen_reg_rtx (tmode);
13339 pat = GEN_FCN (icode) (target, op0, op1);
13340 if (! pat)
13341 return 0;
13342 emit_insn (pat);
13343 return target;
13344
13345 case IX86_BUILTIN_PSLLDQI128:
13346 case IX86_BUILTIN_PSRLDQI128:
13347 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13348 : CODE_FOR_sse2_lshrti3);
13349 arg0 = TREE_VALUE (arglist);
13350 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13351 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13352 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13353 tmode = insn_data[icode].operand[0].mode;
13354 mode1 = insn_data[icode].operand[1].mode;
13355 mode2 = insn_data[icode].operand[2].mode;
13356
13357 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13358 {
13359 op0 = copy_to_reg (op0);
13360 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13361 }
13362 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13363 {
13364 error ("shift must be an immediate");
13365 return const0_rtx;
13366 }
13367 target = gen_reg_rtx (V2DImode);
13368 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13369 if (! pat)
13370 return 0;
13371 emit_insn (pat);
13372 return target;
13373
13374 case IX86_BUILTIN_FEMMS:
13375 emit_insn (gen_femms ());
13376 return NULL_RTX;
13377
13378 case IX86_BUILTIN_PAVGUSB:
13379 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13380
13381 case IX86_BUILTIN_PF2ID:
13382 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13383
13384 case IX86_BUILTIN_PFACC:
13385 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13386
13387 case IX86_BUILTIN_PFADD:
13388 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13389
13390 case IX86_BUILTIN_PFCMPEQ:
13391 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13392
13393 case IX86_BUILTIN_PFCMPGE:
13394 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13395
13396 case IX86_BUILTIN_PFCMPGT:
13397 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13398
13399 case IX86_BUILTIN_PFMAX:
13400 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13401
13402 case IX86_BUILTIN_PFMIN:
13403 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13404
13405 case IX86_BUILTIN_PFMUL:
13406 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13407
13408 case IX86_BUILTIN_PFRCP:
13409 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13410
13411 case IX86_BUILTIN_PFRCPIT1:
13412 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13413
13414 case IX86_BUILTIN_PFRCPIT2:
13415 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13416
13417 case IX86_BUILTIN_PFRSQIT1:
13418 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13419
13420 case IX86_BUILTIN_PFRSQRT:
13421 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13422
13423 case IX86_BUILTIN_PFSUB:
13424 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13425
13426 case IX86_BUILTIN_PFSUBR:
13427 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13428
13429 case IX86_BUILTIN_PI2FD:
13430 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13431
13432 case IX86_BUILTIN_PMULHRW:
13433 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13434
13435 case IX86_BUILTIN_PF2IW:
13436 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13437
13438 case IX86_BUILTIN_PFNACC:
13439 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13440
13441 case IX86_BUILTIN_PFPNACC:
13442 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13443
13444 case IX86_BUILTIN_PI2FW:
13445 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13446
13447 case IX86_BUILTIN_PSWAPDSI:
13448 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13449
13450 case IX86_BUILTIN_PSWAPDSF:
13451 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13452
13453 case IX86_BUILTIN_SSE_ZERO:
13454 target = gen_reg_rtx (V4SFmode);
13455 emit_insn (gen_sse_clrv4sf (target));
13456 return target;
13457
13458 case IX86_BUILTIN_MMX_ZERO:
13459 target = gen_reg_rtx (DImode);
13460 emit_insn (gen_mmx_clrdi (target));
13461 return target;
13462
13463 case IX86_BUILTIN_CLRTI:
13464 target = gen_reg_rtx (V2DImode);
13465 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13466 return target;
13467
13468
13469 case IX86_BUILTIN_SQRTSD:
13470 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13471 case IX86_BUILTIN_LOADAPD:
13472 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13473 case IX86_BUILTIN_LOADUPD:
13474 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13475
13476 case IX86_BUILTIN_STOREAPD:
13477 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13478 case IX86_BUILTIN_STOREUPD:
13479 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13480
13481 case IX86_BUILTIN_LOADSD:
13482 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13483
13484 case IX86_BUILTIN_STORESD:
13485 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13486
13487 case IX86_BUILTIN_SETPD1:
13488 target = assign_386_stack_local (DFmode, 0);
13489 arg0 = TREE_VALUE (arglist);
13490 emit_move_insn (adjust_address (target, DFmode, 0),
13491 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13492 op0 = gen_reg_rtx (V2DFmode);
13493 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13494 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13495 return op0;
13496
13497 case IX86_BUILTIN_SETPD:
13498 target = assign_386_stack_local (V2DFmode, 0);
13499 arg0 = TREE_VALUE (arglist);
13500 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13501 emit_move_insn (adjust_address (target, DFmode, 0),
13502 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13503 emit_move_insn (adjust_address (target, DFmode, 8),
13504 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13505 op0 = gen_reg_rtx (V2DFmode);
13506 emit_insn (gen_sse2_movapd (op0, target));
13507 return op0;
13508
13509 case IX86_BUILTIN_LOADRPD:
13510 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13511 gen_reg_rtx (V2DFmode), 1);
13512 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13513 return target;
13514
13515 case IX86_BUILTIN_LOADPD1:
13516 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13517 gen_reg_rtx (V2DFmode), 1);
13518 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13519 return target;
13520
13521 case IX86_BUILTIN_STOREPD1:
13522 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13523 case IX86_BUILTIN_STORERPD:
13524 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13525
13526 case IX86_BUILTIN_CLRPD:
13527 target = gen_reg_rtx (V2DFmode);
13528 emit_insn (gen_sse_clrv2df (target));
13529 return target;
13530
13531 case IX86_BUILTIN_MFENCE:
13532 emit_insn (gen_sse2_mfence ());
13533 return 0;
13534 case IX86_BUILTIN_LFENCE:
13535 emit_insn (gen_sse2_lfence ());
13536 return 0;
13537
13538 case IX86_BUILTIN_CLFLUSH:
13539 arg0 = TREE_VALUE (arglist);
13540 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13541 icode = CODE_FOR_sse2_clflush;
13542 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13543 op0 = copy_to_mode_reg (Pmode, op0);
13544
13545 emit_insn (gen_sse2_clflush (op0));
13546 return 0;
13547
13548 case IX86_BUILTIN_MOVNTPD:
13549 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13550 case IX86_BUILTIN_MOVNTDQ:
13551 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13552 case IX86_BUILTIN_MOVNTI:
13553 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13554
13555 case IX86_BUILTIN_LOADDQA:
13556 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13557 case IX86_BUILTIN_LOADDQU:
13558 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13559 case IX86_BUILTIN_LOADD:
13560 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13561
13562 case IX86_BUILTIN_STOREDQA:
13563 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13564 case IX86_BUILTIN_STOREDQU:
13565 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13566 case IX86_BUILTIN_STORED:
13567 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13568
13569 default:
13570 break;
13571 }
13572
13573 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13574 if (d->code == fcode)
13575 {
13576 /* Compares are treated specially. */
13577 if (d->icode == CODE_FOR_maskcmpv4sf3
13578 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13579 || d->icode == CODE_FOR_maskncmpv4sf3
13580 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13581 || d->icode == CODE_FOR_maskcmpv2df3
13582 || d->icode == CODE_FOR_vmmaskcmpv2df3
13583 || d->icode == CODE_FOR_maskncmpv2df3
13584 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13585 return ix86_expand_sse_compare (d, arglist, target);
13586
13587 return ix86_expand_binop_builtin (d->icode, arglist, target);
13588 }
13589
13590 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13591 if (d->code == fcode)
13592 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13593
13594 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13595 if (d->code == fcode)
13596 return ix86_expand_sse_comi (d, arglist, target);
13597
13598 /* @@@ Should really do something sensible here. */
13599 return 0;
13600 }
13601
13602 /* Store OPERAND to the memory after reload is completed. This means
13603 that we can't easily use assign_stack_local. */
13604 rtx
13605 ix86_force_to_memory (mode, operand)
13606 enum machine_mode mode;
13607 rtx operand;
13608 {
13609 rtx result;
13610 if (!reload_completed)
13611 abort ();
13612 if (TARGET_64BIT && TARGET_RED_ZONE)
13613 {
13614 result = gen_rtx_MEM (mode,
13615 gen_rtx_PLUS (Pmode,
13616 stack_pointer_rtx,
13617 GEN_INT (-RED_ZONE_SIZE)));
13618 emit_move_insn (result, operand);
13619 }
13620 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13621 {
13622 switch (mode)
13623 {
13624 case HImode:
13625 case SImode:
13626 operand = gen_lowpart (DImode, operand);
13627 /* FALLTHRU */
13628 case DImode:
13629 emit_insn (
13630 gen_rtx_SET (VOIDmode,
13631 gen_rtx_MEM (DImode,
13632 gen_rtx_PRE_DEC (DImode,
13633 stack_pointer_rtx)),
13634 operand));
13635 break;
13636 default:
13637 abort ();
13638 }
13639 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13640 }
13641 else
13642 {
13643 switch (mode)
13644 {
13645 case DImode:
13646 {
13647 rtx operands[2];
13648 split_di (&operand, 1, operands, operands + 1);
13649 emit_insn (
13650 gen_rtx_SET (VOIDmode,
13651 gen_rtx_MEM (SImode,
13652 gen_rtx_PRE_DEC (Pmode,
13653 stack_pointer_rtx)),
13654 operands[1]));
13655 emit_insn (
13656 gen_rtx_SET (VOIDmode,
13657 gen_rtx_MEM (SImode,
13658 gen_rtx_PRE_DEC (Pmode,
13659 stack_pointer_rtx)),
13660 operands[0]));
13661 }
13662 break;
13663 case HImode:
13664 /* It is better to store HImodes as SImodes. */
13665 if (!TARGET_PARTIAL_REG_STALL)
13666 operand = gen_lowpart (SImode, operand);
13667 /* FALLTHRU */
13668 case SImode:
13669 emit_insn (
13670 gen_rtx_SET (VOIDmode,
13671 gen_rtx_MEM (GET_MODE (operand),
13672 gen_rtx_PRE_DEC (SImode,
13673 stack_pointer_rtx)),
13674 operand));
13675 break;
13676 default:
13677 abort ();
13678 }
13679 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13680 }
13681 return result;
13682 }
13683
13684 /* Free operand from the memory. */
13685 void
13686 ix86_free_from_memory (mode)
13687 enum machine_mode mode;
13688 {
13689 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13690 {
13691 int size;
13692
13693 if (mode == DImode || TARGET_64BIT)
13694 size = 8;
13695 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13696 size = 2;
13697 else
13698 size = 4;
13699 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13700 to pop or add instruction if registers are available. */
13701 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13702 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13703 GEN_INT (size))));
13704 }
13705 }
13706
13707 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13708 QImode must go into class Q_REGS.
13709 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13710 movdf to do mem-to-mem moves through integer regs. */
13711 enum reg_class
13712 ix86_preferred_reload_class (x, class)
13713 rtx x;
13714 enum reg_class class;
13715 {
13716 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13717 return NO_REGS;
13718 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13719 {
13720 /* SSE can't load any constant directly yet. */
13721 if (SSE_CLASS_P (class))
13722 return NO_REGS;
13723 /* Floats can load 0 and 1. */
13724 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13725 {
13726 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13727 if (MAYBE_SSE_CLASS_P (class))
13728 return (reg_class_subset_p (class, GENERAL_REGS)
13729 ? GENERAL_REGS : FLOAT_REGS);
13730 else
13731 return class;
13732 }
13733 /* General regs can load everything. */
13734 if (reg_class_subset_p (class, GENERAL_REGS))
13735 return GENERAL_REGS;
13736 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13737 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13738 return NO_REGS;
13739 }
13740 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13741 return NO_REGS;
13742 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13743 return Q_REGS;
13744 return class;
13745 }
13746
13747 /* If we are copying between general and FP registers, we need a memory
13748 location. The same is true for SSE and MMX registers.
13749
13750 The macro can't work reliably when one of the CLASSES is class containing
13751 registers from multiple units (SSE, MMX, integer). We avoid this by never
13752 combining those units in single alternative in the machine description.
13753 Ensure that this constraint holds to avoid unexpected surprises.
13754
13755 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13756 enforce these sanity checks. */
13757 int
13758 ix86_secondary_memory_needed (class1, class2, mode, strict)
13759 enum reg_class class1, class2;
13760 enum machine_mode mode;
13761 int strict;
13762 {
13763 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13764 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13765 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13766 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13767 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13768 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13769 {
13770 if (strict)
13771 abort ();
13772 else
13773 return 1;
13774 }
13775 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13776 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13777 && (mode) != SImode)
13778 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13779 && (mode) != SImode));
13780 }
13781 /* Return the cost of moving data from a register in class CLASS1 to
13782 one in class CLASS2.
13783
13784 It is not required that the cost always equal 2 when FROM is the same as TO;
13785 on some machines it is expensive to move between registers if they are not
13786 general registers. */
13787 int
13788 ix86_register_move_cost (mode, class1, class2)
13789 enum machine_mode mode;
13790 enum reg_class class1, class2;
13791 {
13792 /* In case we require secondary memory, compute cost of the store followed
13793 by load. In order to avoid bad register allocation choices, we need
13794 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13795
13796 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13797 {
13798 int cost = 1;
13799
13800 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13801 MEMORY_MOVE_COST (mode, class1, 1));
13802 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13803 MEMORY_MOVE_COST (mode, class2, 1));
13804
13805 /* In case of copying from general_purpose_register we may emit multiple
13806 stores followed by single load causing memory size mismatch stall.
13807 Count this as arbitarily high cost of 20. */
13808 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13809 cost += 20;
13810
13811 /* In the case of FP/MMX moves, the registers actually overlap, and we
13812 have to switch modes in order to treat them differently. */
13813 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13814 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13815 cost += 20;
13816
13817 return cost;
13818 }
13819
13820 /* Moves between SSE/MMX and integer unit are expensive. */
13821 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13822 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13823 return ix86_cost->mmxsse_to_integer;
13824 if (MAYBE_FLOAT_CLASS_P (class1))
13825 return ix86_cost->fp_move;
13826 if (MAYBE_SSE_CLASS_P (class1))
13827 return ix86_cost->sse_move;
13828 if (MAYBE_MMX_CLASS_P (class1))
13829 return ix86_cost->mmx_move;
13830 return 2;
13831 }
13832
13833 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13834 int
13835 ix86_hard_regno_mode_ok (regno, mode)
13836 int regno;
13837 enum machine_mode mode;
13838 {
13839 /* Flags and only flags can only hold CCmode values. */
13840 if (CC_REGNO_P (regno))
13841 return GET_MODE_CLASS (mode) == MODE_CC;
13842 if (GET_MODE_CLASS (mode) == MODE_CC
13843 || GET_MODE_CLASS (mode) == MODE_RANDOM
13844 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13845 return 0;
13846 if (FP_REGNO_P (regno))
13847 return VALID_FP_MODE_P (mode);
13848 if (SSE_REGNO_P (regno))
13849 return VALID_SSE_REG_MODE (mode);
13850 if (MMX_REGNO_P (regno))
13851 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13852 /* We handle both integer and floats in the general purpose registers.
13853 In future we should be able to handle vector modes as well. */
13854 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13855 return 0;
13856 /* Take care for QImode values - they can be in non-QI regs, but then
13857 they do cause partial register stalls. */
13858 if (regno < 4 || mode != QImode || TARGET_64BIT)
13859 return 1;
13860 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13861 }
13862
13863 /* Return the cost of moving data of mode M between a
13864 register and memory. A value of 2 is the default; this cost is
13865 relative to those in `REGISTER_MOVE_COST'.
13866
13867 If moving between registers and memory is more expensive than
13868 between two registers, you should define this macro to express the
13869 relative cost.
13870
13871 Model also increased moving costs of QImode registers in non
13872 Q_REGS classes.
13873 */
13874 int
13875 ix86_memory_move_cost (mode, class, in)
13876 enum machine_mode mode;
13877 enum reg_class class;
13878 int in;
13879 {
13880 if (FLOAT_CLASS_P (class))
13881 {
13882 int index;
13883 switch (mode)
13884 {
13885 case SFmode:
13886 index = 0;
13887 break;
13888 case DFmode:
13889 index = 1;
13890 break;
13891 case XFmode:
13892 case TFmode:
13893 index = 2;
13894 break;
13895 default:
13896 return 100;
13897 }
13898 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13899 }
13900 if (SSE_CLASS_P (class))
13901 {
13902 int index;
13903 switch (GET_MODE_SIZE (mode))
13904 {
13905 case 4:
13906 index = 0;
13907 break;
13908 case 8:
13909 index = 1;
13910 break;
13911 case 16:
13912 index = 2;
13913 break;
13914 default:
13915 return 100;
13916 }
13917 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13918 }
13919 if (MMX_CLASS_P (class))
13920 {
13921 int index;
13922 switch (GET_MODE_SIZE (mode))
13923 {
13924 case 4:
13925 index = 0;
13926 break;
13927 case 8:
13928 index = 1;
13929 break;
13930 default:
13931 return 100;
13932 }
13933 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13934 }
13935 switch (GET_MODE_SIZE (mode))
13936 {
13937 case 1:
13938 if (in)
13939 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13940 : ix86_cost->movzbl_load);
13941 else
13942 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13943 : ix86_cost->int_store[0] + 4);
13944 break;
13945 case 2:
13946 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13947 default:
13948 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13949 if (mode == TFmode)
13950 mode = XFmode;
13951 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13952 * ((int) GET_MODE_SIZE (mode)
13953 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
13954 }
13955 }
13956
13957 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13958 static void
13959 ix86_svr3_asm_out_constructor (symbol, priority)
13960 rtx symbol;
13961 int priority ATTRIBUTE_UNUSED;
13962 {
13963 init_section ();
13964 fputs ("\tpushl $", asm_out_file);
13965 assemble_name (asm_out_file, XSTR (symbol, 0));
13966 fputc ('\n', asm_out_file);
13967 }
13968 #endif
13969
13970 #if TARGET_MACHO
13971
13972 static int current_machopic_label_num;
13973
13974 /* Given a symbol name and its associated stub, write out the
13975 definition of the stub. */
13976
13977 void
13978 machopic_output_stub (file, symb, stub)
13979 FILE *file;
13980 const char *symb, *stub;
13981 {
13982 unsigned int length;
13983 char *binder_name, *symbol_name, lazy_ptr_name[32];
13984 int label = ++current_machopic_label_num;
13985
13986 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13987 symb = (*targetm.strip_name_encoding) (symb);
13988
13989 length = strlen (stub);
13990 binder_name = alloca (length + 32);
13991 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13992
13993 length = strlen (symb);
13994 symbol_name = alloca (length + 32);
13995 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13996
13997 sprintf (lazy_ptr_name, "L%d$lz", label);
13998
13999 if (MACHOPIC_PURE)
14000 machopic_picsymbol_stub_section ();
14001 else
14002 machopic_symbol_stub_section ();
14003
14004 fprintf (file, "%s:\n", stub);
14005 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14006
14007 if (MACHOPIC_PURE)
14008 {
14009 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14010 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14011 fprintf (file, "\tjmp %%edx\n");
14012 }
14013 else
14014 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14015
14016 fprintf (file, "%s:\n", binder_name);
14017
14018 if (MACHOPIC_PURE)
14019 {
14020 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14021 fprintf (file, "\tpushl %%eax\n");
14022 }
14023 else
14024 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14025
14026 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14027
14028 machopic_lazy_symbol_ptr_section ();
14029 fprintf (file, "%s:\n", lazy_ptr_name);
14030 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14031 fprintf (file, "\t.long %s\n", binder_name);
14032 }
14033 #endif /* TARGET_MACHO */
14034
14035 /* Order the registers for register allocator. */
14036
14037 void
14038 x86_order_regs_for_local_alloc ()
14039 {
14040 int pos = 0;
14041 int i;
14042
14043 /* First allocate the local general purpose registers. */
14044 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14045 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14046 reg_alloc_order [pos++] = i;
14047
14048 /* Global general purpose registers. */
14049 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14050 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14051 reg_alloc_order [pos++] = i;
14052
14053 /* x87 registers come first in case we are doing FP math
14054 using them. */
14055 if (!TARGET_SSE_MATH)
14056 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14057 reg_alloc_order [pos++] = i;
14058
14059 /* SSE registers. */
14060 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14061 reg_alloc_order [pos++] = i;
14062 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14063 reg_alloc_order [pos++] = i;
14064
14065 /* x87 registerts. */
14066 if (TARGET_SSE_MATH)
14067 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14068 reg_alloc_order [pos++] = i;
14069
14070 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14071 reg_alloc_order [pos++] = i;
14072
14073 /* Initialize the rest of array as we do not allocate some registers
14074 at all. */
14075 while (pos < FIRST_PSEUDO_REGISTER)
14076 reg_alloc_order [pos++] = 0;
14077 }
14078
14079 /* Returns an expression indicating where the this parameter is
14080 located on entry to the FUNCTION. */
14081
14082 static rtx
14083 x86_this_parameter (function)
14084 tree function;
14085 {
14086 tree type = TREE_TYPE (function);
14087
14088 if (TARGET_64BIT)
14089 {
14090 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14091 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14092 }
14093
14094 if (ix86_fntype_regparm (type) > 0)
14095 {
14096 tree parm;
14097
14098 parm = TYPE_ARG_TYPES (type);
14099 /* Figure out whether or not the function has a variable number of
14100 arguments. */
14101 for (; parm; parm = TREE_CHAIN (parm))
14102 if (TREE_VALUE (parm) == void_type_node)
14103 break;
14104 /* If not, the this parameter is in %eax. */
14105 if (parm)
14106 return gen_rtx_REG (SImode, 0);
14107 }
14108
14109 if (aggregate_value_p (TREE_TYPE (type)))
14110 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14111 else
14112 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14113 }
14114
14115 /* Determine whether x86_output_mi_thunk can succeed. */
14116
14117 static bool
14118 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14119 tree thunk ATTRIBUTE_UNUSED;
14120 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14121 HOST_WIDE_INT vcall_offset;
14122 tree function;
14123 {
14124 /* 64-bit can handle anything. */
14125 if (TARGET_64BIT)
14126 return true;
14127
14128 /* For 32-bit, everything's fine if we have one free register. */
14129 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14130 return true;
14131
14132 /* Need a free register for vcall_offset. */
14133 if (vcall_offset)
14134 return false;
14135
14136 /* Need a free register for GOT references. */
14137 if (flag_pic && !(*targetm.binds_local_p) (function))
14138 return false;
14139
14140 /* Otherwise ok. */
14141 return true;
14142 }
14143
14144 /* Output the assembler code for a thunk function. THUNK_DECL is the
14145 declaration for the thunk function itself, FUNCTION is the decl for
14146 the target function. DELTA is an immediate constant offset to be
14147 added to THIS. If VCALL_OFFSET is non-zero, the word at
14148 *(*this + vcall_offset) should be added to THIS. */
14149
14150 static void
14151 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14152 FILE *file ATTRIBUTE_UNUSED;
14153 tree thunk ATTRIBUTE_UNUSED;
14154 HOST_WIDE_INT delta;
14155 HOST_WIDE_INT vcall_offset;
14156 tree function;
14157 {
14158 rtx xops[3];
14159 rtx this = x86_this_parameter (function);
14160 rtx this_reg, tmp;
14161
14162 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14163 pull it in now and let DELTA benefit. */
14164 if (REG_P (this))
14165 this_reg = this;
14166 else if (vcall_offset)
14167 {
14168 /* Put the this parameter into %eax. */
14169 xops[0] = this;
14170 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14171 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14172 }
14173 else
14174 this_reg = NULL_RTX;
14175
14176 /* Adjust the this parameter by a fixed constant. */
14177 if (delta)
14178 {
14179 xops[0] = GEN_INT (delta);
14180 xops[1] = this_reg ? this_reg : this;
14181 if (TARGET_64BIT)
14182 {
14183 if (!x86_64_general_operand (xops[0], DImode))
14184 {
14185 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14186 xops[1] = tmp;
14187 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14188 xops[0] = tmp;
14189 xops[1] = this;
14190 }
14191 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14192 }
14193 else
14194 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14195 }
14196
14197 /* Adjust the this parameter by a value stored in the vtable. */
14198 if (vcall_offset)
14199 {
14200 if (TARGET_64BIT)
14201 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14202 else
14203 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14204
14205 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14206 xops[1] = tmp;
14207 if (TARGET_64BIT)
14208 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14209 else
14210 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14211
14212 /* Adjust the this parameter. */
14213 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14214 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14215 {
14216 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14217 xops[0] = GEN_INT (vcall_offset);
14218 xops[1] = tmp2;
14219 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14220 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14221 }
14222 xops[1] = this_reg;
14223 if (TARGET_64BIT)
14224 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14225 else
14226 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14227 }
14228
14229 /* If necessary, drop THIS back to its stack slot. */
14230 if (this_reg && this_reg != this)
14231 {
14232 xops[0] = this_reg;
14233 xops[1] = this;
14234 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14235 }
14236
14237 xops[0] = DECL_RTL (function);
14238 if (TARGET_64BIT)
14239 {
14240 if (!flag_pic || (*targetm.binds_local_p) (function))
14241 output_asm_insn ("jmp\t%P0", xops);
14242 else
14243 {
14244 tmp = XEXP (xops[0], 0);
14245 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14246 tmp = gen_rtx_CONST (Pmode, tmp);
14247 tmp = gen_rtx_MEM (QImode, tmp);
14248 xops[0] = tmp;
14249 output_asm_insn ("jmp\t%A0", xops);
14250 }
14251 }
14252 else
14253 {
14254 if (!flag_pic || (*targetm.binds_local_p) (function))
14255 output_asm_insn ("jmp\t%P0", xops);
14256 else
14257 {
14258 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14259 output_set_got (tmp);
14260
14261 xops[1] = tmp;
14262 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14263 output_asm_insn ("jmp\t{*}%1", xops);
14264 }
14265 }
14266 }
14267
14268 int
14269 x86_field_alignment (field, computed)
14270 tree field;
14271 int computed;
14272 {
14273 enum machine_mode mode;
14274 tree type = TREE_TYPE (field);
14275
14276 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14277 return computed;
14278 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14279 ? get_inner_array_type (type) : type);
14280 if (mode == DFmode || mode == DCmode
14281 || GET_MODE_CLASS (mode) == MODE_INT
14282 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14283 return MIN (32, computed);
14284 return computed;
14285 }
14286
14287 /* Output assembler code to FILE to increment profiler label # LABELNO
14288 for profiling a function entry. */
14289 void
14290 x86_function_profiler (file, labelno)
14291 FILE *file;
14292 int labelno;
14293 {
14294 if (TARGET_64BIT)
14295 if (flag_pic)
14296 {
14297 #ifndef NO_PROFILE_COUNTERS
14298 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14299 #endif
14300 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14301 }
14302 else
14303 {
14304 #ifndef NO_PROFILE_COUNTERS
14305 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14306 #endif
14307 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14308 }
14309 else if (flag_pic)
14310 {
14311 #ifndef NO_PROFILE_COUNTERS
14312 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14313 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14314 #endif
14315 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14316 }
14317 else
14318 {
14319 #ifndef NO_PROFILE_COUNTERS
14320 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14321 PROFILE_COUNT_REGISTER);
14322 #endif
14323 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14324 }
14325 }
14326
14327 /* Implement machine specific optimizations.
14328 At the moment we implement single transformation: AMD Athlon works faster
14329 when RET is not destination of conditional jump or directly preceeded
14330 by other jump instruction. We avoid the penalty by inserting NOP just
14331 before the RET instructions in such cases. */
14332 void
14333 x86_machine_dependent_reorg (first)
14334 rtx first ATTRIBUTE_UNUSED;
14335 {
14336 edge e;
14337
14338 if (!TARGET_ATHLON || !optimize || optimize_size)
14339 return;
14340 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14341 {
14342 basic_block bb = e->src;
14343 rtx ret = bb->end;
14344 rtx prev;
14345 bool insert = false;
14346
14347 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14348 continue;
14349 prev = prev_nonnote_insn (ret);
14350 if (prev && GET_CODE (prev) == CODE_LABEL)
14351 {
14352 edge e;
14353 for (e = bb->pred; e; e = e->pred_next)
14354 if (EDGE_FREQUENCY (e) && e->src->index > 0
14355 && !(e->flags & EDGE_FALLTHRU))
14356 insert = 1;
14357 }
14358 if (!insert)
14359 {
14360 prev = prev_real_insn (ret);
14361 if (prev && GET_CODE (prev) == JUMP_INSN
14362 && any_condjump_p (prev))
14363 insert = 1;
14364 }
14365 if (insert)
14366 emit_insn_before (gen_nop (), ret);
14367 }
14368 }
14369
14370 #include "gt-i386.h"