f3d5acc59b3d54d143d2db1f1e11a6718026d169
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
93 };
94
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
137 };
138
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
180 };
181
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
223 };
224
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
266 };
267
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
309 };
310
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
352 };
353
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
395 };
396
397 const struct processor_costs *ix86_cost = &pentium_cost;
398
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
407
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
457
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
465
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
487 };
488
489 /* The "default" register map used in 32bit mode. */
490
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
500 };
501
502 static int const x86_64_int_parameter_registers[6] =
503 {
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507
508 static int const x86_64_int_return_registers[4] =
509 {
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523 };
524
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
588 };
589
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595
596 /* The encoding characters for the four TLS models present in ELF. */
597
598 static char const tls_model_chars[] = " GLil";
599
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
606 {
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
611 };
612
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615
616 /* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635 struct ix86_frame
636 {
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650 };
651
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
656 /* Parsed value. */
657 enum cmodel ix86_cmodel;
658 /* Asm dialect. */
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
661 /* TLS dialext. */
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
664
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
667
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
672
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
677
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
680
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
683
684 /* ix86_regparm_string as a number */
685 int ix86_regparm;
686
687 /* Alignment to use for loops and jumps: */
688
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
691
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
694
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
697
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
700
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
704
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
707
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
711 \f
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
716 int, int, FILE *));
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx x86_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
758
759 struct ix86_address
760 {
761 rtx base, index, disp;
762 HOST_WIDE_INT scale;
763 };
764
765 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
766
767 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
768 static const char *ix86_strip_name_encoding PARAMS ((const char *))
769 ATTRIBUTE_UNUSED;
770
771 struct builtin_description;
772 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
773 tree, rtx));
774 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
775 tree, rtx));
776 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
777 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
778 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
779 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
780 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
781 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
782 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
783 enum rtx_code *,
784 enum rtx_code *,
785 enum rtx_code *));
786 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
787 rtx *, rtx *));
788 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
789 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
791 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
792 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
793 static int ix86_save_reg PARAMS ((unsigned int, int));
794 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
795 static int ix86_comp_type_attributes PARAMS ((tree, tree));
796 static int ix86_fntype_regparm PARAMS ((tree));
797 const struct attribute_spec ix86_attribute_table[];
798 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
799 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
800 static int ix86_value_regno PARAMS ((enum machine_mode));
801
802 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
803 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
804 #endif
805
806 /* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
810
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
813 */
814 enum x86_64_reg_class
815 {
816 X86_64_NO_CLASS,
817 X86_64_INTEGER_CLASS,
818 X86_64_INTEGERSI_CLASS,
819 X86_64_SSE_CLASS,
820 X86_64_SSESF_CLASS,
821 X86_64_SSEDF_CLASS,
822 X86_64_SSEUP_CLASS,
823 X86_64_X87_CLASS,
824 X86_64_X87UP_CLASS,
825 X86_64_MEMORY_CLASS
826 };
827 static const char * const x86_64_reg_class_name[] =
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
829
830 #define MAX_CLASSES 4
831 static int classify_argument PARAMS ((enum machine_mode, tree,
832 enum x86_64_reg_class [MAX_CLASSES],
833 int));
834 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
835 int *));
836 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
837 const int *, int));
838 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
839 enum x86_64_reg_class));
840 \f
841 /* Initialize the GCC target structure. */
842 #undef TARGET_ATTRIBUTE_TABLE
843 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
844 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
845 # undef TARGET_MERGE_DECL_ATTRIBUTES
846 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
847 #endif
848
849 #undef TARGET_COMP_TYPE_ATTRIBUTES
850 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
851
852 #undef TARGET_INIT_BUILTINS
853 #define TARGET_INIT_BUILTINS ix86_init_builtins
854
855 #undef TARGET_EXPAND_BUILTIN
856 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
857
858 #undef TARGET_ASM_FUNCTION_EPILOGUE
859 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
860
861 #undef TARGET_ASM_OPEN_PAREN
862 #define TARGET_ASM_OPEN_PAREN ""
863 #undef TARGET_ASM_CLOSE_PAREN
864 #define TARGET_ASM_CLOSE_PAREN ""
865
866 #undef TARGET_ASM_ALIGNED_HI_OP
867 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868 #undef TARGET_ASM_ALIGNED_SI_OP
869 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
870 #ifdef ASM_QUAD
871 #undef TARGET_ASM_ALIGNED_DI_OP
872 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
873 #endif
874
875 #undef TARGET_ASM_UNALIGNED_HI_OP
876 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877 #undef TARGET_ASM_UNALIGNED_SI_OP
878 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879 #undef TARGET_ASM_UNALIGNED_DI_OP
880 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
881
882 #undef TARGET_SCHED_ADJUST_COST
883 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884 #undef TARGET_SCHED_ISSUE_RATE
885 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886 #undef TARGET_SCHED_VARIABLE_ISSUE
887 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888 #undef TARGET_SCHED_INIT
889 #define TARGET_SCHED_INIT ix86_sched_init
890 #undef TARGET_SCHED_REORDER
891 #define TARGET_SCHED_REORDER ix86_sched_reorder
892 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
893 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
898
899 #ifdef HAVE_AS_TLS
900 #undef TARGET_HAVE_TLS
901 #define TARGET_HAVE_TLS true
902 #endif
903
904 #undef TARGET_ASM_OUTPUT_MI_THUNK
905 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
906 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
908
909 struct gcc_target targetm = TARGET_INITIALIZER;
910 \f
911 /* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
915 been parsed.
916
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
919
920 void
921 override_options ()
922 {
923 int i;
924 /* Comes from final.c -- no real reason to change it. */
925 #define MAX_CODE_ALIGN 16
926
927 static struct ptt
928 {
929 const struct processor_costs *cost; /* Processor costs */
930 const int target_enable; /* Target flags to enable. */
931 const int target_disable; /* Target flags to disable. */
932 const int align_loop; /* Default alignments. */
933 const int align_loop_max_skip;
934 const int align_jump;
935 const int align_jump_max_skip;
936 const int align_func;
937 const int branch_cost;
938 }
939 const processor_target_table[PROCESSOR_max] =
940 {
941 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
948 };
949
950 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
951 static struct pta
952 {
953 const char *const name; /* processor name or nickname. */
954 const enum processor_type processor;
955 const enum pta_flags
956 {
957 PTA_SSE = 1,
958 PTA_SSE2 = 2,
959 PTA_MMX = 4,
960 PTA_PREFETCH_SSE = 8,
961 PTA_3DNOW = 16,
962 PTA_3DNOW_A = 64
963 } flags;
964 }
965 const processor_alias_table[] =
966 {
967 {"i386", PROCESSOR_I386, 0},
968 {"i486", PROCESSOR_I486, 0},
969 {"i586", PROCESSOR_PENTIUM, 0},
970 {"pentium", PROCESSOR_PENTIUM, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
972 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
973 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
974 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
975 {"i686", PROCESSOR_PENTIUMPRO, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
978 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
979 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
980 PTA_MMX | PTA_PREFETCH_SSE},
981 {"k6", PROCESSOR_K6, PTA_MMX},
982 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
983 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
984 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
985 | PTA_3DNOW_A},
986 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
987 | PTA_3DNOW | PTA_3DNOW_A},
988 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
989 | PTA_3DNOW_A | PTA_SSE},
990 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
991 | PTA_3DNOW_A | PTA_SSE},
992 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
993 | PTA_3DNOW_A | PTA_SSE},
994 };
995
996 int const pta_size = ARRAY_SIZE (processor_alias_table);
997
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1001 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1002
1003 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1004 SUBTARGET_OVERRIDE_OPTIONS;
1005 #endif
1006
1007 if (!ix86_cpu_string && ix86_arch_string)
1008 ix86_cpu_string = ix86_arch_string;
1009 if (!ix86_cpu_string)
1010 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1011 if (!ix86_arch_string)
1012 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1013
1014 if (ix86_cmodel_string != 0)
1015 {
1016 if (!strcmp (ix86_cmodel_string, "small"))
1017 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1018 else if (flag_pic)
1019 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1020 else if (!strcmp (ix86_cmodel_string, "32"))
1021 ix86_cmodel = CM_32;
1022 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1023 ix86_cmodel = CM_KERNEL;
1024 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1025 ix86_cmodel = CM_MEDIUM;
1026 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1027 ix86_cmodel = CM_LARGE;
1028 else
1029 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1030 }
1031 else
1032 {
1033 ix86_cmodel = CM_32;
1034 if (TARGET_64BIT)
1035 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1036 }
1037 if (ix86_asm_string != 0)
1038 {
1039 if (!strcmp (ix86_asm_string, "intel"))
1040 ix86_asm_dialect = ASM_INTEL;
1041 else if (!strcmp (ix86_asm_string, "att"))
1042 ix86_asm_dialect = ASM_ATT;
1043 else
1044 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1045 }
1046 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1047 error ("code model `%s' not supported in the %s bit mode",
1048 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1049 if (ix86_cmodel == CM_LARGE)
1050 sorry ("code model `large' not supported yet");
1051 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1052 sorry ("%i-bit mode not compiled in",
1053 (target_flags & MASK_64BIT) ? 64 : 32);
1054
1055 for (i = 0; i < pta_size; i++)
1056 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1057 {
1058 ix86_arch = processor_alias_table[i].processor;
1059 /* Default cpu tuning to the architecture. */
1060 ix86_cpu = ix86_arch;
1061 if (processor_alias_table[i].flags & PTA_MMX
1062 && !(target_flags_explicit & MASK_MMX))
1063 target_flags |= MASK_MMX;
1064 if (processor_alias_table[i].flags & PTA_3DNOW
1065 && !(target_flags_explicit & MASK_3DNOW))
1066 target_flags |= MASK_3DNOW;
1067 if (processor_alias_table[i].flags & PTA_3DNOW_A
1068 && !(target_flags_explicit & MASK_3DNOW_A))
1069 target_flags |= MASK_3DNOW_A;
1070 if (processor_alias_table[i].flags & PTA_SSE
1071 && !(target_flags_explicit & MASK_SSE))
1072 target_flags |= MASK_SSE;
1073 if (processor_alias_table[i].flags & PTA_SSE2
1074 && !(target_flags_explicit & MASK_SSE2))
1075 target_flags |= MASK_SSE2;
1076 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1077 x86_prefetch_sse = true;
1078 break;
1079 }
1080
1081 if (i == pta_size)
1082 error ("bad value (%s) for -march= switch", ix86_arch_string);
1083
1084 for (i = 0; i < pta_size; i++)
1085 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1086 {
1087 ix86_cpu = processor_alias_table[i].processor;
1088 break;
1089 }
1090 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1091 x86_prefetch_sse = true;
1092 if (i == pta_size)
1093 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1094
1095 if (optimize_size)
1096 ix86_cost = &size_cost;
1097 else
1098 ix86_cost = processor_target_table[ix86_cpu].cost;
1099 target_flags |= processor_target_table[ix86_cpu].target_enable;
1100 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1101
1102 /* Arrange to set up i386_stack_locals for all functions. */
1103 init_machine_status = ix86_init_machine_status;
1104
1105 /* Validate -mregparm= value. */
1106 if (ix86_regparm_string)
1107 {
1108 i = atoi (ix86_regparm_string);
1109 if (i < 0 || i > REGPARM_MAX)
1110 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1111 else
1112 ix86_regparm = i;
1113 }
1114 else
1115 if (TARGET_64BIT)
1116 ix86_regparm = REGPARM_MAX;
1117
1118 /* If the user has provided any of the -malign-* options,
1119 warn and use that value only if -falign-* is not set.
1120 Remove this code in GCC 3.2 or later. */
1121 if (ix86_align_loops_string)
1122 {
1123 warning ("-malign-loops is obsolete, use -falign-loops");
1124 if (align_loops == 0)
1125 {
1126 i = atoi (ix86_align_loops_string);
1127 if (i < 0 || i > MAX_CODE_ALIGN)
1128 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1129 else
1130 align_loops = 1 << i;
1131 }
1132 }
1133
1134 if (ix86_align_jumps_string)
1135 {
1136 warning ("-malign-jumps is obsolete, use -falign-jumps");
1137 if (align_jumps == 0)
1138 {
1139 i = atoi (ix86_align_jumps_string);
1140 if (i < 0 || i > MAX_CODE_ALIGN)
1141 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1142 else
1143 align_jumps = 1 << i;
1144 }
1145 }
1146
1147 if (ix86_align_funcs_string)
1148 {
1149 warning ("-malign-functions is obsolete, use -falign-functions");
1150 if (align_functions == 0)
1151 {
1152 i = atoi (ix86_align_funcs_string);
1153 if (i < 0 || i > MAX_CODE_ALIGN)
1154 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1155 else
1156 align_functions = 1 << i;
1157 }
1158 }
1159
1160 /* Default align_* from the processor table. */
1161 if (align_loops == 0)
1162 {
1163 align_loops = processor_target_table[ix86_cpu].align_loop;
1164 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1165 }
1166 if (align_jumps == 0)
1167 {
1168 align_jumps = processor_target_table[ix86_cpu].align_jump;
1169 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1170 }
1171 if (align_functions == 0)
1172 {
1173 align_functions = processor_target_table[ix86_cpu].align_func;
1174 }
1175
1176 /* Validate -mpreferred-stack-boundary= value, or provide default.
1177 The default of 128 bits is for Pentium III's SSE __m128, but we
1178 don't want additional code to keep the stack aligned when
1179 optimizing for code size. */
1180 ix86_preferred_stack_boundary = (optimize_size
1181 ? TARGET_64BIT ? 128 : 32
1182 : 128);
1183 if (ix86_preferred_stack_boundary_string)
1184 {
1185 i = atoi (ix86_preferred_stack_boundary_string);
1186 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1187 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1188 TARGET_64BIT ? 4 : 2);
1189 else
1190 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1191 }
1192
1193 /* Validate -mbranch-cost= value, or provide default. */
1194 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1195 if (ix86_branch_cost_string)
1196 {
1197 i = atoi (ix86_branch_cost_string);
1198 if (i < 0 || i > 5)
1199 error ("-mbranch-cost=%d is not between 0 and 5", i);
1200 else
1201 ix86_branch_cost = i;
1202 }
1203
1204 if (ix86_tls_dialect_string)
1205 {
1206 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1207 ix86_tls_dialect = TLS_DIALECT_GNU;
1208 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1209 ix86_tls_dialect = TLS_DIALECT_SUN;
1210 else
1211 error ("bad value (%s) for -mtls-dialect= switch",
1212 ix86_tls_dialect_string);
1213 }
1214
1215 if (profile_flag)
1216 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1217
1218 /* Keep nonleaf frame pointers. */
1219 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1220 flag_omit_frame_pointer = 1;
1221
1222 /* If we're doing fast math, we don't care about comparison order
1223 wrt NaNs. This lets us use a shorter comparison sequence. */
1224 if (flag_unsafe_math_optimizations)
1225 target_flags &= ~MASK_IEEE_FP;
1226
1227 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1228 since the insns won't need emulation. */
1229 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1230 target_flags &= ~MASK_NO_FANCY_MATH_387;
1231
1232 if (TARGET_64BIT)
1233 {
1234 if (TARGET_ALIGN_DOUBLE)
1235 error ("-malign-double makes no sense in the 64bit mode");
1236 if (TARGET_RTD)
1237 error ("-mrtd calling convention not supported in the 64bit mode");
1238 /* Enable by default the SSE and MMX builtins. */
1239 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1240 ix86_fpmath = FPMATH_SSE;
1241 }
1242 else
1243 ix86_fpmath = FPMATH_387;
1244
1245 if (ix86_fpmath_string != 0)
1246 {
1247 if (! strcmp (ix86_fpmath_string, "387"))
1248 ix86_fpmath = FPMATH_387;
1249 else if (! strcmp (ix86_fpmath_string, "sse"))
1250 {
1251 if (!TARGET_SSE)
1252 {
1253 warning ("SSE instruction set disabled, using 387 arithmetics");
1254 ix86_fpmath = FPMATH_387;
1255 }
1256 else
1257 ix86_fpmath = FPMATH_SSE;
1258 }
1259 else if (! strcmp (ix86_fpmath_string, "387,sse")
1260 || ! strcmp (ix86_fpmath_string, "sse,387"))
1261 {
1262 if (!TARGET_SSE)
1263 {
1264 warning ("SSE instruction set disabled, using 387 arithmetics");
1265 ix86_fpmath = FPMATH_387;
1266 }
1267 else if (!TARGET_80387)
1268 {
1269 warning ("387 instruction set disabled, using SSE arithmetics");
1270 ix86_fpmath = FPMATH_SSE;
1271 }
1272 else
1273 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1274 }
1275 else
1276 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1277 }
1278
1279 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1280 on by -msse. */
1281 if (TARGET_SSE)
1282 {
1283 target_flags |= MASK_MMX;
1284 x86_prefetch_sse = true;
1285 }
1286
1287 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1288 if (TARGET_3DNOW)
1289 {
1290 target_flags |= MASK_MMX;
1291 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1292 extensions it adds. */
1293 if (x86_3dnow_a & (1 << ix86_arch))
1294 target_flags |= MASK_3DNOW_A;
1295 }
1296 if ((x86_accumulate_outgoing_args & CPUMASK)
1297 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1298 && !optimize_size)
1299 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1300
1301 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1302 {
1303 char *p;
1304 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1305 p = strchr (internal_label_prefix, 'X');
1306 internal_label_prefix_len = p - internal_label_prefix;
1307 *p = '\0';
1308 }
1309 }
1310 \f
1311 void
1312 optimization_options (level, size)
1313 int level;
1314 int size ATTRIBUTE_UNUSED;
1315 {
1316 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1317 make the problem with not enough registers even worse. */
1318 #ifdef INSN_SCHEDULING
1319 if (level > 1)
1320 flag_schedule_insns = 0;
1321 #endif
1322 if (TARGET_64BIT && optimize >= 1)
1323 flag_omit_frame_pointer = 1;
1324 if (TARGET_64BIT)
1325 {
1326 flag_pcc_struct_return = 0;
1327 flag_asynchronous_unwind_tables = 1;
1328 }
1329 if (profile_flag)
1330 flag_omit_frame_pointer = 0;
1331 }
1332 \f
1333 /* Table of valid machine attributes. */
1334 const struct attribute_spec ix86_attribute_table[] =
1335 {
1336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1337 /* Stdcall attribute says callee is responsible for popping arguments
1338 if they are not variable. */
1339 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1340 /* Cdecl attribute says the callee is a normal C declaration */
1341 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1342 /* Regparm attribute specifies how many integer arguments are to be
1343 passed in registers. */
1344 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1345 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1346 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1347 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1348 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1349 #endif
1350 { NULL, 0, 0, false, false, false, NULL }
1351 };
1352
1353 /* Handle a "cdecl" or "stdcall" attribute;
1354 arguments as in struct attribute_spec.handler. */
1355 static tree
1356 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1357 tree *node;
1358 tree name;
1359 tree args ATTRIBUTE_UNUSED;
1360 int flags ATTRIBUTE_UNUSED;
1361 bool *no_add_attrs;
1362 {
1363 if (TREE_CODE (*node) != FUNCTION_TYPE
1364 && TREE_CODE (*node) != METHOD_TYPE
1365 && TREE_CODE (*node) != FIELD_DECL
1366 && TREE_CODE (*node) != TYPE_DECL)
1367 {
1368 warning ("`%s' attribute only applies to functions",
1369 IDENTIFIER_POINTER (name));
1370 *no_add_attrs = true;
1371 }
1372
1373 if (TARGET_64BIT)
1374 {
1375 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1376 *no_add_attrs = true;
1377 }
1378
1379 return NULL_TREE;
1380 }
1381
1382 /* Handle a "regparm" attribute;
1383 arguments as in struct attribute_spec.handler. */
1384 static tree
1385 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1386 tree *node;
1387 tree name;
1388 tree args;
1389 int flags ATTRIBUTE_UNUSED;
1390 bool *no_add_attrs;
1391 {
1392 if (TREE_CODE (*node) != FUNCTION_TYPE
1393 && TREE_CODE (*node) != METHOD_TYPE
1394 && TREE_CODE (*node) != FIELD_DECL
1395 && TREE_CODE (*node) != TYPE_DECL)
1396 {
1397 warning ("`%s' attribute only applies to functions",
1398 IDENTIFIER_POINTER (name));
1399 *no_add_attrs = true;
1400 }
1401 else
1402 {
1403 tree cst;
1404
1405 cst = TREE_VALUE (args);
1406 if (TREE_CODE (cst) != INTEGER_CST)
1407 {
1408 warning ("`%s' attribute requires an integer constant argument",
1409 IDENTIFIER_POINTER (name));
1410 *no_add_attrs = true;
1411 }
1412 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1413 {
1414 warning ("argument to `%s' attribute larger than %d",
1415 IDENTIFIER_POINTER (name), REGPARM_MAX);
1416 *no_add_attrs = true;
1417 }
1418 }
1419
1420 return NULL_TREE;
1421 }
1422
1423 /* Return 0 if the attributes for two types are incompatible, 1 if they
1424 are compatible, and 2 if they are nearly compatible (which causes a
1425 warning to be generated). */
1426
1427 static int
1428 ix86_comp_type_attributes (type1, type2)
1429 tree type1;
1430 tree type2;
1431 {
1432 /* Check for mismatch of non-default calling convention. */
1433 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1434
1435 if (TREE_CODE (type1) != FUNCTION_TYPE)
1436 return 1;
1437
1438 /* Check for mismatched return types (cdecl vs stdcall). */
1439 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1440 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1441 return 0;
1442 return 1;
1443 }
1444 \f
1445 /* Return the regparm value for a fuctio with the indicated TYPE. */
1446
1447 static int
1448 ix86_fntype_regparm (type)
1449 tree type;
1450 {
1451 tree attr;
1452
1453 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1454 if (attr)
1455 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1456 else
1457 return ix86_regparm;
1458 }
1459
1460 /* Value is the number of bytes of arguments automatically
1461 popped when returning from a subroutine call.
1462 FUNDECL is the declaration node of the function (as a tree),
1463 FUNTYPE is the data type of the function (as a tree),
1464 or for a library call it is an identifier node for the subroutine name.
1465 SIZE is the number of bytes of arguments passed on the stack.
1466
1467 On the 80386, the RTD insn may be used to pop them if the number
1468 of args is fixed, but if the number is variable then the caller
1469 must pop them all. RTD can't be used for library calls now
1470 because the library is compiled with the Unix compiler.
1471 Use of RTD is a selectable option, since it is incompatible with
1472 standard Unix calling sequences. If the option is not selected,
1473 the caller must always pop the args.
1474
1475 The attribute stdcall is equivalent to RTD on a per module basis. */
1476
1477 int
1478 ix86_return_pops_args (fundecl, funtype, size)
1479 tree fundecl;
1480 tree funtype;
1481 int size;
1482 {
1483 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1484
1485 /* Cdecl functions override -mrtd, and never pop the stack. */
1486 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1487
1488 /* Stdcall functions will pop the stack if not variable args. */
1489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1490 rtd = 1;
1491
1492 if (rtd
1493 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1494 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1495 == void_type_node)))
1496 return size;
1497 }
1498
1499 /* Lose any fake structure return argument if it is passed on the stack. */
1500 if (aggregate_value_p (TREE_TYPE (funtype))
1501 && !TARGET_64BIT)
1502 {
1503 int nregs = ix86_fntype_regparm (funtype);
1504
1505 if (!nregs)
1506 return GET_MODE_SIZE (Pmode);
1507 }
1508
1509 return 0;
1510 }
1511 \f
1512 /* Argument support functions. */
1513
1514 /* Return true when register may be used to pass function parameters. */
1515 bool
1516 ix86_function_arg_regno_p (regno)
1517 int regno;
1518 {
1519 int i;
1520 if (!TARGET_64BIT)
1521 return (regno < REGPARM_MAX
1522 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1523 if (SSE_REGNO_P (regno) && TARGET_SSE)
1524 return true;
1525 /* RAX is used as hidden argument to va_arg functions. */
1526 if (!regno)
1527 return true;
1528 for (i = 0; i < REGPARM_MAX; i++)
1529 if (regno == x86_64_int_parameter_registers[i])
1530 return true;
1531 return false;
1532 }
1533
1534 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1535 for a call to a function whose data type is FNTYPE.
1536 For a library call, FNTYPE is 0. */
1537
1538 void
1539 init_cumulative_args (cum, fntype, libname)
1540 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1541 tree fntype; /* tree ptr for function decl */
1542 rtx libname; /* SYMBOL_REF of library name or 0 */
1543 {
1544 static CUMULATIVE_ARGS zero_cum;
1545 tree param, next_param;
1546
1547 if (TARGET_DEBUG_ARG)
1548 {
1549 fprintf (stderr, "\ninit_cumulative_args (");
1550 if (fntype)
1551 fprintf (stderr, "fntype code = %s, ret code = %s",
1552 tree_code_name[(int) TREE_CODE (fntype)],
1553 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1554 else
1555 fprintf (stderr, "no fntype");
1556
1557 if (libname)
1558 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1559 }
1560
1561 *cum = zero_cum;
1562
1563 /* Set up the number of registers to use for passing arguments. */
1564 cum->nregs = ix86_regparm;
1565 cum->sse_nregs = SSE_REGPARM_MAX;
1566 if (fntype && !TARGET_64BIT)
1567 {
1568 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1569
1570 if (attr)
1571 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1572 }
1573 cum->maybe_vaarg = false;
1574
1575 /* Determine if this function has variable arguments. This is
1576 indicated by the last argument being 'void_type_mode' if there
1577 are no variable arguments. If there are variable arguments, then
1578 we won't pass anything in registers */
1579
1580 if (cum->nregs)
1581 {
1582 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1583 param != 0; param = next_param)
1584 {
1585 next_param = TREE_CHAIN (param);
1586 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1587 {
1588 if (!TARGET_64BIT)
1589 cum->nregs = 0;
1590 cum->maybe_vaarg = true;
1591 }
1592 }
1593 }
1594 if ((!fntype && !libname)
1595 || (fntype && !TYPE_ARG_TYPES (fntype)))
1596 cum->maybe_vaarg = 1;
1597
1598 if (TARGET_DEBUG_ARG)
1599 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1600
1601 return;
1602 }
1603
1604 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1605 of this code is to classify each 8bytes of incoming argument by the register
1606 class and assign registers accordingly. */
1607
1608 /* Return the union class of CLASS1 and CLASS2.
1609 See the x86-64 PS ABI for details. */
1610
1611 static enum x86_64_reg_class
1612 merge_classes (class1, class2)
1613 enum x86_64_reg_class class1, class2;
1614 {
1615 /* Rule #1: If both classes are equal, this is the resulting class. */
1616 if (class1 == class2)
1617 return class1;
1618
1619 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1620 the other class. */
1621 if (class1 == X86_64_NO_CLASS)
1622 return class2;
1623 if (class2 == X86_64_NO_CLASS)
1624 return class1;
1625
1626 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1627 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1628 return X86_64_MEMORY_CLASS;
1629
1630 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1631 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1632 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1633 return X86_64_INTEGERSI_CLASS;
1634 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1635 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1636 return X86_64_INTEGER_CLASS;
1637
1638 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1639 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1640 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1641 return X86_64_MEMORY_CLASS;
1642
1643 /* Rule #6: Otherwise class SSE is used. */
1644 return X86_64_SSE_CLASS;
1645 }
1646
1647 /* Classify the argument of type TYPE and mode MODE.
1648 CLASSES will be filled by the register class used to pass each word
1649 of the operand. The number of words is returned. In case the parameter
1650 should be passed in memory, 0 is returned. As a special case for zero
1651 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1652
1653 BIT_OFFSET is used internally for handling records and specifies offset
1654 of the offset in bits modulo 256 to avoid overflow cases.
1655
1656 See the x86-64 PS ABI for details.
1657 */
1658
1659 static int
1660 classify_argument (mode, type, classes, bit_offset)
1661 enum machine_mode mode;
1662 tree type;
1663 enum x86_64_reg_class classes[MAX_CLASSES];
1664 int bit_offset;
1665 {
1666 int bytes =
1667 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1668 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1669
1670 /* Variable sized entities are always passed/returned in memory. */
1671 if (bytes < 0)
1672 return 0;
1673
1674 if (type && AGGREGATE_TYPE_P (type))
1675 {
1676 int i;
1677 tree field;
1678 enum x86_64_reg_class subclasses[MAX_CLASSES];
1679
1680 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1681 if (bytes > 16)
1682 return 0;
1683
1684 for (i = 0; i < words; i++)
1685 classes[i] = X86_64_NO_CLASS;
1686
1687 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1688 signalize memory class, so handle it as special case. */
1689 if (!words)
1690 {
1691 classes[0] = X86_64_NO_CLASS;
1692 return 1;
1693 }
1694
1695 /* Classify each field of record and merge classes. */
1696 if (TREE_CODE (type) == RECORD_TYPE)
1697 {
1698 /* For classes first merge in the field of the subclasses. */
1699 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1700 {
1701 tree bases = TYPE_BINFO_BASETYPES (type);
1702 int n_bases = TREE_VEC_LENGTH (bases);
1703 int i;
1704
1705 for (i = 0; i < n_bases; ++i)
1706 {
1707 tree binfo = TREE_VEC_ELT (bases, i);
1708 int num;
1709 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1710 tree type = BINFO_TYPE (binfo);
1711
1712 num = classify_argument (TYPE_MODE (type),
1713 type, subclasses,
1714 (offset + bit_offset) % 256);
1715 if (!num)
1716 return 0;
1717 for (i = 0; i < num; i++)
1718 {
1719 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1720 classes[i + pos] =
1721 merge_classes (subclasses[i], classes[i + pos]);
1722 }
1723 }
1724 }
1725 /* And now merge the fields of structure. */
1726 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1727 {
1728 if (TREE_CODE (field) == FIELD_DECL)
1729 {
1730 int num;
1731
1732 /* Bitfields are always classified as integer. Handle them
1733 early, since later code would consider them to be
1734 misaligned integers. */
1735 if (DECL_BIT_FIELD (field))
1736 {
1737 for (i = int_bit_position (field) / 8 / 8;
1738 i < (int_bit_position (field)
1739 + tree_low_cst (DECL_SIZE (field), 0)
1740 + 63) / 8 / 8; i++)
1741 classes[i] =
1742 merge_classes (X86_64_INTEGER_CLASS,
1743 classes[i]);
1744 }
1745 else
1746 {
1747 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1748 TREE_TYPE (field), subclasses,
1749 (int_bit_position (field)
1750 + bit_offset) % 256);
1751 if (!num)
1752 return 0;
1753 for (i = 0; i < num; i++)
1754 {
1755 int pos =
1756 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1757 classes[i + pos] =
1758 merge_classes (subclasses[i], classes[i + pos]);
1759 }
1760 }
1761 }
1762 }
1763 }
1764 /* Arrays are handled as small records. */
1765 else if (TREE_CODE (type) == ARRAY_TYPE)
1766 {
1767 int num;
1768 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1769 TREE_TYPE (type), subclasses, bit_offset);
1770 if (!num)
1771 return 0;
1772
1773 /* The partial classes are now full classes. */
1774 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1775 subclasses[0] = X86_64_SSE_CLASS;
1776 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1777 subclasses[0] = X86_64_INTEGER_CLASS;
1778
1779 for (i = 0; i < words; i++)
1780 classes[i] = subclasses[i % num];
1781 }
1782 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1783 else if (TREE_CODE (type) == UNION_TYPE
1784 || TREE_CODE (type) == QUAL_UNION_TYPE)
1785 {
1786 /* For classes first merge in the field of the subclasses. */
1787 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1788 {
1789 tree bases = TYPE_BINFO_BASETYPES (type);
1790 int n_bases = TREE_VEC_LENGTH (bases);
1791 int i;
1792
1793 for (i = 0; i < n_bases; ++i)
1794 {
1795 tree binfo = TREE_VEC_ELT (bases, i);
1796 int num;
1797 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1798 tree type = BINFO_TYPE (binfo);
1799
1800 num = classify_argument (TYPE_MODE (type),
1801 type, subclasses,
1802 (offset + (bit_offset % 64)) % 256);
1803 if (!num)
1804 return 0;
1805 for (i = 0; i < num; i++)
1806 {
1807 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1808 classes[i + pos] =
1809 merge_classes (subclasses[i], classes[i + pos]);
1810 }
1811 }
1812 }
1813 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1814 {
1815 if (TREE_CODE (field) == FIELD_DECL)
1816 {
1817 int num;
1818 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1819 TREE_TYPE (field), subclasses,
1820 bit_offset);
1821 if (!num)
1822 return 0;
1823 for (i = 0; i < num; i++)
1824 classes[i] = merge_classes (subclasses[i], classes[i]);
1825 }
1826 }
1827 }
1828 else
1829 abort ();
1830
1831 /* Final merger cleanup. */
1832 for (i = 0; i < words; i++)
1833 {
1834 /* If one class is MEMORY, everything should be passed in
1835 memory. */
1836 if (classes[i] == X86_64_MEMORY_CLASS)
1837 return 0;
1838
1839 /* The X86_64_SSEUP_CLASS should be always preceded by
1840 X86_64_SSE_CLASS. */
1841 if (classes[i] == X86_64_SSEUP_CLASS
1842 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1843 classes[i] = X86_64_SSE_CLASS;
1844
1845 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1846 if (classes[i] == X86_64_X87UP_CLASS
1847 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1848 classes[i] = X86_64_SSE_CLASS;
1849 }
1850 return words;
1851 }
1852
1853 /* Compute alignment needed. We align all types to natural boundaries with
1854 exception of XFmode that is aligned to 64bits. */
1855 if (mode != VOIDmode && mode != BLKmode)
1856 {
1857 int mode_alignment = GET_MODE_BITSIZE (mode);
1858
1859 if (mode == XFmode)
1860 mode_alignment = 128;
1861 else if (mode == XCmode)
1862 mode_alignment = 256;
1863 /* Misaligned fields are always returned in memory. */
1864 if (bit_offset % mode_alignment)
1865 return 0;
1866 }
1867
1868 /* Classification of atomic types. */
1869 switch (mode)
1870 {
1871 case DImode:
1872 case SImode:
1873 case HImode:
1874 case QImode:
1875 case CSImode:
1876 case CHImode:
1877 case CQImode:
1878 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1879 classes[0] = X86_64_INTEGERSI_CLASS;
1880 else
1881 classes[0] = X86_64_INTEGER_CLASS;
1882 return 1;
1883 case CDImode:
1884 case TImode:
1885 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1886 return 2;
1887 case CTImode:
1888 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1889 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1890 return 4;
1891 case SFmode:
1892 if (!(bit_offset % 64))
1893 classes[0] = X86_64_SSESF_CLASS;
1894 else
1895 classes[0] = X86_64_SSE_CLASS;
1896 return 1;
1897 case DFmode:
1898 classes[0] = X86_64_SSEDF_CLASS;
1899 return 1;
1900 case TFmode:
1901 classes[0] = X86_64_X87_CLASS;
1902 classes[1] = X86_64_X87UP_CLASS;
1903 return 2;
1904 case TCmode:
1905 classes[0] = X86_64_X87_CLASS;
1906 classes[1] = X86_64_X87UP_CLASS;
1907 classes[2] = X86_64_X87_CLASS;
1908 classes[3] = X86_64_X87UP_CLASS;
1909 return 4;
1910 case DCmode:
1911 classes[0] = X86_64_SSEDF_CLASS;
1912 classes[1] = X86_64_SSEDF_CLASS;
1913 return 2;
1914 case SCmode:
1915 classes[0] = X86_64_SSE_CLASS;
1916 return 1;
1917 case V4SFmode:
1918 case V4SImode:
1919 case V16QImode:
1920 case V8HImode:
1921 case V2DFmode:
1922 case V2DImode:
1923 classes[0] = X86_64_SSE_CLASS;
1924 classes[1] = X86_64_SSEUP_CLASS;
1925 return 2;
1926 case V2SFmode:
1927 case V2SImode:
1928 case V4HImode:
1929 case V8QImode:
1930 return 0;
1931 case BLKmode:
1932 case VOIDmode:
1933 return 0;
1934 default:
1935 abort ();
1936 }
1937 }
1938
1939 /* Examine the argument and return set number of register required in each
1940 class. Return 0 iff parameter should be passed in memory. */
1941 static int
1942 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1943 enum machine_mode mode;
1944 tree type;
1945 int *int_nregs, *sse_nregs;
1946 int in_return;
1947 {
1948 enum x86_64_reg_class class[MAX_CLASSES];
1949 int n = classify_argument (mode, type, class, 0);
1950
1951 *int_nregs = 0;
1952 *sse_nregs = 0;
1953 if (!n)
1954 return 0;
1955 for (n--; n >= 0; n--)
1956 switch (class[n])
1957 {
1958 case X86_64_INTEGER_CLASS:
1959 case X86_64_INTEGERSI_CLASS:
1960 (*int_nregs)++;
1961 break;
1962 case X86_64_SSE_CLASS:
1963 case X86_64_SSESF_CLASS:
1964 case X86_64_SSEDF_CLASS:
1965 (*sse_nregs)++;
1966 break;
1967 case X86_64_NO_CLASS:
1968 case X86_64_SSEUP_CLASS:
1969 break;
1970 case X86_64_X87_CLASS:
1971 case X86_64_X87UP_CLASS:
1972 if (!in_return)
1973 return 0;
1974 break;
1975 case X86_64_MEMORY_CLASS:
1976 abort ();
1977 }
1978 return 1;
1979 }
1980 /* Construct container for the argument used by GCC interface. See
1981 FUNCTION_ARG for the detailed description. */
1982 static rtx
1983 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1984 enum machine_mode mode;
1985 tree type;
1986 int in_return;
1987 int nintregs, nsseregs;
1988 const int * intreg;
1989 int sse_regno;
1990 {
1991 enum machine_mode tmpmode;
1992 int bytes =
1993 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1994 enum x86_64_reg_class class[MAX_CLASSES];
1995 int n;
1996 int i;
1997 int nexps = 0;
1998 int needed_sseregs, needed_intregs;
1999 rtx exp[MAX_CLASSES];
2000 rtx ret;
2001
2002 n = classify_argument (mode, type, class, 0);
2003 if (TARGET_DEBUG_ARG)
2004 {
2005 if (!n)
2006 fprintf (stderr, "Memory class\n");
2007 else
2008 {
2009 fprintf (stderr, "Classes:");
2010 for (i = 0; i < n; i++)
2011 {
2012 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2013 }
2014 fprintf (stderr, "\n");
2015 }
2016 }
2017 if (!n)
2018 return NULL;
2019 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2020 return NULL;
2021 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2022 return NULL;
2023
2024 /* First construct simple cases. Avoid SCmode, since we want to use
2025 single register to pass this type. */
2026 if (n == 1 && mode != SCmode)
2027 switch (class[0])
2028 {
2029 case X86_64_INTEGER_CLASS:
2030 case X86_64_INTEGERSI_CLASS:
2031 return gen_rtx_REG (mode, intreg[0]);
2032 case X86_64_SSE_CLASS:
2033 case X86_64_SSESF_CLASS:
2034 case X86_64_SSEDF_CLASS:
2035 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2036 case X86_64_X87_CLASS:
2037 return gen_rtx_REG (mode, FIRST_STACK_REG);
2038 case X86_64_NO_CLASS:
2039 /* Zero sized array, struct or class. */
2040 return NULL;
2041 default:
2042 abort ();
2043 }
2044 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2045 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2046 if (n == 2
2047 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2048 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2049 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2050 && class[1] == X86_64_INTEGER_CLASS
2051 && (mode == CDImode || mode == TImode)
2052 && intreg[0] + 1 == intreg[1])
2053 return gen_rtx_REG (mode, intreg[0]);
2054 if (n == 4
2055 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2056 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2057 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2058
2059 /* Otherwise figure out the entries of the PARALLEL. */
2060 for (i = 0; i < n; i++)
2061 {
2062 switch (class[i])
2063 {
2064 case X86_64_NO_CLASS:
2065 break;
2066 case X86_64_INTEGER_CLASS:
2067 case X86_64_INTEGERSI_CLASS:
2068 /* Merge TImodes on aligned occassions here too. */
2069 if (i * 8 + 8 > bytes)
2070 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2071 else if (class[i] == X86_64_INTEGERSI_CLASS)
2072 tmpmode = SImode;
2073 else
2074 tmpmode = DImode;
2075 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2076 if (tmpmode == BLKmode)
2077 tmpmode = DImode;
2078 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2079 gen_rtx_REG (tmpmode, *intreg),
2080 GEN_INT (i*8));
2081 intreg++;
2082 break;
2083 case X86_64_SSESF_CLASS:
2084 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2085 gen_rtx_REG (SFmode,
2086 SSE_REGNO (sse_regno)),
2087 GEN_INT (i*8));
2088 sse_regno++;
2089 break;
2090 case X86_64_SSEDF_CLASS:
2091 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2092 gen_rtx_REG (DFmode,
2093 SSE_REGNO (sse_regno)),
2094 GEN_INT (i*8));
2095 sse_regno++;
2096 break;
2097 case X86_64_SSE_CLASS:
2098 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2099 tmpmode = TImode, i++;
2100 else
2101 tmpmode = DImode;
2102 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2103 gen_rtx_REG (tmpmode,
2104 SSE_REGNO (sse_regno)),
2105 GEN_INT (i*8));
2106 sse_regno++;
2107 break;
2108 default:
2109 abort ();
2110 }
2111 }
2112 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2113 for (i = 0; i < nexps; i++)
2114 XVECEXP (ret, 0, i) = exp [i];
2115 return ret;
2116 }
2117
2118 /* Update the data in CUM to advance over an argument
2119 of mode MODE and data type TYPE.
2120 (TYPE is null for libcalls where that information may not be available.) */
2121
2122 void
2123 function_arg_advance (cum, mode, type, named)
2124 CUMULATIVE_ARGS *cum; /* current arg information */
2125 enum machine_mode mode; /* current arg mode */
2126 tree type; /* type of the argument or 0 if lib support */
2127 int named; /* whether or not the argument was named */
2128 {
2129 int bytes =
2130 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2131 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2132
2133 if (TARGET_DEBUG_ARG)
2134 fprintf (stderr,
2135 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2136 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2137 if (TARGET_64BIT)
2138 {
2139 int int_nregs, sse_nregs;
2140 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2141 cum->words += words;
2142 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2143 {
2144 cum->nregs -= int_nregs;
2145 cum->sse_nregs -= sse_nregs;
2146 cum->regno += int_nregs;
2147 cum->sse_regno += sse_nregs;
2148 }
2149 else
2150 cum->words += words;
2151 }
2152 else
2153 {
2154 if (TARGET_SSE && mode == TImode)
2155 {
2156 cum->sse_words += words;
2157 cum->sse_nregs -= 1;
2158 cum->sse_regno += 1;
2159 if (cum->sse_nregs <= 0)
2160 {
2161 cum->sse_nregs = 0;
2162 cum->sse_regno = 0;
2163 }
2164 }
2165 else
2166 {
2167 cum->words += words;
2168 cum->nregs -= words;
2169 cum->regno += words;
2170
2171 if (cum->nregs <= 0)
2172 {
2173 cum->nregs = 0;
2174 cum->regno = 0;
2175 }
2176 }
2177 }
2178 return;
2179 }
2180
2181 /* Define where to put the arguments to a function.
2182 Value is zero to push the argument on the stack,
2183 or a hard register in which to store the argument.
2184
2185 MODE is the argument's machine mode.
2186 TYPE is the data type of the argument (as a tree).
2187 This is null for libcalls where that information may
2188 not be available.
2189 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2190 the preceding args and about the function being called.
2191 NAMED is nonzero if this argument is a named parameter
2192 (otherwise it is an extra parameter matching an ellipsis). */
2193
2194 rtx
2195 function_arg (cum, mode, type, named)
2196 CUMULATIVE_ARGS *cum; /* current arg information */
2197 enum machine_mode mode; /* current arg mode */
2198 tree type; /* type of the argument or 0 if lib support */
2199 int named; /* != 0 for normal args, == 0 for ... args */
2200 {
2201 rtx ret = NULL_RTX;
2202 int bytes =
2203 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2204 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2205
2206 /* Handle an hidden AL argument containing number of registers for varargs
2207 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2208 any AL settings. */
2209 if (mode == VOIDmode)
2210 {
2211 if (TARGET_64BIT)
2212 return GEN_INT (cum->maybe_vaarg
2213 ? (cum->sse_nregs < 0
2214 ? SSE_REGPARM_MAX
2215 : cum->sse_regno)
2216 : -1);
2217 else
2218 return constm1_rtx;
2219 }
2220 if (TARGET_64BIT)
2221 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2222 &x86_64_int_parameter_registers [cum->regno],
2223 cum->sse_regno);
2224 else
2225 switch (mode)
2226 {
2227 /* For now, pass fp/complex values on the stack. */
2228 default:
2229 break;
2230
2231 case BLKmode:
2232 case DImode:
2233 case SImode:
2234 case HImode:
2235 case QImode:
2236 if (words <= cum->nregs)
2237 ret = gen_rtx_REG (mode, cum->regno);
2238 break;
2239 case TImode:
2240 if (cum->sse_nregs)
2241 ret = gen_rtx_REG (mode, cum->sse_regno);
2242 break;
2243 }
2244
2245 if (TARGET_DEBUG_ARG)
2246 {
2247 fprintf (stderr,
2248 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2249 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2250
2251 if (ret)
2252 print_simple_rtl (stderr, ret);
2253 else
2254 fprintf (stderr, ", stack");
2255
2256 fprintf (stderr, " )\n");
2257 }
2258
2259 return ret;
2260 }
2261
2262 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2263 and type. */
2264
2265 int
2266 ix86_function_arg_boundary (mode, type)
2267 enum machine_mode mode;
2268 tree type;
2269 {
2270 int align;
2271 if (!TARGET_64BIT)
2272 return PARM_BOUNDARY;
2273 if (type)
2274 align = TYPE_ALIGN (type);
2275 else
2276 align = GET_MODE_ALIGNMENT (mode);
2277 if (align < PARM_BOUNDARY)
2278 align = PARM_BOUNDARY;
2279 if (align > 128)
2280 align = 128;
2281 return align;
2282 }
2283
2284 /* Return true if N is a possible register number of function value. */
2285 bool
2286 ix86_function_value_regno_p (regno)
2287 int regno;
2288 {
2289 if (!TARGET_64BIT)
2290 {
2291 return ((regno) == 0
2292 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2293 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2294 }
2295 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2296 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2297 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2298 }
2299
2300 /* Define how to find the value returned by a function.
2301 VALTYPE is the data type of the value (as a tree).
2302 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2303 otherwise, FUNC is 0. */
2304 rtx
2305 ix86_function_value (valtype)
2306 tree valtype;
2307 {
2308 if (TARGET_64BIT)
2309 {
2310 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2311 REGPARM_MAX, SSE_REGPARM_MAX,
2312 x86_64_int_return_registers, 0);
2313 /* For zero sized structures, construct_continer return NULL, but we need
2314 to keep rest of compiler happy by returning meaningfull value. */
2315 if (!ret)
2316 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2317 return ret;
2318 }
2319 else
2320 return gen_rtx_REG (TYPE_MODE (valtype),
2321 ix86_value_regno (TYPE_MODE (valtype)));
2322 }
2323
2324 /* Return false iff type is returned in memory. */
2325 int
2326 ix86_return_in_memory (type)
2327 tree type;
2328 {
2329 int needed_intregs, needed_sseregs;
2330 if (TARGET_64BIT)
2331 {
2332 return !examine_argument (TYPE_MODE (type), type, 1,
2333 &needed_intregs, &needed_sseregs);
2334 }
2335 else
2336 {
2337 if (TYPE_MODE (type) == BLKmode
2338 || (VECTOR_MODE_P (TYPE_MODE (type))
2339 && int_size_in_bytes (type) == 8)
2340 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2341 && TYPE_MODE (type) != TFmode
2342 && !VECTOR_MODE_P (TYPE_MODE (type))))
2343 return 1;
2344 return 0;
2345 }
2346 }
2347
2348 /* Define how to find the value returned by a library function
2349 assuming the value has mode MODE. */
2350 rtx
2351 ix86_libcall_value (mode)
2352 enum machine_mode mode;
2353 {
2354 if (TARGET_64BIT)
2355 {
2356 switch (mode)
2357 {
2358 case SFmode:
2359 case SCmode:
2360 case DFmode:
2361 case DCmode:
2362 return gen_rtx_REG (mode, FIRST_SSE_REG);
2363 case TFmode:
2364 case TCmode:
2365 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2366 default:
2367 return gen_rtx_REG (mode, 0);
2368 }
2369 }
2370 else
2371 return gen_rtx_REG (mode, ix86_value_regno (mode));
2372 }
2373
2374 /* Given a mode, return the register to use for a return value. */
2375
2376 static int
2377 ix86_value_regno (mode)
2378 enum machine_mode mode;
2379 {
2380 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2381 return FIRST_FLOAT_REG;
2382 if (mode == TImode || VECTOR_MODE_P (mode))
2383 return FIRST_SSE_REG;
2384 return 0;
2385 }
2386 \f
2387 /* Create the va_list data type. */
2388
2389 tree
2390 ix86_build_va_list ()
2391 {
2392 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2393
2394 /* For i386 we use plain pointer to argument area. */
2395 if (!TARGET_64BIT)
2396 return build_pointer_type (char_type_node);
2397
2398 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2399 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2400
2401 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2402 unsigned_type_node);
2403 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2404 unsigned_type_node);
2405 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2406 ptr_type_node);
2407 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2408 ptr_type_node);
2409
2410 DECL_FIELD_CONTEXT (f_gpr) = record;
2411 DECL_FIELD_CONTEXT (f_fpr) = record;
2412 DECL_FIELD_CONTEXT (f_ovf) = record;
2413 DECL_FIELD_CONTEXT (f_sav) = record;
2414
2415 TREE_CHAIN (record) = type_decl;
2416 TYPE_NAME (record) = type_decl;
2417 TYPE_FIELDS (record) = f_gpr;
2418 TREE_CHAIN (f_gpr) = f_fpr;
2419 TREE_CHAIN (f_fpr) = f_ovf;
2420 TREE_CHAIN (f_ovf) = f_sav;
2421
2422 layout_type (record);
2423
2424 /* The correct type is an array type of one element. */
2425 return build_array_type (record, build_index_type (size_zero_node));
2426 }
2427
2428 /* Perform any needed actions needed for a function that is receiving a
2429 variable number of arguments.
2430
2431 CUM is as above.
2432
2433 MODE and TYPE are the mode and type of the current parameter.
2434
2435 PRETEND_SIZE is a variable that should be set to the amount of stack
2436 that must be pushed by the prolog to pretend that our caller pushed
2437 it.
2438
2439 Normally, this macro will push all remaining incoming registers on the
2440 stack and set PRETEND_SIZE to the length of the registers pushed. */
2441
2442 void
2443 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2444 CUMULATIVE_ARGS *cum;
2445 enum machine_mode mode;
2446 tree type;
2447 int *pretend_size ATTRIBUTE_UNUSED;
2448 int no_rtl;
2449
2450 {
2451 CUMULATIVE_ARGS next_cum;
2452 rtx save_area = NULL_RTX, mem;
2453 rtx label;
2454 rtx label_ref;
2455 rtx tmp_reg;
2456 rtx nsse_reg;
2457 int set;
2458 tree fntype;
2459 int stdarg_p;
2460 int i;
2461
2462 if (!TARGET_64BIT)
2463 return;
2464
2465 /* Indicate to allocate space on the stack for varargs save area. */
2466 ix86_save_varrargs_registers = 1;
2467
2468 fntype = TREE_TYPE (current_function_decl);
2469 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2470 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2471 != void_type_node));
2472
2473 /* For varargs, we do not want to skip the dummy va_dcl argument.
2474 For stdargs, we do want to skip the last named argument. */
2475 next_cum = *cum;
2476 if (stdarg_p)
2477 function_arg_advance (&next_cum, mode, type, 1);
2478
2479 if (!no_rtl)
2480 save_area = frame_pointer_rtx;
2481
2482 set = get_varargs_alias_set ();
2483
2484 for (i = next_cum.regno; i < ix86_regparm; i++)
2485 {
2486 mem = gen_rtx_MEM (Pmode,
2487 plus_constant (save_area, i * UNITS_PER_WORD));
2488 set_mem_alias_set (mem, set);
2489 emit_move_insn (mem, gen_rtx_REG (Pmode,
2490 x86_64_int_parameter_registers[i]));
2491 }
2492
2493 if (next_cum.sse_nregs)
2494 {
2495 /* Now emit code to save SSE registers. The AX parameter contains number
2496 of SSE parameter regsiters used to call this function. We use
2497 sse_prologue_save insn template that produces computed jump across
2498 SSE saves. We need some preparation work to get this working. */
2499
2500 label = gen_label_rtx ();
2501 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2502
2503 /* Compute address to jump to :
2504 label - 5*eax + nnamed_sse_arguments*5 */
2505 tmp_reg = gen_reg_rtx (Pmode);
2506 nsse_reg = gen_reg_rtx (Pmode);
2507 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2508 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2509 gen_rtx_MULT (Pmode, nsse_reg,
2510 GEN_INT (4))));
2511 if (next_cum.sse_regno)
2512 emit_move_insn
2513 (nsse_reg,
2514 gen_rtx_CONST (DImode,
2515 gen_rtx_PLUS (DImode,
2516 label_ref,
2517 GEN_INT (next_cum.sse_regno * 4))));
2518 else
2519 emit_move_insn (nsse_reg, label_ref);
2520 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2521
2522 /* Compute address of memory block we save into. We always use pointer
2523 pointing 127 bytes after first byte to store - this is needed to keep
2524 instruction size limited by 4 bytes. */
2525 tmp_reg = gen_reg_rtx (Pmode);
2526 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2527 plus_constant (save_area,
2528 8 * REGPARM_MAX + 127)));
2529 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2530 set_mem_alias_set (mem, set);
2531 set_mem_align (mem, BITS_PER_WORD);
2532
2533 /* And finally do the dirty job! */
2534 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2535 GEN_INT (next_cum.sse_regno), label));
2536 }
2537
2538 }
2539
2540 /* Implement va_start. */
2541
2542 void
2543 ix86_va_start (valist, nextarg)
2544 tree valist;
2545 rtx nextarg;
2546 {
2547 HOST_WIDE_INT words, n_gpr, n_fpr;
2548 tree f_gpr, f_fpr, f_ovf, f_sav;
2549 tree gpr, fpr, ovf, sav, t;
2550
2551 /* Only 64bit target needs something special. */
2552 if (!TARGET_64BIT)
2553 {
2554 std_expand_builtin_va_start (valist, nextarg);
2555 return;
2556 }
2557
2558 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2559 f_fpr = TREE_CHAIN (f_gpr);
2560 f_ovf = TREE_CHAIN (f_fpr);
2561 f_sav = TREE_CHAIN (f_ovf);
2562
2563 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2564 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2565 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2566 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2567 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2568
2569 /* Count number of gp and fp argument registers used. */
2570 words = current_function_args_info.words;
2571 n_gpr = current_function_args_info.regno;
2572 n_fpr = current_function_args_info.sse_regno;
2573
2574 if (TARGET_DEBUG_ARG)
2575 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2576 (int) words, (int) n_gpr, (int) n_fpr);
2577
2578 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2579 build_int_2 (n_gpr * 8, 0));
2580 TREE_SIDE_EFFECTS (t) = 1;
2581 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2582
2583 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2584 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2585 TREE_SIDE_EFFECTS (t) = 1;
2586 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2587
2588 /* Find the overflow area. */
2589 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2590 if (words != 0)
2591 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2592 build_int_2 (words * UNITS_PER_WORD, 0));
2593 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2594 TREE_SIDE_EFFECTS (t) = 1;
2595 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2596
2597 /* Find the register save area.
2598 Prologue of the function save it right above stack frame. */
2599 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2600 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2601 TREE_SIDE_EFFECTS (t) = 1;
2602 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2603 }
2604
2605 /* Implement va_arg. */
2606 rtx
2607 ix86_va_arg (valist, type)
2608 tree valist, type;
2609 {
2610 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2611 tree f_gpr, f_fpr, f_ovf, f_sav;
2612 tree gpr, fpr, ovf, sav, t;
2613 int size, rsize;
2614 rtx lab_false, lab_over = NULL_RTX;
2615 rtx addr_rtx, r;
2616 rtx container;
2617
2618 /* Only 64bit target needs something special. */
2619 if (!TARGET_64BIT)
2620 {
2621 return std_expand_builtin_va_arg (valist, type);
2622 }
2623
2624 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2625 f_fpr = TREE_CHAIN (f_gpr);
2626 f_ovf = TREE_CHAIN (f_fpr);
2627 f_sav = TREE_CHAIN (f_ovf);
2628
2629 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2630 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2631 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2632 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2633 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2634
2635 size = int_size_in_bytes (type);
2636 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2637
2638 container = construct_container (TYPE_MODE (type), type, 0,
2639 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2640 /*
2641 * Pull the value out of the saved registers ...
2642 */
2643
2644 addr_rtx = gen_reg_rtx (Pmode);
2645
2646 if (container)
2647 {
2648 rtx int_addr_rtx, sse_addr_rtx;
2649 int needed_intregs, needed_sseregs;
2650 int need_temp;
2651
2652 lab_over = gen_label_rtx ();
2653 lab_false = gen_label_rtx ();
2654
2655 examine_argument (TYPE_MODE (type), type, 0,
2656 &needed_intregs, &needed_sseregs);
2657
2658
2659 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2660 || TYPE_ALIGN (type) > 128);
2661
2662 /* In case we are passing structure, verify that it is consetuctive block
2663 on the register save area. If not we need to do moves. */
2664 if (!need_temp && !REG_P (container))
2665 {
2666 /* Verify that all registers are strictly consetuctive */
2667 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2668 {
2669 int i;
2670
2671 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2672 {
2673 rtx slot = XVECEXP (container, 0, i);
2674 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2675 || INTVAL (XEXP (slot, 1)) != i * 16)
2676 need_temp = 1;
2677 }
2678 }
2679 else
2680 {
2681 int i;
2682
2683 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2684 {
2685 rtx slot = XVECEXP (container, 0, i);
2686 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2687 || INTVAL (XEXP (slot, 1)) != i * 8)
2688 need_temp = 1;
2689 }
2690 }
2691 }
2692 if (!need_temp)
2693 {
2694 int_addr_rtx = addr_rtx;
2695 sse_addr_rtx = addr_rtx;
2696 }
2697 else
2698 {
2699 int_addr_rtx = gen_reg_rtx (Pmode);
2700 sse_addr_rtx = gen_reg_rtx (Pmode);
2701 }
2702 /* First ensure that we fit completely in registers. */
2703 if (needed_intregs)
2704 {
2705 emit_cmp_and_jump_insns (expand_expr
2706 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2707 GEN_INT ((REGPARM_MAX - needed_intregs +
2708 1) * 8), GE, const1_rtx, SImode,
2709 1, lab_false);
2710 }
2711 if (needed_sseregs)
2712 {
2713 emit_cmp_and_jump_insns (expand_expr
2714 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2715 GEN_INT ((SSE_REGPARM_MAX -
2716 needed_sseregs + 1) * 16 +
2717 REGPARM_MAX * 8), GE, const1_rtx,
2718 SImode, 1, lab_false);
2719 }
2720
2721 /* Compute index to start of area used for integer regs. */
2722 if (needed_intregs)
2723 {
2724 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2725 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2726 if (r != int_addr_rtx)
2727 emit_move_insn (int_addr_rtx, r);
2728 }
2729 if (needed_sseregs)
2730 {
2731 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2732 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2733 if (r != sse_addr_rtx)
2734 emit_move_insn (sse_addr_rtx, r);
2735 }
2736 if (need_temp)
2737 {
2738 int i;
2739 rtx mem;
2740
2741 /* Never use the memory itself, as it has the alias set. */
2742 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2743 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2744 set_mem_alias_set (mem, get_varargs_alias_set ());
2745 set_mem_align (mem, BITS_PER_UNIT);
2746
2747 for (i = 0; i < XVECLEN (container, 0); i++)
2748 {
2749 rtx slot = XVECEXP (container, 0, i);
2750 rtx reg = XEXP (slot, 0);
2751 enum machine_mode mode = GET_MODE (reg);
2752 rtx src_addr;
2753 rtx src_mem;
2754 int src_offset;
2755 rtx dest_mem;
2756
2757 if (SSE_REGNO_P (REGNO (reg)))
2758 {
2759 src_addr = sse_addr_rtx;
2760 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2761 }
2762 else
2763 {
2764 src_addr = int_addr_rtx;
2765 src_offset = REGNO (reg) * 8;
2766 }
2767 src_mem = gen_rtx_MEM (mode, src_addr);
2768 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2769 src_mem = adjust_address (src_mem, mode, src_offset);
2770 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2771 emit_move_insn (dest_mem, src_mem);
2772 }
2773 }
2774
2775 if (needed_intregs)
2776 {
2777 t =
2778 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2779 build_int_2 (needed_intregs * 8, 0));
2780 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2781 TREE_SIDE_EFFECTS (t) = 1;
2782 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2783 }
2784 if (needed_sseregs)
2785 {
2786 t =
2787 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2788 build_int_2 (needed_sseregs * 16, 0));
2789 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2790 TREE_SIDE_EFFECTS (t) = 1;
2791 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2792 }
2793
2794 emit_jump_insn (gen_jump (lab_over));
2795 emit_barrier ();
2796 emit_label (lab_false);
2797 }
2798
2799 /* ... otherwise out of the overflow area. */
2800
2801 /* Care for on-stack alignment if needed. */
2802 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2803 t = ovf;
2804 else
2805 {
2806 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2807 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2808 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2809 }
2810 t = save_expr (t);
2811
2812 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2813 if (r != addr_rtx)
2814 emit_move_insn (addr_rtx, r);
2815
2816 t =
2817 build (PLUS_EXPR, TREE_TYPE (t), t,
2818 build_int_2 (rsize * UNITS_PER_WORD, 0));
2819 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2820 TREE_SIDE_EFFECTS (t) = 1;
2821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2822
2823 if (container)
2824 emit_label (lab_over);
2825
2826 return addr_rtx;
2827 }
2828 \f
2829 /* Return nonzero if OP is either a i387 or SSE fp register. */
2830 int
2831 any_fp_register_operand (op, mode)
2832 rtx op;
2833 enum machine_mode mode ATTRIBUTE_UNUSED;
2834 {
2835 return ANY_FP_REG_P (op);
2836 }
2837
2838 /* Return nonzero if OP is an i387 fp register. */
2839 int
2840 fp_register_operand (op, mode)
2841 rtx op;
2842 enum machine_mode mode ATTRIBUTE_UNUSED;
2843 {
2844 return FP_REG_P (op);
2845 }
2846
2847 /* Return nonzero if OP is a non-fp register_operand. */
2848 int
2849 register_and_not_any_fp_reg_operand (op, mode)
2850 rtx op;
2851 enum machine_mode mode;
2852 {
2853 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2854 }
2855
2856 /* Return nonzero of OP is a register operand other than an
2857 i387 fp register. */
2858 int
2859 register_and_not_fp_reg_operand (op, mode)
2860 rtx op;
2861 enum machine_mode mode;
2862 {
2863 return register_operand (op, mode) && !FP_REG_P (op);
2864 }
2865
2866 /* Return nonzero if OP is general operand representable on x86_64. */
2867
2868 int
2869 x86_64_general_operand (op, mode)
2870 rtx op;
2871 enum machine_mode mode;
2872 {
2873 if (!TARGET_64BIT)
2874 return general_operand (op, mode);
2875 if (nonimmediate_operand (op, mode))
2876 return 1;
2877 return x86_64_sign_extended_value (op, 1);
2878 }
2879
2880 /* Return nonzero if OP is general operand representable on x86_64
2881 as either sign extended or zero extended constant. */
2882
2883 int
2884 x86_64_szext_general_operand (op, mode)
2885 rtx op;
2886 enum machine_mode mode;
2887 {
2888 if (!TARGET_64BIT)
2889 return general_operand (op, mode);
2890 if (nonimmediate_operand (op, mode))
2891 return 1;
2892 return x86_64_sign_extended_value (op, 1) || x86_64_zero_extended_value (op);
2893 }
2894
2895 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2896
2897 int
2898 x86_64_nonmemory_operand (op, mode)
2899 rtx op;
2900 enum machine_mode mode;
2901 {
2902 if (!TARGET_64BIT)
2903 return nonmemory_operand (op, mode);
2904 if (register_operand (op, mode))
2905 return 1;
2906 return x86_64_sign_extended_value (op, 1);
2907 }
2908
2909 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2910
2911 int
2912 x86_64_movabs_operand (op, mode)
2913 rtx op;
2914 enum machine_mode mode;
2915 {
2916 if (!TARGET_64BIT || !flag_pic)
2917 return nonmemory_operand (op, mode);
2918 if (register_operand (op, mode) || x86_64_sign_extended_value (op, 0))
2919 return 1;
2920 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2921 return 1;
2922 return 0;
2923 }
2924
2925 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2926
2927 int
2928 x86_64_szext_nonmemory_operand (op, mode)
2929 rtx op;
2930 enum machine_mode mode;
2931 {
2932 if (!TARGET_64BIT)
2933 return nonmemory_operand (op, mode);
2934 if (register_operand (op, mode))
2935 return 1;
2936 return x86_64_sign_extended_value (op, 0) || x86_64_zero_extended_value (op);
2937 }
2938
2939 /* Return nonzero if OP is immediate operand representable on x86_64. */
2940
2941 int
2942 x86_64_immediate_operand (op, mode)
2943 rtx op;
2944 enum machine_mode mode;
2945 {
2946 if (!TARGET_64BIT)
2947 return immediate_operand (op, mode);
2948 return x86_64_sign_extended_value (op, 0);
2949 }
2950
2951 /* Return nonzero if OP is immediate operand representable on x86_64. */
2952
2953 int
2954 x86_64_zext_immediate_operand (op, mode)
2955 rtx op;
2956 enum machine_mode mode ATTRIBUTE_UNUSED;
2957 {
2958 return x86_64_zero_extended_value (op);
2959 }
2960
2961 /* Return nonzero if OP is (const_int 1), else return zero. */
2962
2963 int
2964 const_int_1_operand (op, mode)
2965 rtx op;
2966 enum machine_mode mode ATTRIBUTE_UNUSED;
2967 {
2968 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2969 }
2970
2971 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2972 for shift & compare patterns, as shifting by 0 does not change flags),
2973 else return zero. */
2974
2975 int
2976 const_int_1_31_operand (op, mode)
2977 rtx op;
2978 enum machine_mode mode ATTRIBUTE_UNUSED;
2979 {
2980 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2981 }
2982
2983 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
2984 reference and a constant. */
2985
2986 int
2987 symbolic_operand (op, mode)
2988 register rtx op;
2989 enum machine_mode mode ATTRIBUTE_UNUSED;
2990 {
2991 switch (GET_CODE (op))
2992 {
2993 case SYMBOL_REF:
2994 case LABEL_REF:
2995 return 1;
2996
2997 case CONST:
2998 op = XEXP (op, 0);
2999 if (GET_CODE (op) == SYMBOL_REF
3000 || GET_CODE (op) == LABEL_REF
3001 || (GET_CODE (op) == UNSPEC
3002 && (XINT (op, 1) == UNSPEC_GOT
3003 || XINT (op, 1) == UNSPEC_GOTOFF
3004 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3005 return 1;
3006 if (GET_CODE (op) != PLUS
3007 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3008 return 0;
3009
3010 op = XEXP (op, 0);
3011 if (GET_CODE (op) == SYMBOL_REF
3012 || GET_CODE (op) == LABEL_REF)
3013 return 1;
3014 /* Only @GOTOFF gets offsets. */
3015 if (GET_CODE (op) != UNSPEC
3016 || XINT (op, 1) != UNSPEC_GOTOFF)
3017 return 0;
3018
3019 op = XVECEXP (op, 0, 0);
3020 if (GET_CODE (op) == SYMBOL_REF
3021 || GET_CODE (op) == LABEL_REF)
3022 return 1;
3023 return 0;
3024
3025 default:
3026 return 0;
3027 }
3028 }
3029
3030 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3031
3032 int
3033 pic_symbolic_operand (op, mode)
3034 register rtx op;
3035 enum machine_mode mode ATTRIBUTE_UNUSED;
3036 {
3037 if (GET_CODE (op) != CONST)
3038 return 0;
3039 op = XEXP (op, 0);
3040 if (TARGET_64BIT)
3041 {
3042 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3043 return 1;
3044 }
3045 else
3046 {
3047 if (GET_CODE (op) == UNSPEC)
3048 return 1;
3049 if (GET_CODE (op) != PLUS
3050 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3051 return 0;
3052 op = XEXP (op, 0);
3053 if (GET_CODE (op) == UNSPEC)
3054 return 1;
3055 }
3056 return 0;
3057 }
3058
3059 /* Return true if OP is a symbolic operand that resolves locally. */
3060
3061 static int
3062 local_symbolic_operand (op, mode)
3063 rtx op;
3064 enum machine_mode mode ATTRIBUTE_UNUSED;
3065 {
3066 if (GET_CODE (op) == LABEL_REF)
3067 return 1;
3068
3069 if (GET_CODE (op) == CONST
3070 && GET_CODE (XEXP (op, 0)) == PLUS
3071 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3072 && (ix86_cmodel != CM_SMALL_PIC
3073 || (INTVAL (XEXP (XEXP (op, 0), 1)) >= -16*1024*1024
3074 && INTVAL (XEXP (XEXP (op, 0), 1)) < 16*1024*1024)))
3075 op = XEXP (XEXP (op, 0), 0);
3076
3077 if (GET_CODE (op) != SYMBOL_REF)
3078 return 0;
3079
3080 /* These we've been told are local by varasm and encode_section_info
3081 respectively. */
3082 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3083 return 1;
3084
3085 /* There is, however, a not insubstantial body of code in the rest of
3086 the compiler that assumes it can just stick the results of
3087 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3088 /* ??? This is a hack. Should update the body of the compiler to
3089 always create a DECL an invoke targetm.encode_section_info. */
3090 if (strncmp (XSTR (op, 0), internal_label_prefix,
3091 internal_label_prefix_len) == 0)
3092 return 1;
3093
3094 return 0;
3095 }
3096
3097 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3098
3099 int
3100 tls_symbolic_operand (op, mode)
3101 register rtx op;
3102 enum machine_mode mode ATTRIBUTE_UNUSED;
3103 {
3104 const char *symbol_str;
3105
3106 if (GET_CODE (op) != SYMBOL_REF)
3107 return 0;
3108 symbol_str = XSTR (op, 0);
3109
3110 if (symbol_str[0] != '%')
3111 return 0;
3112 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3113 }
3114
3115 static int
3116 tls_symbolic_operand_1 (op, kind)
3117 rtx op;
3118 enum tls_model kind;
3119 {
3120 const char *symbol_str;
3121
3122 if (GET_CODE (op) != SYMBOL_REF)
3123 return 0;
3124 symbol_str = XSTR (op, 0);
3125
3126 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3127 }
3128
3129 int
3130 global_dynamic_symbolic_operand (op, mode)
3131 register rtx op;
3132 enum machine_mode mode ATTRIBUTE_UNUSED;
3133 {
3134 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3135 }
3136
3137 int
3138 local_dynamic_symbolic_operand (op, mode)
3139 register rtx op;
3140 enum machine_mode mode ATTRIBUTE_UNUSED;
3141 {
3142 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3143 }
3144
3145 int
3146 initial_exec_symbolic_operand (op, mode)
3147 register rtx op;
3148 enum machine_mode mode ATTRIBUTE_UNUSED;
3149 {
3150 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3151 }
3152
3153 int
3154 local_exec_symbolic_operand (op, mode)
3155 register rtx op;
3156 enum machine_mode mode ATTRIBUTE_UNUSED;
3157 {
3158 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3159 }
3160
3161 /* Test for a valid operand for a call instruction. Don't allow the
3162 arg pointer register or virtual regs since they may decay into
3163 reg + const, which the patterns can't handle. */
3164
3165 int
3166 call_insn_operand (op, mode)
3167 rtx op;
3168 enum machine_mode mode ATTRIBUTE_UNUSED;
3169 {
3170 /* Disallow indirect through a virtual register. This leads to
3171 compiler aborts when trying to eliminate them. */
3172 if (GET_CODE (op) == REG
3173 && (op == arg_pointer_rtx
3174 || op == frame_pointer_rtx
3175 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3176 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3177 return 0;
3178
3179 /* Disallow `call 1234'. Due to varying assembler lameness this
3180 gets either rejected or translated to `call .+1234'. */
3181 if (GET_CODE (op) == CONST_INT)
3182 return 0;
3183
3184 /* Explicitly allow SYMBOL_REF even if pic. */
3185 if (GET_CODE (op) == SYMBOL_REF)
3186 return 1;
3187
3188 /* Otherwise we can allow any general_operand in the address. */
3189 return general_operand (op, Pmode);
3190 }
3191
3192 int
3193 constant_call_address_operand (op, mode)
3194 rtx op;
3195 enum machine_mode mode ATTRIBUTE_UNUSED;
3196 {
3197 if (GET_CODE (op) == CONST
3198 && GET_CODE (XEXP (op, 0)) == PLUS
3199 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3200 op = XEXP (XEXP (op, 0), 0);
3201 return GET_CODE (op) == SYMBOL_REF;
3202 }
3203
3204 /* Match exactly zero and one. */
3205
3206 int
3207 const0_operand (op, mode)
3208 register rtx op;
3209 enum machine_mode mode;
3210 {
3211 return op == CONST0_RTX (mode);
3212 }
3213
3214 int
3215 const1_operand (op, mode)
3216 register rtx op;
3217 enum machine_mode mode ATTRIBUTE_UNUSED;
3218 {
3219 return op == const1_rtx;
3220 }
3221
3222 /* Match 2, 4, or 8. Used for leal multiplicands. */
3223
3224 int
3225 const248_operand (op, mode)
3226 register rtx op;
3227 enum machine_mode mode ATTRIBUTE_UNUSED;
3228 {
3229 return (GET_CODE (op) == CONST_INT
3230 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3231 }
3232
3233 /* True if this is a constant appropriate for an increment or decremenmt. */
3234
3235 int
3236 incdec_operand (op, mode)
3237 register rtx op;
3238 enum machine_mode mode ATTRIBUTE_UNUSED;
3239 {
3240 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3241 registers, since carry flag is not set. */
3242 if (TARGET_PENTIUM4 && !optimize_size)
3243 return 0;
3244 return op == const1_rtx || op == constm1_rtx;
3245 }
3246
3247 /* Return nonzero if OP is acceptable as operand of DImode shift
3248 expander. */
3249
3250 int
3251 shiftdi_operand (op, mode)
3252 rtx op;
3253 enum machine_mode mode ATTRIBUTE_UNUSED;
3254 {
3255 if (TARGET_64BIT)
3256 return nonimmediate_operand (op, mode);
3257 else
3258 return register_operand (op, mode);
3259 }
3260
3261 /* Return false if this is the stack pointer, or any other fake
3262 register eliminable to the stack pointer. Otherwise, this is
3263 a register operand.
3264
3265 This is used to prevent esp from being used as an index reg.
3266 Which would only happen in pathological cases. */
3267
3268 int
3269 reg_no_sp_operand (op, mode)
3270 register rtx op;
3271 enum machine_mode mode;
3272 {
3273 rtx t = op;
3274 if (GET_CODE (t) == SUBREG)
3275 t = SUBREG_REG (t);
3276 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3277 return 0;
3278
3279 return register_operand (op, mode);
3280 }
3281
3282 int
3283 mmx_reg_operand (op, mode)
3284 register rtx op;
3285 enum machine_mode mode ATTRIBUTE_UNUSED;
3286 {
3287 return MMX_REG_P (op);
3288 }
3289
3290 /* Return false if this is any eliminable register. Otherwise
3291 general_operand. */
3292
3293 int
3294 general_no_elim_operand (op, mode)
3295 register rtx op;
3296 enum machine_mode mode;
3297 {
3298 rtx t = op;
3299 if (GET_CODE (t) == SUBREG)
3300 t = SUBREG_REG (t);
3301 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3302 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3303 || t == virtual_stack_dynamic_rtx)
3304 return 0;
3305 if (REG_P (t)
3306 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3307 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3308 return 0;
3309
3310 return general_operand (op, mode);
3311 }
3312
3313 /* Return false if this is any eliminable register. Otherwise
3314 register_operand or const_int. */
3315
3316 int
3317 nonmemory_no_elim_operand (op, mode)
3318 register rtx op;
3319 enum machine_mode mode;
3320 {
3321 rtx t = op;
3322 if (GET_CODE (t) == SUBREG)
3323 t = SUBREG_REG (t);
3324 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3325 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3326 || t == virtual_stack_dynamic_rtx)
3327 return 0;
3328
3329 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3330 }
3331
3332 /* Return false if this is any eliminable register or stack register,
3333 otherwise work like register_operand. */
3334
3335 int
3336 index_register_operand (op, mode)
3337 register rtx op;
3338 enum machine_mode mode;
3339 {
3340 rtx t = op;
3341 if (GET_CODE (t) == SUBREG)
3342 t = SUBREG_REG (t);
3343 if (!REG_P (t))
3344 return 0;
3345 if (t == arg_pointer_rtx
3346 || t == frame_pointer_rtx
3347 || t == virtual_incoming_args_rtx
3348 || t == virtual_stack_vars_rtx
3349 || t == virtual_stack_dynamic_rtx
3350 || REGNO (t) == STACK_POINTER_REGNUM)
3351 return 0;
3352
3353 return general_operand (op, mode);
3354 }
3355
3356 /* Return true if op is a Q_REGS class register. */
3357
3358 int
3359 q_regs_operand (op, mode)
3360 register rtx op;
3361 enum machine_mode mode;
3362 {
3363 if (mode != VOIDmode && GET_MODE (op) != mode)
3364 return 0;
3365 if (GET_CODE (op) == SUBREG)
3366 op = SUBREG_REG (op);
3367 return ANY_QI_REG_P (op);
3368 }
3369
3370 /* Return true if op is a NON_Q_REGS class register. */
3371
3372 int
3373 non_q_regs_operand (op, mode)
3374 register rtx op;
3375 enum machine_mode mode;
3376 {
3377 if (mode != VOIDmode && GET_MODE (op) != mode)
3378 return 0;
3379 if (GET_CODE (op) == SUBREG)
3380 op = SUBREG_REG (op);
3381 return NON_QI_REG_P (op);
3382 }
3383
3384 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3385 insns. */
3386 int
3387 sse_comparison_operator (op, mode)
3388 rtx op;
3389 enum machine_mode mode ATTRIBUTE_UNUSED;
3390 {
3391 enum rtx_code code = GET_CODE (op);
3392 switch (code)
3393 {
3394 /* Operations supported directly. */
3395 case EQ:
3396 case LT:
3397 case LE:
3398 case UNORDERED:
3399 case NE:
3400 case UNGE:
3401 case UNGT:
3402 case ORDERED:
3403 return 1;
3404 /* These are equivalent to ones above in non-IEEE comparisons. */
3405 case UNEQ:
3406 case UNLT:
3407 case UNLE:
3408 case LTGT:
3409 case GE:
3410 case GT:
3411 return !TARGET_IEEE_FP;
3412 default:
3413 return 0;
3414 }
3415 }
3416 /* Return 1 if OP is a valid comparison operator in valid mode. */
3417 int
3418 ix86_comparison_operator (op, mode)
3419 register rtx op;
3420 enum machine_mode mode;
3421 {
3422 enum machine_mode inmode;
3423 enum rtx_code code = GET_CODE (op);
3424 if (mode != VOIDmode && GET_MODE (op) != mode)
3425 return 0;
3426 if (GET_RTX_CLASS (code) != '<')
3427 return 0;
3428 inmode = GET_MODE (XEXP (op, 0));
3429
3430 if (inmode == CCFPmode || inmode == CCFPUmode)
3431 {
3432 enum rtx_code second_code, bypass_code;
3433 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3434 return (bypass_code == NIL && second_code == NIL);
3435 }
3436 switch (code)
3437 {
3438 case EQ: case NE:
3439 return 1;
3440 case LT: case GE:
3441 if (inmode == CCmode || inmode == CCGCmode
3442 || inmode == CCGOCmode || inmode == CCNOmode)
3443 return 1;
3444 return 0;
3445 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3446 if (inmode == CCmode)
3447 return 1;
3448 return 0;
3449 case GT: case LE:
3450 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3451 return 1;
3452 return 0;
3453 default:
3454 return 0;
3455 }
3456 }
3457
3458 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3459
3460 int
3461 fcmov_comparison_operator (op, mode)
3462 register rtx op;
3463 enum machine_mode mode;
3464 {
3465 enum machine_mode inmode;
3466 enum rtx_code code = GET_CODE (op);
3467 if (mode != VOIDmode && GET_MODE (op) != mode)
3468 return 0;
3469 if (GET_RTX_CLASS (code) != '<')
3470 return 0;
3471 inmode = GET_MODE (XEXP (op, 0));
3472 if (inmode == CCFPmode || inmode == CCFPUmode)
3473 {
3474 enum rtx_code second_code, bypass_code;
3475 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3476 if (bypass_code != NIL || second_code != NIL)
3477 return 0;
3478 code = ix86_fp_compare_code_to_integer (code);
3479 }
3480 /* i387 supports just limited amount of conditional codes. */
3481 switch (code)
3482 {
3483 case LTU: case GTU: case LEU: case GEU:
3484 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3485 return 1;
3486 return 0;
3487 case ORDERED: case UNORDERED:
3488 case EQ: case NE:
3489 return 1;
3490 default:
3491 return 0;
3492 }
3493 }
3494
3495 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3496
3497 int
3498 promotable_binary_operator (op, mode)
3499 register rtx op;
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3501 {
3502 switch (GET_CODE (op))
3503 {
3504 case MULT:
3505 /* Modern CPUs have same latency for HImode and SImode multiply,
3506 but 386 and 486 do HImode multiply faster. */
3507 return ix86_cpu > PROCESSOR_I486;
3508 case PLUS:
3509 case AND:
3510 case IOR:
3511 case XOR:
3512 case ASHIFT:
3513 return 1;
3514 default:
3515 return 0;
3516 }
3517 }
3518
3519 /* Nearly general operand, but accept any const_double, since we wish
3520 to be able to drop them into memory rather than have them get pulled
3521 into registers. */
3522
3523 int
3524 cmp_fp_expander_operand (op, mode)
3525 register rtx op;
3526 enum machine_mode mode;
3527 {
3528 if (mode != VOIDmode && mode != GET_MODE (op))
3529 return 0;
3530 if (GET_CODE (op) == CONST_DOUBLE)
3531 return 1;
3532 return general_operand (op, mode);
3533 }
3534
3535 /* Match an SI or HImode register for a zero_extract. */
3536
3537 int
3538 ext_register_operand (op, mode)
3539 register rtx op;
3540 enum machine_mode mode ATTRIBUTE_UNUSED;
3541 {
3542 int regno;
3543 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3544 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3545 return 0;
3546
3547 if (!register_operand (op, VOIDmode))
3548 return 0;
3549
3550 /* Be curefull to accept only registers having upper parts. */
3551 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3552 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3553 }
3554
3555 /* Return 1 if this is a valid binary floating-point operation.
3556 OP is the expression matched, and MODE is its mode. */
3557
3558 int
3559 binary_fp_operator (op, mode)
3560 register rtx op;
3561 enum machine_mode mode;
3562 {
3563 if (mode != VOIDmode && mode != GET_MODE (op))
3564 return 0;
3565
3566 switch (GET_CODE (op))
3567 {
3568 case PLUS:
3569 case MINUS:
3570 case MULT:
3571 case DIV:
3572 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3573
3574 default:
3575 return 0;
3576 }
3577 }
3578
3579 int
3580 mult_operator (op, mode)
3581 register rtx op;
3582 enum machine_mode mode ATTRIBUTE_UNUSED;
3583 {
3584 return GET_CODE (op) == MULT;
3585 }
3586
3587 int
3588 div_operator (op, mode)
3589 register rtx op;
3590 enum machine_mode mode ATTRIBUTE_UNUSED;
3591 {
3592 return GET_CODE (op) == DIV;
3593 }
3594
3595 int
3596 arith_or_logical_operator (op, mode)
3597 rtx op;
3598 enum machine_mode mode;
3599 {
3600 return ((mode == VOIDmode || GET_MODE (op) == mode)
3601 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3602 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3603 }
3604
3605 /* Returns 1 if OP is memory operand with a displacement. */
3606
3607 int
3608 memory_displacement_operand (op, mode)
3609 register rtx op;
3610 enum machine_mode mode;
3611 {
3612 struct ix86_address parts;
3613
3614 if (! memory_operand (op, mode))
3615 return 0;
3616
3617 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3618 abort ();
3619
3620 return parts.disp != NULL_RTX;
3621 }
3622
3623 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3624 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3625
3626 ??? It seems likely that this will only work because cmpsi is an
3627 expander, and no actual insns use this. */
3628
3629 int
3630 cmpsi_operand (op, mode)
3631 rtx op;
3632 enum machine_mode mode;
3633 {
3634 if (nonimmediate_operand (op, mode))
3635 return 1;
3636
3637 if (GET_CODE (op) == AND
3638 && GET_MODE (op) == SImode
3639 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3640 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3641 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3642 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3643 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3644 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3645 return 1;
3646
3647 return 0;
3648 }
3649
3650 /* Returns 1 if OP is memory operand that can not be represented by the
3651 modRM array. */
3652
3653 int
3654 long_memory_operand (op, mode)
3655 register rtx op;
3656 enum machine_mode mode;
3657 {
3658 if (! memory_operand (op, mode))
3659 return 0;
3660
3661 return memory_address_length (op) != 0;
3662 }
3663
3664 /* Return nonzero if the rtx is known aligned. */
3665
3666 int
3667 aligned_operand (op, mode)
3668 rtx op;
3669 enum machine_mode mode;
3670 {
3671 struct ix86_address parts;
3672
3673 if (!general_operand (op, mode))
3674 return 0;
3675
3676 /* Registers and immediate operands are always "aligned". */
3677 if (GET_CODE (op) != MEM)
3678 return 1;
3679
3680 /* Don't even try to do any aligned optimizations with volatiles. */
3681 if (MEM_VOLATILE_P (op))
3682 return 0;
3683
3684 op = XEXP (op, 0);
3685
3686 /* Pushes and pops are only valid on the stack pointer. */
3687 if (GET_CODE (op) == PRE_DEC
3688 || GET_CODE (op) == POST_INC)
3689 return 1;
3690
3691 /* Decode the address. */
3692 if (! ix86_decompose_address (op, &parts))
3693 abort ();
3694
3695 if (parts.base && GET_CODE (parts.base) == SUBREG)
3696 parts.base = SUBREG_REG (parts.base);
3697 if (parts.index && GET_CODE (parts.index) == SUBREG)
3698 parts.index = SUBREG_REG (parts.index);
3699
3700 /* Look for some component that isn't known to be aligned. */
3701 if (parts.index)
3702 {
3703 if (parts.scale < 4
3704 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3705 return 0;
3706 }
3707 if (parts.base)
3708 {
3709 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3710 return 0;
3711 }
3712 if (parts.disp)
3713 {
3714 if (GET_CODE (parts.disp) != CONST_INT
3715 || (INTVAL (parts.disp) & 3) != 0)
3716 return 0;
3717 }
3718
3719 /* Didn't find one -- this must be an aligned address. */
3720 return 1;
3721 }
3722 \f
3723 /* Return true if the constant is something that can be loaded with
3724 a special instruction. Only handle 0.0 and 1.0; others are less
3725 worthwhile. */
3726
3727 int
3728 standard_80387_constant_p (x)
3729 rtx x;
3730 {
3731 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3732 return -1;
3733 /* Note that on the 80387, other constants, such as pi, that we should support
3734 too. On some machines, these are much slower to load as standard constant,
3735 than to load from doubles in memory. */
3736 if (x == CONST0_RTX (GET_MODE (x)))
3737 return 1;
3738 if (x == CONST1_RTX (GET_MODE (x)))
3739 return 2;
3740 return 0;
3741 }
3742
3743 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3744 */
3745 int
3746 standard_sse_constant_p (x)
3747 rtx x;
3748 {
3749 if (x == const0_rtx)
3750 return 1;
3751 return (x == CONST0_RTX (GET_MODE (x)));
3752 }
3753
3754 /* Returns 1 if OP contains a symbol reference */
3755
3756 int
3757 symbolic_reference_mentioned_p (op)
3758 rtx op;
3759 {
3760 register const char *fmt;
3761 register int i;
3762
3763 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3764 return 1;
3765
3766 fmt = GET_RTX_FORMAT (GET_CODE (op));
3767 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3768 {
3769 if (fmt[i] == 'E')
3770 {
3771 register int j;
3772
3773 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3774 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3775 return 1;
3776 }
3777
3778 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3779 return 1;
3780 }
3781
3782 return 0;
3783 }
3784
3785 /* Return 1 if it is appropriate to emit `ret' instructions in the
3786 body of a function. Do this only if the epilogue is simple, needing a
3787 couple of insns. Prior to reloading, we can't tell how many registers
3788 must be saved, so return 0 then. Return 0 if there is no frame
3789 marker to de-allocate.
3790
3791 If NON_SAVING_SETJMP is defined and true, then it is not possible
3792 for the epilogue to be simple, so return 0. This is a special case
3793 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3794 until final, but jump_optimize may need to know sooner if a
3795 `return' is OK. */
3796
3797 int
3798 ix86_can_use_return_insn_p ()
3799 {
3800 struct ix86_frame frame;
3801
3802 #ifdef NON_SAVING_SETJMP
3803 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3804 return 0;
3805 #endif
3806
3807 if (! reload_completed || frame_pointer_needed)
3808 return 0;
3809
3810 /* Don't allow more than 32 pop, since that's all we can do
3811 with one instruction. */
3812 if (current_function_pops_args
3813 && current_function_args_size >= 32768)
3814 return 0;
3815
3816 ix86_compute_frame_layout (&frame);
3817 return frame.to_allocate == 0 && frame.nregs == 0;
3818 }
3819 \f
3820 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3821 int
3822 x86_64_sign_extended_value (value, allow_rip)
3823 rtx value;
3824 int allow_rip;
3825 {
3826 switch (GET_CODE (value))
3827 {
3828 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3829 to be at least 32 and this all acceptable constants are
3830 represented as CONST_INT. */
3831 case CONST_INT:
3832 if (HOST_BITS_PER_WIDE_INT == 32)
3833 return 1;
3834 else
3835 {
3836 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3837 return trunc_int_for_mode (val, SImode) == val;
3838 }
3839 break;
3840
3841 /* For certain code models, the symbolic references are known to fit.
3842 in CM_SMALL_PIC model we know it fits if it is local to the shared
3843 library. Don't count TLS SYMBOL_REFs here, since they should fit
3844 only if inside of UNSPEC handled below. */
3845 case SYMBOL_REF:
3846 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
3847 || (allow_rip
3848 && ix86_cmodel == CM_SMALL_PIC
3849 && (CONSTANT_POOL_ADDRESS_P (value)
3850 || SYMBOL_REF_FLAG (value))
3851 && ! tls_symbolic_operand (value, GET_MODE (value))));
3852
3853 /* For certain code models, the code is near as well. */
3854 case LABEL_REF:
3855 return ix86_cmodel != CM_LARGE
3856 && (allow_rip || ix86_cmodel != CM_SMALL_PIC);
3857
3858 /* We also may accept the offsetted memory references in certain special
3859 cases. */
3860 case CONST:
3861 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3862 switch (XINT (XEXP (value, 0), 1))
3863 {
3864 case UNSPEC_GOTPCREL:
3865 case UNSPEC_DTPOFF:
3866 case UNSPEC_GOTNTPOFF:
3867 case UNSPEC_NTPOFF:
3868 return 1;
3869 default:
3870 break;
3871 }
3872 if (GET_CODE (XEXP (value, 0)) == PLUS)
3873 {
3874 rtx op1 = XEXP (XEXP (value, 0), 0);
3875 rtx op2 = XEXP (XEXP (value, 0), 1);
3876 HOST_WIDE_INT offset;
3877
3878 if (ix86_cmodel == CM_LARGE)
3879 return 0;
3880 if (GET_CODE (op2) != CONST_INT)
3881 return 0;
3882 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3883 switch (GET_CODE (op1))
3884 {
3885 case SYMBOL_REF:
3886 /* For CM_SMALL assume that latest object is 16MB before
3887 end of 31bits boundary. We may also accept pretty
3888 large negative constants knowing that all objects are
3889 in the positive half of address space. */
3890 if (ix86_cmodel == CM_SMALL
3891 && offset < 16*1024*1024
3892 && trunc_int_for_mode (offset, SImode) == offset)
3893 return 1;
3894 /* For CM_KERNEL we know that all object resist in the
3895 negative half of 32bits address space. We may not
3896 accept negative offsets, since they may be just off
3897 and we may accept pretty large positive ones. */
3898 if (ix86_cmodel == CM_KERNEL
3899 && offset > 0
3900 && trunc_int_for_mode (offset, SImode) == offset)
3901 return 1;
3902 /* For CM_SMALL_PIC, we can make similar assumptions
3903 as for CM_SMALL model, if we know the symbol is local
3904 to the shared library. Disallow any TLS symbols,
3905 since they should always be enclosed in an UNSPEC. */
3906 if (ix86_cmodel == CM_SMALL_PIC
3907 && allow_rip
3908 && (CONSTANT_POOL_ADDRESS_P (op1)
3909 || SYMBOL_REF_FLAG (op1))
3910 && ! tls_symbolic_operand (op1, GET_MODE (op1))
3911 && offset < 16*1024*1024
3912 && offset >= -16*1024*1024
3913 && trunc_int_for_mode (offset, SImode) == offset)
3914 return 1;
3915 break;
3916 case LABEL_REF:
3917 /* These conditions are similar to SYMBOL_REF ones, just the
3918 constraints for code models differ. */
3919 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3920 || (ix86_cmodel == CM_SMALL_PIC && allow_rip
3921 && offset >= -16*1024*1024))
3922 && offset < 16*1024*1024
3923 && trunc_int_for_mode (offset, SImode) == offset)
3924 return 1;
3925 if (ix86_cmodel == CM_KERNEL
3926 && offset > 0
3927 && trunc_int_for_mode (offset, SImode) == offset)
3928 return 1;
3929 break;
3930 case UNSPEC:
3931 switch (XINT (op1, 1))
3932 {
3933 case UNSPEC_DTPOFF:
3934 case UNSPEC_NTPOFF:
3935 if (offset > 0
3936 && trunc_int_for_mode (offset, SImode) == offset)
3937 return 1;
3938 }
3939 break;
3940 default:
3941 return 0;
3942 }
3943 }
3944 return 0;
3945 default:
3946 return 0;
3947 }
3948 }
3949
3950 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3951 int
3952 x86_64_zero_extended_value (value)
3953 rtx value;
3954 {
3955 switch (GET_CODE (value))
3956 {
3957 case CONST_DOUBLE:
3958 if (HOST_BITS_PER_WIDE_INT == 32)
3959 return (GET_MODE (value) == VOIDmode
3960 && !CONST_DOUBLE_HIGH (value));
3961 else
3962 return 0;
3963 case CONST_INT:
3964 if (HOST_BITS_PER_WIDE_INT == 32)
3965 return INTVAL (value) >= 0;
3966 else
3967 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3968 break;
3969
3970 /* For certain code models, the symbolic references are known to fit. */
3971 case SYMBOL_REF:
3972 return ix86_cmodel == CM_SMALL;
3973
3974 /* For certain code models, the code is near as well. */
3975 case LABEL_REF:
3976 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3977
3978 /* We also may accept the offsetted memory references in certain special
3979 cases. */
3980 case CONST:
3981 if (GET_CODE (XEXP (value, 0)) == PLUS)
3982 {
3983 rtx op1 = XEXP (XEXP (value, 0), 0);
3984 rtx op2 = XEXP (XEXP (value, 0), 1);
3985
3986 if (ix86_cmodel == CM_LARGE)
3987 return 0;
3988 switch (GET_CODE (op1))
3989 {
3990 case SYMBOL_REF:
3991 return 0;
3992 /* For small code model we may accept pretty large positive
3993 offsets, since one bit is available for free. Negative
3994 offsets are limited by the size of NULL pointer area
3995 specified by the ABI. */
3996 if (ix86_cmodel == CM_SMALL
3997 && GET_CODE (op2) == CONST_INT
3998 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3999 && (trunc_int_for_mode (INTVAL (op2), SImode)
4000 == INTVAL (op2)))
4001 return 1;
4002 /* ??? For the kernel, we may accept adjustment of
4003 -0x10000000, since we know that it will just convert
4004 negative address space to positive, but perhaps this
4005 is not worthwhile. */
4006 break;
4007 case LABEL_REF:
4008 /* These conditions are similar to SYMBOL_REF ones, just the
4009 constraints for code models differ. */
4010 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4011 && GET_CODE (op2) == CONST_INT
4012 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4013 && (trunc_int_for_mode (INTVAL (op2), SImode)
4014 == INTVAL (op2)))
4015 return 1;
4016 break;
4017 default:
4018 return 0;
4019 }
4020 }
4021 return 0;
4022 default:
4023 return 0;
4024 }
4025 }
4026
4027 /* Value should be nonzero if functions must have frame pointers.
4028 Zero means the frame pointer need not be set up (and parms may
4029 be accessed via the stack pointer) in functions that seem suitable. */
4030
4031 int
4032 ix86_frame_pointer_required ()
4033 {
4034 /* If we accessed previous frames, then the generated code expects
4035 to be able to access the saved ebp value in our frame. */
4036 if (cfun->machine->accesses_prev_frame)
4037 return 1;
4038
4039 /* Several x86 os'es need a frame pointer for other reasons,
4040 usually pertaining to setjmp. */
4041 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4042 return 1;
4043
4044 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4045 the frame pointer by default. Turn it back on now if we've not
4046 got a leaf function. */
4047 if (TARGET_OMIT_LEAF_FRAME_POINTER
4048 && (!current_function_is_leaf || current_function_profile))
4049 return 1;
4050
4051 return 0;
4052 }
4053
4054 /* Record that the current function accesses previous call frames. */
4055
4056 void
4057 ix86_setup_frame_addresses ()
4058 {
4059 cfun->machine->accesses_prev_frame = 1;
4060 }
4061 \f
4062 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4063 # define USE_HIDDEN_LINKONCE 1
4064 #else
4065 # define USE_HIDDEN_LINKONCE 0
4066 #endif
4067
4068 static int pic_labels_used;
4069
4070 /* Fills in the label name that should be used for a pc thunk for
4071 the given register. */
4072
4073 static void
4074 get_pc_thunk_name (name, regno)
4075 char name[32];
4076 unsigned int regno;
4077 {
4078 if (USE_HIDDEN_LINKONCE)
4079 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4080 else
4081 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4082 }
4083
4084
4085 /* This function generates code for -fpic that loads %ebx with
4086 the return address of the caller and then returns. */
4087
4088 void
4089 ix86_asm_file_end (file)
4090 FILE *file;
4091 {
4092 rtx xops[2];
4093 int regno;
4094
4095 for (regno = 0; regno < 8; ++regno)
4096 {
4097 char name[32];
4098
4099 if (! ((pic_labels_used >> regno) & 1))
4100 continue;
4101
4102 get_pc_thunk_name (name, regno);
4103
4104 if (USE_HIDDEN_LINKONCE)
4105 {
4106 tree decl;
4107
4108 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4109 error_mark_node);
4110 TREE_PUBLIC (decl) = 1;
4111 TREE_STATIC (decl) = 1;
4112 DECL_ONE_ONLY (decl) = 1;
4113
4114 (*targetm.asm_out.unique_section) (decl, 0);
4115 named_section (decl, NULL, 0);
4116
4117 (*targetm.asm_out.globalize_label) (file, name);
4118 fputs ("\t.hidden\t", file);
4119 assemble_name (file, name);
4120 fputc ('\n', file);
4121 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4122 }
4123 else
4124 {
4125 text_section ();
4126 ASM_OUTPUT_LABEL (file, name);
4127 }
4128
4129 xops[0] = gen_rtx_REG (SImode, regno);
4130 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4131 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4132 output_asm_insn ("ret", xops);
4133 }
4134 }
4135
4136 /* Emit code for the SET_GOT patterns. */
4137
4138 const char *
4139 output_set_got (dest)
4140 rtx dest;
4141 {
4142 rtx xops[3];
4143
4144 xops[0] = dest;
4145 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4146
4147 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4148 {
4149 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4150
4151 if (!flag_pic)
4152 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4153 else
4154 output_asm_insn ("call\t%a2", xops);
4155
4156 #if TARGET_MACHO
4157 /* Output the "canonical" label name ("Lxx$pb") here too. This
4158 is what will be referred to by the Mach-O PIC subsystem. */
4159 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4160 #endif
4161 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4162 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4163
4164 if (flag_pic)
4165 output_asm_insn ("pop{l}\t%0", xops);
4166 }
4167 else
4168 {
4169 char name[32];
4170 get_pc_thunk_name (name, REGNO (dest));
4171 pic_labels_used |= 1 << REGNO (dest);
4172
4173 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4174 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4175 output_asm_insn ("call\t%X2", xops);
4176 }
4177
4178 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4179 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4180 else if (!TARGET_MACHO)
4181 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4182
4183 return "";
4184 }
4185
4186 /* Generate an "push" pattern for input ARG. */
4187
4188 static rtx
4189 gen_push (arg)
4190 rtx arg;
4191 {
4192 return gen_rtx_SET (VOIDmode,
4193 gen_rtx_MEM (Pmode,
4194 gen_rtx_PRE_DEC (Pmode,
4195 stack_pointer_rtx)),
4196 arg);
4197 }
4198
4199 /* Return >= 0 if there is an unused call-clobbered register available
4200 for the entire function. */
4201
4202 static unsigned int
4203 ix86_select_alt_pic_regnum ()
4204 {
4205 if (current_function_is_leaf && !current_function_profile)
4206 {
4207 int i;
4208 for (i = 2; i >= 0; --i)
4209 if (!regs_ever_live[i])
4210 return i;
4211 }
4212
4213 return INVALID_REGNUM;
4214 }
4215
4216 /* Return 1 if we need to save REGNO. */
4217 static int
4218 ix86_save_reg (regno, maybe_eh_return)
4219 unsigned int regno;
4220 int maybe_eh_return;
4221 {
4222 if (pic_offset_table_rtx
4223 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4224 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4225 || current_function_profile
4226 || current_function_calls_eh_return))
4227 {
4228 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4229 return 0;
4230 return 1;
4231 }
4232
4233 if (current_function_calls_eh_return && maybe_eh_return)
4234 {
4235 unsigned i;
4236 for (i = 0; ; i++)
4237 {
4238 unsigned test = EH_RETURN_DATA_REGNO (i);
4239 if (test == INVALID_REGNUM)
4240 break;
4241 if (test == regno)
4242 return 1;
4243 }
4244 }
4245
4246 return (regs_ever_live[regno]
4247 && !call_used_regs[regno]
4248 && !fixed_regs[regno]
4249 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4250 }
4251
4252 /* Return number of registers to be saved on the stack. */
4253
4254 static int
4255 ix86_nsaved_regs ()
4256 {
4257 int nregs = 0;
4258 int regno;
4259
4260 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4261 if (ix86_save_reg (regno, true))
4262 nregs++;
4263 return nregs;
4264 }
4265
4266 /* Return the offset between two registers, one to be eliminated, and the other
4267 its replacement, at the start of a routine. */
4268
4269 HOST_WIDE_INT
4270 ix86_initial_elimination_offset (from, to)
4271 int from;
4272 int to;
4273 {
4274 struct ix86_frame frame;
4275 ix86_compute_frame_layout (&frame);
4276
4277 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4278 return frame.hard_frame_pointer_offset;
4279 else if (from == FRAME_POINTER_REGNUM
4280 && to == HARD_FRAME_POINTER_REGNUM)
4281 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4282 else
4283 {
4284 if (to != STACK_POINTER_REGNUM)
4285 abort ();
4286 else if (from == ARG_POINTER_REGNUM)
4287 return frame.stack_pointer_offset;
4288 else if (from != FRAME_POINTER_REGNUM)
4289 abort ();
4290 else
4291 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4292 }
4293 }
4294
4295 /* Fill structure ix86_frame about frame of currently computed function. */
4296
4297 static void
4298 ix86_compute_frame_layout (frame)
4299 struct ix86_frame *frame;
4300 {
4301 HOST_WIDE_INT total_size;
4302 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4303 int offset;
4304 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4305 HOST_WIDE_INT size = get_frame_size ();
4306
4307 frame->nregs = ix86_nsaved_regs ();
4308 total_size = size;
4309
4310 /* Skip return address and saved base pointer. */
4311 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4312
4313 frame->hard_frame_pointer_offset = offset;
4314
4315 /* Do some sanity checking of stack_alignment_needed and
4316 preferred_alignment, since i386 port is the only using those features
4317 that may break easily. */
4318
4319 if (size && !stack_alignment_needed)
4320 abort ();
4321 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4322 abort ();
4323 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4324 abort ();
4325 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4326 abort ();
4327
4328 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4329 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4330
4331 /* Register save area */
4332 offset += frame->nregs * UNITS_PER_WORD;
4333
4334 /* Va-arg area */
4335 if (ix86_save_varrargs_registers)
4336 {
4337 offset += X86_64_VARARGS_SIZE;
4338 frame->va_arg_size = X86_64_VARARGS_SIZE;
4339 }
4340 else
4341 frame->va_arg_size = 0;
4342
4343 /* Align start of frame for local function. */
4344 frame->padding1 = ((offset + stack_alignment_needed - 1)
4345 & -stack_alignment_needed) - offset;
4346
4347 offset += frame->padding1;
4348
4349 /* Frame pointer points here. */
4350 frame->frame_pointer_offset = offset;
4351
4352 offset += size;
4353
4354 /* Add outgoing arguments area. Can be skipped if we eliminated
4355 all the function calls as dead code. */
4356 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4357 {
4358 offset += current_function_outgoing_args_size;
4359 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4360 }
4361 else
4362 frame->outgoing_arguments_size = 0;
4363
4364 /* Align stack boundary. Only needed if we're calling another function
4365 or using alloca. */
4366 if (!current_function_is_leaf || current_function_calls_alloca)
4367 frame->padding2 = ((offset + preferred_alignment - 1)
4368 & -preferred_alignment) - offset;
4369 else
4370 frame->padding2 = 0;
4371
4372 offset += frame->padding2;
4373
4374 /* We've reached end of stack frame. */
4375 frame->stack_pointer_offset = offset;
4376
4377 /* Size prologue needs to allocate. */
4378 frame->to_allocate =
4379 (size + frame->padding1 + frame->padding2
4380 + frame->outgoing_arguments_size + frame->va_arg_size);
4381
4382 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4383 && current_function_is_leaf)
4384 {
4385 frame->red_zone_size = frame->to_allocate;
4386 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4387 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4388 }
4389 else
4390 frame->red_zone_size = 0;
4391 frame->to_allocate -= frame->red_zone_size;
4392 frame->stack_pointer_offset -= frame->red_zone_size;
4393 #if 0
4394 fprintf (stderr, "nregs: %i\n", frame->nregs);
4395 fprintf (stderr, "size: %i\n", size);
4396 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4397 fprintf (stderr, "padding1: %i\n", frame->padding1);
4398 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4399 fprintf (stderr, "padding2: %i\n", frame->padding2);
4400 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4401 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4402 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4403 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4404 frame->hard_frame_pointer_offset);
4405 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4406 #endif
4407 }
4408
4409 /* Emit code to save registers in the prologue. */
4410
4411 static void
4412 ix86_emit_save_regs ()
4413 {
4414 register int regno;
4415 rtx insn;
4416
4417 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4418 if (ix86_save_reg (regno, true))
4419 {
4420 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4421 RTX_FRAME_RELATED_P (insn) = 1;
4422 }
4423 }
4424
4425 /* Emit code to save registers using MOV insns. First register
4426 is restored from POINTER + OFFSET. */
4427 static void
4428 ix86_emit_save_regs_using_mov (pointer, offset)
4429 rtx pointer;
4430 HOST_WIDE_INT offset;
4431 {
4432 int regno;
4433 rtx insn;
4434
4435 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4436 if (ix86_save_reg (regno, true))
4437 {
4438 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4439 Pmode, offset),
4440 gen_rtx_REG (Pmode, regno));
4441 RTX_FRAME_RELATED_P (insn) = 1;
4442 offset += UNITS_PER_WORD;
4443 }
4444 }
4445
4446 /* Expand the prologue into a bunch of separate insns. */
4447
4448 void
4449 ix86_expand_prologue ()
4450 {
4451 rtx insn;
4452 bool pic_reg_used;
4453 struct ix86_frame frame;
4454 int use_mov = 0;
4455 HOST_WIDE_INT allocate;
4456
4457 if (!optimize_size)
4458 {
4459 use_fast_prologue_epilogue
4460 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4461 if (TARGET_PROLOGUE_USING_MOVE)
4462 use_mov = use_fast_prologue_epilogue;
4463 }
4464 ix86_compute_frame_layout (&frame);
4465
4466 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4467 slower on all targets. Also sdb doesn't like it. */
4468
4469 if (frame_pointer_needed)
4470 {
4471 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4472 RTX_FRAME_RELATED_P (insn) = 1;
4473
4474 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4475 RTX_FRAME_RELATED_P (insn) = 1;
4476 }
4477
4478 allocate = frame.to_allocate;
4479 /* In case we are dealing only with single register and empty frame,
4480 push is equivalent of the mov+add sequence. */
4481 if (allocate == 0 && frame.nregs <= 1)
4482 use_mov = 0;
4483
4484 if (!use_mov)
4485 ix86_emit_save_regs ();
4486 else
4487 allocate += frame.nregs * UNITS_PER_WORD;
4488
4489 if (allocate == 0)
4490 ;
4491 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4492 {
4493 insn = emit_insn (gen_pro_epilogue_adjust_stack
4494 (stack_pointer_rtx, stack_pointer_rtx,
4495 GEN_INT (-allocate)));
4496 RTX_FRAME_RELATED_P (insn) = 1;
4497 }
4498 else
4499 {
4500 /* ??? Is this only valid for Win32? */
4501
4502 rtx arg0, sym;
4503
4504 if (TARGET_64BIT)
4505 abort ();
4506
4507 arg0 = gen_rtx_REG (SImode, 0);
4508 emit_move_insn (arg0, GEN_INT (allocate));
4509
4510 sym = gen_rtx_MEM (FUNCTION_MODE,
4511 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4512 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4513
4514 CALL_INSN_FUNCTION_USAGE (insn)
4515 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4516 CALL_INSN_FUNCTION_USAGE (insn));
4517 }
4518 if (use_mov)
4519 {
4520 if (!frame_pointer_needed || !frame.to_allocate)
4521 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4522 else
4523 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4524 -frame.nregs * UNITS_PER_WORD);
4525 }
4526
4527 #ifdef SUBTARGET_PROLOGUE
4528 SUBTARGET_PROLOGUE;
4529 #endif
4530
4531 pic_reg_used = false;
4532 if (pic_offset_table_rtx
4533 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4534 || current_function_profile))
4535 {
4536 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4537
4538 if (alt_pic_reg_used != INVALID_REGNUM)
4539 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4540
4541 pic_reg_used = true;
4542 }
4543
4544 if (pic_reg_used)
4545 {
4546 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4547
4548 /* Even with accurate pre-reload life analysis, we can wind up
4549 deleting all references to the pic register after reload.
4550 Consider if cross-jumping unifies two sides of a branch
4551 controled by a comparison vs the only read from a global.
4552 In which case, allow the set_got to be deleted, though we're
4553 too late to do anything about the ebx save in the prologue. */
4554 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4555 }
4556
4557 /* Prevent function calls from be scheduled before the call to mcount.
4558 In the pic_reg_used case, make sure that the got load isn't deleted. */
4559 if (current_function_profile)
4560 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4561 }
4562
4563 /* Emit code to restore saved registers using MOV insns. First register
4564 is restored from POINTER + OFFSET. */
4565 static void
4566 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4567 rtx pointer;
4568 int offset;
4569 int maybe_eh_return;
4570 {
4571 int regno;
4572
4573 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4574 if (ix86_save_reg (regno, maybe_eh_return))
4575 {
4576 emit_move_insn (gen_rtx_REG (Pmode, regno),
4577 adjust_address (gen_rtx_MEM (Pmode, pointer),
4578 Pmode, offset));
4579 offset += UNITS_PER_WORD;
4580 }
4581 }
4582
4583 /* Restore function stack, frame, and registers. */
4584
4585 void
4586 ix86_expand_epilogue (style)
4587 int style;
4588 {
4589 int regno;
4590 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4591 struct ix86_frame frame;
4592 HOST_WIDE_INT offset;
4593
4594 ix86_compute_frame_layout (&frame);
4595
4596 /* Calculate start of saved registers relative to ebp. Special care
4597 must be taken for the normal return case of a function using
4598 eh_return: the eax and edx registers are marked as saved, but not
4599 restored along this path. */
4600 offset = frame.nregs;
4601 if (current_function_calls_eh_return && style != 2)
4602 offset -= 2;
4603 offset *= -UNITS_PER_WORD;
4604
4605 /* If we're only restoring one register and sp is not valid then
4606 using a move instruction to restore the register since it's
4607 less work than reloading sp and popping the register.
4608
4609 The default code result in stack adjustment using add/lea instruction,
4610 while this code results in LEAVE instruction (or discrete equivalent),
4611 so it is profitable in some other cases as well. Especially when there
4612 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4613 and there is exactly one register to pop. This heruistic may need some
4614 tuning in future. */
4615 if ((!sp_valid && frame.nregs <= 1)
4616 || (TARGET_EPILOGUE_USING_MOVE
4617 && use_fast_prologue_epilogue
4618 && (frame.nregs > 1 || frame.to_allocate))
4619 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4620 || (frame_pointer_needed && TARGET_USE_LEAVE
4621 && use_fast_prologue_epilogue && frame.nregs == 1)
4622 || current_function_calls_eh_return)
4623 {
4624 /* Restore registers. We can use ebp or esp to address the memory
4625 locations. If both are available, default to ebp, since offsets
4626 are known to be small. Only exception is esp pointing directly to the
4627 end of block of saved registers, where we may simplify addressing
4628 mode. */
4629
4630 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4631 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4632 frame.to_allocate, style == 2);
4633 else
4634 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4635 offset, style == 2);
4636
4637 /* eh_return epilogues need %ecx added to the stack pointer. */
4638 if (style == 2)
4639 {
4640 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4641
4642 if (frame_pointer_needed)
4643 {
4644 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4645 tmp = plus_constant (tmp, UNITS_PER_WORD);
4646 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4647
4648 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4649 emit_move_insn (hard_frame_pointer_rtx, tmp);
4650
4651 emit_insn (gen_pro_epilogue_adjust_stack
4652 (stack_pointer_rtx, sa, const0_rtx));
4653 }
4654 else
4655 {
4656 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4657 tmp = plus_constant (tmp, (frame.to_allocate
4658 + frame.nregs * UNITS_PER_WORD));
4659 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4660 }
4661 }
4662 else if (!frame_pointer_needed)
4663 emit_insn (gen_pro_epilogue_adjust_stack
4664 (stack_pointer_rtx, stack_pointer_rtx,
4665 GEN_INT (frame.to_allocate
4666 + frame.nregs * UNITS_PER_WORD)));
4667 /* If not an i386, mov & pop is faster than "leave". */
4668 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4669 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4670 else
4671 {
4672 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4673 hard_frame_pointer_rtx,
4674 const0_rtx));
4675 if (TARGET_64BIT)
4676 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4677 else
4678 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4679 }
4680 }
4681 else
4682 {
4683 /* First step is to deallocate the stack frame so that we can
4684 pop the registers. */
4685 if (!sp_valid)
4686 {
4687 if (!frame_pointer_needed)
4688 abort ();
4689 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4690 hard_frame_pointer_rtx,
4691 GEN_INT (offset)));
4692 }
4693 else if (frame.to_allocate)
4694 emit_insn (gen_pro_epilogue_adjust_stack
4695 (stack_pointer_rtx, stack_pointer_rtx,
4696 GEN_INT (frame.to_allocate)));
4697
4698 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4699 if (ix86_save_reg (regno, false))
4700 {
4701 if (TARGET_64BIT)
4702 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4703 else
4704 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4705 }
4706 if (frame_pointer_needed)
4707 {
4708 /* Leave results in shorter dependency chains on CPUs that are
4709 able to grok it fast. */
4710 if (TARGET_USE_LEAVE)
4711 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4712 else if (TARGET_64BIT)
4713 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4714 else
4715 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4716 }
4717 }
4718
4719 /* Sibcall epilogues don't want a return instruction. */
4720 if (style == 0)
4721 return;
4722
4723 if (current_function_pops_args && current_function_args_size)
4724 {
4725 rtx popc = GEN_INT (current_function_pops_args);
4726
4727 /* i386 can only pop 64K bytes. If asked to pop more, pop
4728 return address, do explicit add, and jump indirectly to the
4729 caller. */
4730
4731 if (current_function_pops_args >= 65536)
4732 {
4733 rtx ecx = gen_rtx_REG (SImode, 2);
4734
4735 /* There are is no "pascal" calling convention in 64bit ABI. */
4736 if (TARGET_64BIT)
4737 abort ();
4738
4739 emit_insn (gen_popsi1 (ecx));
4740 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4741 emit_jump_insn (gen_return_indirect_internal (ecx));
4742 }
4743 else
4744 emit_jump_insn (gen_return_pop_internal (popc));
4745 }
4746 else
4747 emit_jump_insn (gen_return_internal ());
4748 }
4749
4750 /* Reset from the function's potential modifications. */
4751
4752 static void
4753 ix86_output_function_epilogue (file, size)
4754 FILE *file ATTRIBUTE_UNUSED;
4755 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4756 {
4757 if (pic_offset_table_rtx)
4758 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4759 }
4760 \f
4761 /* Extract the parts of an RTL expression that is a valid memory address
4762 for an instruction. Return 0 if the structure of the address is
4763 grossly off. Return -1 if the address contains ASHIFT, so it is not
4764 strictly valid, but still used for computing length of lea instruction.
4765 */
4766
4767 static int
4768 ix86_decompose_address (addr, out)
4769 register rtx addr;
4770 struct ix86_address *out;
4771 {
4772 rtx base = NULL_RTX;
4773 rtx index = NULL_RTX;
4774 rtx disp = NULL_RTX;
4775 HOST_WIDE_INT scale = 1;
4776 rtx scale_rtx = NULL_RTX;
4777 int retval = 1;
4778
4779 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4780 base = addr;
4781 else if (GET_CODE (addr) == PLUS)
4782 {
4783 rtx op0 = XEXP (addr, 0);
4784 rtx op1 = XEXP (addr, 1);
4785 enum rtx_code code0 = GET_CODE (op0);
4786 enum rtx_code code1 = GET_CODE (op1);
4787
4788 if (code0 == REG || code0 == SUBREG)
4789 {
4790 if (code1 == REG || code1 == SUBREG)
4791 index = op0, base = op1; /* index + base */
4792 else
4793 base = op0, disp = op1; /* base + displacement */
4794 }
4795 else if (code0 == MULT)
4796 {
4797 index = XEXP (op0, 0);
4798 scale_rtx = XEXP (op0, 1);
4799 if (code1 == REG || code1 == SUBREG)
4800 base = op1; /* index*scale + base */
4801 else
4802 disp = op1; /* index*scale + disp */
4803 }
4804 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4805 {
4806 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4807 scale_rtx = XEXP (XEXP (op0, 0), 1);
4808 base = XEXP (op0, 1);
4809 disp = op1;
4810 }
4811 else if (code0 == PLUS)
4812 {
4813 index = XEXP (op0, 0); /* index + base + disp */
4814 base = XEXP (op0, 1);
4815 disp = op1;
4816 }
4817 else
4818 return 0;
4819 }
4820 else if (GET_CODE (addr) == MULT)
4821 {
4822 index = XEXP (addr, 0); /* index*scale */
4823 scale_rtx = XEXP (addr, 1);
4824 }
4825 else if (GET_CODE (addr) == ASHIFT)
4826 {
4827 rtx tmp;
4828
4829 /* We're called for lea too, which implements ashift on occasion. */
4830 index = XEXP (addr, 0);
4831 tmp = XEXP (addr, 1);
4832 if (GET_CODE (tmp) != CONST_INT)
4833 return 0;
4834 scale = INTVAL (tmp);
4835 if ((unsigned HOST_WIDE_INT) scale > 3)
4836 return 0;
4837 scale = 1 << scale;
4838 retval = -1;
4839 }
4840 else
4841 disp = addr; /* displacement */
4842
4843 /* Extract the integral value of scale. */
4844 if (scale_rtx)
4845 {
4846 if (GET_CODE (scale_rtx) != CONST_INT)
4847 return 0;
4848 scale = INTVAL (scale_rtx);
4849 }
4850
4851 /* Allow arg pointer and stack pointer as index if there is not scaling */
4852 if (base && index && scale == 1
4853 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4854 || index == stack_pointer_rtx))
4855 {
4856 rtx tmp = base;
4857 base = index;
4858 index = tmp;
4859 }
4860
4861 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4862 if ((base == hard_frame_pointer_rtx
4863 || base == frame_pointer_rtx
4864 || base == arg_pointer_rtx) && !disp)
4865 disp = const0_rtx;
4866
4867 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4868 Avoid this by transforming to [%esi+0]. */
4869 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4870 && base && !index && !disp
4871 && REG_P (base)
4872 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4873 disp = const0_rtx;
4874
4875 /* Special case: encode reg+reg instead of reg*2. */
4876 if (!base && index && scale && scale == 2)
4877 base = index, scale = 1;
4878
4879 /* Special case: scaling cannot be encoded without base or displacement. */
4880 if (!base && !disp && index && scale != 1)
4881 disp = const0_rtx;
4882
4883 out->base = base;
4884 out->index = index;
4885 out->disp = disp;
4886 out->scale = scale;
4887
4888 return retval;
4889 }
4890 \f
4891 /* Return cost of the memory address x.
4892 For i386, it is better to use a complex address than let gcc copy
4893 the address into a reg and make a new pseudo. But not if the address
4894 requires to two regs - that would mean more pseudos with longer
4895 lifetimes. */
4896 int
4897 ix86_address_cost (x)
4898 rtx x;
4899 {
4900 struct ix86_address parts;
4901 int cost = 1;
4902
4903 if (!ix86_decompose_address (x, &parts))
4904 abort ();
4905
4906 if (parts.base && GET_CODE (parts.base) == SUBREG)
4907 parts.base = SUBREG_REG (parts.base);
4908 if (parts.index && GET_CODE (parts.index) == SUBREG)
4909 parts.index = SUBREG_REG (parts.index);
4910
4911 /* More complex memory references are better. */
4912 if (parts.disp && parts.disp != const0_rtx)
4913 cost--;
4914
4915 /* Attempt to minimize number of registers in the address. */
4916 if ((parts.base
4917 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4918 || (parts.index
4919 && (!REG_P (parts.index)
4920 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4921 cost++;
4922
4923 if (parts.base
4924 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4925 && parts.index
4926 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4927 && parts.base != parts.index)
4928 cost++;
4929
4930 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4931 since it's predecode logic can't detect the length of instructions
4932 and it degenerates to vector decoded. Increase cost of such
4933 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4934 to split such addresses or even refuse such addresses at all.
4935
4936 Following addressing modes are affected:
4937 [base+scale*index]
4938 [scale*index+disp]
4939 [base+index]
4940
4941 The first and last case may be avoidable by explicitly coding the zero in
4942 memory address, but I don't have AMD-K6 machine handy to check this
4943 theory. */
4944
4945 if (TARGET_K6
4946 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4947 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4948 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4949 cost += 10;
4950
4951 return cost;
4952 }
4953 \f
4954 /* If X is a machine specific address (i.e. a symbol or label being
4955 referenced as a displacement from the GOT implemented using an
4956 UNSPEC), then return the base term. Otherwise return X. */
4957
4958 rtx
4959 ix86_find_base_term (x)
4960 rtx x;
4961 {
4962 rtx term;
4963
4964 if (TARGET_64BIT)
4965 {
4966 if (GET_CODE (x) != CONST)
4967 return x;
4968 term = XEXP (x, 0);
4969 if (GET_CODE (term) == PLUS
4970 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4971 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4972 term = XEXP (term, 0);
4973 if (GET_CODE (term) != UNSPEC
4974 || XINT (term, 1) != UNSPEC_GOTPCREL)
4975 return x;
4976
4977 term = XVECEXP (term, 0, 0);
4978
4979 if (GET_CODE (term) != SYMBOL_REF
4980 && GET_CODE (term) != LABEL_REF)
4981 return x;
4982
4983 return term;
4984 }
4985
4986 if (GET_CODE (x) != PLUS
4987 || XEXP (x, 0) != pic_offset_table_rtx
4988 || GET_CODE (XEXP (x, 1)) != CONST)
4989 return x;
4990
4991 term = XEXP (XEXP (x, 1), 0);
4992
4993 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4994 term = XEXP (term, 0);
4995
4996 if (GET_CODE (term) != UNSPEC
4997 || XINT (term, 1) != UNSPEC_GOTOFF)
4998 return x;
4999
5000 term = XVECEXP (term, 0, 0);
5001
5002 if (GET_CODE (term) != SYMBOL_REF
5003 && GET_CODE (term) != LABEL_REF)
5004 return x;
5005
5006 return term;
5007 }
5008 \f
5009 /* Determine if a given RTX is a valid constant. We already know this
5010 satisfies CONSTANT_P. */
5011
5012 bool
5013 legitimate_constant_p (x)
5014 rtx x;
5015 {
5016 rtx inner;
5017
5018 switch (GET_CODE (x))
5019 {
5020 case SYMBOL_REF:
5021 /* TLS symbols are not constant. */
5022 if (tls_symbolic_operand (x, Pmode))
5023 return false;
5024 break;
5025
5026 case CONST:
5027 inner = XEXP (x, 0);
5028
5029 /* Offsets of TLS symbols are never valid.
5030 Discourage CSE from creating them. */
5031 if (GET_CODE (inner) == PLUS
5032 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5033 return false;
5034
5035 /* Only some unspecs are valid as "constants". */
5036 if (GET_CODE (inner) == UNSPEC)
5037 switch (XINT (inner, 1))
5038 {
5039 case UNSPEC_TPOFF:
5040 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5041 default:
5042 return false;
5043 }
5044 break;
5045
5046 default:
5047 break;
5048 }
5049
5050 /* Otherwise we handle everything else in the move patterns. */
5051 return true;
5052 }
5053
5054 /* Determine if a given RTX is a valid constant address. */
5055
5056 bool
5057 constant_address_p (x)
5058 rtx x;
5059 {
5060 switch (GET_CODE (x))
5061 {
5062 case LABEL_REF:
5063 case CONST_INT:
5064 return true;
5065
5066 case CONST_DOUBLE:
5067 return TARGET_64BIT;
5068
5069 case CONST:
5070 /* For Mach-O, really believe the CONST. */
5071 if (TARGET_MACHO)
5072 return true;
5073 /* Otherwise fall through. */
5074 case SYMBOL_REF:
5075 return !flag_pic && legitimate_constant_p (x);
5076
5077 default:
5078 return false;
5079 }
5080 }
5081
5082 /* Nonzero if the constant value X is a legitimate general operand
5083 when generating PIC code. It is given that flag_pic is on and
5084 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5085
5086 bool
5087 legitimate_pic_operand_p (x)
5088 rtx x;
5089 {
5090 rtx inner;
5091
5092 switch (GET_CODE (x))
5093 {
5094 case CONST:
5095 inner = XEXP (x, 0);
5096
5097 /* Only some unspecs are valid as "constants". */
5098 if (GET_CODE (inner) == UNSPEC)
5099 switch (XINT (inner, 1))
5100 {
5101 case UNSPEC_TPOFF:
5102 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5103 default:
5104 return false;
5105 }
5106 /* FALLTHRU */
5107
5108 case SYMBOL_REF:
5109 case LABEL_REF:
5110 return legitimate_pic_address_disp_p (x);
5111
5112 default:
5113 return true;
5114 }
5115 }
5116
5117 /* Determine if a given CONST RTX is a valid memory displacement
5118 in PIC mode. */
5119
5120 int
5121 legitimate_pic_address_disp_p (disp)
5122 register rtx disp;
5123 {
5124 bool saw_plus;
5125
5126 /* In 64bit mode we can allow direct addresses of symbols and labels
5127 when they are not dynamic symbols. */
5128 if (TARGET_64BIT && local_symbolic_operand (disp, Pmode))
5129 return 1;
5130 if (GET_CODE (disp) != CONST)
5131 return 0;
5132 disp = XEXP (disp, 0);
5133
5134 if (TARGET_64BIT)
5135 {
5136 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5137 of GOT tables. We should not need these anyway. */
5138 if (GET_CODE (disp) != UNSPEC
5139 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5140 return 0;
5141
5142 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5143 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5144 return 0;
5145 return 1;
5146 }
5147
5148 saw_plus = false;
5149 if (GET_CODE (disp) == PLUS)
5150 {
5151 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5152 return 0;
5153 disp = XEXP (disp, 0);
5154 saw_plus = true;
5155 }
5156
5157 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5158 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5159 {
5160 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5161 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5162 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5163 {
5164 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5165 if (strstr (sym_name, "$pb") != 0)
5166 return 1;
5167 }
5168 }
5169
5170 if (GET_CODE (disp) != UNSPEC)
5171 return 0;
5172
5173 switch (XINT (disp, 1))
5174 {
5175 case UNSPEC_GOT:
5176 if (saw_plus)
5177 return false;
5178 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5179 case UNSPEC_GOTOFF:
5180 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5181 case UNSPEC_GOTTPOFF:
5182 case UNSPEC_GOTNTPOFF:
5183 case UNSPEC_INDNTPOFF:
5184 if (saw_plus)
5185 return false;
5186 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5187 case UNSPEC_NTPOFF:
5188 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5189 case UNSPEC_DTPOFF:
5190 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5191 }
5192
5193 return 0;
5194 }
5195
5196 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5197 memory address for an instruction. The MODE argument is the machine mode
5198 for the MEM expression that wants to use this address.
5199
5200 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5201 convert common non-canonical forms to canonical form so that they will
5202 be recognized. */
5203
5204 int
5205 legitimate_address_p (mode, addr, strict)
5206 enum machine_mode mode;
5207 register rtx addr;
5208 int strict;
5209 {
5210 struct ix86_address parts;
5211 rtx base, index, disp;
5212 HOST_WIDE_INT scale;
5213 const char *reason = NULL;
5214 rtx reason_rtx = NULL_RTX;
5215
5216 if (TARGET_DEBUG_ADDR)
5217 {
5218 fprintf (stderr,
5219 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5220 GET_MODE_NAME (mode), strict);
5221 debug_rtx (addr);
5222 }
5223
5224 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5225 {
5226 if (TARGET_DEBUG_ADDR)
5227 fprintf (stderr, "Success.\n");
5228 return TRUE;
5229 }
5230
5231 if (ix86_decompose_address (addr, &parts) <= 0)
5232 {
5233 reason = "decomposition failed";
5234 goto report_error;
5235 }
5236
5237 base = parts.base;
5238 index = parts.index;
5239 disp = parts.disp;
5240 scale = parts.scale;
5241
5242 /* Validate base register.
5243
5244 Don't allow SUBREG's here, it can lead to spill failures when the base
5245 is one word out of a two word structure, which is represented internally
5246 as a DImode int. */
5247
5248 if (base)
5249 {
5250 rtx reg;
5251 reason_rtx = base;
5252
5253 if (GET_CODE (base) == SUBREG)
5254 reg = SUBREG_REG (base);
5255 else
5256 reg = base;
5257
5258 if (GET_CODE (reg) != REG)
5259 {
5260 reason = "base is not a register";
5261 goto report_error;
5262 }
5263
5264 if (GET_MODE (base) != Pmode)
5265 {
5266 reason = "base is not in Pmode";
5267 goto report_error;
5268 }
5269
5270 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5271 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5272 {
5273 reason = "base is not valid";
5274 goto report_error;
5275 }
5276 }
5277
5278 /* Validate index register.
5279
5280 Don't allow SUBREG's here, it can lead to spill failures when the index
5281 is one word out of a two word structure, which is represented internally
5282 as a DImode int. */
5283
5284 if (index)
5285 {
5286 rtx reg;
5287 reason_rtx = index;
5288
5289 if (GET_CODE (index) == SUBREG)
5290 reg = SUBREG_REG (index);
5291 else
5292 reg = index;
5293
5294 if (GET_CODE (reg) != REG)
5295 {
5296 reason = "index is not a register";
5297 goto report_error;
5298 }
5299
5300 if (GET_MODE (index) != Pmode)
5301 {
5302 reason = "index is not in Pmode";
5303 goto report_error;
5304 }
5305
5306 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5307 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5308 {
5309 reason = "index is not valid";
5310 goto report_error;
5311 }
5312 }
5313
5314 /* Validate scale factor. */
5315 if (scale != 1)
5316 {
5317 reason_rtx = GEN_INT (scale);
5318 if (!index)
5319 {
5320 reason = "scale without index";
5321 goto report_error;
5322 }
5323
5324 if (scale != 2 && scale != 4 && scale != 8)
5325 {
5326 reason = "scale is not a valid multiplier";
5327 goto report_error;
5328 }
5329 }
5330
5331 /* Validate displacement. */
5332 if (disp)
5333 {
5334 reason_rtx = disp;
5335
5336 if (TARGET_64BIT)
5337 {
5338 if (!x86_64_sign_extended_value (disp, !(index || base)))
5339 {
5340 reason = "displacement is out of range";
5341 goto report_error;
5342 }
5343 }
5344 else
5345 {
5346 if (GET_CODE (disp) == CONST_DOUBLE)
5347 {
5348 reason = "displacement is a const_double";
5349 goto report_error;
5350 }
5351 }
5352
5353 if (GET_CODE (disp) == CONST
5354 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5355 switch (XINT (XEXP (disp, 0), 1))
5356 {
5357 case UNSPEC_GOT:
5358 case UNSPEC_GOTOFF:
5359 case UNSPEC_GOTPCREL:
5360 if (!flag_pic)
5361 abort ();
5362 goto is_legitimate_pic;
5363
5364 case UNSPEC_GOTTPOFF:
5365 case UNSPEC_GOTNTPOFF:
5366 case UNSPEC_INDNTPOFF:
5367 case UNSPEC_NTPOFF:
5368 case UNSPEC_DTPOFF:
5369 break;
5370
5371 default:
5372 reason = "invalid address unspec";
5373 goto report_error;
5374 }
5375
5376 else if (flag_pic && (SYMBOLIC_CONST (disp)
5377 #if TARGET_MACHO
5378 && !machopic_operand_p (disp)
5379 #endif
5380 ))
5381 {
5382 is_legitimate_pic:
5383 if (TARGET_64BIT && (index || base))
5384 {
5385 /* foo@dtpoff(%rX) is ok. */
5386 if (GET_CODE (disp) != CONST
5387 || GET_CODE (XEXP (disp, 0)) != PLUS
5388 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5389 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5390 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5391 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5392 {
5393 reason = "non-constant pic memory reference";
5394 goto report_error;
5395 }
5396 }
5397 else if (! legitimate_pic_address_disp_p (disp))
5398 {
5399 reason = "displacement is an invalid pic construct";
5400 goto report_error;
5401 }
5402
5403 /* This code used to verify that a symbolic pic displacement
5404 includes the pic_offset_table_rtx register.
5405
5406 While this is good idea, unfortunately these constructs may
5407 be created by "adds using lea" optimization for incorrect
5408 code like:
5409
5410 int a;
5411 int foo(int i)
5412 {
5413 return *(&a+i);
5414 }
5415
5416 This code is nonsensical, but results in addressing
5417 GOT table with pic_offset_table_rtx base. We can't
5418 just refuse it easily, since it gets matched by
5419 "addsi3" pattern, that later gets split to lea in the
5420 case output register differs from input. While this
5421 can be handled by separate addsi pattern for this case
5422 that never results in lea, this seems to be easier and
5423 correct fix for crash to disable this test. */
5424 }
5425 else if (!CONSTANT_ADDRESS_P (disp))
5426 {
5427 reason = "displacement is not constant";
5428 goto report_error;
5429 }
5430 }
5431
5432 /* Everything looks valid. */
5433 if (TARGET_DEBUG_ADDR)
5434 fprintf (stderr, "Success.\n");
5435 return TRUE;
5436
5437 report_error:
5438 if (TARGET_DEBUG_ADDR)
5439 {
5440 fprintf (stderr, "Error: %s\n", reason);
5441 debug_rtx (reason_rtx);
5442 }
5443 return FALSE;
5444 }
5445 \f
5446 /* Return an unique alias set for the GOT. */
5447
5448 static HOST_WIDE_INT
5449 ix86_GOT_alias_set ()
5450 {
5451 static HOST_WIDE_INT set = -1;
5452 if (set == -1)
5453 set = new_alias_set ();
5454 return set;
5455 }
5456
5457 /* Return a legitimate reference for ORIG (an address) using the
5458 register REG. If REG is 0, a new pseudo is generated.
5459
5460 There are two types of references that must be handled:
5461
5462 1. Global data references must load the address from the GOT, via
5463 the PIC reg. An insn is emitted to do this load, and the reg is
5464 returned.
5465
5466 2. Static data references, constant pool addresses, and code labels
5467 compute the address as an offset from the GOT, whose base is in
5468 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5469 differentiate them from global data objects. The returned
5470 address is the PIC reg + an unspec constant.
5471
5472 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5473 reg also appears in the address. */
5474
5475 rtx
5476 legitimize_pic_address (orig, reg)
5477 rtx orig;
5478 rtx reg;
5479 {
5480 rtx addr = orig;
5481 rtx new = orig;
5482 rtx base;
5483
5484 #if TARGET_MACHO
5485 if (reg == 0)
5486 reg = gen_reg_rtx (Pmode);
5487 /* Use the generic Mach-O PIC machinery. */
5488 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5489 #endif
5490
5491 if (local_symbolic_operand (addr, Pmode))
5492 {
5493 /* In 64bit mode we can address such objects directly. */
5494 if (TARGET_64BIT)
5495 new = addr;
5496 else
5497 {
5498 /* This symbol may be referenced via a displacement from the PIC
5499 base address (@GOTOFF). */
5500
5501 if (reload_in_progress)
5502 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5503 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5504 new = gen_rtx_CONST (Pmode, new);
5505 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5506
5507 if (reg != 0)
5508 {
5509 emit_move_insn (reg, new);
5510 new = reg;
5511 }
5512 }
5513 }
5514 else if (GET_CODE (addr) == SYMBOL_REF)
5515 {
5516 if (TARGET_64BIT)
5517 {
5518 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5519 new = gen_rtx_CONST (Pmode, new);
5520 new = gen_rtx_MEM (Pmode, new);
5521 RTX_UNCHANGING_P (new) = 1;
5522 set_mem_alias_set (new, ix86_GOT_alias_set ());
5523
5524 if (reg == 0)
5525 reg = gen_reg_rtx (Pmode);
5526 /* Use directly gen_movsi, otherwise the address is loaded
5527 into register for CSE. We don't want to CSE this addresses,
5528 instead we CSE addresses from the GOT table, so skip this. */
5529 emit_insn (gen_movsi (reg, new));
5530 new = reg;
5531 }
5532 else
5533 {
5534 /* This symbol must be referenced via a load from the
5535 Global Offset Table (@GOT). */
5536
5537 if (reload_in_progress)
5538 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5539 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5540 new = gen_rtx_CONST (Pmode, new);
5541 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5542 new = gen_rtx_MEM (Pmode, new);
5543 RTX_UNCHANGING_P (new) = 1;
5544 set_mem_alias_set (new, ix86_GOT_alias_set ());
5545
5546 if (reg == 0)
5547 reg = gen_reg_rtx (Pmode);
5548 emit_move_insn (reg, new);
5549 new = reg;
5550 }
5551 }
5552 else
5553 {
5554 if (GET_CODE (addr) == CONST)
5555 {
5556 addr = XEXP (addr, 0);
5557
5558 /* We must match stuff we generate before. Assume the only
5559 unspecs that can get here are ours. Not that we could do
5560 anything with them anyway... */
5561 if (GET_CODE (addr) == UNSPEC
5562 || (GET_CODE (addr) == PLUS
5563 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5564 return orig;
5565 if (GET_CODE (addr) != PLUS)
5566 abort ();
5567 }
5568 if (GET_CODE (addr) == PLUS)
5569 {
5570 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5571
5572 /* Check first to see if this is a constant offset from a @GOTOFF
5573 symbol reference. */
5574 if (local_symbolic_operand (op0, Pmode)
5575 && GET_CODE (op1) == CONST_INT)
5576 {
5577 if (!TARGET_64BIT)
5578 {
5579 if (reload_in_progress)
5580 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5581 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5582 UNSPEC_GOTOFF);
5583 new = gen_rtx_PLUS (Pmode, new, op1);
5584 new = gen_rtx_CONST (Pmode, new);
5585 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5586
5587 if (reg != 0)
5588 {
5589 emit_move_insn (reg, new);
5590 new = reg;
5591 }
5592 }
5593 else
5594 {
5595 if (INTVAL (op1) < -16*1024*1024
5596 || INTVAL (op1) >= 16*1024*1024)
5597 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5598 }
5599 }
5600 else
5601 {
5602 base = legitimize_pic_address (XEXP (addr, 0), reg);
5603 new = legitimize_pic_address (XEXP (addr, 1),
5604 base == reg ? NULL_RTX : reg);
5605
5606 if (GET_CODE (new) == CONST_INT)
5607 new = plus_constant (base, INTVAL (new));
5608 else
5609 {
5610 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5611 {
5612 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5613 new = XEXP (new, 1);
5614 }
5615 new = gen_rtx_PLUS (Pmode, base, new);
5616 }
5617 }
5618 }
5619 }
5620 return new;
5621 }
5622
5623 static void
5624 ix86_encode_section_info (decl, first)
5625 tree decl;
5626 int first ATTRIBUTE_UNUSED;
5627 {
5628 bool local_p = (*targetm.binds_local_p) (decl);
5629 rtx rtl, symbol;
5630
5631 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5632 if (GET_CODE (rtl) != MEM)
5633 return;
5634 symbol = XEXP (rtl, 0);
5635 if (GET_CODE (symbol) != SYMBOL_REF)
5636 return;
5637
5638 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5639 symbol so that we may access it directly in the GOT. */
5640
5641 if (flag_pic)
5642 SYMBOL_REF_FLAG (symbol) = local_p;
5643
5644 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5645 "local dynamic", "initial exec" or "local exec" TLS models
5646 respectively. */
5647
5648 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5649 {
5650 const char *symbol_str;
5651 char *newstr;
5652 size_t len;
5653 enum tls_model kind = decl_tls_model (decl);
5654
5655 if (TARGET_64BIT && ! flag_pic)
5656 {
5657 /* x86-64 doesn't allow non-pic code for shared libraries,
5658 so don't generate GD/LD TLS models for non-pic code. */
5659 switch (kind)
5660 {
5661 case TLS_MODEL_GLOBAL_DYNAMIC:
5662 kind = TLS_MODEL_INITIAL_EXEC; break;
5663 case TLS_MODEL_LOCAL_DYNAMIC:
5664 kind = TLS_MODEL_LOCAL_EXEC; break;
5665 default:
5666 break;
5667 }
5668 }
5669
5670 symbol_str = XSTR (symbol, 0);
5671
5672 if (symbol_str[0] == '%')
5673 {
5674 if (symbol_str[1] == tls_model_chars[kind])
5675 return;
5676 symbol_str += 2;
5677 }
5678 len = strlen (symbol_str) + 1;
5679 newstr = alloca (len + 2);
5680
5681 newstr[0] = '%';
5682 newstr[1] = tls_model_chars[kind];
5683 memcpy (newstr + 2, symbol_str, len);
5684
5685 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5686 }
5687 }
5688
5689 /* Undo the above when printing symbol names. */
5690
5691 static const char *
5692 ix86_strip_name_encoding (str)
5693 const char *str;
5694 {
5695 if (str[0] == '%')
5696 str += 2;
5697 if (str [0] == '*')
5698 str += 1;
5699 return str;
5700 }
5701 \f
5702 /* Load the thread pointer into a register. */
5703
5704 static rtx
5705 get_thread_pointer ()
5706 {
5707 rtx tp;
5708
5709 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5710 tp = gen_rtx_MEM (Pmode, tp);
5711 RTX_UNCHANGING_P (tp) = 1;
5712 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5713 tp = force_reg (Pmode, tp);
5714
5715 return tp;
5716 }
5717
5718 /* Try machine-dependent ways of modifying an illegitimate address
5719 to be legitimate. If we find one, return the new, valid address.
5720 This macro is used in only one place: `memory_address' in explow.c.
5721
5722 OLDX is the address as it was before break_out_memory_refs was called.
5723 In some cases it is useful to look at this to decide what needs to be done.
5724
5725 MODE and WIN are passed so that this macro can use
5726 GO_IF_LEGITIMATE_ADDRESS.
5727
5728 It is always safe for this macro to do nothing. It exists to recognize
5729 opportunities to optimize the output.
5730
5731 For the 80386, we handle X+REG by loading X into a register R and
5732 using R+REG. R will go in a general reg and indexing will be used.
5733 However, if REG is a broken-out memory address or multiplication,
5734 nothing needs to be done because REG can certainly go in a general reg.
5735
5736 When -fpic is used, special handling is needed for symbolic references.
5737 See comments by legitimize_pic_address in i386.c for details. */
5738
5739 rtx
5740 legitimize_address (x, oldx, mode)
5741 register rtx x;
5742 register rtx oldx ATTRIBUTE_UNUSED;
5743 enum machine_mode mode;
5744 {
5745 int changed = 0;
5746 unsigned log;
5747
5748 if (TARGET_DEBUG_ADDR)
5749 {
5750 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5751 GET_MODE_NAME (mode));
5752 debug_rtx (x);
5753 }
5754
5755 log = tls_symbolic_operand (x, mode);
5756 if (log)
5757 {
5758 rtx dest, base, off, pic;
5759 int type;
5760
5761 switch (log)
5762 {
5763 case TLS_MODEL_GLOBAL_DYNAMIC:
5764 dest = gen_reg_rtx (Pmode);
5765 if (TARGET_64BIT)
5766 {
5767 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5768
5769 start_sequence ();
5770 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5771 insns = get_insns ();
5772 end_sequence ();
5773
5774 emit_libcall_block (insns, dest, rax, x);
5775 }
5776 else
5777 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5778 break;
5779
5780 case TLS_MODEL_LOCAL_DYNAMIC:
5781 base = gen_reg_rtx (Pmode);
5782 if (TARGET_64BIT)
5783 {
5784 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5785
5786 start_sequence ();
5787 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5788 insns = get_insns ();
5789 end_sequence ();
5790
5791 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5792 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5793 emit_libcall_block (insns, base, rax, note);
5794 }
5795 else
5796 emit_insn (gen_tls_local_dynamic_base_32 (base));
5797
5798 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5799 off = gen_rtx_CONST (Pmode, off);
5800
5801 return gen_rtx_PLUS (Pmode, base, off);
5802
5803 case TLS_MODEL_INITIAL_EXEC:
5804 if (TARGET_64BIT)
5805 {
5806 pic = NULL;
5807 type = UNSPEC_GOTNTPOFF;
5808 }
5809 else if (flag_pic)
5810 {
5811 if (reload_in_progress)
5812 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5813 pic = pic_offset_table_rtx;
5814 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5815 }
5816 else if (!TARGET_GNU_TLS)
5817 {
5818 pic = gen_reg_rtx (Pmode);
5819 emit_insn (gen_set_got (pic));
5820 type = UNSPEC_GOTTPOFF;
5821 }
5822 else
5823 {
5824 pic = NULL;
5825 type = UNSPEC_INDNTPOFF;
5826 }
5827
5828 base = get_thread_pointer ();
5829
5830 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5831 off = gen_rtx_CONST (Pmode, off);
5832 if (pic)
5833 off = gen_rtx_PLUS (Pmode, pic, off);
5834 off = gen_rtx_MEM (Pmode, off);
5835 RTX_UNCHANGING_P (off) = 1;
5836 set_mem_alias_set (off, ix86_GOT_alias_set ());
5837 dest = gen_reg_rtx (Pmode);
5838
5839 if (TARGET_64BIT || TARGET_GNU_TLS)
5840 {
5841 emit_move_insn (dest, off);
5842 return gen_rtx_PLUS (Pmode, base, dest);
5843 }
5844 else
5845 emit_insn (gen_subsi3 (dest, base, off));
5846 break;
5847
5848 case TLS_MODEL_LOCAL_EXEC:
5849 base = get_thread_pointer ();
5850
5851 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5852 (TARGET_64BIT || TARGET_GNU_TLS)
5853 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5854 off = gen_rtx_CONST (Pmode, off);
5855
5856 if (TARGET_64BIT || TARGET_GNU_TLS)
5857 return gen_rtx_PLUS (Pmode, base, off);
5858 else
5859 {
5860 dest = gen_reg_rtx (Pmode);
5861 emit_insn (gen_subsi3 (dest, base, off));
5862 }
5863 break;
5864
5865 default:
5866 abort ();
5867 }
5868
5869 return dest;
5870 }
5871
5872 if (flag_pic && SYMBOLIC_CONST (x))
5873 return legitimize_pic_address (x, 0);
5874
5875 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5876 if (GET_CODE (x) == ASHIFT
5877 && GET_CODE (XEXP (x, 1)) == CONST_INT
5878 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5879 {
5880 changed = 1;
5881 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5882 GEN_INT (1 << log));
5883 }
5884
5885 if (GET_CODE (x) == PLUS)
5886 {
5887 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5888
5889 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5890 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5891 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5892 {
5893 changed = 1;
5894 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5895 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5896 GEN_INT (1 << log));
5897 }
5898
5899 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5900 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5901 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5902 {
5903 changed = 1;
5904 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5905 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5906 GEN_INT (1 << log));
5907 }
5908
5909 /* Put multiply first if it isn't already. */
5910 if (GET_CODE (XEXP (x, 1)) == MULT)
5911 {
5912 rtx tmp = XEXP (x, 0);
5913 XEXP (x, 0) = XEXP (x, 1);
5914 XEXP (x, 1) = tmp;
5915 changed = 1;
5916 }
5917
5918 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5919 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5920 created by virtual register instantiation, register elimination, and
5921 similar optimizations. */
5922 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5923 {
5924 changed = 1;
5925 x = gen_rtx_PLUS (Pmode,
5926 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5927 XEXP (XEXP (x, 1), 0)),
5928 XEXP (XEXP (x, 1), 1));
5929 }
5930
5931 /* Canonicalize
5932 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5933 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5934 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5935 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5936 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5937 && CONSTANT_P (XEXP (x, 1)))
5938 {
5939 rtx constant;
5940 rtx other = NULL_RTX;
5941
5942 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5943 {
5944 constant = XEXP (x, 1);
5945 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5946 }
5947 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5948 {
5949 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5950 other = XEXP (x, 1);
5951 }
5952 else
5953 constant = 0;
5954
5955 if (constant)
5956 {
5957 changed = 1;
5958 x = gen_rtx_PLUS (Pmode,
5959 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5960 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5961 plus_constant (other, INTVAL (constant)));
5962 }
5963 }
5964
5965 if (changed && legitimate_address_p (mode, x, FALSE))
5966 return x;
5967
5968 if (GET_CODE (XEXP (x, 0)) == MULT)
5969 {
5970 changed = 1;
5971 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5972 }
5973
5974 if (GET_CODE (XEXP (x, 1)) == MULT)
5975 {
5976 changed = 1;
5977 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5978 }
5979
5980 if (changed
5981 && GET_CODE (XEXP (x, 1)) == REG
5982 && GET_CODE (XEXP (x, 0)) == REG)
5983 return x;
5984
5985 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5986 {
5987 changed = 1;
5988 x = legitimize_pic_address (x, 0);
5989 }
5990
5991 if (changed && legitimate_address_p (mode, x, FALSE))
5992 return x;
5993
5994 if (GET_CODE (XEXP (x, 0)) == REG)
5995 {
5996 register rtx temp = gen_reg_rtx (Pmode);
5997 register rtx val = force_operand (XEXP (x, 1), temp);
5998 if (val != temp)
5999 emit_move_insn (temp, val);
6000
6001 XEXP (x, 1) = temp;
6002 return x;
6003 }
6004
6005 else if (GET_CODE (XEXP (x, 1)) == REG)
6006 {
6007 register rtx temp = gen_reg_rtx (Pmode);
6008 register rtx val = force_operand (XEXP (x, 0), temp);
6009 if (val != temp)
6010 emit_move_insn (temp, val);
6011
6012 XEXP (x, 0) = temp;
6013 return x;
6014 }
6015 }
6016
6017 return x;
6018 }
6019 \f
6020 /* Print an integer constant expression in assembler syntax. Addition
6021 and subtraction are the only arithmetic that may appear in these
6022 expressions. FILE is the stdio stream to write to, X is the rtx, and
6023 CODE is the operand print code from the output string. */
6024
6025 static void
6026 output_pic_addr_const (file, x, code)
6027 FILE *file;
6028 rtx x;
6029 int code;
6030 {
6031 char buf[256];
6032
6033 switch (GET_CODE (x))
6034 {
6035 case PC:
6036 if (flag_pic)
6037 putc ('.', file);
6038 else
6039 abort ();
6040 break;
6041
6042 case SYMBOL_REF:
6043 assemble_name (file, XSTR (x, 0));
6044 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6045 fputs ("@PLT", file);
6046 break;
6047
6048 case LABEL_REF:
6049 x = XEXP (x, 0);
6050 /* FALLTHRU */
6051 case CODE_LABEL:
6052 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6053 assemble_name (asm_out_file, buf);
6054 break;
6055
6056 case CONST_INT:
6057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6058 break;
6059
6060 case CONST:
6061 /* This used to output parentheses around the expression,
6062 but that does not work on the 386 (either ATT or BSD assembler). */
6063 output_pic_addr_const (file, XEXP (x, 0), code);
6064 break;
6065
6066 case CONST_DOUBLE:
6067 if (GET_MODE (x) == VOIDmode)
6068 {
6069 /* We can use %d if the number is <32 bits and positive. */
6070 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6071 fprintf (file, "0x%lx%08lx",
6072 (unsigned long) CONST_DOUBLE_HIGH (x),
6073 (unsigned long) CONST_DOUBLE_LOW (x));
6074 else
6075 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6076 }
6077 else
6078 /* We can't handle floating point constants;
6079 PRINT_OPERAND must handle them. */
6080 output_operand_lossage ("floating constant misused");
6081 break;
6082
6083 case PLUS:
6084 /* Some assemblers need integer constants to appear first. */
6085 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6086 {
6087 output_pic_addr_const (file, XEXP (x, 0), code);
6088 putc ('+', file);
6089 output_pic_addr_const (file, XEXP (x, 1), code);
6090 }
6091 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6092 {
6093 output_pic_addr_const (file, XEXP (x, 1), code);
6094 putc ('+', file);
6095 output_pic_addr_const (file, XEXP (x, 0), code);
6096 }
6097 else
6098 abort ();
6099 break;
6100
6101 case MINUS:
6102 if (!TARGET_MACHO)
6103 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6104 output_pic_addr_const (file, XEXP (x, 0), code);
6105 putc ('-', file);
6106 output_pic_addr_const (file, XEXP (x, 1), code);
6107 if (!TARGET_MACHO)
6108 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6109 break;
6110
6111 case UNSPEC:
6112 if (XVECLEN (x, 0) != 1)
6113 abort ();
6114 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6115 switch (XINT (x, 1))
6116 {
6117 case UNSPEC_GOT:
6118 fputs ("@GOT", file);
6119 break;
6120 case UNSPEC_GOTOFF:
6121 fputs ("@GOTOFF", file);
6122 break;
6123 case UNSPEC_GOTPCREL:
6124 fputs ("@GOTPCREL(%rip)", file);
6125 break;
6126 case UNSPEC_GOTTPOFF:
6127 /* FIXME: This might be @TPOFF in Sun ld too. */
6128 fputs ("@GOTTPOFF", file);
6129 break;
6130 case UNSPEC_TPOFF:
6131 fputs ("@TPOFF", file);
6132 break;
6133 case UNSPEC_NTPOFF:
6134 if (TARGET_64BIT)
6135 fputs ("@TPOFF", file);
6136 else
6137 fputs ("@NTPOFF", file);
6138 break;
6139 case UNSPEC_DTPOFF:
6140 fputs ("@DTPOFF", file);
6141 break;
6142 case UNSPEC_GOTNTPOFF:
6143 if (TARGET_64BIT)
6144 fputs ("@GOTTPOFF(%rip)", file);
6145 else
6146 fputs ("@GOTNTPOFF", file);
6147 break;
6148 case UNSPEC_INDNTPOFF:
6149 fputs ("@INDNTPOFF", file);
6150 break;
6151 default:
6152 output_operand_lossage ("invalid UNSPEC as operand");
6153 break;
6154 }
6155 break;
6156
6157 default:
6158 output_operand_lossage ("invalid expression as operand");
6159 }
6160 }
6161
6162 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6163 We need to handle our special PIC relocations. */
6164
6165 void
6166 i386_dwarf_output_addr_const (file, x)
6167 FILE *file;
6168 rtx x;
6169 {
6170 #ifdef ASM_QUAD
6171 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6172 #else
6173 if (TARGET_64BIT)
6174 abort ();
6175 fprintf (file, "%s", ASM_LONG);
6176 #endif
6177 if (flag_pic)
6178 output_pic_addr_const (file, x, '\0');
6179 else
6180 output_addr_const (file, x);
6181 fputc ('\n', file);
6182 }
6183
6184 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6185 We need to emit DTP-relative relocations. */
6186
6187 void
6188 i386_output_dwarf_dtprel (file, size, x)
6189 FILE *file;
6190 int size;
6191 rtx x;
6192 {
6193 fputs (ASM_LONG, file);
6194 output_addr_const (file, x);
6195 fputs ("@DTPOFF", file);
6196 switch (size)
6197 {
6198 case 4:
6199 break;
6200 case 8:
6201 fputs (", 0", file);
6202 break;
6203 default:
6204 abort ();
6205 }
6206 }
6207
6208 /* In the name of slightly smaller debug output, and to cater to
6209 general assembler losage, recognize PIC+GOTOFF and turn it back
6210 into a direct symbol reference. */
6211
6212 rtx
6213 i386_simplify_dwarf_addr (orig_x)
6214 rtx orig_x;
6215 {
6216 rtx x = orig_x, y;
6217
6218 if (GET_CODE (x) == MEM)
6219 x = XEXP (x, 0);
6220
6221 if (TARGET_64BIT)
6222 {
6223 if (GET_CODE (x) != CONST
6224 || GET_CODE (XEXP (x, 0)) != UNSPEC
6225 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6226 || GET_CODE (orig_x) != MEM)
6227 return orig_x;
6228 return XVECEXP (XEXP (x, 0), 0, 0);
6229 }
6230
6231 if (GET_CODE (x) != PLUS
6232 || GET_CODE (XEXP (x, 1)) != CONST)
6233 return orig_x;
6234
6235 if (GET_CODE (XEXP (x, 0)) == REG
6236 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6237 /* %ebx + GOT/GOTOFF */
6238 y = NULL;
6239 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6240 {
6241 /* %ebx + %reg * scale + GOT/GOTOFF */
6242 y = XEXP (x, 0);
6243 if (GET_CODE (XEXP (y, 0)) == REG
6244 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6245 y = XEXP (y, 1);
6246 else if (GET_CODE (XEXP (y, 1)) == REG
6247 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6248 y = XEXP (y, 0);
6249 else
6250 return orig_x;
6251 if (GET_CODE (y) != REG
6252 && GET_CODE (y) != MULT
6253 && GET_CODE (y) != ASHIFT)
6254 return orig_x;
6255 }
6256 else
6257 return orig_x;
6258
6259 x = XEXP (XEXP (x, 1), 0);
6260 if (GET_CODE (x) == UNSPEC
6261 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6262 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6263 {
6264 if (y)
6265 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6266 return XVECEXP (x, 0, 0);
6267 }
6268
6269 if (GET_CODE (x) == PLUS
6270 && GET_CODE (XEXP (x, 0)) == UNSPEC
6271 && GET_CODE (XEXP (x, 1)) == CONST_INT
6272 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6273 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6274 && GET_CODE (orig_x) != MEM)))
6275 {
6276 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6277 if (y)
6278 return gen_rtx_PLUS (Pmode, y, x);
6279 return x;
6280 }
6281
6282 return orig_x;
6283 }
6284 \f
6285 static void
6286 put_condition_code (code, mode, reverse, fp, file)
6287 enum rtx_code code;
6288 enum machine_mode mode;
6289 int reverse, fp;
6290 FILE *file;
6291 {
6292 const char *suffix;
6293
6294 if (mode == CCFPmode || mode == CCFPUmode)
6295 {
6296 enum rtx_code second_code, bypass_code;
6297 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6298 if (bypass_code != NIL || second_code != NIL)
6299 abort ();
6300 code = ix86_fp_compare_code_to_integer (code);
6301 mode = CCmode;
6302 }
6303 if (reverse)
6304 code = reverse_condition (code);
6305
6306 switch (code)
6307 {
6308 case EQ:
6309 suffix = "e";
6310 break;
6311 case NE:
6312 suffix = "ne";
6313 break;
6314 case GT:
6315 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6316 abort ();
6317 suffix = "g";
6318 break;
6319 case GTU:
6320 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6321 Those same assemblers have the same but opposite losage on cmov. */
6322 if (mode != CCmode)
6323 abort ();
6324 suffix = fp ? "nbe" : "a";
6325 break;
6326 case LT:
6327 if (mode == CCNOmode || mode == CCGOCmode)
6328 suffix = "s";
6329 else if (mode == CCmode || mode == CCGCmode)
6330 suffix = "l";
6331 else
6332 abort ();
6333 break;
6334 case LTU:
6335 if (mode != CCmode)
6336 abort ();
6337 suffix = "b";
6338 break;
6339 case GE:
6340 if (mode == CCNOmode || mode == CCGOCmode)
6341 suffix = "ns";
6342 else if (mode == CCmode || mode == CCGCmode)
6343 suffix = "ge";
6344 else
6345 abort ();
6346 break;
6347 case GEU:
6348 /* ??? As above. */
6349 if (mode != CCmode)
6350 abort ();
6351 suffix = fp ? "nb" : "ae";
6352 break;
6353 case LE:
6354 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6355 abort ();
6356 suffix = "le";
6357 break;
6358 case LEU:
6359 if (mode != CCmode)
6360 abort ();
6361 suffix = "be";
6362 break;
6363 case UNORDERED:
6364 suffix = fp ? "u" : "p";
6365 break;
6366 case ORDERED:
6367 suffix = fp ? "nu" : "np";
6368 break;
6369 default:
6370 abort ();
6371 }
6372 fputs (suffix, file);
6373 }
6374
6375 void
6376 print_reg (x, code, file)
6377 rtx x;
6378 int code;
6379 FILE *file;
6380 {
6381 if (REGNO (x) == ARG_POINTER_REGNUM
6382 || REGNO (x) == FRAME_POINTER_REGNUM
6383 || REGNO (x) == FLAGS_REG
6384 || REGNO (x) == FPSR_REG)
6385 abort ();
6386
6387 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6388 putc ('%', file);
6389
6390 if (code == 'w' || MMX_REG_P (x))
6391 code = 2;
6392 else if (code == 'b')
6393 code = 1;
6394 else if (code == 'k')
6395 code = 4;
6396 else if (code == 'q')
6397 code = 8;
6398 else if (code == 'y')
6399 code = 3;
6400 else if (code == 'h')
6401 code = 0;
6402 else
6403 code = GET_MODE_SIZE (GET_MODE (x));
6404
6405 /* Irritatingly, AMD extended registers use different naming convention
6406 from the normal registers. */
6407 if (REX_INT_REG_P (x))
6408 {
6409 if (!TARGET_64BIT)
6410 abort ();
6411 switch (code)
6412 {
6413 case 0:
6414 error ("extended registers have no high halves");
6415 break;
6416 case 1:
6417 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6418 break;
6419 case 2:
6420 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6421 break;
6422 case 4:
6423 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6424 break;
6425 case 8:
6426 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6427 break;
6428 default:
6429 error ("unsupported operand size for extended register");
6430 break;
6431 }
6432 return;
6433 }
6434 switch (code)
6435 {
6436 case 3:
6437 if (STACK_TOP_P (x))
6438 {
6439 fputs ("st(0)", file);
6440 break;
6441 }
6442 /* FALLTHRU */
6443 case 8:
6444 case 4:
6445 case 12:
6446 if (! ANY_FP_REG_P (x))
6447 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6448 /* FALLTHRU */
6449 case 16:
6450 case 2:
6451 fputs (hi_reg_name[REGNO (x)], file);
6452 break;
6453 case 1:
6454 fputs (qi_reg_name[REGNO (x)], file);
6455 break;
6456 case 0:
6457 fputs (qi_high_reg_name[REGNO (x)], file);
6458 break;
6459 default:
6460 abort ();
6461 }
6462 }
6463
6464 /* Locate some local-dynamic symbol still in use by this function
6465 so that we can print its name in some tls_local_dynamic_base
6466 pattern. */
6467
6468 static const char *
6469 get_some_local_dynamic_name ()
6470 {
6471 rtx insn;
6472
6473 if (cfun->machine->some_ld_name)
6474 return cfun->machine->some_ld_name;
6475
6476 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6477 if (INSN_P (insn)
6478 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6479 return cfun->machine->some_ld_name;
6480
6481 abort ();
6482 }
6483
6484 static int
6485 get_some_local_dynamic_name_1 (px, data)
6486 rtx *px;
6487 void *data ATTRIBUTE_UNUSED;
6488 {
6489 rtx x = *px;
6490
6491 if (GET_CODE (x) == SYMBOL_REF
6492 && local_dynamic_symbolic_operand (x, Pmode))
6493 {
6494 cfun->machine->some_ld_name = XSTR (x, 0);
6495 return 1;
6496 }
6497
6498 return 0;
6499 }
6500
6501 /* Meaning of CODE:
6502 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6503 C -- print opcode suffix for set/cmov insn.
6504 c -- like C, but print reversed condition
6505 F,f -- likewise, but for floating-point.
6506 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6507 nothing
6508 R -- print the prefix for register names.
6509 z -- print the opcode suffix for the size of the current operand.
6510 * -- print a star (in certain assembler syntax)
6511 A -- print an absolute memory reference.
6512 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6513 s -- print a shift double count, followed by the assemblers argument
6514 delimiter.
6515 b -- print the QImode name of the register for the indicated operand.
6516 %b0 would print %al if operands[0] is reg 0.
6517 w -- likewise, print the HImode name of the register.
6518 k -- likewise, print the SImode name of the register.
6519 q -- likewise, print the DImode name of the register.
6520 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6521 y -- print "st(0)" instead of "st" as a register.
6522 D -- print condition for SSE cmp instruction.
6523 P -- if PIC, print an @PLT suffix.
6524 X -- don't print any sort of PIC '@' suffix for a symbol.
6525 & -- print some in-use local-dynamic symbol name.
6526 */
6527
6528 void
6529 print_operand (file, x, code)
6530 FILE *file;
6531 rtx x;
6532 int code;
6533 {
6534 if (code)
6535 {
6536 switch (code)
6537 {
6538 case '*':
6539 if (ASSEMBLER_DIALECT == ASM_ATT)
6540 putc ('*', file);
6541 return;
6542
6543 case '&':
6544 assemble_name (file, get_some_local_dynamic_name ());
6545 return;
6546
6547 case 'A':
6548 if (ASSEMBLER_DIALECT == ASM_ATT)
6549 putc ('*', file);
6550 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6551 {
6552 /* Intel syntax. For absolute addresses, registers should not
6553 be surrounded by braces. */
6554 if (GET_CODE (x) != REG)
6555 {
6556 putc ('[', file);
6557 PRINT_OPERAND (file, x, 0);
6558 putc (']', file);
6559 return;
6560 }
6561 }
6562 else
6563 abort ();
6564
6565 PRINT_OPERAND (file, x, 0);
6566 return;
6567
6568
6569 case 'L':
6570 if (ASSEMBLER_DIALECT == ASM_ATT)
6571 putc ('l', file);
6572 return;
6573
6574 case 'W':
6575 if (ASSEMBLER_DIALECT == ASM_ATT)
6576 putc ('w', file);
6577 return;
6578
6579 case 'B':
6580 if (ASSEMBLER_DIALECT == ASM_ATT)
6581 putc ('b', file);
6582 return;
6583
6584 case 'Q':
6585 if (ASSEMBLER_DIALECT == ASM_ATT)
6586 putc ('l', file);
6587 return;
6588
6589 case 'S':
6590 if (ASSEMBLER_DIALECT == ASM_ATT)
6591 putc ('s', file);
6592 return;
6593
6594 case 'T':
6595 if (ASSEMBLER_DIALECT == ASM_ATT)
6596 putc ('t', file);
6597 return;
6598
6599 case 'z':
6600 /* 387 opcodes don't get size suffixes if the operands are
6601 registers. */
6602 if (STACK_REG_P (x))
6603 return;
6604
6605 /* Likewise if using Intel opcodes. */
6606 if (ASSEMBLER_DIALECT == ASM_INTEL)
6607 return;
6608
6609 /* This is the size of op from size of operand. */
6610 switch (GET_MODE_SIZE (GET_MODE (x)))
6611 {
6612 case 2:
6613 #ifdef HAVE_GAS_FILDS_FISTS
6614 putc ('s', file);
6615 #endif
6616 return;
6617
6618 case 4:
6619 if (GET_MODE (x) == SFmode)
6620 {
6621 putc ('s', file);
6622 return;
6623 }
6624 else
6625 putc ('l', file);
6626 return;
6627
6628 case 12:
6629 case 16:
6630 putc ('t', file);
6631 return;
6632
6633 case 8:
6634 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6635 {
6636 #ifdef GAS_MNEMONICS
6637 putc ('q', file);
6638 #else
6639 putc ('l', file);
6640 putc ('l', file);
6641 #endif
6642 }
6643 else
6644 putc ('l', file);
6645 return;
6646
6647 default:
6648 abort ();
6649 }
6650
6651 case 'b':
6652 case 'w':
6653 case 'k':
6654 case 'q':
6655 case 'h':
6656 case 'y':
6657 case 'X':
6658 case 'P':
6659 break;
6660
6661 case 's':
6662 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6663 {
6664 PRINT_OPERAND (file, x, 0);
6665 putc (',', file);
6666 }
6667 return;
6668
6669 case 'D':
6670 /* Little bit of braindamage here. The SSE compare instructions
6671 does use completely different names for the comparisons that the
6672 fp conditional moves. */
6673 switch (GET_CODE (x))
6674 {
6675 case EQ:
6676 case UNEQ:
6677 fputs ("eq", file);
6678 break;
6679 case LT:
6680 case UNLT:
6681 fputs ("lt", file);
6682 break;
6683 case LE:
6684 case UNLE:
6685 fputs ("le", file);
6686 break;
6687 case UNORDERED:
6688 fputs ("unord", file);
6689 break;
6690 case NE:
6691 case LTGT:
6692 fputs ("neq", file);
6693 break;
6694 case UNGE:
6695 case GE:
6696 fputs ("nlt", file);
6697 break;
6698 case UNGT:
6699 case GT:
6700 fputs ("nle", file);
6701 break;
6702 case ORDERED:
6703 fputs ("ord", file);
6704 break;
6705 default:
6706 abort ();
6707 break;
6708 }
6709 return;
6710 case 'O':
6711 #ifdef CMOV_SUN_AS_SYNTAX
6712 if (ASSEMBLER_DIALECT == ASM_ATT)
6713 {
6714 switch (GET_MODE (x))
6715 {
6716 case HImode: putc ('w', file); break;
6717 case SImode:
6718 case SFmode: putc ('l', file); break;
6719 case DImode:
6720 case DFmode: putc ('q', file); break;
6721 default: abort ();
6722 }
6723 putc ('.', file);
6724 }
6725 #endif
6726 return;
6727 case 'C':
6728 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6729 return;
6730 case 'F':
6731 #ifdef CMOV_SUN_AS_SYNTAX
6732 if (ASSEMBLER_DIALECT == ASM_ATT)
6733 putc ('.', file);
6734 #endif
6735 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6736 return;
6737
6738 /* Like above, but reverse condition */
6739 case 'c':
6740 /* Check to see if argument to %c is really a constant
6741 and not a condition code which needs to be reversed. */
6742 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6743 {
6744 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6745 return;
6746 }
6747 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6748 return;
6749 case 'f':
6750 #ifdef CMOV_SUN_AS_SYNTAX
6751 if (ASSEMBLER_DIALECT == ASM_ATT)
6752 putc ('.', file);
6753 #endif
6754 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6755 return;
6756 case '+':
6757 {
6758 rtx x;
6759
6760 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6761 return;
6762
6763 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6764 if (x)
6765 {
6766 int pred_val = INTVAL (XEXP (x, 0));
6767
6768 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6769 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6770 {
6771 int taken = pred_val > REG_BR_PROB_BASE / 2;
6772 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6773
6774 /* Emit hints only in the case default branch prediction
6775 heruistics would fail. */
6776 if (taken != cputaken)
6777 {
6778 /* We use 3e (DS) prefix for taken branches and
6779 2e (CS) prefix for not taken branches. */
6780 if (taken)
6781 fputs ("ds ; ", file);
6782 else
6783 fputs ("cs ; ", file);
6784 }
6785 }
6786 }
6787 return;
6788 }
6789 default:
6790 output_operand_lossage ("invalid operand code `%c'", code);
6791 }
6792 }
6793
6794 if (GET_CODE (x) == REG)
6795 {
6796 PRINT_REG (x, code, file);
6797 }
6798
6799 else if (GET_CODE (x) == MEM)
6800 {
6801 /* No `byte ptr' prefix for call instructions. */
6802 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6803 {
6804 const char * size;
6805 switch (GET_MODE_SIZE (GET_MODE (x)))
6806 {
6807 case 1: size = "BYTE"; break;
6808 case 2: size = "WORD"; break;
6809 case 4: size = "DWORD"; break;
6810 case 8: size = "QWORD"; break;
6811 case 12: size = "XWORD"; break;
6812 case 16: size = "XMMWORD"; break;
6813 default:
6814 abort ();
6815 }
6816
6817 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6818 if (code == 'b')
6819 size = "BYTE";
6820 else if (code == 'w')
6821 size = "WORD";
6822 else if (code == 'k')
6823 size = "DWORD";
6824
6825 fputs (size, file);
6826 fputs (" PTR ", file);
6827 }
6828
6829 x = XEXP (x, 0);
6830 if (flag_pic && CONSTANT_ADDRESS_P (x))
6831 output_pic_addr_const (file, x, code);
6832 /* Avoid (%rip) for call operands. */
6833 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6834 && GET_CODE (x) != CONST_INT)
6835 output_addr_const (file, x);
6836 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6837 output_operand_lossage ("invalid constraints for operand");
6838 else
6839 output_address (x);
6840 }
6841
6842 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6843 {
6844 REAL_VALUE_TYPE r;
6845 long l;
6846
6847 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6848 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6849
6850 if (ASSEMBLER_DIALECT == ASM_ATT)
6851 putc ('$', file);
6852 fprintf (file, "0x%lx", l);
6853 }
6854
6855 /* These float cases don't actually occur as immediate operands. */
6856 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6857 {
6858 char dstr[30];
6859
6860 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6861 fprintf (file, "%s", dstr);
6862 }
6863
6864 else if (GET_CODE (x) == CONST_DOUBLE
6865 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6866 {
6867 char dstr[30];
6868
6869 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6870 fprintf (file, "%s", dstr);
6871 }
6872
6873 else
6874 {
6875 if (code != 'P')
6876 {
6877 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6878 {
6879 if (ASSEMBLER_DIALECT == ASM_ATT)
6880 putc ('$', file);
6881 }
6882 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6883 || GET_CODE (x) == LABEL_REF)
6884 {
6885 if (ASSEMBLER_DIALECT == ASM_ATT)
6886 putc ('$', file);
6887 else
6888 fputs ("OFFSET FLAT:", file);
6889 }
6890 }
6891 if (GET_CODE (x) == CONST_INT)
6892 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6893 else if (flag_pic)
6894 output_pic_addr_const (file, x, code);
6895 else
6896 output_addr_const (file, x);
6897 }
6898 }
6899 \f
6900 /* Print a memory operand whose address is ADDR. */
6901
6902 void
6903 print_operand_address (file, addr)
6904 FILE *file;
6905 register rtx addr;
6906 {
6907 struct ix86_address parts;
6908 rtx base, index, disp;
6909 int scale;
6910
6911 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6912 {
6913 if (ASSEMBLER_DIALECT == ASM_INTEL)
6914 fputs ("DWORD PTR ", file);
6915 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6916 putc ('%', file);
6917 if (TARGET_64BIT)
6918 fputs ("fs:0", file);
6919 else
6920 fputs ("gs:0", file);
6921 return;
6922 }
6923
6924 if (! ix86_decompose_address (addr, &parts))
6925 abort ();
6926
6927 base = parts.base;
6928 index = parts.index;
6929 disp = parts.disp;
6930 scale = parts.scale;
6931
6932 if (!base && !index)
6933 {
6934 /* Displacement only requires special attention. */
6935
6936 if (GET_CODE (disp) == CONST_INT)
6937 {
6938 if (ASSEMBLER_DIALECT == ASM_INTEL)
6939 {
6940 if (USER_LABEL_PREFIX[0] == 0)
6941 putc ('%', file);
6942 fputs ("ds:", file);
6943 }
6944 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6945 }
6946 else if (flag_pic)
6947 output_pic_addr_const (file, addr, 0);
6948 else
6949 output_addr_const (file, addr);
6950
6951 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6952 if (TARGET_64BIT
6953 && ((GET_CODE (addr) == SYMBOL_REF
6954 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
6955 || GET_CODE (addr) == LABEL_REF
6956 || (GET_CODE (addr) == CONST
6957 && GET_CODE (XEXP (addr, 0)) == PLUS
6958 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6959 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6960 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6961 fputs ("(%rip)", file);
6962 }
6963 else
6964 {
6965 if (ASSEMBLER_DIALECT == ASM_ATT)
6966 {
6967 if (disp)
6968 {
6969 if (flag_pic)
6970 output_pic_addr_const (file, disp, 0);
6971 else if (GET_CODE (disp) == LABEL_REF)
6972 output_asm_label (disp);
6973 else
6974 output_addr_const (file, disp);
6975 }
6976
6977 putc ('(', file);
6978 if (base)
6979 PRINT_REG (base, 0, file);
6980 if (index)
6981 {
6982 putc (',', file);
6983 PRINT_REG (index, 0, file);
6984 if (scale != 1)
6985 fprintf (file, ",%d", scale);
6986 }
6987 putc (')', file);
6988 }
6989 else
6990 {
6991 rtx offset = NULL_RTX;
6992
6993 if (disp)
6994 {
6995 /* Pull out the offset of a symbol; print any symbol itself. */
6996 if (GET_CODE (disp) == CONST
6997 && GET_CODE (XEXP (disp, 0)) == PLUS
6998 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6999 {
7000 offset = XEXP (XEXP (disp, 0), 1);
7001 disp = gen_rtx_CONST (VOIDmode,
7002 XEXP (XEXP (disp, 0), 0));
7003 }
7004
7005 if (flag_pic)
7006 output_pic_addr_const (file, disp, 0);
7007 else if (GET_CODE (disp) == LABEL_REF)
7008 output_asm_label (disp);
7009 else if (GET_CODE (disp) == CONST_INT)
7010 offset = disp;
7011 else
7012 output_addr_const (file, disp);
7013 }
7014
7015 putc ('[', file);
7016 if (base)
7017 {
7018 PRINT_REG (base, 0, file);
7019 if (offset)
7020 {
7021 if (INTVAL (offset) >= 0)
7022 putc ('+', file);
7023 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7024 }
7025 }
7026 else if (offset)
7027 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7028 else
7029 putc ('0', file);
7030
7031 if (index)
7032 {
7033 putc ('+', file);
7034 PRINT_REG (index, 0, file);
7035 if (scale != 1)
7036 fprintf (file, "*%d", scale);
7037 }
7038 putc (']', file);
7039 }
7040 }
7041 }
7042
7043 bool
7044 output_addr_const_extra (file, x)
7045 FILE *file;
7046 rtx x;
7047 {
7048 rtx op;
7049
7050 if (GET_CODE (x) != UNSPEC)
7051 return false;
7052
7053 op = XVECEXP (x, 0, 0);
7054 switch (XINT (x, 1))
7055 {
7056 case UNSPEC_GOTTPOFF:
7057 output_addr_const (file, op);
7058 /* FIXME: This might be @TPOFF in Sun ld. */
7059 fputs ("@GOTTPOFF", file);
7060 break;
7061 case UNSPEC_TPOFF:
7062 output_addr_const (file, op);
7063 fputs ("@TPOFF", file);
7064 break;
7065 case UNSPEC_NTPOFF:
7066 output_addr_const (file, op);
7067 if (TARGET_64BIT)
7068 fputs ("@TPOFF", file);
7069 else
7070 fputs ("@NTPOFF", file);
7071 break;
7072 case UNSPEC_DTPOFF:
7073 output_addr_const (file, op);
7074 fputs ("@DTPOFF", file);
7075 break;
7076 case UNSPEC_GOTNTPOFF:
7077 output_addr_const (file, op);
7078 if (TARGET_64BIT)
7079 fputs ("@GOTTPOFF(%rip)", file);
7080 else
7081 fputs ("@GOTNTPOFF", file);
7082 break;
7083 case UNSPEC_INDNTPOFF:
7084 output_addr_const (file, op);
7085 fputs ("@INDNTPOFF", file);
7086 break;
7087
7088 default:
7089 return false;
7090 }
7091
7092 return true;
7093 }
7094 \f
7095 /* Split one or more DImode RTL references into pairs of SImode
7096 references. The RTL can be REG, offsettable MEM, integer constant, or
7097 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7098 split and "num" is its length. lo_half and hi_half are output arrays
7099 that parallel "operands". */
7100
7101 void
7102 split_di (operands, num, lo_half, hi_half)
7103 rtx operands[];
7104 int num;
7105 rtx lo_half[], hi_half[];
7106 {
7107 while (num--)
7108 {
7109 rtx op = operands[num];
7110
7111 /* simplify_subreg refuse to split volatile memory addresses,
7112 but we still have to handle it. */
7113 if (GET_CODE (op) == MEM)
7114 {
7115 lo_half[num] = adjust_address (op, SImode, 0);
7116 hi_half[num] = adjust_address (op, SImode, 4);
7117 }
7118 else
7119 {
7120 lo_half[num] = simplify_gen_subreg (SImode, op,
7121 GET_MODE (op) == VOIDmode
7122 ? DImode : GET_MODE (op), 0);
7123 hi_half[num] = simplify_gen_subreg (SImode, op,
7124 GET_MODE (op) == VOIDmode
7125 ? DImode : GET_MODE (op), 4);
7126 }
7127 }
7128 }
7129 /* Split one or more TImode RTL references into pairs of SImode
7130 references. The RTL can be REG, offsettable MEM, integer constant, or
7131 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7132 split and "num" is its length. lo_half and hi_half are output arrays
7133 that parallel "operands". */
7134
7135 void
7136 split_ti (operands, num, lo_half, hi_half)
7137 rtx operands[];
7138 int num;
7139 rtx lo_half[], hi_half[];
7140 {
7141 while (num--)
7142 {
7143 rtx op = operands[num];
7144
7145 /* simplify_subreg refuse to split volatile memory addresses, but we
7146 still have to handle it. */
7147 if (GET_CODE (op) == MEM)
7148 {
7149 lo_half[num] = adjust_address (op, DImode, 0);
7150 hi_half[num] = adjust_address (op, DImode, 8);
7151 }
7152 else
7153 {
7154 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7155 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7156 }
7157 }
7158 }
7159 \f
7160 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7161 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7162 is the expression of the binary operation. The output may either be
7163 emitted here, or returned to the caller, like all output_* functions.
7164
7165 There is no guarantee that the operands are the same mode, as they
7166 might be within FLOAT or FLOAT_EXTEND expressions. */
7167
7168 #ifndef SYSV386_COMPAT
7169 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7170 wants to fix the assemblers because that causes incompatibility
7171 with gcc. No-one wants to fix gcc because that causes
7172 incompatibility with assemblers... You can use the option of
7173 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7174 #define SYSV386_COMPAT 1
7175 #endif
7176
7177 const char *
7178 output_387_binary_op (insn, operands)
7179 rtx insn;
7180 rtx *operands;
7181 {
7182 static char buf[30];
7183 const char *p;
7184 const char *ssep;
7185 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7186
7187 #ifdef ENABLE_CHECKING
7188 /* Even if we do not want to check the inputs, this documents input
7189 constraints. Which helps in understanding the following code. */
7190 if (STACK_REG_P (operands[0])
7191 && ((REG_P (operands[1])
7192 && REGNO (operands[0]) == REGNO (operands[1])
7193 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7194 || (REG_P (operands[2])
7195 && REGNO (operands[0]) == REGNO (operands[2])
7196 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7197 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7198 ; /* ok */
7199 else if (!is_sse)
7200 abort ();
7201 #endif
7202
7203 switch (GET_CODE (operands[3]))
7204 {
7205 case PLUS:
7206 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7207 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7208 p = "fiadd";
7209 else
7210 p = "fadd";
7211 ssep = "add";
7212 break;
7213
7214 case MINUS:
7215 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7216 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7217 p = "fisub";
7218 else
7219 p = "fsub";
7220 ssep = "sub";
7221 break;
7222
7223 case MULT:
7224 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7225 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7226 p = "fimul";
7227 else
7228 p = "fmul";
7229 ssep = "mul";
7230 break;
7231
7232 case DIV:
7233 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7234 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7235 p = "fidiv";
7236 else
7237 p = "fdiv";
7238 ssep = "div";
7239 break;
7240
7241 default:
7242 abort ();
7243 }
7244
7245 if (is_sse)
7246 {
7247 strcpy (buf, ssep);
7248 if (GET_MODE (operands[0]) == SFmode)
7249 strcat (buf, "ss\t{%2, %0|%0, %2}");
7250 else
7251 strcat (buf, "sd\t{%2, %0|%0, %2}");
7252 return buf;
7253 }
7254 strcpy (buf, p);
7255
7256 switch (GET_CODE (operands[3]))
7257 {
7258 case MULT:
7259 case PLUS:
7260 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7261 {
7262 rtx temp = operands[2];
7263 operands[2] = operands[1];
7264 operands[1] = temp;
7265 }
7266
7267 /* know operands[0] == operands[1]. */
7268
7269 if (GET_CODE (operands[2]) == MEM)
7270 {
7271 p = "%z2\t%2";
7272 break;
7273 }
7274
7275 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7276 {
7277 if (STACK_TOP_P (operands[0]))
7278 /* How is it that we are storing to a dead operand[2]?
7279 Well, presumably operands[1] is dead too. We can't
7280 store the result to st(0) as st(0) gets popped on this
7281 instruction. Instead store to operands[2] (which I
7282 think has to be st(1)). st(1) will be popped later.
7283 gcc <= 2.8.1 didn't have this check and generated
7284 assembly code that the Unixware assembler rejected. */
7285 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7286 else
7287 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7288 break;
7289 }
7290
7291 if (STACK_TOP_P (operands[0]))
7292 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7293 else
7294 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7295 break;
7296
7297 case MINUS:
7298 case DIV:
7299 if (GET_CODE (operands[1]) == MEM)
7300 {
7301 p = "r%z1\t%1";
7302 break;
7303 }
7304
7305 if (GET_CODE (operands[2]) == MEM)
7306 {
7307 p = "%z2\t%2";
7308 break;
7309 }
7310
7311 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7312 {
7313 #if SYSV386_COMPAT
7314 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7315 derived assemblers, confusingly reverse the direction of
7316 the operation for fsub{r} and fdiv{r} when the
7317 destination register is not st(0). The Intel assembler
7318 doesn't have this brain damage. Read !SYSV386_COMPAT to
7319 figure out what the hardware really does. */
7320 if (STACK_TOP_P (operands[0]))
7321 p = "{p\t%0, %2|rp\t%2, %0}";
7322 else
7323 p = "{rp\t%2, %0|p\t%0, %2}";
7324 #else
7325 if (STACK_TOP_P (operands[0]))
7326 /* As above for fmul/fadd, we can't store to st(0). */
7327 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7328 else
7329 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7330 #endif
7331 break;
7332 }
7333
7334 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7335 {
7336 #if SYSV386_COMPAT
7337 if (STACK_TOP_P (operands[0]))
7338 p = "{rp\t%0, %1|p\t%1, %0}";
7339 else
7340 p = "{p\t%1, %0|rp\t%0, %1}";
7341 #else
7342 if (STACK_TOP_P (operands[0]))
7343 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7344 else
7345 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7346 #endif
7347 break;
7348 }
7349
7350 if (STACK_TOP_P (operands[0]))
7351 {
7352 if (STACK_TOP_P (operands[1]))
7353 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7354 else
7355 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7356 break;
7357 }
7358 else if (STACK_TOP_P (operands[1]))
7359 {
7360 #if SYSV386_COMPAT
7361 p = "{\t%1, %0|r\t%0, %1}";
7362 #else
7363 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7364 #endif
7365 }
7366 else
7367 {
7368 #if SYSV386_COMPAT
7369 p = "{r\t%2, %0|\t%0, %2}";
7370 #else
7371 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7372 #endif
7373 }
7374 break;
7375
7376 default:
7377 abort ();
7378 }
7379
7380 strcat (buf, p);
7381 return buf;
7382 }
7383
7384 /* Output code to initialize control word copies used by
7385 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7386 is set to control word rounding downwards. */
7387 void
7388 emit_i387_cw_initialization (normal, round_down)
7389 rtx normal, round_down;
7390 {
7391 rtx reg = gen_reg_rtx (HImode);
7392
7393 emit_insn (gen_x86_fnstcw_1 (normal));
7394 emit_move_insn (reg, normal);
7395 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7396 && !TARGET_64BIT)
7397 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7398 else
7399 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7400 emit_move_insn (round_down, reg);
7401 }
7402
7403 /* Output code for INSN to convert a float to a signed int. OPERANDS
7404 are the insn operands. The output may be [HSD]Imode and the input
7405 operand may be [SDX]Fmode. */
7406
7407 const char *
7408 output_fix_trunc (insn, operands)
7409 rtx insn;
7410 rtx *operands;
7411 {
7412 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7413 int dimode_p = GET_MODE (operands[0]) == DImode;
7414
7415 /* Jump through a hoop or two for DImode, since the hardware has no
7416 non-popping instruction. We used to do this a different way, but
7417 that was somewhat fragile and broke with post-reload splitters. */
7418 if (dimode_p && !stack_top_dies)
7419 output_asm_insn ("fld\t%y1", operands);
7420
7421 if (!STACK_TOP_P (operands[1]))
7422 abort ();
7423
7424 if (GET_CODE (operands[0]) != MEM)
7425 abort ();
7426
7427 output_asm_insn ("fldcw\t%3", operands);
7428 if (stack_top_dies || dimode_p)
7429 output_asm_insn ("fistp%z0\t%0", operands);
7430 else
7431 output_asm_insn ("fist%z0\t%0", operands);
7432 output_asm_insn ("fldcw\t%2", operands);
7433
7434 return "";
7435 }
7436
7437 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7438 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7439 when fucom should be used. */
7440
7441 const char *
7442 output_fp_compare (insn, operands, eflags_p, unordered_p)
7443 rtx insn;
7444 rtx *operands;
7445 int eflags_p, unordered_p;
7446 {
7447 int stack_top_dies;
7448 rtx cmp_op0 = operands[0];
7449 rtx cmp_op1 = operands[1];
7450 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7451
7452 if (eflags_p == 2)
7453 {
7454 cmp_op0 = cmp_op1;
7455 cmp_op1 = operands[2];
7456 }
7457 if (is_sse)
7458 {
7459 if (GET_MODE (operands[0]) == SFmode)
7460 if (unordered_p)
7461 return "ucomiss\t{%1, %0|%0, %1}";
7462 else
7463 return "comiss\t{%1, %0|%0, %y}";
7464 else
7465 if (unordered_p)
7466 return "ucomisd\t{%1, %0|%0, %1}";
7467 else
7468 return "comisd\t{%1, %0|%0, %y}";
7469 }
7470
7471 if (! STACK_TOP_P (cmp_op0))
7472 abort ();
7473
7474 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7475
7476 if (STACK_REG_P (cmp_op1)
7477 && stack_top_dies
7478 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7479 && REGNO (cmp_op1) != FIRST_STACK_REG)
7480 {
7481 /* If both the top of the 387 stack dies, and the other operand
7482 is also a stack register that dies, then this must be a
7483 `fcompp' float compare */
7484
7485 if (eflags_p == 1)
7486 {
7487 /* There is no double popping fcomi variant. Fortunately,
7488 eflags is immune from the fstp's cc clobbering. */
7489 if (unordered_p)
7490 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7491 else
7492 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7493 return "fstp\t%y0";
7494 }
7495 else
7496 {
7497 if (eflags_p == 2)
7498 {
7499 if (unordered_p)
7500 return "fucompp\n\tfnstsw\t%0";
7501 else
7502 return "fcompp\n\tfnstsw\t%0";
7503 }
7504 else
7505 {
7506 if (unordered_p)
7507 return "fucompp";
7508 else
7509 return "fcompp";
7510 }
7511 }
7512 }
7513 else
7514 {
7515 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7516
7517 static const char * const alt[24] =
7518 {
7519 "fcom%z1\t%y1",
7520 "fcomp%z1\t%y1",
7521 "fucom%z1\t%y1",
7522 "fucomp%z1\t%y1",
7523
7524 "ficom%z1\t%y1",
7525 "ficomp%z1\t%y1",
7526 NULL,
7527 NULL,
7528
7529 "fcomi\t{%y1, %0|%0, %y1}",
7530 "fcomip\t{%y1, %0|%0, %y1}",
7531 "fucomi\t{%y1, %0|%0, %y1}",
7532 "fucomip\t{%y1, %0|%0, %y1}",
7533
7534 NULL,
7535 NULL,
7536 NULL,
7537 NULL,
7538
7539 "fcom%z2\t%y2\n\tfnstsw\t%0",
7540 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7541 "fucom%z2\t%y2\n\tfnstsw\t%0",
7542 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7543
7544 "ficom%z2\t%y2\n\tfnstsw\t%0",
7545 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7546 NULL,
7547 NULL
7548 };
7549
7550 int mask;
7551 const char *ret;
7552
7553 mask = eflags_p << 3;
7554 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7555 mask |= unordered_p << 1;
7556 mask |= stack_top_dies;
7557
7558 if (mask >= 24)
7559 abort ();
7560 ret = alt[mask];
7561 if (ret == NULL)
7562 abort ();
7563
7564 return ret;
7565 }
7566 }
7567
7568 void
7569 ix86_output_addr_vec_elt (file, value)
7570 FILE *file;
7571 int value;
7572 {
7573 const char *directive = ASM_LONG;
7574
7575 if (TARGET_64BIT)
7576 {
7577 #ifdef ASM_QUAD
7578 directive = ASM_QUAD;
7579 #else
7580 abort ();
7581 #endif
7582 }
7583
7584 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7585 }
7586
7587 void
7588 ix86_output_addr_diff_elt (file, value, rel)
7589 FILE *file;
7590 int value, rel;
7591 {
7592 if (TARGET_64BIT)
7593 fprintf (file, "%s%s%d-%s%d\n",
7594 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7595 else if (HAVE_AS_GOTOFF_IN_DATA)
7596 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7597 #if TARGET_MACHO
7598 else if (TARGET_MACHO)
7599 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7600 machopic_function_base_name () + 1);
7601 #endif
7602 else
7603 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7604 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7605 }
7606 \f
7607 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7608 for the target. */
7609
7610 void
7611 ix86_expand_clear (dest)
7612 rtx dest;
7613 {
7614 rtx tmp;
7615
7616 /* We play register width games, which are only valid after reload. */
7617 if (!reload_completed)
7618 abort ();
7619
7620 /* Avoid HImode and its attendant prefix byte. */
7621 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7622 dest = gen_rtx_REG (SImode, REGNO (dest));
7623
7624 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7625
7626 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7627 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7628 {
7629 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7630 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7631 }
7632
7633 emit_insn (tmp);
7634 }
7635
7636 /* X is an unchanging MEM. If it is a constant pool reference, return
7637 the constant pool rtx, else NULL. */
7638
7639 static rtx
7640 maybe_get_pool_constant (x)
7641 rtx x;
7642 {
7643 x = XEXP (x, 0);
7644
7645 if (flag_pic && ! TARGET_64BIT)
7646 {
7647 if (GET_CODE (x) != PLUS)
7648 return NULL_RTX;
7649 if (XEXP (x, 0) != pic_offset_table_rtx)
7650 return NULL_RTX;
7651 x = XEXP (x, 1);
7652 if (GET_CODE (x) != CONST)
7653 return NULL_RTX;
7654 x = XEXP (x, 0);
7655 if (GET_CODE (x) != UNSPEC)
7656 return NULL_RTX;
7657 if (XINT (x, 1) != UNSPEC_GOTOFF)
7658 return NULL_RTX;
7659 x = XVECEXP (x, 0, 0);
7660 }
7661
7662 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7663 return get_pool_constant (x);
7664
7665 return NULL_RTX;
7666 }
7667
7668 void
7669 ix86_expand_move (mode, operands)
7670 enum machine_mode mode;
7671 rtx operands[];
7672 {
7673 int strict = (reload_in_progress || reload_completed);
7674 rtx insn, op0, op1, tmp;
7675
7676 op0 = operands[0];
7677 op1 = operands[1];
7678
7679 /* ??? We have a slight problem. We need to say that tls symbols are
7680 not legitimate constants so that reload does not helpfully reload
7681 these constants from a REG_EQUIV, which we cannot handle. (Recall
7682 that general- and local-dynamic address resolution requires a
7683 function call.)
7684
7685 However, if we say that tls symbols are not legitimate constants,
7686 then emit_move_insn helpfully drop them into the constant pool.
7687
7688 It is far easier to work around emit_move_insn than reload. Recognize
7689 the MEM that we would have created and extract the symbol_ref. */
7690
7691 if (mode == Pmode
7692 && GET_CODE (op1) == MEM
7693 && RTX_UNCHANGING_P (op1))
7694 {
7695 tmp = maybe_get_pool_constant (op1);
7696 /* Note that we only care about symbolic constants here, which
7697 unlike CONST_INT will always have a proper mode. */
7698 if (tmp && GET_MODE (tmp) == Pmode)
7699 op1 = tmp;
7700 }
7701
7702 if (tls_symbolic_operand (op1, Pmode))
7703 {
7704 op1 = legitimize_address (op1, op1, VOIDmode);
7705 if (GET_CODE (op0) == MEM)
7706 {
7707 tmp = gen_reg_rtx (mode);
7708 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7709 op1 = tmp;
7710 }
7711 }
7712 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7713 {
7714 #if TARGET_MACHO
7715 if (MACHOPIC_PURE)
7716 {
7717 rtx temp = ((reload_in_progress
7718 || ((op0 && GET_CODE (op0) == REG)
7719 && mode == Pmode))
7720 ? op0 : gen_reg_rtx (Pmode));
7721 op1 = machopic_indirect_data_reference (op1, temp);
7722 op1 = machopic_legitimize_pic_address (op1, mode,
7723 temp == op1 ? 0 : temp);
7724 }
7725 else
7726 {
7727 if (MACHOPIC_INDIRECT)
7728 op1 = machopic_indirect_data_reference (op1, 0);
7729 }
7730 if (op0 != op1)
7731 {
7732 insn = gen_rtx_SET (VOIDmode, op0, op1);
7733 emit_insn (insn);
7734 }
7735 return;
7736 #endif /* TARGET_MACHO */
7737 if (GET_CODE (op0) == MEM)
7738 op1 = force_reg (Pmode, op1);
7739 else
7740 {
7741 rtx temp = op0;
7742 if (GET_CODE (temp) != REG)
7743 temp = gen_reg_rtx (Pmode);
7744 temp = legitimize_pic_address (op1, temp);
7745 if (temp == op0)
7746 return;
7747 op1 = temp;
7748 }
7749 }
7750 else
7751 {
7752 if (GET_CODE (op0) == MEM
7753 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7754 || !push_operand (op0, mode))
7755 && GET_CODE (op1) == MEM)
7756 op1 = force_reg (mode, op1);
7757
7758 if (push_operand (op0, mode)
7759 && ! general_no_elim_operand (op1, mode))
7760 op1 = copy_to_mode_reg (mode, op1);
7761
7762 /* Force large constants in 64bit compilation into register
7763 to get them CSEed. */
7764 if (TARGET_64BIT && mode == DImode
7765 && immediate_operand (op1, mode)
7766 && !x86_64_zero_extended_value (op1)
7767 && !register_operand (op0, mode)
7768 && optimize && !reload_completed && !reload_in_progress)
7769 op1 = copy_to_mode_reg (mode, op1);
7770
7771 if (FLOAT_MODE_P (mode))
7772 {
7773 /* If we are loading a floating point constant to a register,
7774 force the value to memory now, since we'll get better code
7775 out the back end. */
7776
7777 if (strict)
7778 ;
7779 else if (GET_CODE (op1) == CONST_DOUBLE
7780 && register_operand (op0, mode))
7781 op1 = validize_mem (force_const_mem (mode, op1));
7782 }
7783 }
7784
7785 insn = gen_rtx_SET (VOIDmode, op0, op1);
7786
7787 emit_insn (insn);
7788 }
7789
7790 void
7791 ix86_expand_vector_move (mode, operands)
7792 enum machine_mode mode;
7793 rtx operands[];
7794 {
7795 /* Force constants other than zero into memory. We do not know how
7796 the instructions used to build constants modify the upper 64 bits
7797 of the register, once we have that information we may be able
7798 to handle some of them more efficiently. */
7799 if ((reload_in_progress | reload_completed) == 0
7800 && register_operand (operands[0], mode)
7801 && CONSTANT_P (operands[1]))
7802 {
7803 rtx addr = gen_reg_rtx (Pmode);
7804 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7805 operands[1] = gen_rtx_MEM (mode, addr);
7806 }
7807
7808 /* Make operand1 a register if it isn't already. */
7809 if ((reload_in_progress | reload_completed) == 0
7810 && !register_operand (operands[0], mode)
7811 && !register_operand (operands[1], mode))
7812 {
7813 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7814 emit_move_insn (operands[0], temp);
7815 return;
7816 }
7817
7818 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7819 }
7820
7821 /* Attempt to expand a binary operator. Make the expansion closer to the
7822 actual machine, then just general_operand, which will allow 3 separate
7823 memory references (one output, two input) in a single insn. */
7824
7825 void
7826 ix86_expand_binary_operator (code, mode, operands)
7827 enum rtx_code code;
7828 enum machine_mode mode;
7829 rtx operands[];
7830 {
7831 int matching_memory;
7832 rtx src1, src2, dst, op, clob;
7833
7834 dst = operands[0];
7835 src1 = operands[1];
7836 src2 = operands[2];
7837
7838 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7839 if (GET_RTX_CLASS (code) == 'c'
7840 && (rtx_equal_p (dst, src2)
7841 || immediate_operand (src1, mode)))
7842 {
7843 rtx temp = src1;
7844 src1 = src2;
7845 src2 = temp;
7846 }
7847
7848 /* If the destination is memory, and we do not have matching source
7849 operands, do things in registers. */
7850 matching_memory = 0;
7851 if (GET_CODE (dst) == MEM)
7852 {
7853 if (rtx_equal_p (dst, src1))
7854 matching_memory = 1;
7855 else if (GET_RTX_CLASS (code) == 'c'
7856 && rtx_equal_p (dst, src2))
7857 matching_memory = 2;
7858 else
7859 dst = gen_reg_rtx (mode);
7860 }
7861
7862 /* Both source operands cannot be in memory. */
7863 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7864 {
7865 if (matching_memory != 2)
7866 src2 = force_reg (mode, src2);
7867 else
7868 src1 = force_reg (mode, src1);
7869 }
7870
7871 /* If the operation is not commutable, source 1 cannot be a constant
7872 or non-matching memory. */
7873 if ((CONSTANT_P (src1)
7874 || (!matching_memory && GET_CODE (src1) == MEM))
7875 && GET_RTX_CLASS (code) != 'c')
7876 src1 = force_reg (mode, src1);
7877
7878 /* If optimizing, copy to regs to improve CSE */
7879 if (optimize && ! no_new_pseudos)
7880 {
7881 if (GET_CODE (dst) == MEM)
7882 dst = gen_reg_rtx (mode);
7883 if (GET_CODE (src1) == MEM)
7884 src1 = force_reg (mode, src1);
7885 if (GET_CODE (src2) == MEM)
7886 src2 = force_reg (mode, src2);
7887 }
7888
7889 /* Emit the instruction. */
7890
7891 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7892 if (reload_in_progress)
7893 {
7894 /* Reload doesn't know about the flags register, and doesn't know that
7895 it doesn't want to clobber it. We can only do this with PLUS. */
7896 if (code != PLUS)
7897 abort ();
7898 emit_insn (op);
7899 }
7900 else
7901 {
7902 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7903 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7904 }
7905
7906 /* Fix up the destination if needed. */
7907 if (dst != operands[0])
7908 emit_move_insn (operands[0], dst);
7909 }
7910
7911 /* Return TRUE or FALSE depending on whether the binary operator meets the
7912 appropriate constraints. */
7913
7914 int
7915 ix86_binary_operator_ok (code, mode, operands)
7916 enum rtx_code code;
7917 enum machine_mode mode ATTRIBUTE_UNUSED;
7918 rtx operands[3];
7919 {
7920 /* Both source operands cannot be in memory. */
7921 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7922 return 0;
7923 /* If the operation is not commutable, source 1 cannot be a constant. */
7924 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7925 return 0;
7926 /* If the destination is memory, we must have a matching source operand. */
7927 if (GET_CODE (operands[0]) == MEM
7928 && ! (rtx_equal_p (operands[0], operands[1])
7929 || (GET_RTX_CLASS (code) == 'c'
7930 && rtx_equal_p (operands[0], operands[2]))))
7931 return 0;
7932 /* If the operation is not commutable and the source 1 is memory, we must
7933 have a matching destination. */
7934 if (GET_CODE (operands[1]) == MEM
7935 && GET_RTX_CLASS (code) != 'c'
7936 && ! rtx_equal_p (operands[0], operands[1]))
7937 return 0;
7938 return 1;
7939 }
7940
7941 /* Attempt to expand a unary operator. Make the expansion closer to the
7942 actual machine, then just general_operand, which will allow 2 separate
7943 memory references (one output, one input) in a single insn. */
7944
7945 void
7946 ix86_expand_unary_operator (code, mode, operands)
7947 enum rtx_code code;
7948 enum machine_mode mode;
7949 rtx operands[];
7950 {
7951 int matching_memory;
7952 rtx src, dst, op, clob;
7953
7954 dst = operands[0];
7955 src = operands[1];
7956
7957 /* If the destination is memory, and we do not have matching source
7958 operands, do things in registers. */
7959 matching_memory = 0;
7960 if (GET_CODE (dst) == MEM)
7961 {
7962 if (rtx_equal_p (dst, src))
7963 matching_memory = 1;
7964 else
7965 dst = gen_reg_rtx (mode);
7966 }
7967
7968 /* When source operand is memory, destination must match. */
7969 if (!matching_memory && GET_CODE (src) == MEM)
7970 src = force_reg (mode, src);
7971
7972 /* If optimizing, copy to regs to improve CSE */
7973 if (optimize && ! no_new_pseudos)
7974 {
7975 if (GET_CODE (dst) == MEM)
7976 dst = gen_reg_rtx (mode);
7977 if (GET_CODE (src) == MEM)
7978 src = force_reg (mode, src);
7979 }
7980
7981 /* Emit the instruction. */
7982
7983 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7984 if (reload_in_progress || code == NOT)
7985 {
7986 /* Reload doesn't know about the flags register, and doesn't know that
7987 it doesn't want to clobber it. */
7988 if (code != NOT)
7989 abort ();
7990 emit_insn (op);
7991 }
7992 else
7993 {
7994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7996 }
7997
7998 /* Fix up the destination if needed. */
7999 if (dst != operands[0])
8000 emit_move_insn (operands[0], dst);
8001 }
8002
8003 /* Return TRUE or FALSE depending on whether the unary operator meets the
8004 appropriate constraints. */
8005
8006 int
8007 ix86_unary_operator_ok (code, mode, operands)
8008 enum rtx_code code ATTRIBUTE_UNUSED;
8009 enum machine_mode mode ATTRIBUTE_UNUSED;
8010 rtx operands[2] ATTRIBUTE_UNUSED;
8011 {
8012 /* If one of operands is memory, source and destination must match. */
8013 if ((GET_CODE (operands[0]) == MEM
8014 || GET_CODE (operands[1]) == MEM)
8015 && ! rtx_equal_p (operands[0], operands[1]))
8016 return FALSE;
8017 return TRUE;
8018 }
8019
8020 /* Return TRUE or FALSE depending on whether the first SET in INSN
8021 has source and destination with matching CC modes, and that the
8022 CC mode is at least as constrained as REQ_MODE. */
8023
8024 int
8025 ix86_match_ccmode (insn, req_mode)
8026 rtx insn;
8027 enum machine_mode req_mode;
8028 {
8029 rtx set;
8030 enum machine_mode set_mode;
8031
8032 set = PATTERN (insn);
8033 if (GET_CODE (set) == PARALLEL)
8034 set = XVECEXP (set, 0, 0);
8035 if (GET_CODE (set) != SET)
8036 abort ();
8037 if (GET_CODE (SET_SRC (set)) != COMPARE)
8038 abort ();
8039
8040 set_mode = GET_MODE (SET_DEST (set));
8041 switch (set_mode)
8042 {
8043 case CCNOmode:
8044 if (req_mode != CCNOmode
8045 && (req_mode != CCmode
8046 || XEXP (SET_SRC (set), 1) != const0_rtx))
8047 return 0;
8048 break;
8049 case CCmode:
8050 if (req_mode == CCGCmode)
8051 return 0;
8052 /* FALLTHRU */
8053 case CCGCmode:
8054 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8055 return 0;
8056 /* FALLTHRU */
8057 case CCGOCmode:
8058 if (req_mode == CCZmode)
8059 return 0;
8060 /* FALLTHRU */
8061 case CCZmode:
8062 break;
8063
8064 default:
8065 abort ();
8066 }
8067
8068 return (GET_MODE (SET_SRC (set)) == set_mode);
8069 }
8070
8071 /* Generate insn patterns to do an integer compare of OPERANDS. */
8072
8073 static rtx
8074 ix86_expand_int_compare (code, op0, op1)
8075 enum rtx_code code;
8076 rtx op0, op1;
8077 {
8078 enum machine_mode cmpmode;
8079 rtx tmp, flags;
8080
8081 cmpmode = SELECT_CC_MODE (code, op0, op1);
8082 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8083
8084 /* This is very simple, but making the interface the same as in the
8085 FP case makes the rest of the code easier. */
8086 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8087 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8088
8089 /* Return the test that should be put into the flags user, i.e.
8090 the bcc, scc, or cmov instruction. */
8091 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8092 }
8093
8094 /* Figure out whether to use ordered or unordered fp comparisons.
8095 Return the appropriate mode to use. */
8096
8097 enum machine_mode
8098 ix86_fp_compare_mode (code)
8099 enum rtx_code code ATTRIBUTE_UNUSED;
8100 {
8101 /* ??? In order to make all comparisons reversible, we do all comparisons
8102 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8103 all forms trapping and nontrapping comparisons, we can make inequality
8104 comparisons trapping again, since it results in better code when using
8105 FCOM based compares. */
8106 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8107 }
8108
8109 enum machine_mode
8110 ix86_cc_mode (code, op0, op1)
8111 enum rtx_code code;
8112 rtx op0, op1;
8113 {
8114 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8115 return ix86_fp_compare_mode (code);
8116 switch (code)
8117 {
8118 /* Only zero flag is needed. */
8119 case EQ: /* ZF=0 */
8120 case NE: /* ZF!=0 */
8121 return CCZmode;
8122 /* Codes needing carry flag. */
8123 case GEU: /* CF=0 */
8124 case GTU: /* CF=0 & ZF=0 */
8125 case LTU: /* CF=1 */
8126 case LEU: /* CF=1 | ZF=1 */
8127 return CCmode;
8128 /* Codes possibly doable only with sign flag when
8129 comparing against zero. */
8130 case GE: /* SF=OF or SF=0 */
8131 case LT: /* SF<>OF or SF=1 */
8132 if (op1 == const0_rtx)
8133 return CCGOCmode;
8134 else
8135 /* For other cases Carry flag is not required. */
8136 return CCGCmode;
8137 /* Codes doable only with sign flag when comparing
8138 against zero, but we miss jump instruction for it
8139 so we need to use relational tests agains overflow
8140 that thus needs to be zero. */
8141 case GT: /* ZF=0 & SF=OF */
8142 case LE: /* ZF=1 | SF<>OF */
8143 if (op1 == const0_rtx)
8144 return CCNOmode;
8145 else
8146 return CCGCmode;
8147 /* strcmp pattern do (use flags) and combine may ask us for proper
8148 mode. */
8149 case USE:
8150 return CCmode;
8151 default:
8152 abort ();
8153 }
8154 }
8155
8156 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8157
8158 int
8159 ix86_use_fcomi_compare (code)
8160 enum rtx_code code ATTRIBUTE_UNUSED;
8161 {
8162 enum rtx_code swapped_code = swap_condition (code);
8163 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8164 || (ix86_fp_comparison_cost (swapped_code)
8165 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8166 }
8167
8168 /* Swap, force into registers, or otherwise massage the two operands
8169 to a fp comparison. The operands are updated in place; the new
8170 comparsion code is returned. */
8171
8172 static enum rtx_code
8173 ix86_prepare_fp_compare_args (code, pop0, pop1)
8174 enum rtx_code code;
8175 rtx *pop0, *pop1;
8176 {
8177 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8178 rtx op0 = *pop0, op1 = *pop1;
8179 enum machine_mode op_mode = GET_MODE (op0);
8180 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8181
8182 /* All of the unordered compare instructions only work on registers.
8183 The same is true of the XFmode compare instructions. The same is
8184 true of the fcomi compare instructions. */
8185
8186 if (!is_sse
8187 && (fpcmp_mode == CCFPUmode
8188 || op_mode == XFmode
8189 || op_mode == TFmode
8190 || ix86_use_fcomi_compare (code)))
8191 {
8192 op0 = force_reg (op_mode, op0);
8193 op1 = force_reg (op_mode, op1);
8194 }
8195 else
8196 {
8197 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8198 things around if they appear profitable, otherwise force op0
8199 into a register. */
8200
8201 if (standard_80387_constant_p (op0) == 0
8202 || (GET_CODE (op0) == MEM
8203 && ! (standard_80387_constant_p (op1) == 0
8204 || GET_CODE (op1) == MEM)))
8205 {
8206 rtx tmp;
8207 tmp = op0, op0 = op1, op1 = tmp;
8208 code = swap_condition (code);
8209 }
8210
8211 if (GET_CODE (op0) != REG)
8212 op0 = force_reg (op_mode, op0);
8213
8214 if (CONSTANT_P (op1))
8215 {
8216 if (standard_80387_constant_p (op1))
8217 op1 = force_reg (op_mode, op1);
8218 else
8219 op1 = validize_mem (force_const_mem (op_mode, op1));
8220 }
8221 }
8222
8223 /* Try to rearrange the comparison to make it cheaper. */
8224 if (ix86_fp_comparison_cost (code)
8225 > ix86_fp_comparison_cost (swap_condition (code))
8226 && (GET_CODE (op1) == REG || !no_new_pseudos))
8227 {
8228 rtx tmp;
8229 tmp = op0, op0 = op1, op1 = tmp;
8230 code = swap_condition (code);
8231 if (GET_CODE (op0) != REG)
8232 op0 = force_reg (op_mode, op0);
8233 }
8234
8235 *pop0 = op0;
8236 *pop1 = op1;
8237 return code;
8238 }
8239
8240 /* Convert comparison codes we use to represent FP comparison to integer
8241 code that will result in proper branch. Return UNKNOWN if no such code
8242 is available. */
8243 static enum rtx_code
8244 ix86_fp_compare_code_to_integer (code)
8245 enum rtx_code code;
8246 {
8247 switch (code)
8248 {
8249 case GT:
8250 return GTU;
8251 case GE:
8252 return GEU;
8253 case ORDERED:
8254 case UNORDERED:
8255 return code;
8256 break;
8257 case UNEQ:
8258 return EQ;
8259 break;
8260 case UNLT:
8261 return LTU;
8262 break;
8263 case UNLE:
8264 return LEU;
8265 break;
8266 case LTGT:
8267 return NE;
8268 break;
8269 default:
8270 return UNKNOWN;
8271 }
8272 }
8273
8274 /* Split comparison code CODE into comparisons we can do using branch
8275 instructions. BYPASS_CODE is comparison code for branch that will
8276 branch around FIRST_CODE and SECOND_CODE. If some of branches
8277 is not required, set value to NIL.
8278 We never require more than two branches. */
8279 static void
8280 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8281 enum rtx_code code, *bypass_code, *first_code, *second_code;
8282 {
8283 *first_code = code;
8284 *bypass_code = NIL;
8285 *second_code = NIL;
8286
8287 /* The fcomi comparison sets flags as follows:
8288
8289 cmp ZF PF CF
8290 > 0 0 0
8291 < 0 0 1
8292 = 1 0 0
8293 un 1 1 1 */
8294
8295 switch (code)
8296 {
8297 case GT: /* GTU - CF=0 & ZF=0 */
8298 case GE: /* GEU - CF=0 */
8299 case ORDERED: /* PF=0 */
8300 case UNORDERED: /* PF=1 */
8301 case UNEQ: /* EQ - ZF=1 */
8302 case UNLT: /* LTU - CF=1 */
8303 case UNLE: /* LEU - CF=1 | ZF=1 */
8304 case LTGT: /* EQ - ZF=0 */
8305 break;
8306 case LT: /* LTU - CF=1 - fails on unordered */
8307 *first_code = UNLT;
8308 *bypass_code = UNORDERED;
8309 break;
8310 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8311 *first_code = UNLE;
8312 *bypass_code = UNORDERED;
8313 break;
8314 case EQ: /* EQ - ZF=1 - fails on unordered */
8315 *first_code = UNEQ;
8316 *bypass_code = UNORDERED;
8317 break;
8318 case NE: /* NE - ZF=0 - fails on unordered */
8319 *first_code = LTGT;
8320 *second_code = UNORDERED;
8321 break;
8322 case UNGE: /* GEU - CF=0 - fails on unordered */
8323 *first_code = GE;
8324 *second_code = UNORDERED;
8325 break;
8326 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8327 *first_code = GT;
8328 *second_code = UNORDERED;
8329 break;
8330 default:
8331 abort ();
8332 }
8333 if (!TARGET_IEEE_FP)
8334 {
8335 *second_code = NIL;
8336 *bypass_code = NIL;
8337 }
8338 }
8339
8340 /* Return cost of comparison done fcom + arithmetics operations on AX.
8341 All following functions do use number of instructions as an cost metrics.
8342 In future this should be tweaked to compute bytes for optimize_size and
8343 take into account performance of various instructions on various CPUs. */
8344 static int
8345 ix86_fp_comparison_arithmetics_cost (code)
8346 enum rtx_code code;
8347 {
8348 if (!TARGET_IEEE_FP)
8349 return 4;
8350 /* The cost of code output by ix86_expand_fp_compare. */
8351 switch (code)
8352 {
8353 case UNLE:
8354 case UNLT:
8355 case LTGT:
8356 case GT:
8357 case GE:
8358 case UNORDERED:
8359 case ORDERED:
8360 case UNEQ:
8361 return 4;
8362 break;
8363 case LT:
8364 case NE:
8365 case EQ:
8366 case UNGE:
8367 return 5;
8368 break;
8369 case LE:
8370 case UNGT:
8371 return 6;
8372 break;
8373 default:
8374 abort ();
8375 }
8376 }
8377
8378 /* Return cost of comparison done using fcomi operation.
8379 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8380 static int
8381 ix86_fp_comparison_fcomi_cost (code)
8382 enum rtx_code code;
8383 {
8384 enum rtx_code bypass_code, first_code, second_code;
8385 /* Return arbitarily high cost when instruction is not supported - this
8386 prevents gcc from using it. */
8387 if (!TARGET_CMOVE)
8388 return 1024;
8389 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8390 return (bypass_code != NIL || second_code != NIL) + 2;
8391 }
8392
8393 /* Return cost of comparison done using sahf operation.
8394 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8395 static int
8396 ix86_fp_comparison_sahf_cost (code)
8397 enum rtx_code code;
8398 {
8399 enum rtx_code bypass_code, first_code, second_code;
8400 /* Return arbitarily high cost when instruction is not preferred - this
8401 avoids gcc from using it. */
8402 if (!TARGET_USE_SAHF && !optimize_size)
8403 return 1024;
8404 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8405 return (bypass_code != NIL || second_code != NIL) + 3;
8406 }
8407
8408 /* Compute cost of the comparison done using any method.
8409 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8410 static int
8411 ix86_fp_comparison_cost (code)
8412 enum rtx_code code;
8413 {
8414 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8415 int min;
8416
8417 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8418 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8419
8420 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8421 if (min > sahf_cost)
8422 min = sahf_cost;
8423 if (min > fcomi_cost)
8424 min = fcomi_cost;
8425 return min;
8426 }
8427
8428 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8429
8430 static rtx
8431 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8432 enum rtx_code code;
8433 rtx op0, op1, scratch;
8434 rtx *second_test;
8435 rtx *bypass_test;
8436 {
8437 enum machine_mode fpcmp_mode, intcmp_mode;
8438 rtx tmp, tmp2;
8439 int cost = ix86_fp_comparison_cost (code);
8440 enum rtx_code bypass_code, first_code, second_code;
8441
8442 fpcmp_mode = ix86_fp_compare_mode (code);
8443 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8444
8445 if (second_test)
8446 *second_test = NULL_RTX;
8447 if (bypass_test)
8448 *bypass_test = NULL_RTX;
8449
8450 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8451
8452 /* Do fcomi/sahf based test when profitable. */
8453 if ((bypass_code == NIL || bypass_test)
8454 && (second_code == NIL || second_test)
8455 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8456 {
8457 if (TARGET_CMOVE)
8458 {
8459 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8460 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8461 tmp);
8462 emit_insn (tmp);
8463 }
8464 else
8465 {
8466 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8467 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8468 if (!scratch)
8469 scratch = gen_reg_rtx (HImode);
8470 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8471 emit_insn (gen_x86_sahf_1 (scratch));
8472 }
8473
8474 /* The FP codes work out to act like unsigned. */
8475 intcmp_mode = fpcmp_mode;
8476 code = first_code;
8477 if (bypass_code != NIL)
8478 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8479 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8480 const0_rtx);
8481 if (second_code != NIL)
8482 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8483 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8484 const0_rtx);
8485 }
8486 else
8487 {
8488 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8489 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8490 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8491 if (!scratch)
8492 scratch = gen_reg_rtx (HImode);
8493 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8494
8495 /* In the unordered case, we have to check C2 for NaN's, which
8496 doesn't happen to work out to anything nice combination-wise.
8497 So do some bit twiddling on the value we've got in AH to come
8498 up with an appropriate set of condition codes. */
8499
8500 intcmp_mode = CCNOmode;
8501 switch (code)
8502 {
8503 case GT:
8504 case UNGT:
8505 if (code == GT || !TARGET_IEEE_FP)
8506 {
8507 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8508 code = EQ;
8509 }
8510 else
8511 {
8512 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8513 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8514 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8515 intcmp_mode = CCmode;
8516 code = GEU;
8517 }
8518 break;
8519 case LT:
8520 case UNLT:
8521 if (code == LT && TARGET_IEEE_FP)
8522 {
8523 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8524 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8525 intcmp_mode = CCmode;
8526 code = EQ;
8527 }
8528 else
8529 {
8530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8531 code = NE;
8532 }
8533 break;
8534 case GE:
8535 case UNGE:
8536 if (code == GE || !TARGET_IEEE_FP)
8537 {
8538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8539 code = EQ;
8540 }
8541 else
8542 {
8543 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8544 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8545 GEN_INT (0x01)));
8546 code = NE;
8547 }
8548 break;
8549 case LE:
8550 case UNLE:
8551 if (code == LE && TARGET_IEEE_FP)
8552 {
8553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8554 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8555 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8556 intcmp_mode = CCmode;
8557 code = LTU;
8558 }
8559 else
8560 {
8561 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8562 code = NE;
8563 }
8564 break;
8565 case EQ:
8566 case UNEQ:
8567 if (code == EQ && TARGET_IEEE_FP)
8568 {
8569 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8570 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8571 intcmp_mode = CCmode;
8572 code = EQ;
8573 }
8574 else
8575 {
8576 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8577 code = NE;
8578 break;
8579 }
8580 break;
8581 case NE:
8582 case LTGT:
8583 if (code == NE && TARGET_IEEE_FP)
8584 {
8585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8586 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8587 GEN_INT (0x40)));
8588 code = NE;
8589 }
8590 else
8591 {
8592 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8593 code = EQ;
8594 }
8595 break;
8596
8597 case UNORDERED:
8598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8599 code = NE;
8600 break;
8601 case ORDERED:
8602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8603 code = EQ;
8604 break;
8605
8606 default:
8607 abort ();
8608 }
8609 }
8610
8611 /* Return the test that should be put into the flags user, i.e.
8612 the bcc, scc, or cmov instruction. */
8613 return gen_rtx_fmt_ee (code, VOIDmode,
8614 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8615 const0_rtx);
8616 }
8617
8618 rtx
8619 ix86_expand_compare (code, second_test, bypass_test)
8620 enum rtx_code code;
8621 rtx *second_test, *bypass_test;
8622 {
8623 rtx op0, op1, ret;
8624 op0 = ix86_compare_op0;
8625 op1 = ix86_compare_op1;
8626
8627 if (second_test)
8628 *second_test = NULL_RTX;
8629 if (bypass_test)
8630 *bypass_test = NULL_RTX;
8631
8632 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8633 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8634 second_test, bypass_test);
8635 else
8636 ret = ix86_expand_int_compare (code, op0, op1);
8637
8638 return ret;
8639 }
8640
8641 /* Return true if the CODE will result in nontrivial jump sequence. */
8642 bool
8643 ix86_fp_jump_nontrivial_p (code)
8644 enum rtx_code code;
8645 {
8646 enum rtx_code bypass_code, first_code, second_code;
8647 if (!TARGET_CMOVE)
8648 return true;
8649 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8650 return bypass_code != NIL || second_code != NIL;
8651 }
8652
8653 void
8654 ix86_expand_branch (code, label)
8655 enum rtx_code code;
8656 rtx label;
8657 {
8658 rtx tmp;
8659
8660 switch (GET_MODE (ix86_compare_op0))
8661 {
8662 case QImode:
8663 case HImode:
8664 case SImode:
8665 simple:
8666 tmp = ix86_expand_compare (code, NULL, NULL);
8667 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8668 gen_rtx_LABEL_REF (VOIDmode, label),
8669 pc_rtx);
8670 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8671 return;
8672
8673 case SFmode:
8674 case DFmode:
8675 case XFmode:
8676 case TFmode:
8677 {
8678 rtvec vec;
8679 int use_fcomi;
8680 enum rtx_code bypass_code, first_code, second_code;
8681
8682 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8683 &ix86_compare_op1);
8684
8685 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8686
8687 /* Check whether we will use the natural sequence with one jump. If
8688 so, we can expand jump early. Otherwise delay expansion by
8689 creating compound insn to not confuse optimizers. */
8690 if (bypass_code == NIL && second_code == NIL
8691 && TARGET_CMOVE)
8692 {
8693 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8694 gen_rtx_LABEL_REF (VOIDmode, label),
8695 pc_rtx, NULL_RTX);
8696 }
8697 else
8698 {
8699 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8700 ix86_compare_op0, ix86_compare_op1);
8701 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8702 gen_rtx_LABEL_REF (VOIDmode, label),
8703 pc_rtx);
8704 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8705
8706 use_fcomi = ix86_use_fcomi_compare (code);
8707 vec = rtvec_alloc (3 + !use_fcomi);
8708 RTVEC_ELT (vec, 0) = tmp;
8709 RTVEC_ELT (vec, 1)
8710 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8711 RTVEC_ELT (vec, 2)
8712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8713 if (! use_fcomi)
8714 RTVEC_ELT (vec, 3)
8715 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8716
8717 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8718 }
8719 return;
8720 }
8721
8722 case DImode:
8723 if (TARGET_64BIT)
8724 goto simple;
8725 /* Expand DImode branch into multiple compare+branch. */
8726 {
8727 rtx lo[2], hi[2], label2;
8728 enum rtx_code code1, code2, code3;
8729
8730 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8731 {
8732 tmp = ix86_compare_op0;
8733 ix86_compare_op0 = ix86_compare_op1;
8734 ix86_compare_op1 = tmp;
8735 code = swap_condition (code);
8736 }
8737 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8738 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8739
8740 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8741 avoid two branches. This costs one extra insn, so disable when
8742 optimizing for size. */
8743
8744 if ((code == EQ || code == NE)
8745 && (!optimize_size
8746 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8747 {
8748 rtx xor0, xor1;
8749
8750 xor1 = hi[0];
8751 if (hi[1] != const0_rtx)
8752 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8753 NULL_RTX, 0, OPTAB_WIDEN);
8754
8755 xor0 = lo[0];
8756 if (lo[1] != const0_rtx)
8757 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8758 NULL_RTX, 0, OPTAB_WIDEN);
8759
8760 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8761 NULL_RTX, 0, OPTAB_WIDEN);
8762
8763 ix86_compare_op0 = tmp;
8764 ix86_compare_op1 = const0_rtx;
8765 ix86_expand_branch (code, label);
8766 return;
8767 }
8768
8769 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8770 op1 is a constant and the low word is zero, then we can just
8771 examine the high word. */
8772
8773 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8774 switch (code)
8775 {
8776 case LT: case LTU: case GE: case GEU:
8777 ix86_compare_op0 = hi[0];
8778 ix86_compare_op1 = hi[1];
8779 ix86_expand_branch (code, label);
8780 return;
8781 default:
8782 break;
8783 }
8784
8785 /* Otherwise, we need two or three jumps. */
8786
8787 label2 = gen_label_rtx ();
8788
8789 code1 = code;
8790 code2 = swap_condition (code);
8791 code3 = unsigned_condition (code);
8792
8793 switch (code)
8794 {
8795 case LT: case GT: case LTU: case GTU:
8796 break;
8797
8798 case LE: code1 = LT; code2 = GT; break;
8799 case GE: code1 = GT; code2 = LT; break;
8800 case LEU: code1 = LTU; code2 = GTU; break;
8801 case GEU: code1 = GTU; code2 = LTU; break;
8802
8803 case EQ: code1 = NIL; code2 = NE; break;
8804 case NE: code2 = NIL; break;
8805
8806 default:
8807 abort ();
8808 }
8809
8810 /*
8811 * a < b =>
8812 * if (hi(a) < hi(b)) goto true;
8813 * if (hi(a) > hi(b)) goto false;
8814 * if (lo(a) < lo(b)) goto true;
8815 * false:
8816 */
8817
8818 ix86_compare_op0 = hi[0];
8819 ix86_compare_op1 = hi[1];
8820
8821 if (code1 != NIL)
8822 ix86_expand_branch (code1, label);
8823 if (code2 != NIL)
8824 ix86_expand_branch (code2, label2);
8825
8826 ix86_compare_op0 = lo[0];
8827 ix86_compare_op1 = lo[1];
8828 ix86_expand_branch (code3, label);
8829
8830 if (code2 != NIL)
8831 emit_label (label2);
8832 return;
8833 }
8834
8835 default:
8836 abort ();
8837 }
8838 }
8839
8840 /* Split branch based on floating point condition. */
8841 void
8842 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8843 enum rtx_code code;
8844 rtx op1, op2, target1, target2, tmp;
8845 {
8846 rtx second, bypass;
8847 rtx label = NULL_RTX;
8848 rtx condition;
8849 int bypass_probability = -1, second_probability = -1, probability = -1;
8850 rtx i;
8851
8852 if (target2 != pc_rtx)
8853 {
8854 rtx tmp = target2;
8855 code = reverse_condition_maybe_unordered (code);
8856 target2 = target1;
8857 target1 = tmp;
8858 }
8859
8860 condition = ix86_expand_fp_compare (code, op1, op2,
8861 tmp, &second, &bypass);
8862
8863 if (split_branch_probability >= 0)
8864 {
8865 /* Distribute the probabilities across the jumps.
8866 Assume the BYPASS and SECOND to be always test
8867 for UNORDERED. */
8868 probability = split_branch_probability;
8869
8870 /* Value of 1 is low enough to make no need for probability
8871 to be updated. Later we may run some experiments and see
8872 if unordered values are more frequent in practice. */
8873 if (bypass)
8874 bypass_probability = 1;
8875 if (second)
8876 second_probability = 1;
8877 }
8878 if (bypass != NULL_RTX)
8879 {
8880 label = gen_label_rtx ();
8881 i = emit_jump_insn (gen_rtx_SET
8882 (VOIDmode, pc_rtx,
8883 gen_rtx_IF_THEN_ELSE (VOIDmode,
8884 bypass,
8885 gen_rtx_LABEL_REF (VOIDmode,
8886 label),
8887 pc_rtx)));
8888 if (bypass_probability >= 0)
8889 REG_NOTES (i)
8890 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8891 GEN_INT (bypass_probability),
8892 REG_NOTES (i));
8893 }
8894 i = emit_jump_insn (gen_rtx_SET
8895 (VOIDmode, pc_rtx,
8896 gen_rtx_IF_THEN_ELSE (VOIDmode,
8897 condition, target1, target2)));
8898 if (probability >= 0)
8899 REG_NOTES (i)
8900 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8901 GEN_INT (probability),
8902 REG_NOTES (i));
8903 if (second != NULL_RTX)
8904 {
8905 i = emit_jump_insn (gen_rtx_SET
8906 (VOIDmode, pc_rtx,
8907 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8908 target2)));
8909 if (second_probability >= 0)
8910 REG_NOTES (i)
8911 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8912 GEN_INT (second_probability),
8913 REG_NOTES (i));
8914 }
8915 if (label != NULL_RTX)
8916 emit_label (label);
8917 }
8918
8919 int
8920 ix86_expand_setcc (code, dest)
8921 enum rtx_code code;
8922 rtx dest;
8923 {
8924 rtx ret, tmp, tmpreg;
8925 rtx second_test, bypass_test;
8926
8927 if (GET_MODE (ix86_compare_op0) == DImode
8928 && !TARGET_64BIT)
8929 return 0; /* FAIL */
8930
8931 if (GET_MODE (dest) != QImode)
8932 abort ();
8933
8934 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8935 PUT_MODE (ret, QImode);
8936
8937 tmp = dest;
8938 tmpreg = dest;
8939
8940 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8941 if (bypass_test || second_test)
8942 {
8943 rtx test = second_test;
8944 int bypass = 0;
8945 rtx tmp2 = gen_reg_rtx (QImode);
8946 if (bypass_test)
8947 {
8948 if (second_test)
8949 abort ();
8950 test = bypass_test;
8951 bypass = 1;
8952 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8953 }
8954 PUT_MODE (test, QImode);
8955 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8956
8957 if (bypass)
8958 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8959 else
8960 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8961 }
8962
8963 return 1; /* DONE */
8964 }
8965
8966 int
8967 ix86_expand_int_movcc (operands)
8968 rtx operands[];
8969 {
8970 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8971 rtx compare_seq, compare_op;
8972 rtx second_test, bypass_test;
8973 enum machine_mode mode = GET_MODE (operands[0]);
8974
8975 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8976 In case comparsion is done with immediate, we can convert it to LTU or
8977 GEU by altering the integer. */
8978
8979 if ((code == LEU || code == GTU)
8980 && GET_CODE (ix86_compare_op1) == CONST_INT
8981 && mode != HImode
8982 && INTVAL (ix86_compare_op1) != -1
8983 /* For x86-64, the immediate field in the instruction is 32-bit
8984 signed, so we can't increment a DImode value above 0x7fffffff. */
8985 && (!TARGET_64BIT
8986 || GET_MODE (ix86_compare_op0) != DImode
8987 || INTVAL (ix86_compare_op1) != 0x7fffffff)
8988 && GET_CODE (operands[2]) == CONST_INT
8989 && GET_CODE (operands[3]) == CONST_INT)
8990 {
8991 if (code == LEU)
8992 code = LTU;
8993 else
8994 code = GEU;
8995 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8996 GET_MODE (ix86_compare_op0));
8997 }
8998
8999 start_sequence ();
9000 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9001 compare_seq = get_insns ();
9002 end_sequence ();
9003
9004 compare_code = GET_CODE (compare_op);
9005
9006 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9007 HImode insns, we'd be swallowed in word prefix ops. */
9008
9009 if (mode != HImode
9010 && (mode != DImode || TARGET_64BIT)
9011 && GET_CODE (operands[2]) == CONST_INT
9012 && GET_CODE (operands[3]) == CONST_INT)
9013 {
9014 rtx out = operands[0];
9015 HOST_WIDE_INT ct = INTVAL (operands[2]);
9016 HOST_WIDE_INT cf = INTVAL (operands[3]);
9017 HOST_WIDE_INT diff;
9018
9019 if ((compare_code == LTU || compare_code == GEU)
9020 && !second_test && !bypass_test)
9021 {
9022 /* Detect overlap between destination and compare sources. */
9023 rtx tmp = out;
9024
9025 /* To simplify rest of code, restrict to the GEU case. */
9026 if (compare_code == LTU)
9027 {
9028 int tmp = ct;
9029 ct = cf;
9030 cf = tmp;
9031 compare_code = reverse_condition (compare_code);
9032 code = reverse_condition (code);
9033 }
9034 diff = ct - cf;
9035
9036 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9037 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9038 tmp = gen_reg_rtx (mode);
9039
9040 emit_insn (compare_seq);
9041 if (mode == DImode)
9042 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9043 else
9044 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9045
9046 if (diff == 1)
9047 {
9048 /*
9049 * cmpl op0,op1
9050 * sbbl dest,dest
9051 * [addl dest, ct]
9052 *
9053 * Size 5 - 8.
9054 */
9055 if (ct)
9056 tmp = expand_simple_binop (mode, PLUS,
9057 tmp, GEN_INT (ct),
9058 tmp, 1, OPTAB_DIRECT);
9059 }
9060 else if (cf == -1)
9061 {
9062 /*
9063 * cmpl op0,op1
9064 * sbbl dest,dest
9065 * orl $ct, dest
9066 *
9067 * Size 8.
9068 */
9069 tmp = expand_simple_binop (mode, IOR,
9070 tmp, GEN_INT (ct),
9071 tmp, 1, OPTAB_DIRECT);
9072 }
9073 else if (diff == -1 && ct)
9074 {
9075 /*
9076 * cmpl op0,op1
9077 * sbbl dest,dest
9078 * notl dest
9079 * [addl dest, cf]
9080 *
9081 * Size 8 - 11.
9082 */
9083 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9084 if (cf)
9085 tmp = expand_simple_binop (mode, PLUS,
9086 tmp, GEN_INT (cf),
9087 tmp, 1, OPTAB_DIRECT);
9088 }
9089 else
9090 {
9091 /*
9092 * cmpl op0,op1
9093 * sbbl dest,dest
9094 * [notl dest]
9095 * andl cf - ct, dest
9096 * [addl dest, ct]
9097 *
9098 * Size 8 - 11.
9099 */
9100
9101 if (cf == 0)
9102 {
9103 cf = ct;
9104 ct = 0;
9105 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9106 }
9107
9108 tmp = expand_simple_binop (mode, AND,
9109 tmp,
9110 gen_int_mode (cf - ct, mode),
9111 tmp, 1, OPTAB_DIRECT);
9112 if (ct)
9113 tmp = expand_simple_binop (mode, PLUS,
9114 tmp, GEN_INT (ct),
9115 tmp, 1, OPTAB_DIRECT);
9116 }
9117
9118 if (tmp != out)
9119 emit_move_insn (out, tmp);
9120
9121 return 1; /* DONE */
9122 }
9123
9124 diff = ct - cf;
9125 if (diff < 0)
9126 {
9127 HOST_WIDE_INT tmp;
9128 tmp = ct, ct = cf, cf = tmp;
9129 diff = -diff;
9130 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9131 {
9132 /* We may be reversing unordered compare to normal compare, that
9133 is not valid in general (we may convert non-trapping condition
9134 to trapping one), however on i386 we currently emit all
9135 comparisons unordered. */
9136 compare_code = reverse_condition_maybe_unordered (compare_code);
9137 code = reverse_condition_maybe_unordered (code);
9138 }
9139 else
9140 {
9141 compare_code = reverse_condition (compare_code);
9142 code = reverse_condition (code);
9143 }
9144 }
9145
9146 compare_code = NIL;
9147 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9148 && GET_CODE (ix86_compare_op1) == CONST_INT)
9149 {
9150 if (ix86_compare_op1 == const0_rtx
9151 && (code == LT || code == GE))
9152 compare_code = code;
9153 else if (ix86_compare_op1 == constm1_rtx)
9154 {
9155 if (code == LE)
9156 compare_code = LT;
9157 else if (code == GT)
9158 compare_code = GE;
9159 }
9160 }
9161
9162 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9163 if (compare_code != NIL
9164 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9165 && (cf == -1 || ct == -1))
9166 {
9167 /* If lea code below could be used, only optimize
9168 if it results in a 2 insn sequence. */
9169
9170 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9171 || diff == 3 || diff == 5 || diff == 9)
9172 || (compare_code == LT && ct == -1)
9173 || (compare_code == GE && cf == -1))
9174 {
9175 /*
9176 * notl op1 (if necessary)
9177 * sarl $31, op1
9178 * orl cf, op1
9179 */
9180 if (ct != -1)
9181 {
9182 cf = ct;
9183 ct = -1;
9184 code = reverse_condition (code);
9185 }
9186
9187 out = emit_store_flag (out, code, ix86_compare_op0,
9188 ix86_compare_op1, VOIDmode, 0, -1);
9189
9190 out = expand_simple_binop (mode, IOR,
9191 out, GEN_INT (cf),
9192 out, 1, OPTAB_DIRECT);
9193 if (out != operands[0])
9194 emit_move_insn (operands[0], out);
9195
9196 return 1; /* DONE */
9197 }
9198 }
9199
9200 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9201 || diff == 3 || diff == 5 || diff == 9)
9202 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf), 0)))
9203 {
9204 /*
9205 * xorl dest,dest
9206 * cmpl op1,op2
9207 * setcc dest
9208 * lea cf(dest*(ct-cf)),dest
9209 *
9210 * Size 14.
9211 *
9212 * This also catches the degenerate setcc-only case.
9213 */
9214
9215 rtx tmp;
9216 int nops;
9217
9218 out = emit_store_flag (out, code, ix86_compare_op0,
9219 ix86_compare_op1, VOIDmode, 0, 1);
9220
9221 nops = 0;
9222 /* On x86_64 the lea instruction operates on Pmode, so we need
9223 to get arithmetics done in proper mode to match. */
9224 if (diff == 1)
9225 tmp = out;
9226 else
9227 {
9228 rtx out1;
9229 out1 = out;
9230 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9231 nops++;
9232 if (diff & 1)
9233 {
9234 tmp = gen_rtx_PLUS (mode, tmp, out1);
9235 nops++;
9236 }
9237 }
9238 if (cf != 0)
9239 {
9240 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9241 nops++;
9242 }
9243 if (tmp != out
9244 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9245 {
9246 if (nops == 1)
9247 {
9248 rtx clob;
9249
9250 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9251 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9252
9253 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9254 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9255 emit_insn (tmp);
9256 }
9257 else
9258 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9259 }
9260 if (out != operands[0])
9261 emit_move_insn (operands[0], copy_rtx (out));
9262
9263 return 1; /* DONE */
9264 }
9265
9266 /*
9267 * General case: Jumpful:
9268 * xorl dest,dest cmpl op1, op2
9269 * cmpl op1, op2 movl ct, dest
9270 * setcc dest jcc 1f
9271 * decl dest movl cf, dest
9272 * andl (cf-ct),dest 1:
9273 * addl ct,dest
9274 *
9275 * Size 20. Size 14.
9276 *
9277 * This is reasonably steep, but branch mispredict costs are
9278 * high on modern cpus, so consider failing only if optimizing
9279 * for space.
9280 *
9281 * %%% Parameterize branch_cost on the tuning architecture, then
9282 * use that. The 80386 couldn't care less about mispredicts.
9283 */
9284
9285 if (!optimize_size && !TARGET_CMOVE)
9286 {
9287 if (cf == 0)
9288 {
9289 cf = ct;
9290 ct = 0;
9291 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9292 /* We may be reversing unordered compare to normal compare,
9293 that is not valid in general (we may convert non-trapping
9294 condition to trapping one), however on i386 we currently
9295 emit all comparisons unordered. */
9296 code = reverse_condition_maybe_unordered (code);
9297 else
9298 {
9299 code = reverse_condition (code);
9300 if (compare_code != NIL)
9301 compare_code = reverse_condition (compare_code);
9302 }
9303 }
9304
9305 if (compare_code != NIL)
9306 {
9307 /* notl op1 (if needed)
9308 sarl $31, op1
9309 andl (cf-ct), op1
9310 addl ct, op1
9311
9312 For x < 0 (resp. x <= -1) there will be no notl,
9313 so if possible swap the constants to get rid of the
9314 complement.
9315 True/false will be -1/0 while code below (store flag
9316 followed by decrement) is 0/-1, so the constants need
9317 to be exchanged once more. */
9318
9319 if (compare_code == GE || !cf)
9320 {
9321 code = reverse_condition (code);
9322 compare_code = LT;
9323 }
9324 else
9325 {
9326 HOST_WIDE_INT tmp = cf;
9327 cf = ct;
9328 ct = tmp;
9329 }
9330
9331 out = emit_store_flag (out, code, ix86_compare_op0,
9332 ix86_compare_op1, VOIDmode, 0, -1);
9333 }
9334 else
9335 {
9336 out = emit_store_flag (out, code, ix86_compare_op0,
9337 ix86_compare_op1, VOIDmode, 0, 1);
9338
9339 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9340 out, 1, OPTAB_DIRECT);
9341 }
9342
9343 out = expand_simple_binop (mode, AND, out,
9344 gen_int_mode (cf - ct, mode),
9345 out, 1, OPTAB_DIRECT);
9346 if (ct)
9347 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9348 out, 1, OPTAB_DIRECT);
9349 if (out != operands[0])
9350 emit_move_insn (operands[0], out);
9351
9352 return 1; /* DONE */
9353 }
9354 }
9355
9356 if (!TARGET_CMOVE)
9357 {
9358 /* Try a few things more with specific constants and a variable. */
9359
9360 optab op;
9361 rtx var, orig_out, out, tmp;
9362
9363 if (optimize_size)
9364 return 0; /* FAIL */
9365
9366 /* If one of the two operands is an interesting constant, load a
9367 constant with the above and mask it in with a logical operation. */
9368
9369 if (GET_CODE (operands[2]) == CONST_INT)
9370 {
9371 var = operands[3];
9372 if (INTVAL (operands[2]) == 0)
9373 operands[3] = constm1_rtx, op = and_optab;
9374 else if (INTVAL (operands[2]) == -1)
9375 operands[3] = const0_rtx, op = ior_optab;
9376 else
9377 return 0; /* FAIL */
9378 }
9379 else if (GET_CODE (operands[3]) == CONST_INT)
9380 {
9381 var = operands[2];
9382 if (INTVAL (operands[3]) == 0)
9383 operands[2] = constm1_rtx, op = and_optab;
9384 else if (INTVAL (operands[3]) == -1)
9385 operands[2] = const0_rtx, op = ior_optab;
9386 else
9387 return 0; /* FAIL */
9388 }
9389 else
9390 return 0; /* FAIL */
9391
9392 orig_out = operands[0];
9393 tmp = gen_reg_rtx (mode);
9394 operands[0] = tmp;
9395
9396 /* Recurse to get the constant loaded. */
9397 if (ix86_expand_int_movcc (operands) == 0)
9398 return 0; /* FAIL */
9399
9400 /* Mask in the interesting variable. */
9401 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9402 OPTAB_WIDEN);
9403 if (out != orig_out)
9404 emit_move_insn (orig_out, out);
9405
9406 return 1; /* DONE */
9407 }
9408
9409 /*
9410 * For comparison with above,
9411 *
9412 * movl cf,dest
9413 * movl ct,tmp
9414 * cmpl op1,op2
9415 * cmovcc tmp,dest
9416 *
9417 * Size 15.
9418 */
9419
9420 if (! nonimmediate_operand (operands[2], mode))
9421 operands[2] = force_reg (mode, operands[2]);
9422 if (! nonimmediate_operand (operands[3], mode))
9423 operands[3] = force_reg (mode, operands[3]);
9424
9425 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9426 {
9427 rtx tmp = gen_reg_rtx (mode);
9428 emit_move_insn (tmp, operands[3]);
9429 operands[3] = tmp;
9430 }
9431 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9432 {
9433 rtx tmp = gen_reg_rtx (mode);
9434 emit_move_insn (tmp, operands[2]);
9435 operands[2] = tmp;
9436 }
9437 if (! register_operand (operands[2], VOIDmode)
9438 && ! register_operand (operands[3], VOIDmode))
9439 operands[2] = force_reg (mode, operands[2]);
9440
9441 emit_insn (compare_seq);
9442 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9443 gen_rtx_IF_THEN_ELSE (mode,
9444 compare_op, operands[2],
9445 operands[3])));
9446 if (bypass_test)
9447 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9448 gen_rtx_IF_THEN_ELSE (mode,
9449 bypass_test,
9450 operands[3],
9451 operands[0])));
9452 if (second_test)
9453 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9454 gen_rtx_IF_THEN_ELSE (mode,
9455 second_test,
9456 operands[2],
9457 operands[0])));
9458
9459 return 1; /* DONE */
9460 }
9461
9462 int
9463 ix86_expand_fp_movcc (operands)
9464 rtx operands[];
9465 {
9466 enum rtx_code code;
9467 rtx tmp;
9468 rtx compare_op, second_test, bypass_test;
9469
9470 /* For SF/DFmode conditional moves based on comparisons
9471 in same mode, we may want to use SSE min/max instructions. */
9472 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9473 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9474 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9475 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9476 && (!TARGET_IEEE_FP
9477 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9478 /* We may be called from the post-reload splitter. */
9479 && (!REG_P (operands[0])
9480 || SSE_REG_P (operands[0])
9481 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9482 {
9483 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9484 code = GET_CODE (operands[1]);
9485
9486 /* See if we have (cross) match between comparison operands and
9487 conditional move operands. */
9488 if (rtx_equal_p (operands[2], op1))
9489 {
9490 rtx tmp = op0;
9491 op0 = op1;
9492 op1 = tmp;
9493 code = reverse_condition_maybe_unordered (code);
9494 }
9495 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9496 {
9497 /* Check for min operation. */
9498 if (code == LT)
9499 {
9500 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9501 if (memory_operand (op0, VOIDmode))
9502 op0 = force_reg (GET_MODE (operands[0]), op0);
9503 if (GET_MODE (operands[0]) == SFmode)
9504 emit_insn (gen_minsf3 (operands[0], op0, op1));
9505 else
9506 emit_insn (gen_mindf3 (operands[0], op0, op1));
9507 return 1;
9508 }
9509 /* Check for max operation. */
9510 if (code == GT)
9511 {
9512 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9513 if (memory_operand (op0, VOIDmode))
9514 op0 = force_reg (GET_MODE (operands[0]), op0);
9515 if (GET_MODE (operands[0]) == SFmode)
9516 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9517 else
9518 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9519 return 1;
9520 }
9521 }
9522 /* Manage condition to be sse_comparison_operator. In case we are
9523 in non-ieee mode, try to canonicalize the destination operand
9524 to be first in the comparison - this helps reload to avoid extra
9525 moves. */
9526 if (!sse_comparison_operator (operands[1], VOIDmode)
9527 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9528 {
9529 rtx tmp = ix86_compare_op0;
9530 ix86_compare_op0 = ix86_compare_op1;
9531 ix86_compare_op1 = tmp;
9532 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9533 VOIDmode, ix86_compare_op0,
9534 ix86_compare_op1);
9535 }
9536 /* Similary try to manage result to be first operand of conditional
9537 move. We also don't support the NE comparison on SSE, so try to
9538 avoid it. */
9539 if ((rtx_equal_p (operands[0], operands[3])
9540 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9541 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9542 {
9543 rtx tmp = operands[2];
9544 operands[2] = operands[3];
9545 operands[3] = tmp;
9546 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9547 (GET_CODE (operands[1])),
9548 VOIDmode, ix86_compare_op0,
9549 ix86_compare_op1);
9550 }
9551 if (GET_MODE (operands[0]) == SFmode)
9552 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9553 operands[2], operands[3],
9554 ix86_compare_op0, ix86_compare_op1));
9555 else
9556 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9557 operands[2], operands[3],
9558 ix86_compare_op0, ix86_compare_op1));
9559 return 1;
9560 }
9561
9562 /* The floating point conditional move instructions don't directly
9563 support conditions resulting from a signed integer comparison. */
9564
9565 code = GET_CODE (operands[1]);
9566 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9567
9568 /* The floating point conditional move instructions don't directly
9569 support signed integer comparisons. */
9570
9571 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9572 {
9573 if (second_test != NULL || bypass_test != NULL)
9574 abort ();
9575 tmp = gen_reg_rtx (QImode);
9576 ix86_expand_setcc (code, tmp);
9577 code = NE;
9578 ix86_compare_op0 = tmp;
9579 ix86_compare_op1 = const0_rtx;
9580 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9581 }
9582 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9583 {
9584 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9585 emit_move_insn (tmp, operands[3]);
9586 operands[3] = tmp;
9587 }
9588 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9589 {
9590 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9591 emit_move_insn (tmp, operands[2]);
9592 operands[2] = tmp;
9593 }
9594
9595 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9596 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9597 compare_op,
9598 operands[2],
9599 operands[3])));
9600 if (bypass_test)
9601 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9602 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9603 bypass_test,
9604 operands[3],
9605 operands[0])));
9606 if (second_test)
9607 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9608 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9609 second_test,
9610 operands[2],
9611 operands[0])));
9612
9613 return 1;
9614 }
9615
9616 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9617 works for floating pointer parameters and nonoffsetable memories.
9618 For pushes, it returns just stack offsets; the values will be saved
9619 in the right order. Maximally three parts are generated. */
9620
9621 static int
9622 ix86_split_to_parts (operand, parts, mode)
9623 rtx operand;
9624 rtx *parts;
9625 enum machine_mode mode;
9626 {
9627 int size;
9628
9629 if (!TARGET_64BIT)
9630 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9631 else
9632 size = (GET_MODE_SIZE (mode) + 4) / 8;
9633
9634 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9635 abort ();
9636 if (size < 2 || size > 3)
9637 abort ();
9638
9639 /* Optimize constant pool reference to immediates. This is used by fp
9640 moves, that force all constants to memory to allow combining. */
9641 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9642 {
9643 rtx tmp = maybe_get_pool_constant (operand);
9644 if (tmp)
9645 operand = tmp;
9646 }
9647
9648 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9649 {
9650 /* The only non-offsetable memories we handle are pushes. */
9651 if (! push_operand (operand, VOIDmode))
9652 abort ();
9653
9654 operand = copy_rtx (operand);
9655 PUT_MODE (operand, Pmode);
9656 parts[0] = parts[1] = parts[2] = operand;
9657 }
9658 else if (!TARGET_64BIT)
9659 {
9660 if (mode == DImode)
9661 split_di (&operand, 1, &parts[0], &parts[1]);
9662 else
9663 {
9664 if (REG_P (operand))
9665 {
9666 if (!reload_completed)
9667 abort ();
9668 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9669 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9670 if (size == 3)
9671 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9672 }
9673 else if (offsettable_memref_p (operand))
9674 {
9675 operand = adjust_address (operand, SImode, 0);
9676 parts[0] = operand;
9677 parts[1] = adjust_address (operand, SImode, 4);
9678 if (size == 3)
9679 parts[2] = adjust_address (operand, SImode, 8);
9680 }
9681 else if (GET_CODE (operand) == CONST_DOUBLE)
9682 {
9683 REAL_VALUE_TYPE r;
9684 long l[4];
9685
9686 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9687 switch (mode)
9688 {
9689 case XFmode:
9690 case TFmode:
9691 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9692 parts[2] = gen_int_mode (l[2], SImode);
9693 break;
9694 case DFmode:
9695 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9696 break;
9697 default:
9698 abort ();
9699 }
9700 parts[1] = gen_int_mode (l[1], SImode);
9701 parts[0] = gen_int_mode (l[0], SImode);
9702 }
9703 else
9704 abort ();
9705 }
9706 }
9707 else
9708 {
9709 if (mode == TImode)
9710 split_ti (&operand, 1, &parts[0], &parts[1]);
9711 if (mode == XFmode || mode == TFmode)
9712 {
9713 if (REG_P (operand))
9714 {
9715 if (!reload_completed)
9716 abort ();
9717 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9718 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9719 }
9720 else if (offsettable_memref_p (operand))
9721 {
9722 operand = adjust_address (operand, DImode, 0);
9723 parts[0] = operand;
9724 parts[1] = adjust_address (operand, SImode, 8);
9725 }
9726 else if (GET_CODE (operand) == CONST_DOUBLE)
9727 {
9728 REAL_VALUE_TYPE r;
9729 long l[3];
9730
9731 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9732 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9733 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9734 if (HOST_BITS_PER_WIDE_INT >= 64)
9735 parts[0]
9736 = gen_int_mode
9737 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9738 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9739 DImode);
9740 else
9741 parts[0] = immed_double_const (l[0], l[1], DImode);
9742 parts[1] = gen_int_mode (l[2], SImode);
9743 }
9744 else
9745 abort ();
9746 }
9747 }
9748
9749 return size;
9750 }
9751
9752 /* Emit insns to perform a move or push of DI, DF, and XF values.
9753 Return false when normal moves are needed; true when all required
9754 insns have been emitted. Operands 2-4 contain the input values
9755 int the correct order; operands 5-7 contain the output values. */
9756
9757 void
9758 ix86_split_long_move (operands)
9759 rtx operands[];
9760 {
9761 rtx part[2][3];
9762 int nparts;
9763 int push = 0;
9764 int collisions = 0;
9765 enum machine_mode mode = GET_MODE (operands[0]);
9766
9767 /* The DFmode expanders may ask us to move double.
9768 For 64bit target this is single move. By hiding the fact
9769 here we simplify i386.md splitters. */
9770 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9771 {
9772 /* Optimize constant pool reference to immediates. This is used by
9773 fp moves, that force all constants to memory to allow combining. */
9774
9775 if (GET_CODE (operands[1]) == MEM
9776 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9777 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9778 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9779 if (push_operand (operands[0], VOIDmode))
9780 {
9781 operands[0] = copy_rtx (operands[0]);
9782 PUT_MODE (operands[0], Pmode);
9783 }
9784 else
9785 operands[0] = gen_lowpart (DImode, operands[0]);
9786 operands[1] = gen_lowpart (DImode, operands[1]);
9787 emit_move_insn (operands[0], operands[1]);
9788 return;
9789 }
9790
9791 /* The only non-offsettable memory we handle is push. */
9792 if (push_operand (operands[0], VOIDmode))
9793 push = 1;
9794 else if (GET_CODE (operands[0]) == MEM
9795 && ! offsettable_memref_p (operands[0]))
9796 abort ();
9797
9798 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9799 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9800
9801 /* When emitting push, take care for source operands on the stack. */
9802 if (push && GET_CODE (operands[1]) == MEM
9803 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9804 {
9805 if (nparts == 3)
9806 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9807 XEXP (part[1][2], 0));
9808 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9809 XEXP (part[1][1], 0));
9810 }
9811
9812 /* We need to do copy in the right order in case an address register
9813 of the source overlaps the destination. */
9814 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9815 {
9816 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9817 collisions++;
9818 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9819 collisions++;
9820 if (nparts == 3
9821 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9822 collisions++;
9823
9824 /* Collision in the middle part can be handled by reordering. */
9825 if (collisions == 1 && nparts == 3
9826 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9827 {
9828 rtx tmp;
9829 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9830 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9831 }
9832
9833 /* If there are more collisions, we can't handle it by reordering.
9834 Do an lea to the last part and use only one colliding move. */
9835 else if (collisions > 1)
9836 {
9837 collisions = 1;
9838 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9839 XEXP (part[1][0], 0)));
9840 part[1][0] = change_address (part[1][0],
9841 TARGET_64BIT ? DImode : SImode,
9842 part[0][nparts - 1]);
9843 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9844 if (nparts == 3)
9845 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9846 }
9847 }
9848
9849 if (push)
9850 {
9851 if (!TARGET_64BIT)
9852 {
9853 if (nparts == 3)
9854 {
9855 /* We use only first 12 bytes of TFmode value, but for pushing we
9856 are required to adjust stack as if we were pushing real 16byte
9857 value. */
9858 if (mode == TFmode && !TARGET_64BIT)
9859 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9860 GEN_INT (-4)));
9861 emit_move_insn (part[0][2], part[1][2]);
9862 }
9863 }
9864 else
9865 {
9866 /* In 64bit mode we don't have 32bit push available. In case this is
9867 register, it is OK - we will just use larger counterpart. We also
9868 retype memory - these comes from attempt to avoid REX prefix on
9869 moving of second half of TFmode value. */
9870 if (GET_MODE (part[1][1]) == SImode)
9871 {
9872 if (GET_CODE (part[1][1]) == MEM)
9873 part[1][1] = adjust_address (part[1][1], DImode, 0);
9874 else if (REG_P (part[1][1]))
9875 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9876 else
9877 abort ();
9878 if (GET_MODE (part[1][0]) == SImode)
9879 part[1][0] = part[1][1];
9880 }
9881 }
9882 emit_move_insn (part[0][1], part[1][1]);
9883 emit_move_insn (part[0][0], part[1][0]);
9884 return;
9885 }
9886
9887 /* Choose correct order to not overwrite the source before it is copied. */
9888 if ((REG_P (part[0][0])
9889 && REG_P (part[1][1])
9890 && (REGNO (part[0][0]) == REGNO (part[1][1])
9891 || (nparts == 3
9892 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9893 || (collisions > 0
9894 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9895 {
9896 if (nparts == 3)
9897 {
9898 operands[2] = part[0][2];
9899 operands[3] = part[0][1];
9900 operands[4] = part[0][0];
9901 operands[5] = part[1][2];
9902 operands[6] = part[1][1];
9903 operands[7] = part[1][0];
9904 }
9905 else
9906 {
9907 operands[2] = part[0][1];
9908 operands[3] = part[0][0];
9909 operands[5] = part[1][1];
9910 operands[6] = part[1][0];
9911 }
9912 }
9913 else
9914 {
9915 if (nparts == 3)
9916 {
9917 operands[2] = part[0][0];
9918 operands[3] = part[0][1];
9919 operands[4] = part[0][2];
9920 operands[5] = part[1][0];
9921 operands[6] = part[1][1];
9922 operands[7] = part[1][2];
9923 }
9924 else
9925 {
9926 operands[2] = part[0][0];
9927 operands[3] = part[0][1];
9928 operands[5] = part[1][0];
9929 operands[6] = part[1][1];
9930 }
9931 }
9932 emit_move_insn (operands[2], operands[5]);
9933 emit_move_insn (operands[3], operands[6]);
9934 if (nparts == 3)
9935 emit_move_insn (operands[4], operands[7]);
9936
9937 return;
9938 }
9939
9940 void
9941 ix86_split_ashldi (operands, scratch)
9942 rtx *operands, scratch;
9943 {
9944 rtx low[2], high[2];
9945 int count;
9946
9947 if (GET_CODE (operands[2]) == CONST_INT)
9948 {
9949 split_di (operands, 2, low, high);
9950 count = INTVAL (operands[2]) & 63;
9951
9952 if (count >= 32)
9953 {
9954 emit_move_insn (high[0], low[1]);
9955 emit_move_insn (low[0], const0_rtx);
9956
9957 if (count > 32)
9958 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9959 }
9960 else
9961 {
9962 if (!rtx_equal_p (operands[0], operands[1]))
9963 emit_move_insn (operands[0], operands[1]);
9964 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9965 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9966 }
9967 }
9968 else
9969 {
9970 if (!rtx_equal_p (operands[0], operands[1]))
9971 emit_move_insn (operands[0], operands[1]);
9972
9973 split_di (operands, 1, low, high);
9974
9975 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9976 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9977
9978 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9979 {
9980 if (! no_new_pseudos)
9981 scratch = force_reg (SImode, const0_rtx);
9982 else
9983 emit_move_insn (scratch, const0_rtx);
9984
9985 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9986 scratch));
9987 }
9988 else
9989 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9990 }
9991 }
9992
9993 void
9994 ix86_split_ashrdi (operands, scratch)
9995 rtx *operands, scratch;
9996 {
9997 rtx low[2], high[2];
9998 int count;
9999
10000 if (GET_CODE (operands[2]) == CONST_INT)
10001 {
10002 split_di (operands, 2, low, high);
10003 count = INTVAL (operands[2]) & 63;
10004
10005 if (count >= 32)
10006 {
10007 emit_move_insn (low[0], high[1]);
10008
10009 if (! reload_completed)
10010 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10011 else
10012 {
10013 emit_move_insn (high[0], low[0]);
10014 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10015 }
10016
10017 if (count > 32)
10018 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10019 }
10020 else
10021 {
10022 if (!rtx_equal_p (operands[0], operands[1]))
10023 emit_move_insn (operands[0], operands[1]);
10024 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10025 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10026 }
10027 }
10028 else
10029 {
10030 if (!rtx_equal_p (operands[0], operands[1]))
10031 emit_move_insn (operands[0], operands[1]);
10032
10033 split_di (operands, 1, low, high);
10034
10035 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10036 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10037
10038 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10039 {
10040 if (! no_new_pseudos)
10041 scratch = gen_reg_rtx (SImode);
10042 emit_move_insn (scratch, high[0]);
10043 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10044 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10045 scratch));
10046 }
10047 else
10048 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10049 }
10050 }
10051
10052 void
10053 ix86_split_lshrdi (operands, scratch)
10054 rtx *operands, scratch;
10055 {
10056 rtx low[2], high[2];
10057 int count;
10058
10059 if (GET_CODE (operands[2]) == CONST_INT)
10060 {
10061 split_di (operands, 2, low, high);
10062 count = INTVAL (operands[2]) & 63;
10063
10064 if (count >= 32)
10065 {
10066 emit_move_insn (low[0], high[1]);
10067 emit_move_insn (high[0], const0_rtx);
10068
10069 if (count > 32)
10070 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10071 }
10072 else
10073 {
10074 if (!rtx_equal_p (operands[0], operands[1]))
10075 emit_move_insn (operands[0], operands[1]);
10076 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10077 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10078 }
10079 }
10080 else
10081 {
10082 if (!rtx_equal_p (operands[0], operands[1]))
10083 emit_move_insn (operands[0], operands[1]);
10084
10085 split_di (operands, 1, low, high);
10086
10087 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10088 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10089
10090 /* Heh. By reversing the arguments, we can reuse this pattern. */
10091 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10092 {
10093 if (! no_new_pseudos)
10094 scratch = force_reg (SImode, const0_rtx);
10095 else
10096 emit_move_insn (scratch, const0_rtx);
10097
10098 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10099 scratch));
10100 }
10101 else
10102 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10103 }
10104 }
10105
10106 /* Helper function for the string operations below. Dest VARIABLE whether
10107 it is aligned to VALUE bytes. If true, jump to the label. */
10108 static rtx
10109 ix86_expand_aligntest (variable, value)
10110 rtx variable;
10111 int value;
10112 {
10113 rtx label = gen_label_rtx ();
10114 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10115 if (GET_MODE (variable) == DImode)
10116 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10117 else
10118 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10119 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10120 1, label);
10121 return label;
10122 }
10123
10124 /* Adjust COUNTER by the VALUE. */
10125 static void
10126 ix86_adjust_counter (countreg, value)
10127 rtx countreg;
10128 HOST_WIDE_INT value;
10129 {
10130 if (GET_MODE (countreg) == DImode)
10131 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10132 else
10133 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10134 }
10135
10136 /* Zero extend possibly SImode EXP to Pmode register. */
10137 rtx
10138 ix86_zero_extend_to_Pmode (exp)
10139 rtx exp;
10140 {
10141 rtx r;
10142 if (GET_MODE (exp) == VOIDmode)
10143 return force_reg (Pmode, exp);
10144 if (GET_MODE (exp) == Pmode)
10145 return copy_to_mode_reg (Pmode, exp);
10146 r = gen_reg_rtx (Pmode);
10147 emit_insn (gen_zero_extendsidi2 (r, exp));
10148 return r;
10149 }
10150
10151 /* Expand string move (memcpy) operation. Use i386 string operations when
10152 profitable. expand_clrstr contains similar code. */
10153 int
10154 ix86_expand_movstr (dst, src, count_exp, align_exp)
10155 rtx dst, src, count_exp, align_exp;
10156 {
10157 rtx srcreg, destreg, countreg;
10158 enum machine_mode counter_mode;
10159 HOST_WIDE_INT align = 0;
10160 unsigned HOST_WIDE_INT count = 0;
10161 rtx insns;
10162
10163 start_sequence ();
10164
10165 if (GET_CODE (align_exp) == CONST_INT)
10166 align = INTVAL (align_exp);
10167
10168 /* This simple hack avoids all inlining code and simplifies code below. */
10169 if (!TARGET_ALIGN_STRINGOPS)
10170 align = 64;
10171
10172 if (GET_CODE (count_exp) == CONST_INT)
10173 count = INTVAL (count_exp);
10174
10175 /* Figure out proper mode for counter. For 32bits it is always SImode,
10176 for 64bits use SImode when possible, otherwise DImode.
10177 Set count to number of bytes copied when known at compile time. */
10178 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10179 || x86_64_zero_extended_value (count_exp))
10180 counter_mode = SImode;
10181 else
10182 counter_mode = DImode;
10183
10184 if (counter_mode != SImode && counter_mode != DImode)
10185 abort ();
10186
10187 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10188 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10189
10190 emit_insn (gen_cld ());
10191
10192 /* When optimizing for size emit simple rep ; movsb instruction for
10193 counts not divisible by 4. */
10194
10195 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10196 {
10197 countreg = ix86_zero_extend_to_Pmode (count_exp);
10198 if (TARGET_64BIT)
10199 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10200 destreg, srcreg, countreg));
10201 else
10202 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10203 destreg, srcreg, countreg));
10204 }
10205
10206 /* For constant aligned (or small unaligned) copies use rep movsl
10207 followed by code copying the rest. For PentiumPro ensure 8 byte
10208 alignment to allow rep movsl acceleration. */
10209
10210 else if (count != 0
10211 && (align >= 8
10212 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10213 || optimize_size || count < (unsigned int) 64))
10214 {
10215 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10216 if (count & ~(size - 1))
10217 {
10218 countreg = copy_to_mode_reg (counter_mode,
10219 GEN_INT ((count >> (size == 4 ? 2 : 3))
10220 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10221 countreg = ix86_zero_extend_to_Pmode (countreg);
10222 if (size == 4)
10223 {
10224 if (TARGET_64BIT)
10225 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10226 destreg, srcreg, countreg));
10227 else
10228 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10229 destreg, srcreg, countreg));
10230 }
10231 else
10232 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10233 destreg, srcreg, countreg));
10234 }
10235 if (size == 8 && (count & 0x04))
10236 emit_insn (gen_strmovsi (destreg, srcreg));
10237 if (count & 0x02)
10238 emit_insn (gen_strmovhi (destreg, srcreg));
10239 if (count & 0x01)
10240 emit_insn (gen_strmovqi (destreg, srcreg));
10241 }
10242 /* The generic code based on the glibc implementation:
10243 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10244 allowing accelerated copying there)
10245 - copy the data using rep movsl
10246 - copy the rest. */
10247 else
10248 {
10249 rtx countreg2;
10250 rtx label = NULL;
10251 int desired_alignment = (TARGET_PENTIUMPRO
10252 && (count == 0 || count >= (unsigned int) 260)
10253 ? 8 : UNITS_PER_WORD);
10254
10255 /* In case we don't know anything about the alignment, default to
10256 library version, since it is usually equally fast and result in
10257 shorter code. */
10258 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10259 {
10260 end_sequence ();
10261 return 0;
10262 }
10263
10264 if (TARGET_SINGLE_STRINGOP)
10265 emit_insn (gen_cld ());
10266
10267 countreg2 = gen_reg_rtx (Pmode);
10268 countreg = copy_to_mode_reg (counter_mode, count_exp);
10269
10270 /* We don't use loops to align destination and to copy parts smaller
10271 than 4 bytes, because gcc is able to optimize such code better (in
10272 the case the destination or the count really is aligned, gcc is often
10273 able to predict the branches) and also it is friendlier to the
10274 hardware branch prediction.
10275
10276 Using loops is benefical for generic case, because we can
10277 handle small counts using the loops. Many CPUs (such as Athlon)
10278 have large REP prefix setup costs.
10279
10280 This is quite costy. Maybe we can revisit this decision later or
10281 add some customizability to this code. */
10282
10283 if (count == 0 && align < desired_alignment)
10284 {
10285 label = gen_label_rtx ();
10286 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10287 LEU, 0, counter_mode, 1, label);
10288 }
10289 if (align <= 1)
10290 {
10291 rtx label = ix86_expand_aligntest (destreg, 1);
10292 emit_insn (gen_strmovqi (destreg, srcreg));
10293 ix86_adjust_counter (countreg, 1);
10294 emit_label (label);
10295 LABEL_NUSES (label) = 1;
10296 }
10297 if (align <= 2)
10298 {
10299 rtx label = ix86_expand_aligntest (destreg, 2);
10300 emit_insn (gen_strmovhi (destreg, srcreg));
10301 ix86_adjust_counter (countreg, 2);
10302 emit_label (label);
10303 LABEL_NUSES (label) = 1;
10304 }
10305 if (align <= 4 && desired_alignment > 4)
10306 {
10307 rtx label = ix86_expand_aligntest (destreg, 4);
10308 emit_insn (gen_strmovsi (destreg, srcreg));
10309 ix86_adjust_counter (countreg, 4);
10310 emit_label (label);
10311 LABEL_NUSES (label) = 1;
10312 }
10313
10314 if (label && desired_alignment > 4 && !TARGET_64BIT)
10315 {
10316 emit_label (label);
10317 LABEL_NUSES (label) = 1;
10318 label = NULL_RTX;
10319 }
10320 if (!TARGET_SINGLE_STRINGOP)
10321 emit_insn (gen_cld ());
10322 if (TARGET_64BIT)
10323 {
10324 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10325 GEN_INT (3)));
10326 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10327 destreg, srcreg, countreg2));
10328 }
10329 else
10330 {
10331 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10332 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10333 destreg, srcreg, countreg2));
10334 }
10335
10336 if (label)
10337 {
10338 emit_label (label);
10339 LABEL_NUSES (label) = 1;
10340 }
10341 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10342 emit_insn (gen_strmovsi (destreg, srcreg));
10343 if ((align <= 4 || count == 0) && TARGET_64BIT)
10344 {
10345 rtx label = ix86_expand_aligntest (countreg, 4);
10346 emit_insn (gen_strmovsi (destreg, srcreg));
10347 emit_label (label);
10348 LABEL_NUSES (label) = 1;
10349 }
10350 if (align > 2 && count != 0 && (count & 2))
10351 emit_insn (gen_strmovhi (destreg, srcreg));
10352 if (align <= 2 || count == 0)
10353 {
10354 rtx label = ix86_expand_aligntest (countreg, 2);
10355 emit_insn (gen_strmovhi (destreg, srcreg));
10356 emit_label (label);
10357 LABEL_NUSES (label) = 1;
10358 }
10359 if (align > 1 && count != 0 && (count & 1))
10360 emit_insn (gen_strmovqi (destreg, srcreg));
10361 if (align <= 1 || count == 0)
10362 {
10363 rtx label = ix86_expand_aligntest (countreg, 1);
10364 emit_insn (gen_strmovqi (destreg, srcreg));
10365 emit_label (label);
10366 LABEL_NUSES (label) = 1;
10367 }
10368 }
10369
10370 insns = get_insns ();
10371 end_sequence ();
10372
10373 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10374 emit_insn (insns);
10375 return 1;
10376 }
10377
10378 /* Expand string clear operation (bzero). Use i386 string operations when
10379 profitable. expand_movstr contains similar code. */
10380 int
10381 ix86_expand_clrstr (src, count_exp, align_exp)
10382 rtx src, count_exp, align_exp;
10383 {
10384 rtx destreg, zeroreg, countreg;
10385 enum machine_mode counter_mode;
10386 HOST_WIDE_INT align = 0;
10387 unsigned HOST_WIDE_INT count = 0;
10388
10389 if (GET_CODE (align_exp) == CONST_INT)
10390 align = INTVAL (align_exp);
10391
10392 /* This simple hack avoids all inlining code and simplifies code below. */
10393 if (!TARGET_ALIGN_STRINGOPS)
10394 align = 32;
10395
10396 if (GET_CODE (count_exp) == CONST_INT)
10397 count = INTVAL (count_exp);
10398 /* Figure out proper mode for counter. For 32bits it is always SImode,
10399 for 64bits use SImode when possible, otherwise DImode.
10400 Set count to number of bytes copied when known at compile time. */
10401 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10402 || x86_64_zero_extended_value (count_exp))
10403 counter_mode = SImode;
10404 else
10405 counter_mode = DImode;
10406
10407 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10408
10409 emit_insn (gen_cld ());
10410
10411 /* When optimizing for size emit simple rep ; movsb instruction for
10412 counts not divisible by 4. */
10413
10414 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10415 {
10416 countreg = ix86_zero_extend_to_Pmode (count_exp);
10417 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10418 if (TARGET_64BIT)
10419 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10420 destreg, countreg));
10421 else
10422 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10423 destreg, countreg));
10424 }
10425 else if (count != 0
10426 && (align >= 8
10427 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10428 || optimize_size || count < (unsigned int) 64))
10429 {
10430 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10431 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10432 if (count & ~(size - 1))
10433 {
10434 countreg = copy_to_mode_reg (counter_mode,
10435 GEN_INT ((count >> (size == 4 ? 2 : 3))
10436 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10437 countreg = ix86_zero_extend_to_Pmode (countreg);
10438 if (size == 4)
10439 {
10440 if (TARGET_64BIT)
10441 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10442 destreg, countreg));
10443 else
10444 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10445 destreg, countreg));
10446 }
10447 else
10448 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10449 destreg, countreg));
10450 }
10451 if (size == 8 && (count & 0x04))
10452 emit_insn (gen_strsetsi (destreg,
10453 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10454 if (count & 0x02)
10455 emit_insn (gen_strsethi (destreg,
10456 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10457 if (count & 0x01)
10458 emit_insn (gen_strsetqi (destreg,
10459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10460 }
10461 else
10462 {
10463 rtx countreg2;
10464 rtx label = NULL;
10465 /* Compute desired alignment of the string operation. */
10466 int desired_alignment = (TARGET_PENTIUMPRO
10467 && (count == 0 || count >= (unsigned int) 260)
10468 ? 8 : UNITS_PER_WORD);
10469
10470 /* In case we don't know anything about the alignment, default to
10471 library version, since it is usually equally fast and result in
10472 shorter code. */
10473 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10474 return 0;
10475
10476 if (TARGET_SINGLE_STRINGOP)
10477 emit_insn (gen_cld ());
10478
10479 countreg2 = gen_reg_rtx (Pmode);
10480 countreg = copy_to_mode_reg (counter_mode, count_exp);
10481 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10482
10483 if (count == 0 && align < desired_alignment)
10484 {
10485 label = gen_label_rtx ();
10486 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10487 LEU, 0, counter_mode, 1, label);
10488 }
10489 if (align <= 1)
10490 {
10491 rtx label = ix86_expand_aligntest (destreg, 1);
10492 emit_insn (gen_strsetqi (destreg,
10493 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10494 ix86_adjust_counter (countreg, 1);
10495 emit_label (label);
10496 LABEL_NUSES (label) = 1;
10497 }
10498 if (align <= 2)
10499 {
10500 rtx label = ix86_expand_aligntest (destreg, 2);
10501 emit_insn (gen_strsethi (destreg,
10502 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10503 ix86_adjust_counter (countreg, 2);
10504 emit_label (label);
10505 LABEL_NUSES (label) = 1;
10506 }
10507 if (align <= 4 && desired_alignment > 4)
10508 {
10509 rtx label = ix86_expand_aligntest (destreg, 4);
10510 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10511 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10512 : zeroreg)));
10513 ix86_adjust_counter (countreg, 4);
10514 emit_label (label);
10515 LABEL_NUSES (label) = 1;
10516 }
10517
10518 if (label && desired_alignment > 4 && !TARGET_64BIT)
10519 {
10520 emit_label (label);
10521 LABEL_NUSES (label) = 1;
10522 label = NULL_RTX;
10523 }
10524
10525 if (!TARGET_SINGLE_STRINGOP)
10526 emit_insn (gen_cld ());
10527 if (TARGET_64BIT)
10528 {
10529 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10530 GEN_INT (3)));
10531 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10532 destreg, countreg2));
10533 }
10534 else
10535 {
10536 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10537 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10538 destreg, countreg2));
10539 }
10540 if (label)
10541 {
10542 emit_label (label);
10543 LABEL_NUSES (label) = 1;
10544 }
10545
10546 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10547 emit_insn (gen_strsetsi (destreg,
10548 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10549 if (TARGET_64BIT && (align <= 4 || count == 0))
10550 {
10551 rtx label = ix86_expand_aligntest (countreg, 4);
10552 emit_insn (gen_strsetsi (destreg,
10553 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10554 emit_label (label);
10555 LABEL_NUSES (label) = 1;
10556 }
10557 if (align > 2 && count != 0 && (count & 2))
10558 emit_insn (gen_strsethi (destreg,
10559 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10560 if (align <= 2 || count == 0)
10561 {
10562 rtx label = ix86_expand_aligntest (countreg, 2);
10563 emit_insn (gen_strsethi (destreg,
10564 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10565 emit_label (label);
10566 LABEL_NUSES (label) = 1;
10567 }
10568 if (align > 1 && count != 0 && (count & 1))
10569 emit_insn (gen_strsetqi (destreg,
10570 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10571 if (align <= 1 || count == 0)
10572 {
10573 rtx label = ix86_expand_aligntest (countreg, 1);
10574 emit_insn (gen_strsetqi (destreg,
10575 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10576 emit_label (label);
10577 LABEL_NUSES (label) = 1;
10578 }
10579 }
10580 return 1;
10581 }
10582 /* Expand strlen. */
10583 int
10584 ix86_expand_strlen (out, src, eoschar, align)
10585 rtx out, src, eoschar, align;
10586 {
10587 rtx addr, scratch1, scratch2, scratch3, scratch4;
10588
10589 /* The generic case of strlen expander is long. Avoid it's
10590 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10591
10592 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10593 && !TARGET_INLINE_ALL_STRINGOPS
10594 && !optimize_size
10595 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10596 return 0;
10597
10598 addr = force_reg (Pmode, XEXP (src, 0));
10599 scratch1 = gen_reg_rtx (Pmode);
10600
10601 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10602 && !optimize_size)
10603 {
10604 /* Well it seems that some optimizer does not combine a call like
10605 foo(strlen(bar), strlen(bar));
10606 when the move and the subtraction is done here. It does calculate
10607 the length just once when these instructions are done inside of
10608 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10609 often used and I use one fewer register for the lifetime of
10610 output_strlen_unroll() this is better. */
10611
10612 emit_move_insn (out, addr);
10613
10614 ix86_expand_strlensi_unroll_1 (out, align);
10615
10616 /* strlensi_unroll_1 returns the address of the zero at the end of
10617 the string, like memchr(), so compute the length by subtracting
10618 the start address. */
10619 if (TARGET_64BIT)
10620 emit_insn (gen_subdi3 (out, out, addr));
10621 else
10622 emit_insn (gen_subsi3 (out, out, addr));
10623 }
10624 else
10625 {
10626 scratch2 = gen_reg_rtx (Pmode);
10627 scratch3 = gen_reg_rtx (Pmode);
10628 scratch4 = force_reg (Pmode, constm1_rtx);
10629
10630 emit_move_insn (scratch3, addr);
10631 eoschar = force_reg (QImode, eoschar);
10632
10633 emit_insn (gen_cld ());
10634 if (TARGET_64BIT)
10635 {
10636 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10637 align, scratch4, scratch3));
10638 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10639 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10640 }
10641 else
10642 {
10643 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10644 align, scratch4, scratch3));
10645 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10646 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10647 }
10648 }
10649 return 1;
10650 }
10651
10652 /* Expand the appropriate insns for doing strlen if not just doing
10653 repnz; scasb
10654
10655 out = result, initialized with the start address
10656 align_rtx = alignment of the address.
10657 scratch = scratch register, initialized with the startaddress when
10658 not aligned, otherwise undefined
10659
10660 This is just the body. It needs the initialisations mentioned above and
10661 some address computing at the end. These things are done in i386.md. */
10662
10663 static void
10664 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10665 rtx out, align_rtx;
10666 {
10667 int align;
10668 rtx tmp;
10669 rtx align_2_label = NULL_RTX;
10670 rtx align_3_label = NULL_RTX;
10671 rtx align_4_label = gen_label_rtx ();
10672 rtx end_0_label = gen_label_rtx ();
10673 rtx mem;
10674 rtx tmpreg = gen_reg_rtx (SImode);
10675 rtx scratch = gen_reg_rtx (SImode);
10676
10677 align = 0;
10678 if (GET_CODE (align_rtx) == CONST_INT)
10679 align = INTVAL (align_rtx);
10680
10681 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10682
10683 /* Is there a known alignment and is it less than 4? */
10684 if (align < 4)
10685 {
10686 rtx scratch1 = gen_reg_rtx (Pmode);
10687 emit_move_insn (scratch1, out);
10688 /* Is there a known alignment and is it not 2? */
10689 if (align != 2)
10690 {
10691 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10692 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10693
10694 /* Leave just the 3 lower bits. */
10695 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10696 NULL_RTX, 0, OPTAB_WIDEN);
10697
10698 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10699 Pmode, 1, align_4_label);
10700 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10701 Pmode, 1, align_2_label);
10702 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10703 Pmode, 1, align_3_label);
10704 }
10705 else
10706 {
10707 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10708 check if is aligned to 4 - byte. */
10709
10710 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10711 NULL_RTX, 0, OPTAB_WIDEN);
10712
10713 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10714 Pmode, 1, align_4_label);
10715 }
10716
10717 mem = gen_rtx_MEM (QImode, out);
10718
10719 /* Now compare the bytes. */
10720
10721 /* Compare the first n unaligned byte on a byte per byte basis. */
10722 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10723 QImode, 1, end_0_label);
10724
10725 /* Increment the address. */
10726 if (TARGET_64BIT)
10727 emit_insn (gen_adddi3 (out, out, const1_rtx));
10728 else
10729 emit_insn (gen_addsi3 (out, out, const1_rtx));
10730
10731 /* Not needed with an alignment of 2 */
10732 if (align != 2)
10733 {
10734 emit_label (align_2_label);
10735
10736 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10737 end_0_label);
10738
10739 if (TARGET_64BIT)
10740 emit_insn (gen_adddi3 (out, out, const1_rtx));
10741 else
10742 emit_insn (gen_addsi3 (out, out, const1_rtx));
10743
10744 emit_label (align_3_label);
10745 }
10746
10747 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10748 end_0_label);
10749
10750 if (TARGET_64BIT)
10751 emit_insn (gen_adddi3 (out, out, const1_rtx));
10752 else
10753 emit_insn (gen_addsi3 (out, out, const1_rtx));
10754 }
10755
10756 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10757 align this loop. It gives only huge programs, but does not help to
10758 speed up. */
10759 emit_label (align_4_label);
10760
10761 mem = gen_rtx_MEM (SImode, out);
10762 emit_move_insn (scratch, mem);
10763 if (TARGET_64BIT)
10764 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10765 else
10766 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10767
10768 /* This formula yields a nonzero result iff one of the bytes is zero.
10769 This saves three branches inside loop and many cycles. */
10770
10771 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10772 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10773 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10774 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10775 gen_int_mode (0x80808080, SImode)));
10776 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10777 align_4_label);
10778
10779 if (TARGET_CMOVE)
10780 {
10781 rtx reg = gen_reg_rtx (SImode);
10782 rtx reg2 = gen_reg_rtx (Pmode);
10783 emit_move_insn (reg, tmpreg);
10784 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10785
10786 /* If zero is not in the first two bytes, move two bytes forward. */
10787 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10788 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10789 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10790 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10791 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10792 reg,
10793 tmpreg)));
10794 /* Emit lea manually to avoid clobbering of flags. */
10795 emit_insn (gen_rtx_SET (SImode, reg2,
10796 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10797
10798 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10799 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10800 emit_insn (gen_rtx_SET (VOIDmode, out,
10801 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10802 reg2,
10803 out)));
10804
10805 }
10806 else
10807 {
10808 rtx end_2_label = gen_label_rtx ();
10809 /* Is zero in the first two bytes? */
10810
10811 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10812 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10813 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10814 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10815 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10816 pc_rtx);
10817 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10818 JUMP_LABEL (tmp) = end_2_label;
10819
10820 /* Not in the first two. Move two bytes forward. */
10821 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10822 if (TARGET_64BIT)
10823 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10824 else
10825 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10826
10827 emit_label (end_2_label);
10828
10829 }
10830
10831 /* Avoid branch in fixing the byte. */
10832 tmpreg = gen_lowpart (QImode, tmpreg);
10833 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10834 if (TARGET_64BIT)
10835 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10836 else
10837 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10838
10839 emit_label (end_0_label);
10840 }
10841
10842 void
10843 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10844 rtx retval, fnaddr, callarg1, callarg2, pop;
10845 {
10846 rtx use = NULL, call;
10847
10848 if (pop == const0_rtx)
10849 pop = NULL;
10850 if (TARGET_64BIT && pop)
10851 abort ();
10852
10853 #if TARGET_MACHO
10854 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10855 fnaddr = machopic_indirect_call_target (fnaddr);
10856 #else
10857 /* Static functions and indirect calls don't need the pic register. */
10858 if (! TARGET_64BIT && flag_pic
10859 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10860 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10861 use_reg (&use, pic_offset_table_rtx);
10862
10863 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10864 {
10865 rtx al = gen_rtx_REG (QImode, 0);
10866 emit_move_insn (al, callarg2);
10867 use_reg (&use, al);
10868 }
10869 #endif /* TARGET_MACHO */
10870
10871 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10872 {
10873 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10874 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10875 }
10876
10877 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10878 if (retval)
10879 call = gen_rtx_SET (VOIDmode, retval, call);
10880 if (pop)
10881 {
10882 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10883 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10884 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10885 }
10886
10887 call = emit_call_insn (call);
10888 if (use)
10889 CALL_INSN_FUNCTION_USAGE (call) = use;
10890 }
10891
10892 \f
10893 /* Clear stack slot assignments remembered from previous functions.
10894 This is called from INIT_EXPANDERS once before RTL is emitted for each
10895 function. */
10896
10897 static struct machine_function *
10898 ix86_init_machine_status ()
10899 {
10900 return ggc_alloc_cleared (sizeof (struct machine_function));
10901 }
10902
10903 /* Return a MEM corresponding to a stack slot with mode MODE.
10904 Allocate a new slot if necessary.
10905
10906 The RTL for a function can have several slots available: N is
10907 which slot to use. */
10908
10909 rtx
10910 assign_386_stack_local (mode, n)
10911 enum machine_mode mode;
10912 int n;
10913 {
10914 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10915 abort ();
10916
10917 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10918 ix86_stack_locals[(int) mode][n]
10919 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10920
10921 return ix86_stack_locals[(int) mode][n];
10922 }
10923
10924 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10925
10926 static GTY(()) rtx ix86_tls_symbol;
10927 rtx
10928 ix86_tls_get_addr ()
10929 {
10930
10931 if (!ix86_tls_symbol)
10932 {
10933 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10934 (TARGET_GNU_TLS && !TARGET_64BIT)
10935 ? "___tls_get_addr"
10936 : "__tls_get_addr");
10937 }
10938
10939 return ix86_tls_symbol;
10940 }
10941 \f
10942 /* Calculate the length of the memory address in the instruction
10943 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10944
10945 static int
10946 memory_address_length (addr)
10947 rtx addr;
10948 {
10949 struct ix86_address parts;
10950 rtx base, index, disp;
10951 int len;
10952
10953 if (GET_CODE (addr) == PRE_DEC
10954 || GET_CODE (addr) == POST_INC
10955 || GET_CODE (addr) == PRE_MODIFY
10956 || GET_CODE (addr) == POST_MODIFY)
10957 return 0;
10958
10959 if (! ix86_decompose_address (addr, &parts))
10960 abort ();
10961
10962 base = parts.base;
10963 index = parts.index;
10964 disp = parts.disp;
10965 len = 0;
10966
10967 /* Register Indirect. */
10968 if (base && !index && !disp)
10969 {
10970 /* Special cases: ebp and esp need the two-byte modrm form. */
10971 if (addr == stack_pointer_rtx
10972 || addr == arg_pointer_rtx
10973 || addr == frame_pointer_rtx
10974 || addr == hard_frame_pointer_rtx)
10975 len = 1;
10976 }
10977
10978 /* Direct Addressing. */
10979 else if (disp && !base && !index)
10980 len = 4;
10981
10982 else
10983 {
10984 /* Find the length of the displacement constant. */
10985 if (disp)
10986 {
10987 if (GET_CODE (disp) == CONST_INT
10988 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10989 len = 1;
10990 else
10991 len = 4;
10992 }
10993
10994 /* An index requires the two-byte modrm form. */
10995 if (index)
10996 len += 1;
10997 }
10998
10999 return len;
11000 }
11001
11002 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11003 is set, expect that insn have 8bit immediate alternative. */
11004 int
11005 ix86_attr_length_immediate_default (insn, shortform)
11006 rtx insn;
11007 int shortform;
11008 {
11009 int len = 0;
11010 int i;
11011 extract_insn_cached (insn);
11012 for (i = recog_data.n_operands - 1; i >= 0; --i)
11013 if (CONSTANT_P (recog_data.operand[i]))
11014 {
11015 if (len)
11016 abort ();
11017 if (shortform
11018 && GET_CODE (recog_data.operand[i]) == CONST_INT
11019 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11020 len = 1;
11021 else
11022 {
11023 switch (get_attr_mode (insn))
11024 {
11025 case MODE_QI:
11026 len+=1;
11027 break;
11028 case MODE_HI:
11029 len+=2;
11030 break;
11031 case MODE_SI:
11032 len+=4;
11033 break;
11034 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11035 case MODE_DI:
11036 len+=4;
11037 break;
11038 default:
11039 fatal_insn ("unknown insn mode", insn);
11040 }
11041 }
11042 }
11043 return len;
11044 }
11045 /* Compute default value for "length_address" attribute. */
11046 int
11047 ix86_attr_length_address_default (insn)
11048 rtx insn;
11049 {
11050 int i;
11051 extract_insn_cached (insn);
11052 for (i = recog_data.n_operands - 1; i >= 0; --i)
11053 if (GET_CODE (recog_data.operand[i]) == MEM)
11054 {
11055 return memory_address_length (XEXP (recog_data.operand[i], 0));
11056 break;
11057 }
11058 return 0;
11059 }
11060 \f
11061 /* Return the maximum number of instructions a cpu can issue. */
11062
11063 static int
11064 ix86_issue_rate ()
11065 {
11066 switch (ix86_cpu)
11067 {
11068 case PROCESSOR_PENTIUM:
11069 case PROCESSOR_K6:
11070 return 2;
11071
11072 case PROCESSOR_PENTIUMPRO:
11073 case PROCESSOR_PENTIUM4:
11074 case PROCESSOR_ATHLON:
11075 return 3;
11076
11077 default:
11078 return 1;
11079 }
11080 }
11081
11082 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11083 by DEP_INSN and nothing set by DEP_INSN. */
11084
11085 static int
11086 ix86_flags_dependant (insn, dep_insn, insn_type)
11087 rtx insn, dep_insn;
11088 enum attr_type insn_type;
11089 {
11090 rtx set, set2;
11091
11092 /* Simplify the test for uninteresting insns. */
11093 if (insn_type != TYPE_SETCC
11094 && insn_type != TYPE_ICMOV
11095 && insn_type != TYPE_FCMOV
11096 && insn_type != TYPE_IBR)
11097 return 0;
11098
11099 if ((set = single_set (dep_insn)) != 0)
11100 {
11101 set = SET_DEST (set);
11102 set2 = NULL_RTX;
11103 }
11104 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11105 && XVECLEN (PATTERN (dep_insn), 0) == 2
11106 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11107 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11108 {
11109 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11110 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11111 }
11112 else
11113 return 0;
11114
11115 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11116 return 0;
11117
11118 /* This test is true if the dependent insn reads the flags but
11119 not any other potentially set register. */
11120 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11121 return 0;
11122
11123 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11124 return 0;
11125
11126 return 1;
11127 }
11128
11129 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11130 address with operands set by DEP_INSN. */
11131
11132 static int
11133 ix86_agi_dependant (insn, dep_insn, insn_type)
11134 rtx insn, dep_insn;
11135 enum attr_type insn_type;
11136 {
11137 rtx addr;
11138
11139 if (insn_type == TYPE_LEA
11140 && TARGET_PENTIUM)
11141 {
11142 addr = PATTERN (insn);
11143 if (GET_CODE (addr) == SET)
11144 ;
11145 else if (GET_CODE (addr) == PARALLEL
11146 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11147 addr = XVECEXP (addr, 0, 0);
11148 else
11149 abort ();
11150 addr = SET_SRC (addr);
11151 }
11152 else
11153 {
11154 int i;
11155 extract_insn_cached (insn);
11156 for (i = recog_data.n_operands - 1; i >= 0; --i)
11157 if (GET_CODE (recog_data.operand[i]) == MEM)
11158 {
11159 addr = XEXP (recog_data.operand[i], 0);
11160 goto found;
11161 }
11162 return 0;
11163 found:;
11164 }
11165
11166 return modified_in_p (addr, dep_insn);
11167 }
11168
11169 static int
11170 ix86_adjust_cost (insn, link, dep_insn, cost)
11171 rtx insn, link, dep_insn;
11172 int cost;
11173 {
11174 enum attr_type insn_type, dep_insn_type;
11175 enum attr_memory memory, dep_memory;
11176 rtx set, set2;
11177 int dep_insn_code_number;
11178
11179 /* Anti and output depenancies have zero cost on all CPUs. */
11180 if (REG_NOTE_KIND (link) != 0)
11181 return 0;
11182
11183 dep_insn_code_number = recog_memoized (dep_insn);
11184
11185 /* If we can't recognize the insns, we can't really do anything. */
11186 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11187 return cost;
11188
11189 insn_type = get_attr_type (insn);
11190 dep_insn_type = get_attr_type (dep_insn);
11191
11192 switch (ix86_cpu)
11193 {
11194 case PROCESSOR_PENTIUM:
11195 /* Address Generation Interlock adds a cycle of latency. */
11196 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11197 cost += 1;
11198
11199 /* ??? Compares pair with jump/setcc. */
11200 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11201 cost = 0;
11202
11203 /* Floating point stores require value to be ready one cycle ealier. */
11204 if (insn_type == TYPE_FMOV
11205 && get_attr_memory (insn) == MEMORY_STORE
11206 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11207 cost += 1;
11208 break;
11209
11210 case PROCESSOR_PENTIUMPRO:
11211 memory = get_attr_memory (insn);
11212 dep_memory = get_attr_memory (dep_insn);
11213
11214 /* Since we can't represent delayed latencies of load+operation,
11215 increase the cost here for non-imov insns. */
11216 if (dep_insn_type != TYPE_IMOV
11217 && dep_insn_type != TYPE_FMOV
11218 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11219 cost += 1;
11220
11221 /* INT->FP conversion is expensive. */
11222 if (get_attr_fp_int_src (dep_insn))
11223 cost += 5;
11224
11225 /* There is one cycle extra latency between an FP op and a store. */
11226 if (insn_type == TYPE_FMOV
11227 && (set = single_set (dep_insn)) != NULL_RTX
11228 && (set2 = single_set (insn)) != NULL_RTX
11229 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11230 && GET_CODE (SET_DEST (set2)) == MEM)
11231 cost += 1;
11232
11233 /* Show ability of reorder buffer to hide latency of load by executing
11234 in parallel with previous instruction in case
11235 previous instruction is not needed to compute the address. */
11236 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11237 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11238 {
11239 /* Claim moves to take one cycle, as core can issue one load
11240 at time and the next load can start cycle later. */
11241 if (dep_insn_type == TYPE_IMOV
11242 || dep_insn_type == TYPE_FMOV)
11243 cost = 1;
11244 else if (cost > 1)
11245 cost--;
11246 }
11247 break;
11248
11249 case PROCESSOR_K6:
11250 memory = get_attr_memory (insn);
11251 dep_memory = get_attr_memory (dep_insn);
11252 /* The esp dependency is resolved before the instruction is really
11253 finished. */
11254 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11255 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11256 return 1;
11257
11258 /* Since we can't represent delayed latencies of load+operation,
11259 increase the cost here for non-imov insns. */
11260 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11261 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11262
11263 /* INT->FP conversion is expensive. */
11264 if (get_attr_fp_int_src (dep_insn))
11265 cost += 5;
11266
11267 /* Show ability of reorder buffer to hide latency of load by executing
11268 in parallel with previous instruction in case
11269 previous instruction is not needed to compute the address. */
11270 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11271 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11272 {
11273 /* Claim moves to take one cycle, as core can issue one load
11274 at time and the next load can start cycle later. */
11275 if (dep_insn_type == TYPE_IMOV
11276 || dep_insn_type == TYPE_FMOV)
11277 cost = 1;
11278 else if (cost > 2)
11279 cost -= 2;
11280 else
11281 cost = 1;
11282 }
11283 break;
11284
11285 case PROCESSOR_ATHLON:
11286 memory = get_attr_memory (insn);
11287 dep_memory = get_attr_memory (dep_insn);
11288
11289 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11290 {
11291 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11292 cost += 2;
11293 else
11294 cost += 3;
11295 }
11296 /* Show ability of reorder buffer to hide latency of load by executing
11297 in parallel with previous instruction in case
11298 previous instruction is not needed to compute the address. */
11299 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11300 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11301 {
11302 /* Claim moves to take one cycle, as core can issue one load
11303 at time and the next load can start cycle later. */
11304 if (dep_insn_type == TYPE_IMOV
11305 || dep_insn_type == TYPE_FMOV)
11306 cost = 0;
11307 else if (cost >= 3)
11308 cost -= 3;
11309 else
11310 cost = 0;
11311 }
11312
11313 default:
11314 break;
11315 }
11316
11317 return cost;
11318 }
11319
11320 static union
11321 {
11322 struct ppro_sched_data
11323 {
11324 rtx decode[3];
11325 int issued_this_cycle;
11326 } ppro;
11327 } ix86_sched_data;
11328
11329 static enum attr_ppro_uops
11330 ix86_safe_ppro_uops (insn)
11331 rtx insn;
11332 {
11333 if (recog_memoized (insn) >= 0)
11334 return get_attr_ppro_uops (insn);
11335 else
11336 return PPRO_UOPS_MANY;
11337 }
11338
11339 static void
11340 ix86_dump_ppro_packet (dump)
11341 FILE *dump;
11342 {
11343 if (ix86_sched_data.ppro.decode[0])
11344 {
11345 fprintf (dump, "PPRO packet: %d",
11346 INSN_UID (ix86_sched_data.ppro.decode[0]));
11347 if (ix86_sched_data.ppro.decode[1])
11348 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11349 if (ix86_sched_data.ppro.decode[2])
11350 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11351 fputc ('\n', dump);
11352 }
11353 }
11354
11355 /* We're beginning a new block. Initialize data structures as necessary. */
11356
11357 static void
11358 ix86_sched_init (dump, sched_verbose, veclen)
11359 FILE *dump ATTRIBUTE_UNUSED;
11360 int sched_verbose ATTRIBUTE_UNUSED;
11361 int veclen ATTRIBUTE_UNUSED;
11362 {
11363 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11364 }
11365
11366 /* Shift INSN to SLOT, and shift everything else down. */
11367
11368 static void
11369 ix86_reorder_insn (insnp, slot)
11370 rtx *insnp, *slot;
11371 {
11372 if (insnp != slot)
11373 {
11374 rtx insn = *insnp;
11375 do
11376 insnp[0] = insnp[1];
11377 while (++insnp != slot);
11378 *insnp = insn;
11379 }
11380 }
11381
11382 static void
11383 ix86_sched_reorder_ppro (ready, e_ready)
11384 rtx *ready;
11385 rtx *e_ready;
11386 {
11387 rtx decode[3];
11388 enum attr_ppro_uops cur_uops;
11389 int issued_this_cycle;
11390 rtx *insnp;
11391 int i;
11392
11393 /* At this point .ppro.decode contains the state of the three
11394 decoders from last "cycle". That is, those insns that were
11395 actually independent. But here we're scheduling for the
11396 decoder, and we may find things that are decodable in the
11397 same cycle. */
11398
11399 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11400 issued_this_cycle = 0;
11401
11402 insnp = e_ready;
11403 cur_uops = ix86_safe_ppro_uops (*insnp);
11404
11405 /* If the decoders are empty, and we've a complex insn at the
11406 head of the priority queue, let it issue without complaint. */
11407 if (decode[0] == NULL)
11408 {
11409 if (cur_uops == PPRO_UOPS_MANY)
11410 {
11411 decode[0] = *insnp;
11412 goto ppro_done;
11413 }
11414
11415 /* Otherwise, search for a 2-4 uop unsn to issue. */
11416 while (cur_uops != PPRO_UOPS_FEW)
11417 {
11418 if (insnp == ready)
11419 break;
11420 cur_uops = ix86_safe_ppro_uops (*--insnp);
11421 }
11422
11423 /* If so, move it to the head of the line. */
11424 if (cur_uops == PPRO_UOPS_FEW)
11425 ix86_reorder_insn (insnp, e_ready);
11426
11427 /* Issue the head of the queue. */
11428 issued_this_cycle = 1;
11429 decode[0] = *e_ready--;
11430 }
11431
11432 /* Look for simple insns to fill in the other two slots. */
11433 for (i = 1; i < 3; ++i)
11434 if (decode[i] == NULL)
11435 {
11436 if (ready > e_ready)
11437 goto ppro_done;
11438
11439 insnp = e_ready;
11440 cur_uops = ix86_safe_ppro_uops (*insnp);
11441 while (cur_uops != PPRO_UOPS_ONE)
11442 {
11443 if (insnp == ready)
11444 break;
11445 cur_uops = ix86_safe_ppro_uops (*--insnp);
11446 }
11447
11448 /* Found one. Move it to the head of the queue and issue it. */
11449 if (cur_uops == PPRO_UOPS_ONE)
11450 {
11451 ix86_reorder_insn (insnp, e_ready);
11452 decode[i] = *e_ready--;
11453 issued_this_cycle++;
11454 continue;
11455 }
11456
11457 /* ??? Didn't find one. Ideally, here we would do a lazy split
11458 of 2-uop insns, issue one and queue the other. */
11459 }
11460
11461 ppro_done:
11462 if (issued_this_cycle == 0)
11463 issued_this_cycle = 1;
11464 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11465 }
11466
11467 /* We are about to being issuing insns for this clock cycle.
11468 Override the default sort algorithm to better slot instructions. */
11469 static int
11470 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11471 FILE *dump ATTRIBUTE_UNUSED;
11472 int sched_verbose ATTRIBUTE_UNUSED;
11473 rtx *ready;
11474 int *n_readyp;
11475 int clock_var ATTRIBUTE_UNUSED;
11476 {
11477 int n_ready = *n_readyp;
11478 rtx *e_ready = ready + n_ready - 1;
11479
11480 /* Make sure to go ahead and initialize key items in
11481 ix86_sched_data if we are not going to bother trying to
11482 reorder the ready queue. */
11483 if (n_ready < 2)
11484 {
11485 ix86_sched_data.ppro.issued_this_cycle = 1;
11486 goto out;
11487 }
11488
11489 switch (ix86_cpu)
11490 {
11491 default:
11492 break;
11493
11494 case PROCESSOR_PENTIUMPRO:
11495 ix86_sched_reorder_ppro (ready, e_ready);
11496 break;
11497 }
11498
11499 out:
11500 return ix86_issue_rate ();
11501 }
11502
11503 /* We are about to issue INSN. Return the number of insns left on the
11504 ready queue that can be issued this cycle. */
11505
11506 static int
11507 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11508 FILE *dump;
11509 int sched_verbose;
11510 rtx insn;
11511 int can_issue_more;
11512 {
11513 int i;
11514 switch (ix86_cpu)
11515 {
11516 default:
11517 return can_issue_more - 1;
11518
11519 case PROCESSOR_PENTIUMPRO:
11520 {
11521 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11522
11523 if (uops == PPRO_UOPS_MANY)
11524 {
11525 if (sched_verbose)
11526 ix86_dump_ppro_packet (dump);
11527 ix86_sched_data.ppro.decode[0] = insn;
11528 ix86_sched_data.ppro.decode[1] = NULL;
11529 ix86_sched_data.ppro.decode[2] = NULL;
11530 if (sched_verbose)
11531 ix86_dump_ppro_packet (dump);
11532 ix86_sched_data.ppro.decode[0] = NULL;
11533 }
11534 else if (uops == PPRO_UOPS_FEW)
11535 {
11536 if (sched_verbose)
11537 ix86_dump_ppro_packet (dump);
11538 ix86_sched_data.ppro.decode[0] = insn;
11539 ix86_sched_data.ppro.decode[1] = NULL;
11540 ix86_sched_data.ppro.decode[2] = NULL;
11541 }
11542 else
11543 {
11544 for (i = 0; i < 3; ++i)
11545 if (ix86_sched_data.ppro.decode[i] == NULL)
11546 {
11547 ix86_sched_data.ppro.decode[i] = insn;
11548 break;
11549 }
11550 if (i == 3)
11551 abort ();
11552 if (i == 2)
11553 {
11554 if (sched_verbose)
11555 ix86_dump_ppro_packet (dump);
11556 ix86_sched_data.ppro.decode[0] = NULL;
11557 ix86_sched_data.ppro.decode[1] = NULL;
11558 ix86_sched_data.ppro.decode[2] = NULL;
11559 }
11560 }
11561 }
11562 return --ix86_sched_data.ppro.issued_this_cycle;
11563 }
11564 }
11565
11566 static int
11567 ia32_use_dfa_pipeline_interface ()
11568 {
11569 if (ix86_cpu == PROCESSOR_PENTIUM)
11570 return 1;
11571 return 0;
11572 }
11573
11574 /* How many alternative schedules to try. This should be as wide as the
11575 scheduling freedom in the DFA, but no wider. Making this value too
11576 large results extra work for the scheduler. */
11577
11578 static int
11579 ia32_multipass_dfa_lookahead ()
11580 {
11581 if (ix86_cpu == PROCESSOR_PENTIUM)
11582 return 2;
11583 else
11584 return 0;
11585 }
11586
11587 \f
11588 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11589 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11590 appropriate. */
11591
11592 void
11593 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11594 rtx insns;
11595 rtx dstref, srcref, dstreg, srcreg;
11596 {
11597 rtx insn;
11598
11599 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11600 if (INSN_P (insn))
11601 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11602 dstreg, srcreg);
11603 }
11604
11605 /* Subroutine of above to actually do the updating by recursively walking
11606 the rtx. */
11607
11608 static void
11609 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11610 rtx x;
11611 rtx dstref, srcref, dstreg, srcreg;
11612 {
11613 enum rtx_code code = GET_CODE (x);
11614 const char *format_ptr = GET_RTX_FORMAT (code);
11615 int i, j;
11616
11617 if (code == MEM && XEXP (x, 0) == dstreg)
11618 MEM_COPY_ATTRIBUTES (x, dstref);
11619 else if (code == MEM && XEXP (x, 0) == srcreg)
11620 MEM_COPY_ATTRIBUTES (x, srcref);
11621
11622 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11623 {
11624 if (*format_ptr == 'e')
11625 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11626 dstreg, srcreg);
11627 else if (*format_ptr == 'E')
11628 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11629 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11630 dstreg, srcreg);
11631 }
11632 }
11633 \f
11634 /* Compute the alignment given to a constant that is being placed in memory.
11635 EXP is the constant and ALIGN is the alignment that the object would
11636 ordinarily have.
11637 The value of this function is used instead of that alignment to align
11638 the object. */
11639
11640 int
11641 ix86_constant_alignment (exp, align)
11642 tree exp;
11643 int align;
11644 {
11645 if (TREE_CODE (exp) == REAL_CST)
11646 {
11647 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11648 return 64;
11649 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11650 return 128;
11651 }
11652 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11653 && align < 256)
11654 return 256;
11655
11656 return align;
11657 }
11658
11659 /* Compute the alignment for a static variable.
11660 TYPE is the data type, and ALIGN is the alignment that
11661 the object would ordinarily have. The value of this function is used
11662 instead of that alignment to align the object. */
11663
11664 int
11665 ix86_data_alignment (type, align)
11666 tree type;
11667 int align;
11668 {
11669 if (AGGREGATE_TYPE_P (type)
11670 && TYPE_SIZE (type)
11671 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11672 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11673 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11674 return 256;
11675
11676 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11677 to 16byte boundary. */
11678 if (TARGET_64BIT)
11679 {
11680 if (AGGREGATE_TYPE_P (type)
11681 && TYPE_SIZE (type)
11682 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11683 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11684 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11685 return 128;
11686 }
11687
11688 if (TREE_CODE (type) == ARRAY_TYPE)
11689 {
11690 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11691 return 64;
11692 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11693 return 128;
11694 }
11695 else if (TREE_CODE (type) == COMPLEX_TYPE)
11696 {
11697
11698 if (TYPE_MODE (type) == DCmode && align < 64)
11699 return 64;
11700 if (TYPE_MODE (type) == XCmode && align < 128)
11701 return 128;
11702 }
11703 else if ((TREE_CODE (type) == RECORD_TYPE
11704 || TREE_CODE (type) == UNION_TYPE
11705 || TREE_CODE (type) == QUAL_UNION_TYPE)
11706 && TYPE_FIELDS (type))
11707 {
11708 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11709 return 64;
11710 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11711 return 128;
11712 }
11713 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11714 || TREE_CODE (type) == INTEGER_TYPE)
11715 {
11716 if (TYPE_MODE (type) == DFmode && align < 64)
11717 return 64;
11718 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11719 return 128;
11720 }
11721
11722 return align;
11723 }
11724
11725 /* Compute the alignment for a local variable.
11726 TYPE is the data type, and ALIGN is the alignment that
11727 the object would ordinarily have. The value of this macro is used
11728 instead of that alignment to align the object. */
11729
11730 int
11731 ix86_local_alignment (type, align)
11732 tree type;
11733 int align;
11734 {
11735 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11736 to 16byte boundary. */
11737 if (TARGET_64BIT)
11738 {
11739 if (AGGREGATE_TYPE_P (type)
11740 && TYPE_SIZE (type)
11741 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11742 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11743 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11744 return 128;
11745 }
11746 if (TREE_CODE (type) == ARRAY_TYPE)
11747 {
11748 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11749 return 64;
11750 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11751 return 128;
11752 }
11753 else if (TREE_CODE (type) == COMPLEX_TYPE)
11754 {
11755 if (TYPE_MODE (type) == DCmode && align < 64)
11756 return 64;
11757 if (TYPE_MODE (type) == XCmode && align < 128)
11758 return 128;
11759 }
11760 else if ((TREE_CODE (type) == RECORD_TYPE
11761 || TREE_CODE (type) == UNION_TYPE
11762 || TREE_CODE (type) == QUAL_UNION_TYPE)
11763 && TYPE_FIELDS (type))
11764 {
11765 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11766 return 64;
11767 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11768 return 128;
11769 }
11770 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11771 || TREE_CODE (type) == INTEGER_TYPE)
11772 {
11773
11774 if (TYPE_MODE (type) == DFmode && align < 64)
11775 return 64;
11776 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11777 return 128;
11778 }
11779 return align;
11780 }
11781 \f
11782 /* Emit RTL insns to initialize the variable parts of a trampoline.
11783 FNADDR is an RTX for the address of the function's pure code.
11784 CXT is an RTX for the static chain value for the function. */
11785 void
11786 x86_initialize_trampoline (tramp, fnaddr, cxt)
11787 rtx tramp, fnaddr, cxt;
11788 {
11789 if (!TARGET_64BIT)
11790 {
11791 /* Compute offset from the end of the jmp to the target function. */
11792 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11793 plus_constant (tramp, 10),
11794 NULL_RTX, 1, OPTAB_DIRECT);
11795 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11796 gen_int_mode (0xb9, QImode));
11797 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11798 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11799 gen_int_mode (0xe9, QImode));
11800 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11801 }
11802 else
11803 {
11804 int offset = 0;
11805 /* Try to load address using shorter movl instead of movabs.
11806 We may want to support movq for kernel mode, but kernel does not use
11807 trampolines at the moment. */
11808 if (x86_64_zero_extended_value (fnaddr))
11809 {
11810 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11811 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11812 gen_int_mode (0xbb41, HImode));
11813 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11814 gen_lowpart (SImode, fnaddr));
11815 offset += 6;
11816 }
11817 else
11818 {
11819 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11820 gen_int_mode (0xbb49, HImode));
11821 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11822 fnaddr);
11823 offset += 10;
11824 }
11825 /* Load static chain using movabs to r10. */
11826 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11827 gen_int_mode (0xba49, HImode));
11828 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11829 cxt);
11830 offset += 10;
11831 /* Jump to the r11 */
11832 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11833 gen_int_mode (0xff49, HImode));
11834 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11835 gen_int_mode (0xe3, QImode));
11836 offset += 3;
11837 if (offset > TRAMPOLINE_SIZE)
11838 abort ();
11839 }
11840
11841 #ifdef TRANSFER_FROM_TRAMPOLINE
11842 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11843 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11844 #endif
11845 }
11846 \f
11847 #define def_builtin(MASK, NAME, TYPE, CODE) \
11848 do { \
11849 if ((MASK) & target_flags) \
11850 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11851 NULL, NULL_TREE); \
11852 } while (0)
11853
11854 struct builtin_description
11855 {
11856 const unsigned int mask;
11857 const enum insn_code icode;
11858 const char *const name;
11859 const enum ix86_builtins code;
11860 const enum rtx_code comparison;
11861 const unsigned int flag;
11862 };
11863
11864 /* Used for builtins that are enabled both by -msse and -msse2. */
11865 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11866
11867 static const struct builtin_description bdesc_comi[] =
11868 {
11869 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11870 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11871 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11872 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11873 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11874 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11875 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11876 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11877 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11878 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11879 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11880 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11881 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11882 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11883 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11884 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11885 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11886 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11887 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11888 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11889 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11890 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11891 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11892 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11893 };
11894
11895 static const struct builtin_description bdesc_2arg[] =
11896 {
11897 /* SSE */
11898 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11899 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11900 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11901 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11902 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11903 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11904 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11905 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11906
11907 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11908 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11909 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11910 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11911 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11912 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11913 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11914 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11915 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11916 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11917 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11918 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11919 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11920 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11921 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11922 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11923 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11924 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11925 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11926 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11927
11928 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11929 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11930 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11931 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11932
11933 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11934 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11935 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11936 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11937
11938 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11939 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11940 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11941 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11942 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11943
11944 /* MMX */
11945 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11946 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11947 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11948 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11949 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11950 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11951
11952 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11953 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11954 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11955 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11956 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11957 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11958 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11959 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11960
11961 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11962 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11963 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11964
11965 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11966 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11967 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11968 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11969
11970 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11971 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11972
11973 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11974 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11975 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11976 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11977 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11978 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11979
11980 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11981 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11982 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11983 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
11984
11985 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11986 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11987 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11988 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11989 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11990 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
11991
11992 /* Special. */
11993 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11994 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11995 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11996
11997 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11998 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
11999
12000 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12001 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12002 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12003 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12004 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12005 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12006
12007 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12008 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12009 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12010 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12011 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12012 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12013
12014 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12015 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12016 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12017 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12018
12019 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12020 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12021
12022 /* SSE2 */
12023 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12024 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12025 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12026 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12027 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12029 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12030 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12031
12032 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12033 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12034 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12035 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12036 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12037 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12038 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12039 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12040 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12041 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12042 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12043 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12044 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12045 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12046 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12047 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12048 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12049 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12050 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12051 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12052
12053 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12054 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12055 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12056 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12057
12058 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12060 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12061 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12062
12063 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12065 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12066
12067 /* SSE2 MMX */
12068 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12069 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12070 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12071 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12072 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12073 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12076
12077 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12078 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12079 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12080 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12081 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12082 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12083 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12084 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12085
12086 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12087 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12090
12091 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12095
12096 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12097 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12098
12099 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12100 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12101 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12102 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12103 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12104 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12105
12106 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12109 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12110
12111 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12114 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12115 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12117 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12118 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12119
12120 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12123
12124 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12125 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12126
12127 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12129 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12130 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12133
12134 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12138 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12139 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12140
12141 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12142 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12143 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12145
12146 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12147
12148 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12151 };
12152
12153 static const struct builtin_description bdesc_1arg[] =
12154 {
12155 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12156 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12157
12158 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12159 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12160 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12161
12162 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12163 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12164 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12165 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12166
12167 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12170 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12171
12172 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12173
12174 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12175 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12176
12177 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12178 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12179 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12180 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12181 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12182
12183 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12184
12185 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12186 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12187
12188 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12189 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12190 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12191
12192 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12193 };
12194
12195 void
12196 ix86_init_builtins ()
12197 {
12198 if (TARGET_MMX)
12199 ix86_init_mmx_sse_builtins ();
12200 }
12201
12202 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12203 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12204 builtins. */
12205 static void
12206 ix86_init_mmx_sse_builtins ()
12207 {
12208 const struct builtin_description * d;
12209 size_t i;
12210
12211 tree pchar_type_node = build_pointer_type (char_type_node);
12212 tree pfloat_type_node = build_pointer_type (float_type_node);
12213 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12214 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12215 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12216
12217 /* Comparisons. */
12218 tree int_ftype_v4sf_v4sf
12219 = build_function_type_list (integer_type_node,
12220 V4SF_type_node, V4SF_type_node, NULL_TREE);
12221 tree v4si_ftype_v4sf_v4sf
12222 = build_function_type_list (V4SI_type_node,
12223 V4SF_type_node, V4SF_type_node, NULL_TREE);
12224 /* MMX/SSE/integer conversions. */
12225 tree int_ftype_v4sf
12226 = build_function_type_list (integer_type_node,
12227 V4SF_type_node, NULL_TREE);
12228 tree int_ftype_v8qi
12229 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12230 tree v4sf_ftype_v4sf_int
12231 = build_function_type_list (V4SF_type_node,
12232 V4SF_type_node, integer_type_node, NULL_TREE);
12233 tree v4sf_ftype_v4sf_v2si
12234 = build_function_type_list (V4SF_type_node,
12235 V4SF_type_node, V2SI_type_node, NULL_TREE);
12236 tree int_ftype_v4hi_int
12237 = build_function_type_list (integer_type_node,
12238 V4HI_type_node, integer_type_node, NULL_TREE);
12239 tree v4hi_ftype_v4hi_int_int
12240 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12241 integer_type_node, integer_type_node,
12242 NULL_TREE);
12243 /* Miscellaneous. */
12244 tree v8qi_ftype_v4hi_v4hi
12245 = build_function_type_list (V8QI_type_node,
12246 V4HI_type_node, V4HI_type_node, NULL_TREE);
12247 tree v4hi_ftype_v2si_v2si
12248 = build_function_type_list (V4HI_type_node,
12249 V2SI_type_node, V2SI_type_node, NULL_TREE);
12250 tree v4sf_ftype_v4sf_v4sf_int
12251 = build_function_type_list (V4SF_type_node,
12252 V4SF_type_node, V4SF_type_node,
12253 integer_type_node, NULL_TREE);
12254 tree v2si_ftype_v4hi_v4hi
12255 = build_function_type_list (V2SI_type_node,
12256 V4HI_type_node, V4HI_type_node, NULL_TREE);
12257 tree v4hi_ftype_v4hi_int
12258 = build_function_type_list (V4HI_type_node,
12259 V4HI_type_node, integer_type_node, NULL_TREE);
12260 tree v4hi_ftype_v4hi_di
12261 = build_function_type_list (V4HI_type_node,
12262 V4HI_type_node, long_long_unsigned_type_node,
12263 NULL_TREE);
12264 tree v2si_ftype_v2si_di
12265 = build_function_type_list (V2SI_type_node,
12266 V2SI_type_node, long_long_unsigned_type_node,
12267 NULL_TREE);
12268 tree void_ftype_void
12269 = build_function_type (void_type_node, void_list_node);
12270 tree void_ftype_unsigned
12271 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12272 tree unsigned_ftype_void
12273 = build_function_type (unsigned_type_node, void_list_node);
12274 tree di_ftype_void
12275 = build_function_type (long_long_unsigned_type_node, void_list_node);
12276 tree v4sf_ftype_void
12277 = build_function_type (V4SF_type_node, void_list_node);
12278 tree v2si_ftype_v4sf
12279 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12280 /* Loads/stores. */
12281 tree void_ftype_v8qi_v8qi_pchar
12282 = build_function_type_list (void_type_node,
12283 V8QI_type_node, V8QI_type_node,
12284 pchar_type_node, NULL_TREE);
12285 tree v4sf_ftype_pfloat
12286 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12287 /* @@@ the type is bogus */
12288 tree v4sf_ftype_v4sf_pv2si
12289 = build_function_type_list (V4SF_type_node,
12290 V4SF_type_node, pv2di_type_node, NULL_TREE);
12291 tree void_ftype_pv2si_v4sf
12292 = build_function_type_list (void_type_node,
12293 pv2di_type_node, V4SF_type_node, NULL_TREE);
12294 tree void_ftype_pfloat_v4sf
12295 = build_function_type_list (void_type_node,
12296 pfloat_type_node, V4SF_type_node, NULL_TREE);
12297 tree void_ftype_pdi_di
12298 = build_function_type_list (void_type_node,
12299 pdi_type_node, long_long_unsigned_type_node,
12300 NULL_TREE);
12301 tree void_ftype_pv2di_v2di
12302 = build_function_type_list (void_type_node,
12303 pv2di_type_node, V2DI_type_node, NULL_TREE);
12304 /* Normal vector unops. */
12305 tree v4sf_ftype_v4sf
12306 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12307
12308 /* Normal vector binops. */
12309 tree v4sf_ftype_v4sf_v4sf
12310 = build_function_type_list (V4SF_type_node,
12311 V4SF_type_node, V4SF_type_node, NULL_TREE);
12312 tree v8qi_ftype_v8qi_v8qi
12313 = build_function_type_list (V8QI_type_node,
12314 V8QI_type_node, V8QI_type_node, NULL_TREE);
12315 tree v4hi_ftype_v4hi_v4hi
12316 = build_function_type_list (V4HI_type_node,
12317 V4HI_type_node, V4HI_type_node, NULL_TREE);
12318 tree v2si_ftype_v2si_v2si
12319 = build_function_type_list (V2SI_type_node,
12320 V2SI_type_node, V2SI_type_node, NULL_TREE);
12321 tree di_ftype_di_di
12322 = build_function_type_list (long_long_unsigned_type_node,
12323 long_long_unsigned_type_node,
12324 long_long_unsigned_type_node, NULL_TREE);
12325
12326 tree v2si_ftype_v2sf
12327 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12328 tree v2sf_ftype_v2si
12329 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12330 tree v2si_ftype_v2si
12331 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12332 tree v2sf_ftype_v2sf
12333 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12334 tree v2sf_ftype_v2sf_v2sf
12335 = build_function_type_list (V2SF_type_node,
12336 V2SF_type_node, V2SF_type_node, NULL_TREE);
12337 tree v2si_ftype_v2sf_v2sf
12338 = build_function_type_list (V2SI_type_node,
12339 V2SF_type_node, V2SF_type_node, NULL_TREE);
12340 tree pint_type_node = build_pointer_type (integer_type_node);
12341 tree pdouble_type_node = build_pointer_type (double_type_node);
12342 tree int_ftype_v2df_v2df
12343 = build_function_type_list (integer_type_node,
12344 V2DF_type_node, V2DF_type_node, NULL_TREE);
12345
12346 tree ti_ftype_void
12347 = build_function_type (intTI_type_node, void_list_node);
12348 tree v2di_ftype_void
12349 = build_function_type (V2DI_type_node, void_list_node);
12350 tree ti_ftype_ti_ti
12351 = build_function_type_list (intTI_type_node,
12352 intTI_type_node, intTI_type_node, NULL_TREE);
12353 tree void_ftype_pvoid
12354 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12355 tree v2di_ftype_di
12356 = build_function_type_list (V2DI_type_node,
12357 long_long_unsigned_type_node, NULL_TREE);
12358 tree di_ftype_v2di
12359 = build_function_type_list (long_long_unsigned_type_node,
12360 V2DI_type_node, NULL_TREE);
12361 tree v4sf_ftype_v4si
12362 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12363 tree v4si_ftype_v4sf
12364 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12365 tree v2df_ftype_v4si
12366 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12367 tree v4si_ftype_v2df
12368 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12369 tree v2si_ftype_v2df
12370 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12371 tree v4sf_ftype_v2df
12372 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12373 tree v2df_ftype_v2si
12374 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12375 tree v2df_ftype_v4sf
12376 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12377 tree int_ftype_v2df
12378 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12379 tree v2df_ftype_v2df_int
12380 = build_function_type_list (V2DF_type_node,
12381 V2DF_type_node, integer_type_node, NULL_TREE);
12382 tree v4sf_ftype_v4sf_v2df
12383 = build_function_type_list (V4SF_type_node,
12384 V4SF_type_node, V2DF_type_node, NULL_TREE);
12385 tree v2df_ftype_v2df_v4sf
12386 = build_function_type_list (V2DF_type_node,
12387 V2DF_type_node, V4SF_type_node, NULL_TREE);
12388 tree v2df_ftype_v2df_v2df_int
12389 = build_function_type_list (V2DF_type_node,
12390 V2DF_type_node, V2DF_type_node,
12391 integer_type_node,
12392 NULL_TREE);
12393 tree v2df_ftype_v2df_pv2si
12394 = build_function_type_list (V2DF_type_node,
12395 V2DF_type_node, pv2si_type_node, NULL_TREE);
12396 tree void_ftype_pv2si_v2df
12397 = build_function_type_list (void_type_node,
12398 pv2si_type_node, V2DF_type_node, NULL_TREE);
12399 tree void_ftype_pdouble_v2df
12400 = build_function_type_list (void_type_node,
12401 pdouble_type_node, V2DF_type_node, NULL_TREE);
12402 tree void_ftype_pint_int
12403 = build_function_type_list (void_type_node,
12404 pint_type_node, integer_type_node, NULL_TREE);
12405 tree void_ftype_v16qi_v16qi_pchar
12406 = build_function_type_list (void_type_node,
12407 V16QI_type_node, V16QI_type_node,
12408 pchar_type_node, NULL_TREE);
12409 tree v2df_ftype_pdouble
12410 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12411 tree v2df_ftype_v2df_v2df
12412 = build_function_type_list (V2DF_type_node,
12413 V2DF_type_node, V2DF_type_node, NULL_TREE);
12414 tree v16qi_ftype_v16qi_v16qi
12415 = build_function_type_list (V16QI_type_node,
12416 V16QI_type_node, V16QI_type_node, NULL_TREE);
12417 tree v8hi_ftype_v8hi_v8hi
12418 = build_function_type_list (V8HI_type_node,
12419 V8HI_type_node, V8HI_type_node, NULL_TREE);
12420 tree v4si_ftype_v4si_v4si
12421 = build_function_type_list (V4SI_type_node,
12422 V4SI_type_node, V4SI_type_node, NULL_TREE);
12423 tree v2di_ftype_v2di_v2di
12424 = build_function_type_list (V2DI_type_node,
12425 V2DI_type_node, V2DI_type_node, NULL_TREE);
12426 tree v2di_ftype_v2df_v2df
12427 = build_function_type_list (V2DI_type_node,
12428 V2DF_type_node, V2DF_type_node, NULL_TREE);
12429 tree v2df_ftype_v2df
12430 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12431 tree v2df_ftype_double
12432 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12433 tree v2df_ftype_double_double
12434 = build_function_type_list (V2DF_type_node,
12435 double_type_node, double_type_node, NULL_TREE);
12436 tree int_ftype_v8hi_int
12437 = build_function_type_list (integer_type_node,
12438 V8HI_type_node, integer_type_node, NULL_TREE);
12439 tree v8hi_ftype_v8hi_int_int
12440 = build_function_type_list (V8HI_type_node,
12441 V8HI_type_node, integer_type_node,
12442 integer_type_node, NULL_TREE);
12443 tree v2di_ftype_v2di_int
12444 = build_function_type_list (V2DI_type_node,
12445 V2DI_type_node, integer_type_node, NULL_TREE);
12446 tree v4si_ftype_v4si_int
12447 = build_function_type_list (V4SI_type_node,
12448 V4SI_type_node, integer_type_node, NULL_TREE);
12449 tree v8hi_ftype_v8hi_int
12450 = build_function_type_list (V8HI_type_node,
12451 V8HI_type_node, integer_type_node, NULL_TREE);
12452 tree v8hi_ftype_v8hi_v2di
12453 = build_function_type_list (V8HI_type_node,
12454 V8HI_type_node, V2DI_type_node, NULL_TREE);
12455 tree v4si_ftype_v4si_v2di
12456 = build_function_type_list (V4SI_type_node,
12457 V4SI_type_node, V2DI_type_node, NULL_TREE);
12458 tree v4si_ftype_v8hi_v8hi
12459 = build_function_type_list (V4SI_type_node,
12460 V8HI_type_node, V8HI_type_node, NULL_TREE);
12461 tree di_ftype_v8qi_v8qi
12462 = build_function_type_list (long_long_unsigned_type_node,
12463 V8QI_type_node, V8QI_type_node, NULL_TREE);
12464 tree v2di_ftype_v16qi_v16qi
12465 = build_function_type_list (V2DI_type_node,
12466 V16QI_type_node, V16QI_type_node, NULL_TREE);
12467 tree int_ftype_v16qi
12468 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12469 tree v16qi_ftype_pchar
12470 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12471 tree void_ftype_pchar_v16qi
12472 = build_function_type_list (void_type_node,
12473 pchar_type_node, V16QI_type_node, NULL_TREE);
12474 tree v4si_ftype_pchar
12475 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12476 tree void_ftype_pchar_v4si
12477 = build_function_type_list (void_type_node,
12478 pchar_type_node, V4SI_type_node, NULL_TREE);
12479 tree v2di_ftype_v2di
12480 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12481
12482 /* Add all builtins that are more or less simple operations on two
12483 operands. */
12484 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12485 {
12486 /* Use one of the operands; the target can have a different mode for
12487 mask-generating compares. */
12488 enum machine_mode mode;
12489 tree type;
12490
12491 if (d->name == 0)
12492 continue;
12493 mode = insn_data[d->icode].operand[1].mode;
12494
12495 switch (mode)
12496 {
12497 case V16QImode:
12498 type = v16qi_ftype_v16qi_v16qi;
12499 break;
12500 case V8HImode:
12501 type = v8hi_ftype_v8hi_v8hi;
12502 break;
12503 case V4SImode:
12504 type = v4si_ftype_v4si_v4si;
12505 break;
12506 case V2DImode:
12507 type = v2di_ftype_v2di_v2di;
12508 break;
12509 case V2DFmode:
12510 type = v2df_ftype_v2df_v2df;
12511 break;
12512 case TImode:
12513 type = ti_ftype_ti_ti;
12514 break;
12515 case V4SFmode:
12516 type = v4sf_ftype_v4sf_v4sf;
12517 break;
12518 case V8QImode:
12519 type = v8qi_ftype_v8qi_v8qi;
12520 break;
12521 case V4HImode:
12522 type = v4hi_ftype_v4hi_v4hi;
12523 break;
12524 case V2SImode:
12525 type = v2si_ftype_v2si_v2si;
12526 break;
12527 case DImode:
12528 type = di_ftype_di_di;
12529 break;
12530
12531 default:
12532 abort ();
12533 }
12534
12535 /* Override for comparisons. */
12536 if (d->icode == CODE_FOR_maskcmpv4sf3
12537 || d->icode == CODE_FOR_maskncmpv4sf3
12538 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12539 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12540 type = v4si_ftype_v4sf_v4sf;
12541
12542 if (d->icode == CODE_FOR_maskcmpv2df3
12543 || d->icode == CODE_FOR_maskncmpv2df3
12544 || d->icode == CODE_FOR_vmmaskcmpv2df3
12545 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12546 type = v2di_ftype_v2df_v2df;
12547
12548 def_builtin (d->mask, d->name, type, d->code);
12549 }
12550
12551 /* Add the remaining MMX insns with somewhat more complicated types. */
12552 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12553 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12554 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12555 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12556 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12557 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12558 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12559
12560 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12561 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12562 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12563
12564 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12565 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12566
12567 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12568 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12569
12570 /* comi/ucomi insns. */
12571 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12572 if (d->mask == MASK_SSE2)
12573 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12574 else
12575 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12576
12577 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12578 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12579 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12580
12581 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12582 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12583 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12584 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12585 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12586 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12587
12588 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12589 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12590
12591 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12592
12593 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12594 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12595 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12596 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12597 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12598 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12599
12600 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12601 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12602 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12603 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12604
12605 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12606 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12607 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12608 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12609
12610 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12611
12612 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12613
12614 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12615 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12616 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12617 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12618 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12619 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12620
12621 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12622
12623 /* Original 3DNow! */
12624 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12625 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12626 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12627 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12628 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12629 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12630 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12631 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12632 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12633 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12634 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12635 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12636 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12637 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12638 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12639 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12640 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12641 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12642 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12643 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12644
12645 /* 3DNow! extension as used in the Athlon CPU. */
12646 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12647 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12648 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12649 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12650 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12651 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12652
12653 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12654
12655 /* SSE2 */
12656 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12657 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12658
12659 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12660 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12661 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12662
12663 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12664 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12665 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12666 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12667 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12668 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12669
12670 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12671 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12672 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12673 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12674
12675 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12676 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12677 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12678 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12679 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12680
12681 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12682 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12684 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12685
12686 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12687 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12688
12689 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12690
12691 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12692 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12693
12694 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12695 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12696 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12697 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12698 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12699
12700 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12701
12702 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12703 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12704
12705 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12706 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12707 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12708
12709 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12712
12713 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12714 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12715 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12718 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12719 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12720
12721 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12722 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12723 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12724
12725 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
12726 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
12727 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
12728 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12730 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
12731 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12732
12733 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12734
12735 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12737 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12738
12739 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12740 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12742
12743 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12744 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12745
12746 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12748 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12749 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12750
12751 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12752 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12753 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12754 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12755
12756 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12757 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12758
12759 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12760 }
12761
12762 /* Errors in the source file can cause expand_expr to return const0_rtx
12763 where we expect a vector. To avoid crashing, use one of the vector
12764 clear instructions. */
12765 static rtx
12766 safe_vector_operand (x, mode)
12767 rtx x;
12768 enum machine_mode mode;
12769 {
12770 if (x != const0_rtx)
12771 return x;
12772 x = gen_reg_rtx (mode);
12773
12774 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12775 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12776 : gen_rtx_SUBREG (DImode, x, 0)));
12777 else
12778 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12779 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12780 return x;
12781 }
12782
12783 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12784
12785 static rtx
12786 ix86_expand_binop_builtin (icode, arglist, target)
12787 enum insn_code icode;
12788 tree arglist;
12789 rtx target;
12790 {
12791 rtx pat;
12792 tree arg0 = TREE_VALUE (arglist);
12793 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12794 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12795 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12796 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12797 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12798 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12799
12800 if (VECTOR_MODE_P (mode0))
12801 op0 = safe_vector_operand (op0, mode0);
12802 if (VECTOR_MODE_P (mode1))
12803 op1 = safe_vector_operand (op1, mode1);
12804
12805 if (! target
12806 || GET_MODE (target) != tmode
12807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12808 target = gen_reg_rtx (tmode);
12809
12810 /* In case the insn wants input operands in modes different from
12811 the result, abort. */
12812 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12813 abort ();
12814
12815 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12816 op0 = copy_to_mode_reg (mode0, op0);
12817 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12818 op1 = copy_to_mode_reg (mode1, op1);
12819
12820 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12821 yet one of the two must not be a memory. This is normally enforced
12822 by expanders, but we didn't bother to create one here. */
12823 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12824 op0 = copy_to_mode_reg (mode0, op0);
12825
12826 pat = GEN_FCN (icode) (target, op0, op1);
12827 if (! pat)
12828 return 0;
12829 emit_insn (pat);
12830 return target;
12831 }
12832
12833 /* Subroutine of ix86_expand_builtin to take care of stores. */
12834
12835 static rtx
12836 ix86_expand_store_builtin (icode, arglist)
12837 enum insn_code icode;
12838 tree arglist;
12839 {
12840 rtx pat;
12841 tree arg0 = TREE_VALUE (arglist);
12842 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12843 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12844 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12845 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12846 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12847
12848 if (VECTOR_MODE_P (mode1))
12849 op1 = safe_vector_operand (op1, mode1);
12850
12851 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12852
12853 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12854 op1 = copy_to_mode_reg (mode1, op1);
12855
12856 pat = GEN_FCN (icode) (op0, op1);
12857 if (pat)
12858 emit_insn (pat);
12859 return 0;
12860 }
12861
12862 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12863
12864 static rtx
12865 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12866 enum insn_code icode;
12867 tree arglist;
12868 rtx target;
12869 int do_load;
12870 {
12871 rtx pat;
12872 tree arg0 = TREE_VALUE (arglist);
12873 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12874 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12875 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12876
12877 if (! target
12878 || GET_MODE (target) != tmode
12879 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12880 target = gen_reg_rtx (tmode);
12881 if (do_load)
12882 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12883 else
12884 {
12885 if (VECTOR_MODE_P (mode0))
12886 op0 = safe_vector_operand (op0, mode0);
12887
12888 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12889 op0 = copy_to_mode_reg (mode0, op0);
12890 }
12891
12892 pat = GEN_FCN (icode) (target, op0);
12893 if (! pat)
12894 return 0;
12895 emit_insn (pat);
12896 return target;
12897 }
12898
12899 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12900 sqrtss, rsqrtss, rcpss. */
12901
12902 static rtx
12903 ix86_expand_unop1_builtin (icode, arglist, target)
12904 enum insn_code icode;
12905 tree arglist;
12906 rtx target;
12907 {
12908 rtx pat;
12909 tree arg0 = TREE_VALUE (arglist);
12910 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12911 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12912 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12913
12914 if (! target
12915 || GET_MODE (target) != tmode
12916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12917 target = gen_reg_rtx (tmode);
12918
12919 if (VECTOR_MODE_P (mode0))
12920 op0 = safe_vector_operand (op0, mode0);
12921
12922 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12923 op0 = copy_to_mode_reg (mode0, op0);
12924
12925 op1 = op0;
12926 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12927 op1 = copy_to_mode_reg (mode0, op1);
12928
12929 pat = GEN_FCN (icode) (target, op0, op1);
12930 if (! pat)
12931 return 0;
12932 emit_insn (pat);
12933 return target;
12934 }
12935
12936 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12937
12938 static rtx
12939 ix86_expand_sse_compare (d, arglist, target)
12940 const struct builtin_description *d;
12941 tree arglist;
12942 rtx target;
12943 {
12944 rtx pat;
12945 tree arg0 = TREE_VALUE (arglist);
12946 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12947 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12948 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12949 rtx op2;
12950 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12951 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12952 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12953 enum rtx_code comparison = d->comparison;
12954
12955 if (VECTOR_MODE_P (mode0))
12956 op0 = safe_vector_operand (op0, mode0);
12957 if (VECTOR_MODE_P (mode1))
12958 op1 = safe_vector_operand (op1, mode1);
12959
12960 /* Swap operands if we have a comparison that isn't available in
12961 hardware. */
12962 if (d->flag)
12963 {
12964 rtx tmp = gen_reg_rtx (mode1);
12965 emit_move_insn (tmp, op1);
12966 op1 = op0;
12967 op0 = tmp;
12968 }
12969
12970 if (! target
12971 || GET_MODE (target) != tmode
12972 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12973 target = gen_reg_rtx (tmode);
12974
12975 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12976 op0 = copy_to_mode_reg (mode0, op0);
12977 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12978 op1 = copy_to_mode_reg (mode1, op1);
12979
12980 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12981 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12982 if (! pat)
12983 return 0;
12984 emit_insn (pat);
12985 return target;
12986 }
12987
12988 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
12989
12990 static rtx
12991 ix86_expand_sse_comi (d, arglist, target)
12992 const struct builtin_description *d;
12993 tree arglist;
12994 rtx target;
12995 {
12996 rtx pat;
12997 tree arg0 = TREE_VALUE (arglist);
12998 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12999 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13000 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13001 rtx op2;
13002 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13003 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13004 enum rtx_code comparison = d->comparison;
13005
13006 if (VECTOR_MODE_P (mode0))
13007 op0 = safe_vector_operand (op0, mode0);
13008 if (VECTOR_MODE_P (mode1))
13009 op1 = safe_vector_operand (op1, mode1);
13010
13011 /* Swap operands if we have a comparison that isn't available in
13012 hardware. */
13013 if (d->flag)
13014 {
13015 rtx tmp = op1;
13016 op1 = op0;
13017 op0 = tmp;
13018 }
13019
13020 target = gen_reg_rtx (SImode);
13021 emit_move_insn (target, const0_rtx);
13022 target = gen_rtx_SUBREG (QImode, target, 0);
13023
13024 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13025 op0 = copy_to_mode_reg (mode0, op0);
13026 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13027 op1 = copy_to_mode_reg (mode1, op1);
13028
13029 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13030 pat = GEN_FCN (d->icode) (op0, op1);
13031 if (! pat)
13032 return 0;
13033 emit_insn (pat);
13034 emit_insn (gen_rtx_SET (VOIDmode,
13035 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13036 gen_rtx_fmt_ee (comparison, QImode,
13037 SET_DEST (pat),
13038 const0_rtx)));
13039
13040 return SUBREG_REG (target);
13041 }
13042
13043 /* Expand an expression EXP that calls a built-in function,
13044 with result going to TARGET if that's convenient
13045 (and in mode MODE if that's convenient).
13046 SUBTARGET may be used as the target for computing one of EXP's operands.
13047 IGNORE is nonzero if the value is to be ignored. */
13048
13049 rtx
13050 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13051 tree exp;
13052 rtx target;
13053 rtx subtarget ATTRIBUTE_UNUSED;
13054 enum machine_mode mode ATTRIBUTE_UNUSED;
13055 int ignore ATTRIBUTE_UNUSED;
13056 {
13057 const struct builtin_description *d;
13058 size_t i;
13059 enum insn_code icode;
13060 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13061 tree arglist = TREE_OPERAND (exp, 1);
13062 tree arg0, arg1, arg2;
13063 rtx op0, op1, op2, pat;
13064 enum machine_mode tmode, mode0, mode1, mode2;
13065 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13066
13067 switch (fcode)
13068 {
13069 case IX86_BUILTIN_EMMS:
13070 emit_insn (gen_emms ());
13071 return 0;
13072
13073 case IX86_BUILTIN_SFENCE:
13074 emit_insn (gen_sfence ());
13075 return 0;
13076
13077 case IX86_BUILTIN_PEXTRW:
13078 case IX86_BUILTIN_PEXTRW128:
13079 icode = (fcode == IX86_BUILTIN_PEXTRW
13080 ? CODE_FOR_mmx_pextrw
13081 : CODE_FOR_sse2_pextrw);
13082 arg0 = TREE_VALUE (arglist);
13083 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13084 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13085 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13086 tmode = insn_data[icode].operand[0].mode;
13087 mode0 = insn_data[icode].operand[1].mode;
13088 mode1 = insn_data[icode].operand[2].mode;
13089
13090 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13091 op0 = copy_to_mode_reg (mode0, op0);
13092 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13093 {
13094 /* @@@ better error message */
13095 error ("selector must be an immediate");
13096 return gen_reg_rtx (tmode);
13097 }
13098 if (target == 0
13099 || GET_MODE (target) != tmode
13100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13101 target = gen_reg_rtx (tmode);
13102 pat = GEN_FCN (icode) (target, op0, op1);
13103 if (! pat)
13104 return 0;
13105 emit_insn (pat);
13106 return target;
13107
13108 case IX86_BUILTIN_PINSRW:
13109 case IX86_BUILTIN_PINSRW128:
13110 icode = (fcode == IX86_BUILTIN_PINSRW
13111 ? CODE_FOR_mmx_pinsrw
13112 : CODE_FOR_sse2_pinsrw);
13113 arg0 = TREE_VALUE (arglist);
13114 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13115 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13116 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13117 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13118 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13119 tmode = insn_data[icode].operand[0].mode;
13120 mode0 = insn_data[icode].operand[1].mode;
13121 mode1 = insn_data[icode].operand[2].mode;
13122 mode2 = insn_data[icode].operand[3].mode;
13123
13124 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13125 op0 = copy_to_mode_reg (mode0, op0);
13126 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13127 op1 = copy_to_mode_reg (mode1, op1);
13128 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13129 {
13130 /* @@@ better error message */
13131 error ("selector must be an immediate");
13132 return const0_rtx;
13133 }
13134 if (target == 0
13135 || GET_MODE (target) != tmode
13136 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13137 target = gen_reg_rtx (tmode);
13138 pat = GEN_FCN (icode) (target, op0, op1, op2);
13139 if (! pat)
13140 return 0;
13141 emit_insn (pat);
13142 return target;
13143
13144 case IX86_BUILTIN_MASKMOVQ:
13145 case IX86_BUILTIN_MASKMOVDQU:
13146 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13147 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13148 : CODE_FOR_sse2_maskmovdqu);
13149 /* Note the arg order is different from the operand order. */
13150 arg1 = TREE_VALUE (arglist);
13151 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13152 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13153 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13154 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13155 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13156 mode0 = insn_data[icode].operand[0].mode;
13157 mode1 = insn_data[icode].operand[1].mode;
13158 mode2 = insn_data[icode].operand[2].mode;
13159
13160 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13161 op0 = copy_to_mode_reg (mode0, op0);
13162 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13163 op1 = copy_to_mode_reg (mode1, op1);
13164 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13165 op2 = copy_to_mode_reg (mode2, op2);
13166 pat = GEN_FCN (icode) (op0, op1, op2);
13167 if (! pat)
13168 return 0;
13169 emit_insn (pat);
13170 return 0;
13171
13172 case IX86_BUILTIN_SQRTSS:
13173 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13174 case IX86_BUILTIN_RSQRTSS:
13175 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13176 case IX86_BUILTIN_RCPSS:
13177 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13178
13179 case IX86_BUILTIN_LOADAPS:
13180 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13181
13182 case IX86_BUILTIN_LOADUPS:
13183 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13184
13185 case IX86_BUILTIN_STOREAPS:
13186 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13187
13188 case IX86_BUILTIN_STOREUPS:
13189 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13190
13191 case IX86_BUILTIN_LOADSS:
13192 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13193
13194 case IX86_BUILTIN_STORESS:
13195 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13196
13197 case IX86_BUILTIN_LOADHPS:
13198 case IX86_BUILTIN_LOADLPS:
13199 case IX86_BUILTIN_LOADHPD:
13200 case IX86_BUILTIN_LOADLPD:
13201 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13202 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13203 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13204 : CODE_FOR_sse2_movlpd);
13205 arg0 = TREE_VALUE (arglist);
13206 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13208 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13209 tmode = insn_data[icode].operand[0].mode;
13210 mode0 = insn_data[icode].operand[1].mode;
13211 mode1 = insn_data[icode].operand[2].mode;
13212
13213 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13214 op0 = copy_to_mode_reg (mode0, op0);
13215 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13216 if (target == 0
13217 || GET_MODE (target) != tmode
13218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13219 target = gen_reg_rtx (tmode);
13220 pat = GEN_FCN (icode) (target, op0, op1);
13221 if (! pat)
13222 return 0;
13223 emit_insn (pat);
13224 return target;
13225
13226 case IX86_BUILTIN_STOREHPS:
13227 case IX86_BUILTIN_STORELPS:
13228 case IX86_BUILTIN_STOREHPD:
13229 case IX86_BUILTIN_STORELPD:
13230 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13231 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13232 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13233 : CODE_FOR_sse2_movlpd);
13234 arg0 = TREE_VALUE (arglist);
13235 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13236 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13237 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13238 mode0 = insn_data[icode].operand[1].mode;
13239 mode1 = insn_data[icode].operand[2].mode;
13240
13241 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13243 op1 = copy_to_mode_reg (mode1, op1);
13244
13245 pat = GEN_FCN (icode) (op0, op0, op1);
13246 if (! pat)
13247 return 0;
13248 emit_insn (pat);
13249 return 0;
13250
13251 case IX86_BUILTIN_MOVNTPS:
13252 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13253 case IX86_BUILTIN_MOVNTQ:
13254 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13255
13256 case IX86_BUILTIN_LDMXCSR:
13257 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13258 target = assign_386_stack_local (SImode, 0);
13259 emit_move_insn (target, op0);
13260 emit_insn (gen_ldmxcsr (target));
13261 return 0;
13262
13263 case IX86_BUILTIN_STMXCSR:
13264 target = assign_386_stack_local (SImode, 0);
13265 emit_insn (gen_stmxcsr (target));
13266 return copy_to_mode_reg (SImode, target);
13267
13268 case IX86_BUILTIN_SHUFPS:
13269 case IX86_BUILTIN_SHUFPD:
13270 icode = (fcode == IX86_BUILTIN_SHUFPS
13271 ? CODE_FOR_sse_shufps
13272 : CODE_FOR_sse2_shufpd);
13273 arg0 = TREE_VALUE (arglist);
13274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13275 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13276 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13277 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13278 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13279 tmode = insn_data[icode].operand[0].mode;
13280 mode0 = insn_data[icode].operand[1].mode;
13281 mode1 = insn_data[icode].operand[2].mode;
13282 mode2 = insn_data[icode].operand[3].mode;
13283
13284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13285 op0 = copy_to_mode_reg (mode0, op0);
13286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13287 op1 = copy_to_mode_reg (mode1, op1);
13288 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13289 {
13290 /* @@@ better error message */
13291 error ("mask must be an immediate");
13292 return gen_reg_rtx (tmode);
13293 }
13294 if (target == 0
13295 || GET_MODE (target) != tmode
13296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13297 target = gen_reg_rtx (tmode);
13298 pat = GEN_FCN (icode) (target, op0, op1, op2);
13299 if (! pat)
13300 return 0;
13301 emit_insn (pat);
13302 return target;
13303
13304 case IX86_BUILTIN_PSHUFW:
13305 case IX86_BUILTIN_PSHUFD:
13306 case IX86_BUILTIN_PSHUFHW:
13307 case IX86_BUILTIN_PSHUFLW:
13308 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13309 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13310 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13311 : CODE_FOR_mmx_pshufw);
13312 arg0 = TREE_VALUE (arglist);
13313 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13315 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13316 tmode = insn_data[icode].operand[0].mode;
13317 mode1 = insn_data[icode].operand[1].mode;
13318 mode2 = insn_data[icode].operand[2].mode;
13319
13320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13321 op0 = copy_to_mode_reg (mode1, op0);
13322 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13323 {
13324 /* @@@ better error message */
13325 error ("mask must be an immediate");
13326 return const0_rtx;
13327 }
13328 if (target == 0
13329 || GET_MODE (target) != tmode
13330 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13331 target = gen_reg_rtx (tmode);
13332 pat = GEN_FCN (icode) (target, op0, op1);
13333 if (! pat)
13334 return 0;
13335 emit_insn (pat);
13336 return target;
13337
13338 case IX86_BUILTIN_PSLLDQI128:
13339 case IX86_BUILTIN_PSRLDQI128:
13340 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13341 : CODE_FOR_sse2_lshrti3);
13342 arg0 = TREE_VALUE (arglist);
13343 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13344 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13345 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13346 tmode = insn_data[icode].operand[0].mode;
13347 mode1 = insn_data[icode].operand[1].mode;
13348 mode2 = insn_data[icode].operand[2].mode;
13349
13350 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13351 {
13352 op0 = copy_to_reg (op0);
13353 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13354 }
13355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13356 {
13357 error ("shift must be an immediate");
13358 return const0_rtx;
13359 }
13360 target = gen_reg_rtx (V2DImode);
13361 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13362 if (! pat)
13363 return 0;
13364 emit_insn (pat);
13365 return target;
13366
13367 case IX86_BUILTIN_FEMMS:
13368 emit_insn (gen_femms ());
13369 return NULL_RTX;
13370
13371 case IX86_BUILTIN_PAVGUSB:
13372 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13373
13374 case IX86_BUILTIN_PF2ID:
13375 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13376
13377 case IX86_BUILTIN_PFACC:
13378 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13379
13380 case IX86_BUILTIN_PFADD:
13381 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13382
13383 case IX86_BUILTIN_PFCMPEQ:
13384 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13385
13386 case IX86_BUILTIN_PFCMPGE:
13387 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13388
13389 case IX86_BUILTIN_PFCMPGT:
13390 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13391
13392 case IX86_BUILTIN_PFMAX:
13393 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13394
13395 case IX86_BUILTIN_PFMIN:
13396 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13397
13398 case IX86_BUILTIN_PFMUL:
13399 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13400
13401 case IX86_BUILTIN_PFRCP:
13402 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13403
13404 case IX86_BUILTIN_PFRCPIT1:
13405 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13406
13407 case IX86_BUILTIN_PFRCPIT2:
13408 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13409
13410 case IX86_BUILTIN_PFRSQIT1:
13411 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13412
13413 case IX86_BUILTIN_PFRSQRT:
13414 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13415
13416 case IX86_BUILTIN_PFSUB:
13417 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13418
13419 case IX86_BUILTIN_PFSUBR:
13420 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13421
13422 case IX86_BUILTIN_PI2FD:
13423 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13424
13425 case IX86_BUILTIN_PMULHRW:
13426 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13427
13428 case IX86_BUILTIN_PF2IW:
13429 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13430
13431 case IX86_BUILTIN_PFNACC:
13432 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13433
13434 case IX86_BUILTIN_PFPNACC:
13435 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13436
13437 case IX86_BUILTIN_PI2FW:
13438 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13439
13440 case IX86_BUILTIN_PSWAPDSI:
13441 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13442
13443 case IX86_BUILTIN_PSWAPDSF:
13444 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13445
13446 case IX86_BUILTIN_SSE_ZERO:
13447 target = gen_reg_rtx (V4SFmode);
13448 emit_insn (gen_sse_clrv4sf (target));
13449 return target;
13450
13451 case IX86_BUILTIN_MMX_ZERO:
13452 target = gen_reg_rtx (DImode);
13453 emit_insn (gen_mmx_clrdi (target));
13454 return target;
13455
13456 case IX86_BUILTIN_CLRTI:
13457 target = gen_reg_rtx (V2DImode);
13458 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13459 return target;
13460
13461
13462 case IX86_BUILTIN_SQRTSD:
13463 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13464 case IX86_BUILTIN_LOADAPD:
13465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13466 case IX86_BUILTIN_LOADUPD:
13467 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13468
13469 case IX86_BUILTIN_STOREAPD:
13470 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13471 case IX86_BUILTIN_STOREUPD:
13472 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13473
13474 case IX86_BUILTIN_LOADSD:
13475 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13476
13477 case IX86_BUILTIN_STORESD:
13478 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13479
13480 case IX86_BUILTIN_SETPD1:
13481 target = assign_386_stack_local (DFmode, 0);
13482 arg0 = TREE_VALUE (arglist);
13483 emit_move_insn (adjust_address (target, DFmode, 0),
13484 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13485 op0 = gen_reg_rtx (V2DFmode);
13486 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13487 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13488 return op0;
13489
13490 case IX86_BUILTIN_SETPD:
13491 target = assign_386_stack_local (V2DFmode, 0);
13492 arg0 = TREE_VALUE (arglist);
13493 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13494 emit_move_insn (adjust_address (target, DFmode, 0),
13495 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13496 emit_move_insn (adjust_address (target, DFmode, 8),
13497 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13498 op0 = gen_reg_rtx (V2DFmode);
13499 emit_insn (gen_sse2_movapd (op0, target));
13500 return op0;
13501
13502 case IX86_BUILTIN_LOADRPD:
13503 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13504 gen_reg_rtx (V2DFmode), 1);
13505 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13506 return target;
13507
13508 case IX86_BUILTIN_LOADPD1:
13509 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13510 gen_reg_rtx (V2DFmode), 1);
13511 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13512 return target;
13513
13514 case IX86_BUILTIN_STOREPD1:
13515 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13516 case IX86_BUILTIN_STORERPD:
13517 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13518
13519 case IX86_BUILTIN_CLRPD:
13520 target = gen_reg_rtx (V2DFmode);
13521 emit_insn (gen_sse_clrv2df (target));
13522 return target;
13523
13524 case IX86_BUILTIN_MFENCE:
13525 emit_insn (gen_sse2_mfence ());
13526 return 0;
13527 case IX86_BUILTIN_LFENCE:
13528 emit_insn (gen_sse2_lfence ());
13529 return 0;
13530
13531 case IX86_BUILTIN_CLFLUSH:
13532 arg0 = TREE_VALUE (arglist);
13533 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13534 icode = CODE_FOR_sse2_clflush;
13535 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13536 op0 = copy_to_mode_reg (Pmode, op0);
13537
13538 emit_insn (gen_sse2_clflush (op0));
13539 return 0;
13540
13541 case IX86_BUILTIN_MOVNTPD:
13542 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13543 case IX86_BUILTIN_MOVNTDQ:
13544 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13545 case IX86_BUILTIN_MOVNTI:
13546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13547
13548 case IX86_BUILTIN_LOADDQA:
13549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13550 case IX86_BUILTIN_LOADDQU:
13551 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13552 case IX86_BUILTIN_LOADD:
13553 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13554
13555 case IX86_BUILTIN_STOREDQA:
13556 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13557 case IX86_BUILTIN_STOREDQU:
13558 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13559 case IX86_BUILTIN_STORED:
13560 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13561
13562 default:
13563 break;
13564 }
13565
13566 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13567 if (d->code == fcode)
13568 {
13569 /* Compares are treated specially. */
13570 if (d->icode == CODE_FOR_maskcmpv4sf3
13571 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13572 || d->icode == CODE_FOR_maskncmpv4sf3
13573 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13574 || d->icode == CODE_FOR_maskcmpv2df3
13575 || d->icode == CODE_FOR_vmmaskcmpv2df3
13576 || d->icode == CODE_FOR_maskncmpv2df3
13577 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13578 return ix86_expand_sse_compare (d, arglist, target);
13579
13580 return ix86_expand_binop_builtin (d->icode, arglist, target);
13581 }
13582
13583 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13584 if (d->code == fcode)
13585 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13586
13587 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13588 if (d->code == fcode)
13589 return ix86_expand_sse_comi (d, arglist, target);
13590
13591 /* @@@ Should really do something sensible here. */
13592 return 0;
13593 }
13594
13595 /* Store OPERAND to the memory after reload is completed. This means
13596 that we can't easily use assign_stack_local. */
13597 rtx
13598 ix86_force_to_memory (mode, operand)
13599 enum machine_mode mode;
13600 rtx operand;
13601 {
13602 rtx result;
13603 if (!reload_completed)
13604 abort ();
13605 if (TARGET_64BIT && TARGET_RED_ZONE)
13606 {
13607 result = gen_rtx_MEM (mode,
13608 gen_rtx_PLUS (Pmode,
13609 stack_pointer_rtx,
13610 GEN_INT (-RED_ZONE_SIZE)));
13611 emit_move_insn (result, operand);
13612 }
13613 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13614 {
13615 switch (mode)
13616 {
13617 case HImode:
13618 case SImode:
13619 operand = gen_lowpart (DImode, operand);
13620 /* FALLTHRU */
13621 case DImode:
13622 emit_insn (
13623 gen_rtx_SET (VOIDmode,
13624 gen_rtx_MEM (DImode,
13625 gen_rtx_PRE_DEC (DImode,
13626 stack_pointer_rtx)),
13627 operand));
13628 break;
13629 default:
13630 abort ();
13631 }
13632 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13633 }
13634 else
13635 {
13636 switch (mode)
13637 {
13638 case DImode:
13639 {
13640 rtx operands[2];
13641 split_di (&operand, 1, operands, operands + 1);
13642 emit_insn (
13643 gen_rtx_SET (VOIDmode,
13644 gen_rtx_MEM (SImode,
13645 gen_rtx_PRE_DEC (Pmode,
13646 stack_pointer_rtx)),
13647 operands[1]));
13648 emit_insn (
13649 gen_rtx_SET (VOIDmode,
13650 gen_rtx_MEM (SImode,
13651 gen_rtx_PRE_DEC (Pmode,
13652 stack_pointer_rtx)),
13653 operands[0]));
13654 }
13655 break;
13656 case HImode:
13657 /* It is better to store HImodes as SImodes. */
13658 if (!TARGET_PARTIAL_REG_STALL)
13659 operand = gen_lowpart (SImode, operand);
13660 /* FALLTHRU */
13661 case SImode:
13662 emit_insn (
13663 gen_rtx_SET (VOIDmode,
13664 gen_rtx_MEM (GET_MODE (operand),
13665 gen_rtx_PRE_DEC (SImode,
13666 stack_pointer_rtx)),
13667 operand));
13668 break;
13669 default:
13670 abort ();
13671 }
13672 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13673 }
13674 return result;
13675 }
13676
13677 /* Free operand from the memory. */
13678 void
13679 ix86_free_from_memory (mode)
13680 enum machine_mode mode;
13681 {
13682 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13683 {
13684 int size;
13685
13686 if (mode == DImode || TARGET_64BIT)
13687 size = 8;
13688 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13689 size = 2;
13690 else
13691 size = 4;
13692 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13693 to pop or add instruction if registers are available. */
13694 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13695 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13696 GEN_INT (size))));
13697 }
13698 }
13699
13700 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13701 QImode must go into class Q_REGS.
13702 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13703 movdf to do mem-to-mem moves through integer regs. */
13704 enum reg_class
13705 ix86_preferred_reload_class (x, class)
13706 rtx x;
13707 enum reg_class class;
13708 {
13709 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13710 return NO_REGS;
13711 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13712 {
13713 /* SSE can't load any constant directly yet. */
13714 if (SSE_CLASS_P (class))
13715 return NO_REGS;
13716 /* Floats can load 0 and 1. */
13717 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13718 {
13719 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13720 if (MAYBE_SSE_CLASS_P (class))
13721 return (reg_class_subset_p (class, GENERAL_REGS)
13722 ? GENERAL_REGS : FLOAT_REGS);
13723 else
13724 return class;
13725 }
13726 /* General regs can load everything. */
13727 if (reg_class_subset_p (class, GENERAL_REGS))
13728 return GENERAL_REGS;
13729 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13730 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13731 return NO_REGS;
13732 }
13733 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13734 return NO_REGS;
13735 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13736 return Q_REGS;
13737 return class;
13738 }
13739
13740 /* If we are copying between general and FP registers, we need a memory
13741 location. The same is true for SSE and MMX registers.
13742
13743 The macro can't work reliably when one of the CLASSES is class containing
13744 registers from multiple units (SSE, MMX, integer). We avoid this by never
13745 combining those units in single alternative in the machine description.
13746 Ensure that this constraint holds to avoid unexpected surprises.
13747
13748 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13749 enforce these sanity checks. */
13750 int
13751 ix86_secondary_memory_needed (class1, class2, mode, strict)
13752 enum reg_class class1, class2;
13753 enum machine_mode mode;
13754 int strict;
13755 {
13756 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13757 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13758 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13759 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13760 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13761 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13762 {
13763 if (strict)
13764 abort ();
13765 else
13766 return 1;
13767 }
13768 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13769 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13770 && (mode) != SImode)
13771 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13772 && (mode) != SImode));
13773 }
13774 /* Return the cost of moving data from a register in class CLASS1 to
13775 one in class CLASS2.
13776
13777 It is not required that the cost always equal 2 when FROM is the same as TO;
13778 on some machines it is expensive to move between registers if they are not
13779 general registers. */
13780 int
13781 ix86_register_move_cost (mode, class1, class2)
13782 enum machine_mode mode;
13783 enum reg_class class1, class2;
13784 {
13785 /* In case we require secondary memory, compute cost of the store followed
13786 by load. In order to avoid bad register allocation choices, we need
13787 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13788
13789 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13790 {
13791 int cost = 1;
13792
13793 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13794 MEMORY_MOVE_COST (mode, class1, 1));
13795 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13796 MEMORY_MOVE_COST (mode, class2, 1));
13797
13798 /* In case of copying from general_purpose_register we may emit multiple
13799 stores followed by single load causing memory size mismatch stall.
13800 Count this as arbitarily high cost of 20. */
13801 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13802 cost += 20;
13803
13804 /* In the case of FP/MMX moves, the registers actually overlap, and we
13805 have to switch modes in order to treat them differently. */
13806 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13807 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13808 cost += 20;
13809
13810 return cost;
13811 }
13812
13813 /* Moves between SSE/MMX and integer unit are expensive. */
13814 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13815 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13816 return ix86_cost->mmxsse_to_integer;
13817 if (MAYBE_FLOAT_CLASS_P (class1))
13818 return ix86_cost->fp_move;
13819 if (MAYBE_SSE_CLASS_P (class1))
13820 return ix86_cost->sse_move;
13821 if (MAYBE_MMX_CLASS_P (class1))
13822 return ix86_cost->mmx_move;
13823 return 2;
13824 }
13825
13826 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13827 int
13828 ix86_hard_regno_mode_ok (regno, mode)
13829 int regno;
13830 enum machine_mode mode;
13831 {
13832 /* Flags and only flags can only hold CCmode values. */
13833 if (CC_REGNO_P (regno))
13834 return GET_MODE_CLASS (mode) == MODE_CC;
13835 if (GET_MODE_CLASS (mode) == MODE_CC
13836 || GET_MODE_CLASS (mode) == MODE_RANDOM
13837 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13838 return 0;
13839 if (FP_REGNO_P (regno))
13840 return VALID_FP_MODE_P (mode);
13841 if (SSE_REGNO_P (regno))
13842 return VALID_SSE_REG_MODE (mode);
13843 if (MMX_REGNO_P (regno))
13844 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13845 /* We handle both integer and floats in the general purpose registers.
13846 In future we should be able to handle vector modes as well. */
13847 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13848 return 0;
13849 /* Take care for QImode values - they can be in non-QI regs, but then
13850 they do cause partial register stalls. */
13851 if (regno < 4 || mode != QImode || TARGET_64BIT)
13852 return 1;
13853 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13854 }
13855
13856 /* Return the cost of moving data of mode M between a
13857 register and memory. A value of 2 is the default; this cost is
13858 relative to those in `REGISTER_MOVE_COST'.
13859
13860 If moving between registers and memory is more expensive than
13861 between two registers, you should define this macro to express the
13862 relative cost.
13863
13864 Model also increased moving costs of QImode registers in non
13865 Q_REGS classes.
13866 */
13867 int
13868 ix86_memory_move_cost (mode, class, in)
13869 enum machine_mode mode;
13870 enum reg_class class;
13871 int in;
13872 {
13873 if (FLOAT_CLASS_P (class))
13874 {
13875 int index;
13876 switch (mode)
13877 {
13878 case SFmode:
13879 index = 0;
13880 break;
13881 case DFmode:
13882 index = 1;
13883 break;
13884 case XFmode:
13885 case TFmode:
13886 index = 2;
13887 break;
13888 default:
13889 return 100;
13890 }
13891 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13892 }
13893 if (SSE_CLASS_P (class))
13894 {
13895 int index;
13896 switch (GET_MODE_SIZE (mode))
13897 {
13898 case 4:
13899 index = 0;
13900 break;
13901 case 8:
13902 index = 1;
13903 break;
13904 case 16:
13905 index = 2;
13906 break;
13907 default:
13908 return 100;
13909 }
13910 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13911 }
13912 if (MMX_CLASS_P (class))
13913 {
13914 int index;
13915 switch (GET_MODE_SIZE (mode))
13916 {
13917 case 4:
13918 index = 0;
13919 break;
13920 case 8:
13921 index = 1;
13922 break;
13923 default:
13924 return 100;
13925 }
13926 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13927 }
13928 switch (GET_MODE_SIZE (mode))
13929 {
13930 case 1:
13931 if (in)
13932 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13933 : ix86_cost->movzbl_load);
13934 else
13935 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13936 : ix86_cost->int_store[0] + 4);
13937 break;
13938 case 2:
13939 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13940 default:
13941 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13942 if (mode == TFmode)
13943 mode = XFmode;
13944 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13945 * ((int) GET_MODE_SIZE (mode)
13946 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
13947 }
13948 }
13949
13950 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13951 static void
13952 ix86_svr3_asm_out_constructor (symbol, priority)
13953 rtx symbol;
13954 int priority ATTRIBUTE_UNUSED;
13955 {
13956 init_section ();
13957 fputs ("\tpushl $", asm_out_file);
13958 assemble_name (asm_out_file, XSTR (symbol, 0));
13959 fputc ('\n', asm_out_file);
13960 }
13961 #endif
13962
13963 #if TARGET_MACHO
13964
13965 static int current_machopic_label_num;
13966
13967 /* Given a symbol name and its associated stub, write out the
13968 definition of the stub. */
13969
13970 void
13971 machopic_output_stub (file, symb, stub)
13972 FILE *file;
13973 const char *symb, *stub;
13974 {
13975 unsigned int length;
13976 char *binder_name, *symbol_name, lazy_ptr_name[32];
13977 int label = ++current_machopic_label_num;
13978
13979 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13980 symb = (*targetm.strip_name_encoding) (symb);
13981
13982 length = strlen (stub);
13983 binder_name = alloca (length + 32);
13984 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13985
13986 length = strlen (symb);
13987 symbol_name = alloca (length + 32);
13988 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13989
13990 sprintf (lazy_ptr_name, "L%d$lz", label);
13991
13992 if (MACHOPIC_PURE)
13993 machopic_picsymbol_stub_section ();
13994 else
13995 machopic_symbol_stub_section ();
13996
13997 fprintf (file, "%s:\n", stub);
13998 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13999
14000 if (MACHOPIC_PURE)
14001 {
14002 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14003 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14004 fprintf (file, "\tjmp %%edx\n");
14005 }
14006 else
14007 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14008
14009 fprintf (file, "%s:\n", binder_name);
14010
14011 if (MACHOPIC_PURE)
14012 {
14013 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14014 fprintf (file, "\tpushl %%eax\n");
14015 }
14016 else
14017 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14018
14019 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14020
14021 machopic_lazy_symbol_ptr_section ();
14022 fprintf (file, "%s:\n", lazy_ptr_name);
14023 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14024 fprintf (file, "\t.long %s\n", binder_name);
14025 }
14026 #endif /* TARGET_MACHO */
14027
14028 /* Order the registers for register allocator. */
14029
14030 void
14031 x86_order_regs_for_local_alloc ()
14032 {
14033 int pos = 0;
14034 int i;
14035
14036 /* First allocate the local general purpose registers. */
14037 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14038 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14039 reg_alloc_order [pos++] = i;
14040
14041 /* Global general purpose registers. */
14042 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14043 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14044 reg_alloc_order [pos++] = i;
14045
14046 /* x87 registers come first in case we are doing FP math
14047 using them. */
14048 if (!TARGET_SSE_MATH)
14049 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14050 reg_alloc_order [pos++] = i;
14051
14052 /* SSE registers. */
14053 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14054 reg_alloc_order [pos++] = i;
14055 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14056 reg_alloc_order [pos++] = i;
14057
14058 /* x87 registerts. */
14059 if (TARGET_SSE_MATH)
14060 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14061 reg_alloc_order [pos++] = i;
14062
14063 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14064 reg_alloc_order [pos++] = i;
14065
14066 /* Initialize the rest of array as we do not allocate some registers
14067 at all. */
14068 while (pos < FIRST_PSEUDO_REGISTER)
14069 reg_alloc_order [pos++] = 0;
14070 }
14071
14072 /* Returns an expression indicating where the this parameter is
14073 located on entry to the FUNCTION. */
14074
14075 static rtx
14076 x86_this_parameter (function)
14077 tree function;
14078 {
14079 tree type = TREE_TYPE (function);
14080
14081 if (TARGET_64BIT)
14082 {
14083 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14084 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14085 }
14086
14087 if (ix86_fntype_regparm (type) > 0)
14088 {
14089 tree parm;
14090
14091 parm = TYPE_ARG_TYPES (type);
14092 /* Figure out whether or not the function has a variable number of
14093 arguments. */
14094 for (; parm; parm = TREE_CHAIN (parm))
14095 if (TREE_VALUE (parm) == void_type_node)
14096 break;
14097 /* If not, the this parameter is in %eax. */
14098 if (parm)
14099 return gen_rtx_REG (SImode, 0);
14100 }
14101
14102 if (aggregate_value_p (TREE_TYPE (type)))
14103 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14104 else
14105 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14106 }
14107
14108 /* Determine whether x86_output_mi_thunk can succeed. */
14109
14110 static bool
14111 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14112 tree thunk ATTRIBUTE_UNUSED;
14113 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14114 HOST_WIDE_INT vcall_offset;
14115 tree function;
14116 {
14117 /* 64-bit can handle anything. */
14118 if (TARGET_64BIT)
14119 return true;
14120
14121 /* For 32-bit, everything's fine if we have one free register. */
14122 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14123 return true;
14124
14125 /* Need a free register for vcall_offset. */
14126 if (vcall_offset)
14127 return false;
14128
14129 /* Need a free register for GOT references. */
14130 if (flag_pic && !(*targetm.binds_local_p) (function))
14131 return false;
14132
14133 /* Otherwise ok. */
14134 return true;
14135 }
14136
14137 /* Output the assembler code for a thunk function. THUNK_DECL is the
14138 declaration for the thunk function itself, FUNCTION is the decl for
14139 the target function. DELTA is an immediate constant offset to be
14140 added to THIS. If VCALL_OFFSET is non-zero, the word at
14141 *(*this + vcall_offset) should be added to THIS. */
14142
14143 static void
14144 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14145 FILE *file ATTRIBUTE_UNUSED;
14146 tree thunk ATTRIBUTE_UNUSED;
14147 HOST_WIDE_INT delta;
14148 HOST_WIDE_INT vcall_offset;
14149 tree function;
14150 {
14151 rtx xops[3];
14152 rtx this = x86_this_parameter (function);
14153 rtx this_reg, tmp;
14154
14155 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14156 pull it in now and let DELTA benefit. */
14157 if (REG_P (this))
14158 this_reg = this;
14159 else if (vcall_offset)
14160 {
14161 /* Put the this parameter into %eax. */
14162 xops[0] = this;
14163 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14164 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14165 }
14166 else
14167 this_reg = NULL_RTX;
14168
14169 /* Adjust the this parameter by a fixed constant. */
14170 if (delta)
14171 {
14172 xops[0] = GEN_INT (delta);
14173 xops[1] = this_reg ? this_reg : this;
14174 if (TARGET_64BIT)
14175 {
14176 if (!x86_64_general_operand (xops[0], DImode))
14177 {
14178 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14179 xops[1] = tmp;
14180 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14181 xops[0] = tmp;
14182 xops[1] = this;
14183 }
14184 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14185 }
14186 else
14187 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14188 }
14189
14190 /* Adjust the this parameter by a value stored in the vtable. */
14191 if (vcall_offset)
14192 {
14193 if (TARGET_64BIT)
14194 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14195 else
14196 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14197
14198 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14199 xops[1] = tmp;
14200 if (TARGET_64BIT)
14201 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14202 else
14203 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14204
14205 /* Adjust the this parameter. */
14206 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14207 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14208 {
14209 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14210 xops[0] = GEN_INT (vcall_offset);
14211 xops[1] = tmp2;
14212 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14213 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14214 }
14215 xops[1] = this_reg;
14216 if (TARGET_64BIT)
14217 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14218 else
14219 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14220 }
14221
14222 /* If necessary, drop THIS back to its stack slot. */
14223 if (this_reg && this_reg != this)
14224 {
14225 xops[0] = this_reg;
14226 xops[1] = this;
14227 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14228 }
14229
14230 xops[0] = DECL_RTL (function);
14231 if (TARGET_64BIT)
14232 {
14233 if (!flag_pic || (*targetm.binds_local_p) (function))
14234 output_asm_insn ("jmp\t%P0", xops);
14235 else
14236 output_asm_insn ("jmp\t*%P0@GOTPCREL(%%rip)", xops);
14237 }
14238 else
14239 {
14240 if (!flag_pic || (*targetm.binds_local_p) (function))
14241 output_asm_insn ("jmp\t%P0", xops);
14242 else
14243 {
14244 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14245 output_set_got (tmp);
14246
14247 xops[1] = tmp;
14248 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14249 output_asm_insn ("jmp\t{*}%1", xops);
14250 }
14251 }
14252 }
14253
14254 int
14255 x86_field_alignment (field, computed)
14256 tree field;
14257 int computed;
14258 {
14259 enum machine_mode mode;
14260 tree type = TREE_TYPE (field);
14261
14262 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14263 return computed;
14264 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14265 ? get_inner_array_type (type) : type);
14266 if (mode == DFmode || mode == DCmode
14267 || GET_MODE_CLASS (mode) == MODE_INT
14268 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14269 return MIN (32, computed);
14270 return computed;
14271 }
14272
14273 /* Implement machine specific optimizations.
14274 At the moment we implement single transformation: AMD Athlon works faster
14275 when RET is not destination of conditional jump or directly preceeded
14276 by other jump instruction. We avoid the penalty by inserting NOP just
14277 before the RET instructions in such cases. */
14278 void
14279 x86_machine_dependent_reorg (first)
14280 rtx first ATTRIBUTE_UNUSED;
14281 {
14282 edge e;
14283
14284 if (!TARGET_ATHLON || !optimize || optimize_size)
14285 return;
14286 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14287 {
14288 basic_block bb = e->src;
14289 rtx ret = bb->end;
14290 rtx prev;
14291 bool insert = false;
14292
14293 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14294 continue;
14295 prev = prev_nonnote_insn (ret);
14296 if (prev && GET_CODE (prev) == CODE_LABEL)
14297 {
14298 edge e;
14299 for (e = bb->pred; e; e = e->pred_next)
14300 if (EDGE_FREQUENCY (e) && e->src->index > 0
14301 && !(e->flags & EDGE_FALLTHRU))
14302 insert = 1;
14303 }
14304 if (!insert)
14305 {
14306 prev = prev_real_insn (ret);
14307 if (prev && GET_CODE (prev) == JUMP_INSN
14308 && any_condjump_p (prev))
14309 insert = 1;
14310 }
14311 if (insert)
14312 emit_insn_before (gen_nop (), ret);
14313 }
14314 }
14315
14316 #include "gt-i386.h"