i386-protos.h (x86_function_profiler): New function
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "real.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "output.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "except.h"
36 #include "function.h"
37 #include "recog.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "toplev.h"
41 #include "basic-block.h"
42 #include "ggc.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46
47 #ifndef CHECK_STACK_LIMIT
48 #define CHECK_STACK_LIMIT (-1)
49 #endif
50
51 /* Processor costs (relative to an add) */
52 static const
53 struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
61 3, /* cost of movsx */
62 3, /* cost of movzx */
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
93 };
94
95 /* Processor costs (relative to an add) */
96 static const
97 struct processor_costs i386_cost = { /* 386 specific costs */
98 1, /* cost of an add instruction */
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
104 23, /* cost of a divide/mod */
105 3, /* cost of movsx */
106 2, /* cost of movzx */
107 15, /* "large" insn */
108 3, /* MOVE_RATIO */
109 4, /* cost for loading QImode using movzbl */
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
137 };
138
139 static const
140 struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
147 40, /* cost of a divide/mod */
148 3, /* cost of movsx */
149 2, /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
180 };
181
182 static const
183 struct processor_costs pentium_cost = {
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
186 4, /* variable shift costs */
187 1, /* constant shift costs */
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
190 25, /* cost of a divide/mod */
191 3, /* cost of movsx */
192 2, /* cost of movzx */
193 8, /* "large" insn */
194 6, /* MOVE_RATIO */
195 6, /* cost for loading QImode using movzbl */
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
198 Relative to reg-reg move (2). */
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
223 };
224
225 static const
226 struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
229 1, /* variable shift costs */
230 1, /* constant shift costs */
231 4, /* cost of starting a multiply */
232 0, /* cost of multiply per each bit set */
233 17, /* cost of a divide/mod */
234 1, /* cost of movsx */
235 1, /* cost of movzx */
236 8, /* "large" insn */
237 6, /* MOVE_RATIO */
238 2, /* cost for loading QImode using movzbl */
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
241 Relative to reg-reg move (2). */
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
266 };
267
268 static const
269 struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
271 2, /* cost of a lea instruction */
272 1, /* variable shift costs */
273 1, /* constant shift costs */
274 3, /* cost of starting a multiply */
275 0, /* cost of multiply per each bit set */
276 18, /* cost of a divide/mod */
277 2, /* cost of movsx */
278 2, /* cost of movzx */
279 8, /* "large" insn */
280 4, /* MOVE_RATIO */
281 3, /* cost for loading QImode using movzbl */
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
284 Relative to reg-reg move (2). */
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 56, /* cost of FDIV instruction. */
306 2, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
309 };
310
311 static const
312 struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
314 2, /* cost of a lea instruction */
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
319 42, /* cost of a divide/mod */
320 1, /* cost of movsx */
321 1, /* cost of movzx */
322 8, /* "large" insn */
323 9, /* MOVE_RATIO */
324 4, /* cost for loading QImode using movzbl */
325 {3, 4, 3}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {3, 4, 3}, /* cost of storing integer registers */
329 4, /* cost of reg,reg fld/fst */
330 {4, 4, 12}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {6, 6, 8}, /* cost of loading integer registers */
333 2, /* cost of moving MMX register */
334 {4, 4}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {4, 4}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {4, 4, 6}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {4, 4, 5}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 5, /* MMX or SSE register to integer */
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
352 };
353
354 static const
355 struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
363 1, /* cost of movsx */
364 1, /* cost of movzx */
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
395 };
396
397 const struct processor_costs *ix86_cost = &pentium_cost;
398
399 /* Processor feature/optimization bitmasks. */
400 #define m_386 (1<<PROCESSOR_I386)
401 #define m_486 (1<<PROCESSOR_I486)
402 #define m_PENT (1<<PROCESSOR_PENTIUM)
403 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404 #define m_K6 (1<<PROCESSOR_K6)
405 #define m_ATHLON (1<<PROCESSOR_ATHLON)
406 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
407
408 const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
409 const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
410 const int x86_zero_extend_with_and = m_486 | m_PENT;
411 const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
412 const int x86_double_with_add = ~m_386;
413 const int x86_use_bit_test = m_386;
414 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
415 const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
416 const int x86_3dnow_a = m_ATHLON;
417 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
418 const int x86_branch_hints = m_PENT4;
419 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
420 const int x86_partial_reg_stall = m_PPRO;
421 const int x86_use_loop = m_K6;
422 const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
423 const int x86_use_mov0 = m_K6;
424 const int x86_use_cltd = ~(m_PENT | m_K6);
425 const int x86_read_modify_write = ~m_PENT;
426 const int x86_read_modify = ~(m_PENT | m_PPRO);
427 const int x86_split_long_moves = m_PPRO;
428 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
430 const int x86_single_stringop = m_386 | m_PENT4;
431 const int x86_qimode_math = ~(0);
432 const int x86_promote_qi_regs = 0;
433 const int x86_himode_math = ~(m_PPRO);
434 const int x86_promote_hi_regs = m_PPRO;
435 const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436 const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437 const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438 const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
439 const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
440 const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441 const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
442 const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443 const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444 const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
445 const int x86_decompose_lea = m_PENT4;
446 const int x86_shift1 = ~m_486;
447 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
448
449 /* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452 #define FAST_PROLOGUE_INSN_COUNT 30
453
454 /* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456 static int use_fast_prologue_epilogue;
457
458 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
462
463 /* Array of the smallest class containing reg number REGNO, indexed by
464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
465
466 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
467 {
468 /* ax, dx, cx, bx */
469 AREG, DREG, CREG, BREG,
470 /* si, di, bp, sp */
471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
475 /* arg pointer */
476 NON_Q_REGS,
477 /* flags, fpsr, dirflag, frame */
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
487 };
488
489 /* The "default" register map used in 32bit mode. */
490
491 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
492 {
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
500 };
501
502 static int const x86_64_int_parameter_registers[6] =
503 {
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506 };
507
508 static int const x86_64_int_return_registers[4] =
509 {
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511 };
512
513 /* The "default" register map used in 64bit mode. */
514 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515 {
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523 };
524
525 /* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578 */
579 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
580 {
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
588 };
589
590 /* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
593 rtx ix86_compare_op0 = NULL_RTX;
594 rtx ix86_compare_op1 = NULL_RTX;
595
596 /* The encoding characters for the four TLS models present in ELF. */
597
598 static char const tls_model_chars[] = " GLil";
599
600 #define MAX_386_STACK_LOCALS 3
601 /* Size of the register save area. */
602 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
603
604 /* Define the structure for the machine field in struct function. */
605 struct machine_function GTY(())
606 {
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
608 const char *some_ld_name;
609 int save_varrargs_registers;
610 int accesses_prev_frame;
611 };
612
613 #define ix86_stack_locals (cfun->machine->stack_locals)
614 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
615
616 /* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635 struct ix86_frame
636 {
637 int nregs;
638 int padding1;
639 int va_arg_size;
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
643 int red_zone_size;
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650 };
651
652 /* Used to enable/disable debugging features. */
653 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
654 /* Code model option as passed by user. */
655 const char *ix86_cmodel_string;
656 /* Parsed value. */
657 enum cmodel ix86_cmodel;
658 /* Asm dialect. */
659 const char *ix86_asm_string;
660 enum asm_dialect ix86_asm_dialect = ASM_ATT;
661 /* TLS dialext. */
662 const char *ix86_tls_dialect_string;
663 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
664
665 /* Which unit we are generating floating point math for. */
666 enum fpmath_unit ix86_fpmath;
667
668 /* Which cpu are we scheduling for. */
669 enum processor_type ix86_cpu;
670 /* Which instruction set architecture to use. */
671 enum processor_type ix86_arch;
672
673 /* Strings to hold which cpu and instruction set architecture to use. */
674 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675 const char *ix86_arch_string; /* for -march=<xxx> */
676 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
677
678 /* # of registers to use to pass arguments. */
679 const char *ix86_regparm_string;
680
681 /* true if sse prefetch instruction is not NOOP. */
682 int x86_prefetch_sse;
683
684 /* ix86_regparm_string as a number */
685 int ix86_regparm;
686
687 /* Alignment to use for loops and jumps: */
688
689 /* Power of two alignment for loops. */
690 const char *ix86_align_loops_string;
691
692 /* Power of two alignment for non-loop jumps. */
693 const char *ix86_align_jumps_string;
694
695 /* Power of two alignment for stack boundary in bytes. */
696 const char *ix86_preferred_stack_boundary_string;
697
698 /* Preferred alignment for stack boundary in bits. */
699 int ix86_preferred_stack_boundary;
700
701 /* Values 1-5: see jump.c */
702 int ix86_branch_cost;
703 const char *ix86_branch_cost_string;
704
705 /* Power of two alignment for functions. */
706 const char *ix86_align_funcs_string;
707
708 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709 static char internal_label_prefix[16];
710 static int internal_label_prefix_len;
711 \f
712 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
713 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
714 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
716 int, int, FILE *));
717 static const char *get_some_local_dynamic_name PARAMS ((void));
718 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719 static rtx maybe_get_pool_constant PARAMS ((rtx));
720 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
721 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
723 static rtx get_thread_pointer PARAMS ((void));
724 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
725 static rtx gen_push PARAMS ((rtx));
726 static int memory_address_length PARAMS ((rtx addr));
727 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
729 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730 static void ix86_dump_ppro_packet PARAMS ((FILE *));
731 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
732 static struct machine_function * ix86_init_machine_status PARAMS ((void));
733 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
734 static int ix86_nsaved_regs PARAMS ((void));
735 static void ix86_emit_save_regs PARAMS ((void));
736 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
737 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
738 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
739 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
740 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
741 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
742 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
743 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
745 static int ix86_issue_rate PARAMS ((void));
746 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747 static void ix86_sched_init PARAMS ((FILE *, int, int));
748 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
750 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751 static int ia32_multipass_dfa_lookahead PARAMS ((void));
752 static void ix86_init_mmx_sse_builtins PARAMS ((void));
753 static rtx x86_this_parameter PARAMS ((tree));
754 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
755 HOST_WIDE_INT, tree));
756 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
757 HOST_WIDE_INT, tree));
758
759 struct ix86_address
760 {
761 rtx base, index, disp;
762 HOST_WIDE_INT scale;
763 };
764
765 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
766
767 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
768 static const char *ix86_strip_name_encoding PARAMS ((const char *))
769 ATTRIBUTE_UNUSED;
770
771 struct builtin_description;
772 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
773 tree, rtx));
774 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
775 tree, rtx));
776 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
777 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
778 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
779 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
780 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
781 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
782 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
783 enum rtx_code *,
784 enum rtx_code *,
785 enum rtx_code *));
786 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
787 rtx *, rtx *));
788 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
789 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
790 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
791 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
792 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
793 static int ix86_save_reg PARAMS ((unsigned int, int));
794 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
795 static int ix86_comp_type_attributes PARAMS ((tree, tree));
796 static int ix86_fntype_regparm PARAMS ((tree));
797 const struct attribute_spec ix86_attribute_table[];
798 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
799 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
800 static int ix86_value_regno PARAMS ((enum machine_mode));
801
802 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
803 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
804 #endif
805
806 /* Register class used for passing given 64bit part of the argument.
807 These represent classes as documented by the PS ABI, with the exception
808 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
809 use SF or DFmode move instead of DImode to avoid reformating penalties.
810
811 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
812 whenever possible (upper half does contain padding).
813 */
814 enum x86_64_reg_class
815 {
816 X86_64_NO_CLASS,
817 X86_64_INTEGER_CLASS,
818 X86_64_INTEGERSI_CLASS,
819 X86_64_SSE_CLASS,
820 X86_64_SSESF_CLASS,
821 X86_64_SSEDF_CLASS,
822 X86_64_SSEUP_CLASS,
823 X86_64_X87_CLASS,
824 X86_64_X87UP_CLASS,
825 X86_64_MEMORY_CLASS
826 };
827 static const char * const x86_64_reg_class_name[] =
828 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
829
830 #define MAX_CLASSES 4
831 static int classify_argument PARAMS ((enum machine_mode, tree,
832 enum x86_64_reg_class [MAX_CLASSES],
833 int));
834 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
835 int *));
836 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
837 const int *, int));
838 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
839 enum x86_64_reg_class));
840 \f
841 /* Initialize the GCC target structure. */
842 #undef TARGET_ATTRIBUTE_TABLE
843 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
844 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
845 # undef TARGET_MERGE_DECL_ATTRIBUTES
846 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
847 #endif
848
849 #undef TARGET_COMP_TYPE_ATTRIBUTES
850 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
851
852 #undef TARGET_INIT_BUILTINS
853 #define TARGET_INIT_BUILTINS ix86_init_builtins
854
855 #undef TARGET_EXPAND_BUILTIN
856 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
857
858 #undef TARGET_ASM_FUNCTION_EPILOGUE
859 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
860
861 #undef TARGET_ASM_OPEN_PAREN
862 #define TARGET_ASM_OPEN_PAREN ""
863 #undef TARGET_ASM_CLOSE_PAREN
864 #define TARGET_ASM_CLOSE_PAREN ""
865
866 #undef TARGET_ASM_ALIGNED_HI_OP
867 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
868 #undef TARGET_ASM_ALIGNED_SI_OP
869 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
870 #ifdef ASM_QUAD
871 #undef TARGET_ASM_ALIGNED_DI_OP
872 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
873 #endif
874
875 #undef TARGET_ASM_UNALIGNED_HI_OP
876 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
877 #undef TARGET_ASM_UNALIGNED_SI_OP
878 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
879 #undef TARGET_ASM_UNALIGNED_DI_OP
880 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
881
882 #undef TARGET_SCHED_ADJUST_COST
883 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
884 #undef TARGET_SCHED_ISSUE_RATE
885 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
886 #undef TARGET_SCHED_VARIABLE_ISSUE
887 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
888 #undef TARGET_SCHED_INIT
889 #define TARGET_SCHED_INIT ix86_sched_init
890 #undef TARGET_SCHED_REORDER
891 #define TARGET_SCHED_REORDER ix86_sched_reorder
892 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
893 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
894 ia32_use_dfa_pipeline_interface
895 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
896 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
897 ia32_multipass_dfa_lookahead
898
899 #ifdef HAVE_AS_TLS
900 #undef TARGET_HAVE_TLS
901 #define TARGET_HAVE_TLS true
902 #endif
903
904 #undef TARGET_ASM_OUTPUT_MI_THUNK
905 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
906 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
907 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
908
909 struct gcc_target targetm = TARGET_INITIALIZER;
910 \f
911 /* Sometimes certain combinations of command options do not make
912 sense on a particular target machine. You can define a macro
913 `OVERRIDE_OPTIONS' to take account of this. This macro, if
914 defined, is executed once just after all the command options have
915 been parsed.
916
917 Don't use this macro to turn on various extra optimizations for
918 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
919
920 void
921 override_options ()
922 {
923 int i;
924 /* Comes from final.c -- no real reason to change it. */
925 #define MAX_CODE_ALIGN 16
926
927 static struct ptt
928 {
929 const struct processor_costs *cost; /* Processor costs */
930 const int target_enable; /* Target flags to enable. */
931 const int target_disable; /* Target flags to disable. */
932 const int align_loop; /* Default alignments. */
933 const int align_loop_max_skip;
934 const int align_jump;
935 const int align_jump_max_skip;
936 const int align_func;
937 const int branch_cost;
938 }
939 const processor_target_table[PROCESSOR_max] =
940 {
941 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
942 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
943 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
944 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
945 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
946 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
947 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
948 };
949
950 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
951 static struct pta
952 {
953 const char *const name; /* processor name or nickname. */
954 const enum processor_type processor;
955 const enum pta_flags
956 {
957 PTA_SSE = 1,
958 PTA_SSE2 = 2,
959 PTA_MMX = 4,
960 PTA_PREFETCH_SSE = 8,
961 PTA_3DNOW = 16,
962 PTA_3DNOW_A = 64
963 } flags;
964 }
965 const processor_alias_table[] =
966 {
967 {"i386", PROCESSOR_I386, 0},
968 {"i486", PROCESSOR_I486, 0},
969 {"i586", PROCESSOR_PENTIUM, 0},
970 {"pentium", PROCESSOR_PENTIUM, 0},
971 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
972 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
973 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
974 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
975 {"i686", PROCESSOR_PENTIUMPRO, 0},
976 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
977 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
978 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
979 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
980 PTA_MMX | PTA_PREFETCH_SSE},
981 {"k6", PROCESSOR_K6, PTA_MMX},
982 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
983 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
984 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
985 | PTA_3DNOW_A},
986 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
987 | PTA_3DNOW | PTA_3DNOW_A},
988 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
989 | PTA_3DNOW_A | PTA_SSE},
990 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
991 | PTA_3DNOW_A | PTA_SSE},
992 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
993 | PTA_3DNOW_A | PTA_SSE},
994 };
995
996 int const pta_size = ARRAY_SIZE (processor_alias_table);
997
998 /* By default our XFmode is the 80-bit extended format. If we have
999 use TFmode instead, it's also the 80-bit format, but with padding. */
1000 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1001 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1002
1003 /* Set the default values for switches whose default depends on TARGET_64BIT
1004 in case they weren't overwriten by command line options. */
1005 if (TARGET_64BIT)
1006 {
1007 if (flag_omit_frame_pointer == 2)
1008 flag_omit_frame_pointer = 1;
1009 if (flag_asynchronous_unwind_tables == 2)
1010 flag_asynchronous_unwind_tables = 1;
1011 if (flag_pcc_struct_return == 2)
1012 flag_pcc_struct_return = 0;
1013 }
1014 else
1015 {
1016 if (flag_omit_frame_pointer == 2)
1017 flag_omit_frame_pointer = 0;
1018 if (flag_asynchronous_unwind_tables == 2)
1019 flag_asynchronous_unwind_tables = 0;
1020 if (flag_pcc_struct_return == 2)
1021 flag_pcc_struct_return = 1;
1022 }
1023
1024 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1025 SUBTARGET_OVERRIDE_OPTIONS;
1026 #endif
1027
1028 if (!ix86_cpu_string && ix86_arch_string)
1029 ix86_cpu_string = ix86_arch_string;
1030 if (!ix86_cpu_string)
1031 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1032 if (!ix86_arch_string)
1033 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
1034
1035 if (ix86_cmodel_string != 0)
1036 {
1037 if (!strcmp (ix86_cmodel_string, "small"))
1038 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1039 else if (flag_pic)
1040 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1041 else if (!strcmp (ix86_cmodel_string, "32"))
1042 ix86_cmodel = CM_32;
1043 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1044 ix86_cmodel = CM_KERNEL;
1045 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1046 ix86_cmodel = CM_MEDIUM;
1047 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1048 ix86_cmodel = CM_LARGE;
1049 else
1050 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1051 }
1052 else
1053 {
1054 ix86_cmodel = CM_32;
1055 if (TARGET_64BIT)
1056 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1057 }
1058 if (ix86_asm_string != 0)
1059 {
1060 if (!strcmp (ix86_asm_string, "intel"))
1061 ix86_asm_dialect = ASM_INTEL;
1062 else if (!strcmp (ix86_asm_string, "att"))
1063 ix86_asm_dialect = ASM_ATT;
1064 else
1065 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1066 }
1067 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1068 error ("code model `%s' not supported in the %s bit mode",
1069 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1070 if (ix86_cmodel == CM_LARGE)
1071 sorry ("code model `large' not supported yet");
1072 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1073 sorry ("%i-bit mode not compiled in",
1074 (target_flags & MASK_64BIT) ? 64 : 32);
1075
1076 for (i = 0; i < pta_size; i++)
1077 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1078 {
1079 ix86_arch = processor_alias_table[i].processor;
1080 /* Default cpu tuning to the architecture. */
1081 ix86_cpu = ix86_arch;
1082 if (processor_alias_table[i].flags & PTA_MMX
1083 && !(target_flags_explicit & MASK_MMX))
1084 target_flags |= MASK_MMX;
1085 if (processor_alias_table[i].flags & PTA_3DNOW
1086 && !(target_flags_explicit & MASK_3DNOW))
1087 target_flags |= MASK_3DNOW;
1088 if (processor_alias_table[i].flags & PTA_3DNOW_A
1089 && !(target_flags_explicit & MASK_3DNOW_A))
1090 target_flags |= MASK_3DNOW_A;
1091 if (processor_alias_table[i].flags & PTA_SSE
1092 && !(target_flags_explicit & MASK_SSE))
1093 target_flags |= MASK_SSE;
1094 if (processor_alias_table[i].flags & PTA_SSE2
1095 && !(target_flags_explicit & MASK_SSE2))
1096 target_flags |= MASK_SSE2;
1097 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1098 x86_prefetch_sse = true;
1099 break;
1100 }
1101
1102 if (i == pta_size)
1103 error ("bad value (%s) for -march= switch", ix86_arch_string);
1104
1105 for (i = 0; i < pta_size; i++)
1106 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1107 {
1108 ix86_cpu = processor_alias_table[i].processor;
1109 break;
1110 }
1111 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1112 x86_prefetch_sse = true;
1113 if (i == pta_size)
1114 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1115
1116 if (optimize_size)
1117 ix86_cost = &size_cost;
1118 else
1119 ix86_cost = processor_target_table[ix86_cpu].cost;
1120 target_flags |= processor_target_table[ix86_cpu].target_enable;
1121 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1122
1123 /* Arrange to set up i386_stack_locals for all functions. */
1124 init_machine_status = ix86_init_machine_status;
1125
1126 /* Validate -mregparm= value. */
1127 if (ix86_regparm_string)
1128 {
1129 i = atoi (ix86_regparm_string);
1130 if (i < 0 || i > REGPARM_MAX)
1131 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1132 else
1133 ix86_regparm = i;
1134 }
1135 else
1136 if (TARGET_64BIT)
1137 ix86_regparm = REGPARM_MAX;
1138
1139 /* If the user has provided any of the -malign-* options,
1140 warn and use that value only if -falign-* is not set.
1141 Remove this code in GCC 3.2 or later. */
1142 if (ix86_align_loops_string)
1143 {
1144 warning ("-malign-loops is obsolete, use -falign-loops");
1145 if (align_loops == 0)
1146 {
1147 i = atoi (ix86_align_loops_string);
1148 if (i < 0 || i > MAX_CODE_ALIGN)
1149 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1150 else
1151 align_loops = 1 << i;
1152 }
1153 }
1154
1155 if (ix86_align_jumps_string)
1156 {
1157 warning ("-malign-jumps is obsolete, use -falign-jumps");
1158 if (align_jumps == 0)
1159 {
1160 i = atoi (ix86_align_jumps_string);
1161 if (i < 0 || i > MAX_CODE_ALIGN)
1162 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1163 else
1164 align_jumps = 1 << i;
1165 }
1166 }
1167
1168 if (ix86_align_funcs_string)
1169 {
1170 warning ("-malign-functions is obsolete, use -falign-functions");
1171 if (align_functions == 0)
1172 {
1173 i = atoi (ix86_align_funcs_string);
1174 if (i < 0 || i > MAX_CODE_ALIGN)
1175 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1176 else
1177 align_functions = 1 << i;
1178 }
1179 }
1180
1181 /* Default align_* from the processor table. */
1182 if (align_loops == 0)
1183 {
1184 align_loops = processor_target_table[ix86_cpu].align_loop;
1185 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1186 }
1187 if (align_jumps == 0)
1188 {
1189 align_jumps = processor_target_table[ix86_cpu].align_jump;
1190 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1191 }
1192 if (align_functions == 0)
1193 {
1194 align_functions = processor_target_table[ix86_cpu].align_func;
1195 }
1196
1197 /* Validate -mpreferred-stack-boundary= value, or provide default.
1198 The default of 128 bits is for Pentium III's SSE __m128, but we
1199 don't want additional code to keep the stack aligned when
1200 optimizing for code size. */
1201 ix86_preferred_stack_boundary = (optimize_size
1202 ? TARGET_64BIT ? 128 : 32
1203 : 128);
1204 if (ix86_preferred_stack_boundary_string)
1205 {
1206 i = atoi (ix86_preferred_stack_boundary_string);
1207 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1208 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1209 TARGET_64BIT ? 4 : 2);
1210 else
1211 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1212 }
1213
1214 /* Validate -mbranch-cost= value, or provide default. */
1215 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1216 if (ix86_branch_cost_string)
1217 {
1218 i = atoi (ix86_branch_cost_string);
1219 if (i < 0 || i > 5)
1220 error ("-mbranch-cost=%d is not between 0 and 5", i);
1221 else
1222 ix86_branch_cost = i;
1223 }
1224
1225 if (ix86_tls_dialect_string)
1226 {
1227 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1228 ix86_tls_dialect = TLS_DIALECT_GNU;
1229 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1230 ix86_tls_dialect = TLS_DIALECT_SUN;
1231 else
1232 error ("bad value (%s) for -mtls-dialect= switch",
1233 ix86_tls_dialect_string);
1234 }
1235
1236 /* Keep nonleaf frame pointers. */
1237 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1238 flag_omit_frame_pointer = 1;
1239
1240 /* If we're doing fast math, we don't care about comparison order
1241 wrt NaNs. This lets us use a shorter comparison sequence. */
1242 if (flag_unsafe_math_optimizations)
1243 target_flags &= ~MASK_IEEE_FP;
1244
1245 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1246 since the insns won't need emulation. */
1247 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1248 target_flags &= ~MASK_NO_FANCY_MATH_387;
1249
1250 if (TARGET_64BIT)
1251 {
1252 if (TARGET_ALIGN_DOUBLE)
1253 error ("-malign-double makes no sense in the 64bit mode");
1254 if (TARGET_RTD)
1255 error ("-mrtd calling convention not supported in the 64bit mode");
1256 /* Enable by default the SSE and MMX builtins. */
1257 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1258 ix86_fpmath = FPMATH_SSE;
1259 }
1260 else
1261 ix86_fpmath = FPMATH_387;
1262
1263 if (ix86_fpmath_string != 0)
1264 {
1265 if (! strcmp (ix86_fpmath_string, "387"))
1266 ix86_fpmath = FPMATH_387;
1267 else if (! strcmp (ix86_fpmath_string, "sse"))
1268 {
1269 if (!TARGET_SSE)
1270 {
1271 warning ("SSE instruction set disabled, using 387 arithmetics");
1272 ix86_fpmath = FPMATH_387;
1273 }
1274 else
1275 ix86_fpmath = FPMATH_SSE;
1276 }
1277 else if (! strcmp (ix86_fpmath_string, "387,sse")
1278 || ! strcmp (ix86_fpmath_string, "sse,387"))
1279 {
1280 if (!TARGET_SSE)
1281 {
1282 warning ("SSE instruction set disabled, using 387 arithmetics");
1283 ix86_fpmath = FPMATH_387;
1284 }
1285 else if (!TARGET_80387)
1286 {
1287 warning ("387 instruction set disabled, using SSE arithmetics");
1288 ix86_fpmath = FPMATH_SSE;
1289 }
1290 else
1291 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1292 }
1293 else
1294 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1295 }
1296
1297 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1298 on by -msse. */
1299 if (TARGET_SSE)
1300 {
1301 target_flags |= MASK_MMX;
1302 x86_prefetch_sse = true;
1303 }
1304
1305 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1306 if (TARGET_3DNOW)
1307 {
1308 target_flags |= MASK_MMX;
1309 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1310 extensions it adds. */
1311 if (x86_3dnow_a & (1 << ix86_arch))
1312 target_flags |= MASK_3DNOW_A;
1313 }
1314 if ((x86_accumulate_outgoing_args & CPUMASK)
1315 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1316 && !optimize_size)
1317 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1318
1319 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1320 {
1321 char *p;
1322 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1323 p = strchr (internal_label_prefix, 'X');
1324 internal_label_prefix_len = p - internal_label_prefix;
1325 *p = '\0';
1326 }
1327 }
1328 \f
1329 void
1330 optimization_options (level, size)
1331 int level;
1332 int size ATTRIBUTE_UNUSED;
1333 {
1334 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1335 make the problem with not enough registers even worse. */
1336 #ifdef INSN_SCHEDULING
1337 if (level > 1)
1338 flag_schedule_insns = 0;
1339 #endif
1340
1341 /* The default values of these switches depend on the TARGET_64BIT
1342 that is not known at this moment. Mark these values with 2 and
1343 let user the to override these. In case there is no command line option
1344 specifying them, we will set the defaults in override_options. */
1345 if (optimize >= 1)
1346 flag_omit_frame_pointer = 2;
1347 flag_pcc_struct_return = 2;
1348 flag_asynchronous_unwind_tables = 2;
1349 }
1350 \f
1351 /* Table of valid machine attributes. */
1352 const struct attribute_spec ix86_attribute_table[] =
1353 {
1354 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1355 /* Stdcall attribute says callee is responsible for popping arguments
1356 if they are not variable. */
1357 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1358 /* Cdecl attribute says the callee is a normal C declaration */
1359 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1360 /* Regparm attribute specifies how many integer arguments are to be
1361 passed in registers. */
1362 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1363 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1364 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1365 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1366 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1367 #endif
1368 { NULL, 0, 0, false, false, false, NULL }
1369 };
1370
1371 /* Handle a "cdecl" or "stdcall" attribute;
1372 arguments as in struct attribute_spec.handler. */
1373 static tree
1374 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1375 tree *node;
1376 tree name;
1377 tree args ATTRIBUTE_UNUSED;
1378 int flags ATTRIBUTE_UNUSED;
1379 bool *no_add_attrs;
1380 {
1381 if (TREE_CODE (*node) != FUNCTION_TYPE
1382 && TREE_CODE (*node) != METHOD_TYPE
1383 && TREE_CODE (*node) != FIELD_DECL
1384 && TREE_CODE (*node) != TYPE_DECL)
1385 {
1386 warning ("`%s' attribute only applies to functions",
1387 IDENTIFIER_POINTER (name));
1388 *no_add_attrs = true;
1389 }
1390
1391 if (TARGET_64BIT)
1392 {
1393 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1394 *no_add_attrs = true;
1395 }
1396
1397 return NULL_TREE;
1398 }
1399
1400 /* Handle a "regparm" attribute;
1401 arguments as in struct attribute_spec.handler. */
1402 static tree
1403 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1404 tree *node;
1405 tree name;
1406 tree args;
1407 int flags ATTRIBUTE_UNUSED;
1408 bool *no_add_attrs;
1409 {
1410 if (TREE_CODE (*node) != FUNCTION_TYPE
1411 && TREE_CODE (*node) != METHOD_TYPE
1412 && TREE_CODE (*node) != FIELD_DECL
1413 && TREE_CODE (*node) != TYPE_DECL)
1414 {
1415 warning ("`%s' attribute only applies to functions",
1416 IDENTIFIER_POINTER (name));
1417 *no_add_attrs = true;
1418 }
1419 else
1420 {
1421 tree cst;
1422
1423 cst = TREE_VALUE (args);
1424 if (TREE_CODE (cst) != INTEGER_CST)
1425 {
1426 warning ("`%s' attribute requires an integer constant argument",
1427 IDENTIFIER_POINTER (name));
1428 *no_add_attrs = true;
1429 }
1430 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1431 {
1432 warning ("argument to `%s' attribute larger than %d",
1433 IDENTIFIER_POINTER (name), REGPARM_MAX);
1434 *no_add_attrs = true;
1435 }
1436 }
1437
1438 return NULL_TREE;
1439 }
1440
1441 /* Return 0 if the attributes for two types are incompatible, 1 if they
1442 are compatible, and 2 if they are nearly compatible (which causes a
1443 warning to be generated). */
1444
1445 static int
1446 ix86_comp_type_attributes (type1, type2)
1447 tree type1;
1448 tree type2;
1449 {
1450 /* Check for mismatch of non-default calling convention. */
1451 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1452
1453 if (TREE_CODE (type1) != FUNCTION_TYPE)
1454 return 1;
1455
1456 /* Check for mismatched return types (cdecl vs stdcall). */
1457 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1458 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1459 return 0;
1460 return 1;
1461 }
1462 \f
1463 /* Return the regparm value for a fuctio with the indicated TYPE. */
1464
1465 static int
1466 ix86_fntype_regparm (type)
1467 tree type;
1468 {
1469 tree attr;
1470
1471 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1472 if (attr)
1473 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1474 else
1475 return ix86_regparm;
1476 }
1477
1478 /* Value is the number of bytes of arguments automatically
1479 popped when returning from a subroutine call.
1480 FUNDECL is the declaration node of the function (as a tree),
1481 FUNTYPE is the data type of the function (as a tree),
1482 or for a library call it is an identifier node for the subroutine name.
1483 SIZE is the number of bytes of arguments passed on the stack.
1484
1485 On the 80386, the RTD insn may be used to pop them if the number
1486 of args is fixed, but if the number is variable then the caller
1487 must pop them all. RTD can't be used for library calls now
1488 because the library is compiled with the Unix compiler.
1489 Use of RTD is a selectable option, since it is incompatible with
1490 standard Unix calling sequences. If the option is not selected,
1491 the caller must always pop the args.
1492
1493 The attribute stdcall is equivalent to RTD on a per module basis. */
1494
1495 int
1496 ix86_return_pops_args (fundecl, funtype, size)
1497 tree fundecl;
1498 tree funtype;
1499 int size;
1500 {
1501 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1502
1503 /* Cdecl functions override -mrtd, and never pop the stack. */
1504 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1505
1506 /* Stdcall functions will pop the stack if not variable args. */
1507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1508 rtd = 1;
1509
1510 if (rtd
1511 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1512 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1513 == void_type_node)))
1514 return size;
1515 }
1516
1517 /* Lose any fake structure return argument if it is passed on the stack. */
1518 if (aggregate_value_p (TREE_TYPE (funtype))
1519 && !TARGET_64BIT)
1520 {
1521 int nregs = ix86_fntype_regparm (funtype);
1522
1523 if (!nregs)
1524 return GET_MODE_SIZE (Pmode);
1525 }
1526
1527 return 0;
1528 }
1529 \f
1530 /* Argument support functions. */
1531
1532 /* Return true when register may be used to pass function parameters. */
1533 bool
1534 ix86_function_arg_regno_p (regno)
1535 int regno;
1536 {
1537 int i;
1538 if (!TARGET_64BIT)
1539 return (regno < REGPARM_MAX
1540 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1541 if (SSE_REGNO_P (regno) && TARGET_SSE)
1542 return true;
1543 /* RAX is used as hidden argument to va_arg functions. */
1544 if (!regno)
1545 return true;
1546 for (i = 0; i < REGPARM_MAX; i++)
1547 if (regno == x86_64_int_parameter_registers[i])
1548 return true;
1549 return false;
1550 }
1551
1552 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1553 for a call to a function whose data type is FNTYPE.
1554 For a library call, FNTYPE is 0. */
1555
1556 void
1557 init_cumulative_args (cum, fntype, libname)
1558 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1559 tree fntype; /* tree ptr for function decl */
1560 rtx libname; /* SYMBOL_REF of library name or 0 */
1561 {
1562 static CUMULATIVE_ARGS zero_cum;
1563 tree param, next_param;
1564
1565 if (TARGET_DEBUG_ARG)
1566 {
1567 fprintf (stderr, "\ninit_cumulative_args (");
1568 if (fntype)
1569 fprintf (stderr, "fntype code = %s, ret code = %s",
1570 tree_code_name[(int) TREE_CODE (fntype)],
1571 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1572 else
1573 fprintf (stderr, "no fntype");
1574
1575 if (libname)
1576 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1577 }
1578
1579 *cum = zero_cum;
1580
1581 /* Set up the number of registers to use for passing arguments. */
1582 cum->nregs = ix86_regparm;
1583 cum->sse_nregs = SSE_REGPARM_MAX;
1584 if (fntype && !TARGET_64BIT)
1585 {
1586 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1587
1588 if (attr)
1589 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1590 }
1591 cum->maybe_vaarg = false;
1592
1593 /* Determine if this function has variable arguments. This is
1594 indicated by the last argument being 'void_type_mode' if there
1595 are no variable arguments. If there are variable arguments, then
1596 we won't pass anything in registers */
1597
1598 if (cum->nregs)
1599 {
1600 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1601 param != 0; param = next_param)
1602 {
1603 next_param = TREE_CHAIN (param);
1604 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1605 {
1606 if (!TARGET_64BIT)
1607 cum->nregs = 0;
1608 cum->maybe_vaarg = true;
1609 }
1610 }
1611 }
1612 if ((!fntype && !libname)
1613 || (fntype && !TYPE_ARG_TYPES (fntype)))
1614 cum->maybe_vaarg = 1;
1615
1616 if (TARGET_DEBUG_ARG)
1617 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1618
1619 return;
1620 }
1621
1622 /* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
1623 of this code is to classify each 8bytes of incoming argument by the register
1624 class and assign registers accordingly. */
1625
1626 /* Return the union class of CLASS1 and CLASS2.
1627 See the x86-64 PS ABI for details. */
1628
1629 static enum x86_64_reg_class
1630 merge_classes (class1, class2)
1631 enum x86_64_reg_class class1, class2;
1632 {
1633 /* Rule #1: If both classes are equal, this is the resulting class. */
1634 if (class1 == class2)
1635 return class1;
1636
1637 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1638 the other class. */
1639 if (class1 == X86_64_NO_CLASS)
1640 return class2;
1641 if (class2 == X86_64_NO_CLASS)
1642 return class1;
1643
1644 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1645 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1646 return X86_64_MEMORY_CLASS;
1647
1648 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1649 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1650 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1651 return X86_64_INTEGERSI_CLASS;
1652 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1653 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1654 return X86_64_INTEGER_CLASS;
1655
1656 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1657 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1658 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1659 return X86_64_MEMORY_CLASS;
1660
1661 /* Rule #6: Otherwise class SSE is used. */
1662 return X86_64_SSE_CLASS;
1663 }
1664
1665 /* Classify the argument of type TYPE and mode MODE.
1666 CLASSES will be filled by the register class used to pass each word
1667 of the operand. The number of words is returned. In case the parameter
1668 should be passed in memory, 0 is returned. As a special case for zero
1669 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1670
1671 BIT_OFFSET is used internally for handling records and specifies offset
1672 of the offset in bits modulo 256 to avoid overflow cases.
1673
1674 See the x86-64 PS ABI for details.
1675 */
1676
1677 static int
1678 classify_argument (mode, type, classes, bit_offset)
1679 enum machine_mode mode;
1680 tree type;
1681 enum x86_64_reg_class classes[MAX_CLASSES];
1682 int bit_offset;
1683 {
1684 int bytes =
1685 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1686 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1687
1688 /* Variable sized entities are always passed/returned in memory. */
1689 if (bytes < 0)
1690 return 0;
1691
1692 if (type && AGGREGATE_TYPE_P (type))
1693 {
1694 int i;
1695 tree field;
1696 enum x86_64_reg_class subclasses[MAX_CLASSES];
1697
1698 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1699 if (bytes > 16)
1700 return 0;
1701
1702 for (i = 0; i < words; i++)
1703 classes[i] = X86_64_NO_CLASS;
1704
1705 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1706 signalize memory class, so handle it as special case. */
1707 if (!words)
1708 {
1709 classes[0] = X86_64_NO_CLASS;
1710 return 1;
1711 }
1712
1713 /* Classify each field of record and merge classes. */
1714 if (TREE_CODE (type) == RECORD_TYPE)
1715 {
1716 /* For classes first merge in the field of the subclasses. */
1717 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1718 {
1719 tree bases = TYPE_BINFO_BASETYPES (type);
1720 int n_bases = TREE_VEC_LENGTH (bases);
1721 int i;
1722
1723 for (i = 0; i < n_bases; ++i)
1724 {
1725 tree binfo = TREE_VEC_ELT (bases, i);
1726 int num;
1727 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1728 tree type = BINFO_TYPE (binfo);
1729
1730 num = classify_argument (TYPE_MODE (type),
1731 type, subclasses,
1732 (offset + bit_offset) % 256);
1733 if (!num)
1734 return 0;
1735 for (i = 0; i < num; i++)
1736 {
1737 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1738 classes[i + pos] =
1739 merge_classes (subclasses[i], classes[i + pos]);
1740 }
1741 }
1742 }
1743 /* And now merge the fields of structure. */
1744 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1745 {
1746 if (TREE_CODE (field) == FIELD_DECL)
1747 {
1748 int num;
1749
1750 /* Bitfields are always classified as integer. Handle them
1751 early, since later code would consider them to be
1752 misaligned integers. */
1753 if (DECL_BIT_FIELD (field))
1754 {
1755 for (i = int_bit_position (field) / 8 / 8;
1756 i < (int_bit_position (field)
1757 + tree_low_cst (DECL_SIZE (field), 0)
1758 + 63) / 8 / 8; i++)
1759 classes[i] =
1760 merge_classes (X86_64_INTEGER_CLASS,
1761 classes[i]);
1762 }
1763 else
1764 {
1765 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1766 TREE_TYPE (field), subclasses,
1767 (int_bit_position (field)
1768 + bit_offset) % 256);
1769 if (!num)
1770 return 0;
1771 for (i = 0; i < num; i++)
1772 {
1773 int pos =
1774 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1775 classes[i + pos] =
1776 merge_classes (subclasses[i], classes[i + pos]);
1777 }
1778 }
1779 }
1780 }
1781 }
1782 /* Arrays are handled as small records. */
1783 else if (TREE_CODE (type) == ARRAY_TYPE)
1784 {
1785 int num;
1786 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1787 TREE_TYPE (type), subclasses, bit_offset);
1788 if (!num)
1789 return 0;
1790
1791 /* The partial classes are now full classes. */
1792 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1793 subclasses[0] = X86_64_SSE_CLASS;
1794 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1795 subclasses[0] = X86_64_INTEGER_CLASS;
1796
1797 for (i = 0; i < words; i++)
1798 classes[i] = subclasses[i % num];
1799 }
1800 /* Unions are similar to RECORD_TYPE but offset is always 0. */
1801 else if (TREE_CODE (type) == UNION_TYPE
1802 || TREE_CODE (type) == QUAL_UNION_TYPE)
1803 {
1804 /* For classes first merge in the field of the subclasses. */
1805 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1806 {
1807 tree bases = TYPE_BINFO_BASETYPES (type);
1808 int n_bases = TREE_VEC_LENGTH (bases);
1809 int i;
1810
1811 for (i = 0; i < n_bases; ++i)
1812 {
1813 tree binfo = TREE_VEC_ELT (bases, i);
1814 int num;
1815 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1816 tree type = BINFO_TYPE (binfo);
1817
1818 num = classify_argument (TYPE_MODE (type),
1819 type, subclasses,
1820 (offset + (bit_offset % 64)) % 256);
1821 if (!num)
1822 return 0;
1823 for (i = 0; i < num; i++)
1824 {
1825 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1826 classes[i + pos] =
1827 merge_classes (subclasses[i], classes[i + pos]);
1828 }
1829 }
1830 }
1831 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1832 {
1833 if (TREE_CODE (field) == FIELD_DECL)
1834 {
1835 int num;
1836 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1837 TREE_TYPE (field), subclasses,
1838 bit_offset);
1839 if (!num)
1840 return 0;
1841 for (i = 0; i < num; i++)
1842 classes[i] = merge_classes (subclasses[i], classes[i]);
1843 }
1844 }
1845 }
1846 else
1847 abort ();
1848
1849 /* Final merger cleanup. */
1850 for (i = 0; i < words; i++)
1851 {
1852 /* If one class is MEMORY, everything should be passed in
1853 memory. */
1854 if (classes[i] == X86_64_MEMORY_CLASS)
1855 return 0;
1856
1857 /* The X86_64_SSEUP_CLASS should be always preceded by
1858 X86_64_SSE_CLASS. */
1859 if (classes[i] == X86_64_SSEUP_CLASS
1860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1861 classes[i] = X86_64_SSE_CLASS;
1862
1863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
1864 if (classes[i] == X86_64_X87UP_CLASS
1865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1866 classes[i] = X86_64_SSE_CLASS;
1867 }
1868 return words;
1869 }
1870
1871 /* Compute alignment needed. We align all types to natural boundaries with
1872 exception of XFmode that is aligned to 64bits. */
1873 if (mode != VOIDmode && mode != BLKmode)
1874 {
1875 int mode_alignment = GET_MODE_BITSIZE (mode);
1876
1877 if (mode == XFmode)
1878 mode_alignment = 128;
1879 else if (mode == XCmode)
1880 mode_alignment = 256;
1881 /* Misaligned fields are always returned in memory. */
1882 if (bit_offset % mode_alignment)
1883 return 0;
1884 }
1885
1886 /* Classification of atomic types. */
1887 switch (mode)
1888 {
1889 case DImode:
1890 case SImode:
1891 case HImode:
1892 case QImode:
1893 case CSImode:
1894 case CHImode:
1895 case CQImode:
1896 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1897 classes[0] = X86_64_INTEGERSI_CLASS;
1898 else
1899 classes[0] = X86_64_INTEGER_CLASS;
1900 return 1;
1901 case CDImode:
1902 case TImode:
1903 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1904 return 2;
1905 case CTImode:
1906 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1907 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1908 return 4;
1909 case SFmode:
1910 if (!(bit_offset % 64))
1911 classes[0] = X86_64_SSESF_CLASS;
1912 else
1913 classes[0] = X86_64_SSE_CLASS;
1914 return 1;
1915 case DFmode:
1916 classes[0] = X86_64_SSEDF_CLASS;
1917 return 1;
1918 case TFmode:
1919 classes[0] = X86_64_X87_CLASS;
1920 classes[1] = X86_64_X87UP_CLASS;
1921 return 2;
1922 case TCmode:
1923 classes[0] = X86_64_X87_CLASS;
1924 classes[1] = X86_64_X87UP_CLASS;
1925 classes[2] = X86_64_X87_CLASS;
1926 classes[3] = X86_64_X87UP_CLASS;
1927 return 4;
1928 case DCmode:
1929 classes[0] = X86_64_SSEDF_CLASS;
1930 classes[1] = X86_64_SSEDF_CLASS;
1931 return 2;
1932 case SCmode:
1933 classes[0] = X86_64_SSE_CLASS;
1934 return 1;
1935 case V4SFmode:
1936 case V4SImode:
1937 case V16QImode:
1938 case V8HImode:
1939 case V2DFmode:
1940 case V2DImode:
1941 classes[0] = X86_64_SSE_CLASS;
1942 classes[1] = X86_64_SSEUP_CLASS;
1943 return 2;
1944 case V2SFmode:
1945 case V2SImode:
1946 case V4HImode:
1947 case V8QImode:
1948 return 0;
1949 case BLKmode:
1950 case VOIDmode:
1951 return 0;
1952 default:
1953 abort ();
1954 }
1955 }
1956
1957 /* Examine the argument and return set number of register required in each
1958 class. Return 0 iff parameter should be passed in memory. */
1959 static int
1960 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1961 enum machine_mode mode;
1962 tree type;
1963 int *int_nregs, *sse_nregs;
1964 int in_return;
1965 {
1966 enum x86_64_reg_class class[MAX_CLASSES];
1967 int n = classify_argument (mode, type, class, 0);
1968
1969 *int_nregs = 0;
1970 *sse_nregs = 0;
1971 if (!n)
1972 return 0;
1973 for (n--; n >= 0; n--)
1974 switch (class[n])
1975 {
1976 case X86_64_INTEGER_CLASS:
1977 case X86_64_INTEGERSI_CLASS:
1978 (*int_nregs)++;
1979 break;
1980 case X86_64_SSE_CLASS:
1981 case X86_64_SSESF_CLASS:
1982 case X86_64_SSEDF_CLASS:
1983 (*sse_nregs)++;
1984 break;
1985 case X86_64_NO_CLASS:
1986 case X86_64_SSEUP_CLASS:
1987 break;
1988 case X86_64_X87_CLASS:
1989 case X86_64_X87UP_CLASS:
1990 if (!in_return)
1991 return 0;
1992 break;
1993 case X86_64_MEMORY_CLASS:
1994 abort ();
1995 }
1996 return 1;
1997 }
1998 /* Construct container for the argument used by GCC interface. See
1999 FUNCTION_ARG for the detailed description. */
2000 static rtx
2001 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2002 enum machine_mode mode;
2003 tree type;
2004 int in_return;
2005 int nintregs, nsseregs;
2006 const int * intreg;
2007 int sse_regno;
2008 {
2009 enum machine_mode tmpmode;
2010 int bytes =
2011 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2012 enum x86_64_reg_class class[MAX_CLASSES];
2013 int n;
2014 int i;
2015 int nexps = 0;
2016 int needed_sseregs, needed_intregs;
2017 rtx exp[MAX_CLASSES];
2018 rtx ret;
2019
2020 n = classify_argument (mode, type, class, 0);
2021 if (TARGET_DEBUG_ARG)
2022 {
2023 if (!n)
2024 fprintf (stderr, "Memory class\n");
2025 else
2026 {
2027 fprintf (stderr, "Classes:");
2028 for (i = 0; i < n; i++)
2029 {
2030 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2031 }
2032 fprintf (stderr, "\n");
2033 }
2034 }
2035 if (!n)
2036 return NULL;
2037 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2038 return NULL;
2039 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2040 return NULL;
2041
2042 /* First construct simple cases. Avoid SCmode, since we want to use
2043 single register to pass this type. */
2044 if (n == 1 && mode != SCmode)
2045 switch (class[0])
2046 {
2047 case X86_64_INTEGER_CLASS:
2048 case X86_64_INTEGERSI_CLASS:
2049 return gen_rtx_REG (mode, intreg[0]);
2050 case X86_64_SSE_CLASS:
2051 case X86_64_SSESF_CLASS:
2052 case X86_64_SSEDF_CLASS:
2053 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2054 case X86_64_X87_CLASS:
2055 return gen_rtx_REG (mode, FIRST_STACK_REG);
2056 case X86_64_NO_CLASS:
2057 /* Zero sized array, struct or class. */
2058 return NULL;
2059 default:
2060 abort ();
2061 }
2062 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2063 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2064 if (n == 2
2065 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2066 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2067 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2068 && class[1] == X86_64_INTEGER_CLASS
2069 && (mode == CDImode || mode == TImode)
2070 && intreg[0] + 1 == intreg[1])
2071 return gen_rtx_REG (mode, intreg[0]);
2072 if (n == 4
2073 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2074 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2075 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2076
2077 /* Otherwise figure out the entries of the PARALLEL. */
2078 for (i = 0; i < n; i++)
2079 {
2080 switch (class[i])
2081 {
2082 case X86_64_NO_CLASS:
2083 break;
2084 case X86_64_INTEGER_CLASS:
2085 case X86_64_INTEGERSI_CLASS:
2086 /* Merge TImodes on aligned occassions here too. */
2087 if (i * 8 + 8 > bytes)
2088 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2089 else if (class[i] == X86_64_INTEGERSI_CLASS)
2090 tmpmode = SImode;
2091 else
2092 tmpmode = DImode;
2093 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2094 if (tmpmode == BLKmode)
2095 tmpmode = DImode;
2096 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2097 gen_rtx_REG (tmpmode, *intreg),
2098 GEN_INT (i*8));
2099 intreg++;
2100 break;
2101 case X86_64_SSESF_CLASS:
2102 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2103 gen_rtx_REG (SFmode,
2104 SSE_REGNO (sse_regno)),
2105 GEN_INT (i*8));
2106 sse_regno++;
2107 break;
2108 case X86_64_SSEDF_CLASS:
2109 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2110 gen_rtx_REG (DFmode,
2111 SSE_REGNO (sse_regno)),
2112 GEN_INT (i*8));
2113 sse_regno++;
2114 break;
2115 case X86_64_SSE_CLASS:
2116 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2117 tmpmode = TImode;
2118 else
2119 tmpmode = DImode;
2120 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2121 gen_rtx_REG (tmpmode,
2122 SSE_REGNO (sse_regno)),
2123 GEN_INT (i*8));
2124 if (tmpmode == TImode)
2125 i++;
2126 sse_regno++;
2127 break;
2128 default:
2129 abort ();
2130 }
2131 }
2132 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2133 for (i = 0; i < nexps; i++)
2134 XVECEXP (ret, 0, i) = exp [i];
2135 return ret;
2136 }
2137
2138 /* Update the data in CUM to advance over an argument
2139 of mode MODE and data type TYPE.
2140 (TYPE is null for libcalls where that information may not be available.) */
2141
2142 void
2143 function_arg_advance (cum, mode, type, named)
2144 CUMULATIVE_ARGS *cum; /* current arg information */
2145 enum machine_mode mode; /* current arg mode */
2146 tree type; /* type of the argument or 0 if lib support */
2147 int named; /* whether or not the argument was named */
2148 {
2149 int bytes =
2150 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2151 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2152
2153 if (TARGET_DEBUG_ARG)
2154 fprintf (stderr,
2155 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2156 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2157 if (TARGET_64BIT)
2158 {
2159 int int_nregs, sse_nregs;
2160 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2161 cum->words += words;
2162 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2163 {
2164 cum->nregs -= int_nregs;
2165 cum->sse_nregs -= sse_nregs;
2166 cum->regno += int_nregs;
2167 cum->sse_regno += sse_nregs;
2168 }
2169 else
2170 cum->words += words;
2171 }
2172 else
2173 {
2174 if (TARGET_SSE && mode == TImode)
2175 {
2176 cum->sse_words += words;
2177 cum->sse_nregs -= 1;
2178 cum->sse_regno += 1;
2179 if (cum->sse_nregs <= 0)
2180 {
2181 cum->sse_nregs = 0;
2182 cum->sse_regno = 0;
2183 }
2184 }
2185 else
2186 {
2187 cum->words += words;
2188 cum->nregs -= words;
2189 cum->regno += words;
2190
2191 if (cum->nregs <= 0)
2192 {
2193 cum->nregs = 0;
2194 cum->regno = 0;
2195 }
2196 }
2197 }
2198 return;
2199 }
2200
2201 /* Define where to put the arguments to a function.
2202 Value is zero to push the argument on the stack,
2203 or a hard register in which to store the argument.
2204
2205 MODE is the argument's machine mode.
2206 TYPE is the data type of the argument (as a tree).
2207 This is null for libcalls where that information may
2208 not be available.
2209 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2210 the preceding args and about the function being called.
2211 NAMED is nonzero if this argument is a named parameter
2212 (otherwise it is an extra parameter matching an ellipsis). */
2213
2214 rtx
2215 function_arg (cum, mode, type, named)
2216 CUMULATIVE_ARGS *cum; /* current arg information */
2217 enum machine_mode mode; /* current arg mode */
2218 tree type; /* type of the argument or 0 if lib support */
2219 int named; /* != 0 for normal args, == 0 for ... args */
2220 {
2221 rtx ret = NULL_RTX;
2222 int bytes =
2223 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2224 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2225
2226 /* Handle an hidden AL argument containing number of registers for varargs
2227 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2228 any AL settings. */
2229 if (mode == VOIDmode)
2230 {
2231 if (TARGET_64BIT)
2232 return GEN_INT (cum->maybe_vaarg
2233 ? (cum->sse_nregs < 0
2234 ? SSE_REGPARM_MAX
2235 : cum->sse_regno)
2236 : -1);
2237 else
2238 return constm1_rtx;
2239 }
2240 if (TARGET_64BIT)
2241 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2242 &x86_64_int_parameter_registers [cum->regno],
2243 cum->sse_regno);
2244 else
2245 switch (mode)
2246 {
2247 /* For now, pass fp/complex values on the stack. */
2248 default:
2249 break;
2250
2251 case BLKmode:
2252 case DImode:
2253 case SImode:
2254 case HImode:
2255 case QImode:
2256 if (words <= cum->nregs)
2257 ret = gen_rtx_REG (mode, cum->regno);
2258 break;
2259 case TImode:
2260 if (cum->sse_nregs)
2261 ret = gen_rtx_REG (mode, cum->sse_regno);
2262 break;
2263 }
2264
2265 if (TARGET_DEBUG_ARG)
2266 {
2267 fprintf (stderr,
2268 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2269 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2270
2271 if (ret)
2272 print_simple_rtl (stderr, ret);
2273 else
2274 fprintf (stderr, ", stack");
2275
2276 fprintf (stderr, " )\n");
2277 }
2278
2279 return ret;
2280 }
2281
2282 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2283 and type. */
2284
2285 int
2286 ix86_function_arg_boundary (mode, type)
2287 enum machine_mode mode;
2288 tree type;
2289 {
2290 int align;
2291 if (!TARGET_64BIT)
2292 return PARM_BOUNDARY;
2293 if (type)
2294 align = TYPE_ALIGN (type);
2295 else
2296 align = GET_MODE_ALIGNMENT (mode);
2297 if (align < PARM_BOUNDARY)
2298 align = PARM_BOUNDARY;
2299 if (align > 128)
2300 align = 128;
2301 return align;
2302 }
2303
2304 /* Return true if N is a possible register number of function value. */
2305 bool
2306 ix86_function_value_regno_p (regno)
2307 int regno;
2308 {
2309 if (!TARGET_64BIT)
2310 {
2311 return ((regno) == 0
2312 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2313 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2314 }
2315 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2316 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2317 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2318 }
2319
2320 /* Define how to find the value returned by a function.
2321 VALTYPE is the data type of the value (as a tree).
2322 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2323 otherwise, FUNC is 0. */
2324 rtx
2325 ix86_function_value (valtype)
2326 tree valtype;
2327 {
2328 if (TARGET_64BIT)
2329 {
2330 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2331 REGPARM_MAX, SSE_REGPARM_MAX,
2332 x86_64_int_return_registers, 0);
2333 /* For zero sized structures, construct_continer return NULL, but we need
2334 to keep rest of compiler happy by returning meaningfull value. */
2335 if (!ret)
2336 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2337 return ret;
2338 }
2339 else
2340 return gen_rtx_REG (TYPE_MODE (valtype),
2341 ix86_value_regno (TYPE_MODE (valtype)));
2342 }
2343
2344 /* Return false iff type is returned in memory. */
2345 int
2346 ix86_return_in_memory (type)
2347 tree type;
2348 {
2349 int needed_intregs, needed_sseregs;
2350 if (TARGET_64BIT)
2351 {
2352 return !examine_argument (TYPE_MODE (type), type, 1,
2353 &needed_intregs, &needed_sseregs);
2354 }
2355 else
2356 {
2357 if (TYPE_MODE (type) == BLKmode
2358 || (VECTOR_MODE_P (TYPE_MODE (type))
2359 && int_size_in_bytes (type) == 8)
2360 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2361 && TYPE_MODE (type) != TFmode
2362 && !VECTOR_MODE_P (TYPE_MODE (type))))
2363 return 1;
2364 return 0;
2365 }
2366 }
2367
2368 /* Define how to find the value returned by a library function
2369 assuming the value has mode MODE. */
2370 rtx
2371 ix86_libcall_value (mode)
2372 enum machine_mode mode;
2373 {
2374 if (TARGET_64BIT)
2375 {
2376 switch (mode)
2377 {
2378 case SFmode:
2379 case SCmode:
2380 case DFmode:
2381 case DCmode:
2382 return gen_rtx_REG (mode, FIRST_SSE_REG);
2383 case TFmode:
2384 case TCmode:
2385 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2386 default:
2387 return gen_rtx_REG (mode, 0);
2388 }
2389 }
2390 else
2391 return gen_rtx_REG (mode, ix86_value_regno (mode));
2392 }
2393
2394 /* Given a mode, return the register to use for a return value. */
2395
2396 static int
2397 ix86_value_regno (mode)
2398 enum machine_mode mode;
2399 {
2400 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2401 return FIRST_FLOAT_REG;
2402 if (mode == TImode || VECTOR_MODE_P (mode))
2403 return FIRST_SSE_REG;
2404 return 0;
2405 }
2406 \f
2407 /* Create the va_list data type. */
2408
2409 tree
2410 ix86_build_va_list ()
2411 {
2412 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2413
2414 /* For i386 we use plain pointer to argument area. */
2415 if (!TARGET_64BIT)
2416 return build_pointer_type (char_type_node);
2417
2418 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2419 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2420
2421 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2422 unsigned_type_node);
2423 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2424 unsigned_type_node);
2425 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2426 ptr_type_node);
2427 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2428 ptr_type_node);
2429
2430 DECL_FIELD_CONTEXT (f_gpr) = record;
2431 DECL_FIELD_CONTEXT (f_fpr) = record;
2432 DECL_FIELD_CONTEXT (f_ovf) = record;
2433 DECL_FIELD_CONTEXT (f_sav) = record;
2434
2435 TREE_CHAIN (record) = type_decl;
2436 TYPE_NAME (record) = type_decl;
2437 TYPE_FIELDS (record) = f_gpr;
2438 TREE_CHAIN (f_gpr) = f_fpr;
2439 TREE_CHAIN (f_fpr) = f_ovf;
2440 TREE_CHAIN (f_ovf) = f_sav;
2441
2442 layout_type (record);
2443
2444 /* The correct type is an array type of one element. */
2445 return build_array_type (record, build_index_type (size_zero_node));
2446 }
2447
2448 /* Perform any needed actions needed for a function that is receiving a
2449 variable number of arguments.
2450
2451 CUM is as above.
2452
2453 MODE and TYPE are the mode and type of the current parameter.
2454
2455 PRETEND_SIZE is a variable that should be set to the amount of stack
2456 that must be pushed by the prolog to pretend that our caller pushed
2457 it.
2458
2459 Normally, this macro will push all remaining incoming registers on the
2460 stack and set PRETEND_SIZE to the length of the registers pushed. */
2461
2462 void
2463 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2464 CUMULATIVE_ARGS *cum;
2465 enum machine_mode mode;
2466 tree type;
2467 int *pretend_size ATTRIBUTE_UNUSED;
2468 int no_rtl;
2469
2470 {
2471 CUMULATIVE_ARGS next_cum;
2472 rtx save_area = NULL_RTX, mem;
2473 rtx label;
2474 rtx label_ref;
2475 rtx tmp_reg;
2476 rtx nsse_reg;
2477 int set;
2478 tree fntype;
2479 int stdarg_p;
2480 int i;
2481
2482 if (!TARGET_64BIT)
2483 return;
2484
2485 /* Indicate to allocate space on the stack for varargs save area. */
2486 ix86_save_varrargs_registers = 1;
2487
2488 fntype = TREE_TYPE (current_function_decl);
2489 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2490 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2491 != void_type_node));
2492
2493 /* For varargs, we do not want to skip the dummy va_dcl argument.
2494 For stdargs, we do want to skip the last named argument. */
2495 next_cum = *cum;
2496 if (stdarg_p)
2497 function_arg_advance (&next_cum, mode, type, 1);
2498
2499 if (!no_rtl)
2500 save_area = frame_pointer_rtx;
2501
2502 set = get_varargs_alias_set ();
2503
2504 for (i = next_cum.regno; i < ix86_regparm; i++)
2505 {
2506 mem = gen_rtx_MEM (Pmode,
2507 plus_constant (save_area, i * UNITS_PER_WORD));
2508 set_mem_alias_set (mem, set);
2509 emit_move_insn (mem, gen_rtx_REG (Pmode,
2510 x86_64_int_parameter_registers[i]));
2511 }
2512
2513 if (next_cum.sse_nregs)
2514 {
2515 /* Now emit code to save SSE registers. The AX parameter contains number
2516 of SSE parameter regsiters used to call this function. We use
2517 sse_prologue_save insn template that produces computed jump across
2518 SSE saves. We need some preparation work to get this working. */
2519
2520 label = gen_label_rtx ();
2521 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2522
2523 /* Compute address to jump to :
2524 label - 5*eax + nnamed_sse_arguments*5 */
2525 tmp_reg = gen_reg_rtx (Pmode);
2526 nsse_reg = gen_reg_rtx (Pmode);
2527 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2528 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2529 gen_rtx_MULT (Pmode, nsse_reg,
2530 GEN_INT (4))));
2531 if (next_cum.sse_regno)
2532 emit_move_insn
2533 (nsse_reg,
2534 gen_rtx_CONST (DImode,
2535 gen_rtx_PLUS (DImode,
2536 label_ref,
2537 GEN_INT (next_cum.sse_regno * 4))));
2538 else
2539 emit_move_insn (nsse_reg, label_ref);
2540 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2541
2542 /* Compute address of memory block we save into. We always use pointer
2543 pointing 127 bytes after first byte to store - this is needed to keep
2544 instruction size limited by 4 bytes. */
2545 tmp_reg = gen_reg_rtx (Pmode);
2546 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2547 plus_constant (save_area,
2548 8 * REGPARM_MAX + 127)));
2549 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2550 set_mem_alias_set (mem, set);
2551 set_mem_align (mem, BITS_PER_WORD);
2552
2553 /* And finally do the dirty job! */
2554 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2555 GEN_INT (next_cum.sse_regno), label));
2556 }
2557
2558 }
2559
2560 /* Implement va_start. */
2561
2562 void
2563 ix86_va_start (valist, nextarg)
2564 tree valist;
2565 rtx nextarg;
2566 {
2567 HOST_WIDE_INT words, n_gpr, n_fpr;
2568 tree f_gpr, f_fpr, f_ovf, f_sav;
2569 tree gpr, fpr, ovf, sav, t;
2570
2571 /* Only 64bit target needs something special. */
2572 if (!TARGET_64BIT)
2573 {
2574 std_expand_builtin_va_start (valist, nextarg);
2575 return;
2576 }
2577
2578 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2579 f_fpr = TREE_CHAIN (f_gpr);
2580 f_ovf = TREE_CHAIN (f_fpr);
2581 f_sav = TREE_CHAIN (f_ovf);
2582
2583 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2584 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2585 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2586 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2587 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2588
2589 /* Count number of gp and fp argument registers used. */
2590 words = current_function_args_info.words;
2591 n_gpr = current_function_args_info.regno;
2592 n_fpr = current_function_args_info.sse_regno;
2593
2594 if (TARGET_DEBUG_ARG)
2595 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2596 (int) words, (int) n_gpr, (int) n_fpr);
2597
2598 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2599 build_int_2 (n_gpr * 8, 0));
2600 TREE_SIDE_EFFECTS (t) = 1;
2601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2602
2603 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2604 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2605 TREE_SIDE_EFFECTS (t) = 1;
2606 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2607
2608 /* Find the overflow area. */
2609 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2610 if (words != 0)
2611 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2612 build_int_2 (words * UNITS_PER_WORD, 0));
2613 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2614 TREE_SIDE_EFFECTS (t) = 1;
2615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2616
2617 /* Find the register save area.
2618 Prologue of the function save it right above stack frame. */
2619 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2620 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2621 TREE_SIDE_EFFECTS (t) = 1;
2622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2623 }
2624
2625 /* Implement va_arg. */
2626 rtx
2627 ix86_va_arg (valist, type)
2628 tree valist, type;
2629 {
2630 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2631 tree f_gpr, f_fpr, f_ovf, f_sav;
2632 tree gpr, fpr, ovf, sav, t;
2633 int size, rsize;
2634 rtx lab_false, lab_over = NULL_RTX;
2635 rtx addr_rtx, r;
2636 rtx container;
2637
2638 /* Only 64bit target needs something special. */
2639 if (!TARGET_64BIT)
2640 {
2641 return std_expand_builtin_va_arg (valist, type);
2642 }
2643
2644 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2645 f_fpr = TREE_CHAIN (f_gpr);
2646 f_ovf = TREE_CHAIN (f_fpr);
2647 f_sav = TREE_CHAIN (f_ovf);
2648
2649 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2650 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2651 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2652 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2653 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2654
2655 size = int_size_in_bytes (type);
2656 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2657
2658 container = construct_container (TYPE_MODE (type), type, 0,
2659 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2660 /*
2661 * Pull the value out of the saved registers ...
2662 */
2663
2664 addr_rtx = gen_reg_rtx (Pmode);
2665
2666 if (container)
2667 {
2668 rtx int_addr_rtx, sse_addr_rtx;
2669 int needed_intregs, needed_sseregs;
2670 int need_temp;
2671
2672 lab_over = gen_label_rtx ();
2673 lab_false = gen_label_rtx ();
2674
2675 examine_argument (TYPE_MODE (type), type, 0,
2676 &needed_intregs, &needed_sseregs);
2677
2678
2679 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2680 || TYPE_ALIGN (type) > 128);
2681
2682 /* In case we are passing structure, verify that it is consetuctive block
2683 on the register save area. If not we need to do moves. */
2684 if (!need_temp && !REG_P (container))
2685 {
2686 /* Verify that all registers are strictly consetuctive */
2687 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2688 {
2689 int i;
2690
2691 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2692 {
2693 rtx slot = XVECEXP (container, 0, i);
2694 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2695 || INTVAL (XEXP (slot, 1)) != i * 16)
2696 need_temp = 1;
2697 }
2698 }
2699 else
2700 {
2701 int i;
2702
2703 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2704 {
2705 rtx slot = XVECEXP (container, 0, i);
2706 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2707 || INTVAL (XEXP (slot, 1)) != i * 8)
2708 need_temp = 1;
2709 }
2710 }
2711 }
2712 if (!need_temp)
2713 {
2714 int_addr_rtx = addr_rtx;
2715 sse_addr_rtx = addr_rtx;
2716 }
2717 else
2718 {
2719 int_addr_rtx = gen_reg_rtx (Pmode);
2720 sse_addr_rtx = gen_reg_rtx (Pmode);
2721 }
2722 /* First ensure that we fit completely in registers. */
2723 if (needed_intregs)
2724 {
2725 emit_cmp_and_jump_insns (expand_expr
2726 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2727 GEN_INT ((REGPARM_MAX - needed_intregs +
2728 1) * 8), GE, const1_rtx, SImode,
2729 1, lab_false);
2730 }
2731 if (needed_sseregs)
2732 {
2733 emit_cmp_and_jump_insns (expand_expr
2734 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2735 GEN_INT ((SSE_REGPARM_MAX -
2736 needed_sseregs + 1) * 16 +
2737 REGPARM_MAX * 8), GE, const1_rtx,
2738 SImode, 1, lab_false);
2739 }
2740
2741 /* Compute index to start of area used for integer regs. */
2742 if (needed_intregs)
2743 {
2744 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2745 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2746 if (r != int_addr_rtx)
2747 emit_move_insn (int_addr_rtx, r);
2748 }
2749 if (needed_sseregs)
2750 {
2751 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2752 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2753 if (r != sse_addr_rtx)
2754 emit_move_insn (sse_addr_rtx, r);
2755 }
2756 if (need_temp)
2757 {
2758 int i;
2759 rtx mem;
2760
2761 /* Never use the memory itself, as it has the alias set. */
2762 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2763 mem = gen_rtx_MEM (BLKmode, addr_rtx);
2764 set_mem_alias_set (mem, get_varargs_alias_set ());
2765 set_mem_align (mem, BITS_PER_UNIT);
2766
2767 for (i = 0; i < XVECLEN (container, 0); i++)
2768 {
2769 rtx slot = XVECEXP (container, 0, i);
2770 rtx reg = XEXP (slot, 0);
2771 enum machine_mode mode = GET_MODE (reg);
2772 rtx src_addr;
2773 rtx src_mem;
2774 int src_offset;
2775 rtx dest_mem;
2776
2777 if (SSE_REGNO_P (REGNO (reg)))
2778 {
2779 src_addr = sse_addr_rtx;
2780 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2781 }
2782 else
2783 {
2784 src_addr = int_addr_rtx;
2785 src_offset = REGNO (reg) * 8;
2786 }
2787 src_mem = gen_rtx_MEM (mode, src_addr);
2788 set_mem_alias_set (src_mem, get_varargs_alias_set ());
2789 src_mem = adjust_address (src_mem, mode, src_offset);
2790 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
2791 emit_move_insn (dest_mem, src_mem);
2792 }
2793 }
2794
2795 if (needed_intregs)
2796 {
2797 t =
2798 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2799 build_int_2 (needed_intregs * 8, 0));
2800 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2801 TREE_SIDE_EFFECTS (t) = 1;
2802 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2803 }
2804 if (needed_sseregs)
2805 {
2806 t =
2807 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2808 build_int_2 (needed_sseregs * 16, 0));
2809 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2810 TREE_SIDE_EFFECTS (t) = 1;
2811 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2812 }
2813
2814 emit_jump_insn (gen_jump (lab_over));
2815 emit_barrier ();
2816 emit_label (lab_false);
2817 }
2818
2819 /* ... otherwise out of the overflow area. */
2820
2821 /* Care for on-stack alignment if needed. */
2822 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2823 t = ovf;
2824 else
2825 {
2826 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2827 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2828 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2829 }
2830 t = save_expr (t);
2831
2832 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2833 if (r != addr_rtx)
2834 emit_move_insn (addr_rtx, r);
2835
2836 t =
2837 build (PLUS_EXPR, TREE_TYPE (t), t,
2838 build_int_2 (rsize * UNITS_PER_WORD, 0));
2839 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2840 TREE_SIDE_EFFECTS (t) = 1;
2841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2842
2843 if (container)
2844 emit_label (lab_over);
2845
2846 return addr_rtx;
2847 }
2848 \f
2849 /* Return nonzero if OP is either a i387 or SSE fp register. */
2850 int
2851 any_fp_register_operand (op, mode)
2852 rtx op;
2853 enum machine_mode mode ATTRIBUTE_UNUSED;
2854 {
2855 return ANY_FP_REG_P (op);
2856 }
2857
2858 /* Return nonzero if OP is an i387 fp register. */
2859 int
2860 fp_register_operand (op, mode)
2861 rtx op;
2862 enum machine_mode mode ATTRIBUTE_UNUSED;
2863 {
2864 return FP_REG_P (op);
2865 }
2866
2867 /* Return nonzero if OP is a non-fp register_operand. */
2868 int
2869 register_and_not_any_fp_reg_operand (op, mode)
2870 rtx op;
2871 enum machine_mode mode;
2872 {
2873 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2874 }
2875
2876 /* Return nonzero of OP is a register operand other than an
2877 i387 fp register. */
2878 int
2879 register_and_not_fp_reg_operand (op, mode)
2880 rtx op;
2881 enum machine_mode mode;
2882 {
2883 return register_operand (op, mode) && !FP_REG_P (op);
2884 }
2885
2886 /* Return nonzero if OP is general operand representable on x86_64. */
2887
2888 int
2889 x86_64_general_operand (op, mode)
2890 rtx op;
2891 enum machine_mode mode;
2892 {
2893 if (!TARGET_64BIT)
2894 return general_operand (op, mode);
2895 if (nonimmediate_operand (op, mode))
2896 return 1;
2897 return x86_64_sign_extended_value (op, 1);
2898 }
2899
2900 /* Return nonzero if OP is general operand representable on x86_64
2901 as either sign extended or zero extended constant. */
2902
2903 int
2904 x86_64_szext_general_operand (op, mode)
2905 rtx op;
2906 enum machine_mode mode;
2907 {
2908 if (!TARGET_64BIT)
2909 return general_operand (op, mode);
2910 if (nonimmediate_operand (op, mode))
2911 return 1;
2912 return x86_64_sign_extended_value (op, 1) || x86_64_zero_extended_value (op);
2913 }
2914
2915 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2916
2917 int
2918 x86_64_nonmemory_operand (op, mode)
2919 rtx op;
2920 enum machine_mode mode;
2921 {
2922 if (!TARGET_64BIT)
2923 return nonmemory_operand (op, mode);
2924 if (register_operand (op, mode))
2925 return 1;
2926 return x86_64_sign_extended_value (op, 1);
2927 }
2928
2929 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2930
2931 int
2932 x86_64_movabs_operand (op, mode)
2933 rtx op;
2934 enum machine_mode mode;
2935 {
2936 if (!TARGET_64BIT || !flag_pic)
2937 return nonmemory_operand (op, mode);
2938 if (register_operand (op, mode) || x86_64_sign_extended_value (op, 0))
2939 return 1;
2940 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2941 return 1;
2942 return 0;
2943 }
2944
2945 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
2946
2947 int
2948 x86_64_szext_nonmemory_operand (op, mode)
2949 rtx op;
2950 enum machine_mode mode;
2951 {
2952 if (!TARGET_64BIT)
2953 return nonmemory_operand (op, mode);
2954 if (register_operand (op, mode))
2955 return 1;
2956 return x86_64_sign_extended_value (op, 0) || x86_64_zero_extended_value (op);
2957 }
2958
2959 /* Return nonzero if OP is immediate operand representable on x86_64. */
2960
2961 int
2962 x86_64_immediate_operand (op, mode)
2963 rtx op;
2964 enum machine_mode mode;
2965 {
2966 if (!TARGET_64BIT)
2967 return immediate_operand (op, mode);
2968 return x86_64_sign_extended_value (op, 0);
2969 }
2970
2971 /* Return nonzero if OP is immediate operand representable on x86_64. */
2972
2973 int
2974 x86_64_zext_immediate_operand (op, mode)
2975 rtx op;
2976 enum machine_mode mode ATTRIBUTE_UNUSED;
2977 {
2978 return x86_64_zero_extended_value (op);
2979 }
2980
2981 /* Return nonzero if OP is (const_int 1), else return zero. */
2982
2983 int
2984 const_int_1_operand (op, mode)
2985 rtx op;
2986 enum machine_mode mode ATTRIBUTE_UNUSED;
2987 {
2988 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2989 }
2990
2991 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2992 for shift & compare patterns, as shifting by 0 does not change flags),
2993 else return zero. */
2994
2995 int
2996 const_int_1_31_operand (op, mode)
2997 rtx op;
2998 enum machine_mode mode ATTRIBUTE_UNUSED;
2999 {
3000 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3001 }
3002
3003 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3004 reference and a constant. */
3005
3006 int
3007 symbolic_operand (op, mode)
3008 register rtx op;
3009 enum machine_mode mode ATTRIBUTE_UNUSED;
3010 {
3011 switch (GET_CODE (op))
3012 {
3013 case SYMBOL_REF:
3014 case LABEL_REF:
3015 return 1;
3016
3017 case CONST:
3018 op = XEXP (op, 0);
3019 if (GET_CODE (op) == SYMBOL_REF
3020 || GET_CODE (op) == LABEL_REF
3021 || (GET_CODE (op) == UNSPEC
3022 && (XINT (op, 1) == UNSPEC_GOT
3023 || XINT (op, 1) == UNSPEC_GOTOFF
3024 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3025 return 1;
3026 if (GET_CODE (op) != PLUS
3027 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3028 return 0;
3029
3030 op = XEXP (op, 0);
3031 if (GET_CODE (op) == SYMBOL_REF
3032 || GET_CODE (op) == LABEL_REF)
3033 return 1;
3034 /* Only @GOTOFF gets offsets. */
3035 if (GET_CODE (op) != UNSPEC
3036 || XINT (op, 1) != UNSPEC_GOTOFF)
3037 return 0;
3038
3039 op = XVECEXP (op, 0, 0);
3040 if (GET_CODE (op) == SYMBOL_REF
3041 || GET_CODE (op) == LABEL_REF)
3042 return 1;
3043 return 0;
3044
3045 default:
3046 return 0;
3047 }
3048 }
3049
3050 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3051
3052 int
3053 pic_symbolic_operand (op, mode)
3054 register rtx op;
3055 enum machine_mode mode ATTRIBUTE_UNUSED;
3056 {
3057 if (GET_CODE (op) != CONST)
3058 return 0;
3059 op = XEXP (op, 0);
3060 if (TARGET_64BIT)
3061 {
3062 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3063 return 1;
3064 }
3065 else
3066 {
3067 if (GET_CODE (op) == UNSPEC)
3068 return 1;
3069 if (GET_CODE (op) != PLUS
3070 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3071 return 0;
3072 op = XEXP (op, 0);
3073 if (GET_CODE (op) == UNSPEC)
3074 return 1;
3075 }
3076 return 0;
3077 }
3078
3079 /* Return true if OP is a symbolic operand that resolves locally. */
3080
3081 static int
3082 local_symbolic_operand (op, mode)
3083 rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085 {
3086 if (GET_CODE (op) == CONST
3087 && GET_CODE (XEXP (op, 0)) == PLUS
3088 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3089 && (ix86_cmodel != CM_SMALL_PIC
3090 || (INTVAL (XEXP (XEXP (op, 0), 1)) >= -16*1024*1024
3091 && INTVAL (XEXP (XEXP (op, 0), 1)) < 16*1024*1024)))
3092 op = XEXP (XEXP (op, 0), 0);
3093
3094 if (GET_CODE (op) == LABEL_REF)
3095 return 1;
3096
3097 if (GET_CODE (op) != SYMBOL_REF)
3098 return 0;
3099
3100 /* These we've been told are local by varasm and encode_section_info
3101 respectively. */
3102 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3103 return 1;
3104
3105 /* There is, however, a not insubstantial body of code in the rest of
3106 the compiler that assumes it can just stick the results of
3107 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3108 /* ??? This is a hack. Should update the body of the compiler to
3109 always create a DECL an invoke targetm.encode_section_info. */
3110 if (strncmp (XSTR (op, 0), internal_label_prefix,
3111 internal_label_prefix_len) == 0)
3112 return 1;
3113
3114 return 0;
3115 }
3116
3117 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3118
3119 int
3120 tls_symbolic_operand (op, mode)
3121 register rtx op;
3122 enum machine_mode mode ATTRIBUTE_UNUSED;
3123 {
3124 const char *symbol_str;
3125
3126 if (GET_CODE (op) != SYMBOL_REF)
3127 return 0;
3128 symbol_str = XSTR (op, 0);
3129
3130 if (symbol_str[0] != '%')
3131 return 0;
3132 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3133 }
3134
3135 static int
3136 tls_symbolic_operand_1 (op, kind)
3137 rtx op;
3138 enum tls_model kind;
3139 {
3140 const char *symbol_str;
3141
3142 if (GET_CODE (op) != SYMBOL_REF)
3143 return 0;
3144 symbol_str = XSTR (op, 0);
3145
3146 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3147 }
3148
3149 int
3150 global_dynamic_symbolic_operand (op, mode)
3151 register rtx op;
3152 enum machine_mode mode ATTRIBUTE_UNUSED;
3153 {
3154 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3155 }
3156
3157 int
3158 local_dynamic_symbolic_operand (op, mode)
3159 register rtx op;
3160 enum machine_mode mode ATTRIBUTE_UNUSED;
3161 {
3162 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3163 }
3164
3165 int
3166 initial_exec_symbolic_operand (op, mode)
3167 register rtx op;
3168 enum machine_mode mode ATTRIBUTE_UNUSED;
3169 {
3170 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3171 }
3172
3173 int
3174 local_exec_symbolic_operand (op, mode)
3175 register rtx op;
3176 enum machine_mode mode ATTRIBUTE_UNUSED;
3177 {
3178 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3179 }
3180
3181 /* Test for a valid operand for a call instruction. Don't allow the
3182 arg pointer register or virtual regs since they may decay into
3183 reg + const, which the patterns can't handle. */
3184
3185 int
3186 call_insn_operand (op, mode)
3187 rtx op;
3188 enum machine_mode mode ATTRIBUTE_UNUSED;
3189 {
3190 /* Disallow indirect through a virtual register. This leads to
3191 compiler aborts when trying to eliminate them. */
3192 if (GET_CODE (op) == REG
3193 && (op == arg_pointer_rtx
3194 || op == frame_pointer_rtx
3195 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3196 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3197 return 0;
3198
3199 /* Disallow `call 1234'. Due to varying assembler lameness this
3200 gets either rejected or translated to `call .+1234'. */
3201 if (GET_CODE (op) == CONST_INT)
3202 return 0;
3203
3204 /* Explicitly allow SYMBOL_REF even if pic. */
3205 if (GET_CODE (op) == SYMBOL_REF)
3206 return 1;
3207
3208 /* Otherwise we can allow any general_operand in the address. */
3209 return general_operand (op, Pmode);
3210 }
3211
3212 int
3213 constant_call_address_operand (op, mode)
3214 rtx op;
3215 enum machine_mode mode ATTRIBUTE_UNUSED;
3216 {
3217 if (GET_CODE (op) == CONST
3218 && GET_CODE (XEXP (op, 0)) == PLUS
3219 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3220 op = XEXP (XEXP (op, 0), 0);
3221 return GET_CODE (op) == SYMBOL_REF;
3222 }
3223
3224 /* Match exactly zero and one. */
3225
3226 int
3227 const0_operand (op, mode)
3228 register rtx op;
3229 enum machine_mode mode;
3230 {
3231 return op == CONST0_RTX (mode);
3232 }
3233
3234 int
3235 const1_operand (op, mode)
3236 register rtx op;
3237 enum machine_mode mode ATTRIBUTE_UNUSED;
3238 {
3239 return op == const1_rtx;
3240 }
3241
3242 /* Match 2, 4, or 8. Used for leal multiplicands. */
3243
3244 int
3245 const248_operand (op, mode)
3246 register rtx op;
3247 enum machine_mode mode ATTRIBUTE_UNUSED;
3248 {
3249 return (GET_CODE (op) == CONST_INT
3250 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3251 }
3252
3253 /* True if this is a constant appropriate for an increment or decremenmt. */
3254
3255 int
3256 incdec_operand (op, mode)
3257 register rtx op;
3258 enum machine_mode mode ATTRIBUTE_UNUSED;
3259 {
3260 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3261 registers, since carry flag is not set. */
3262 if (TARGET_PENTIUM4 && !optimize_size)
3263 return 0;
3264 return op == const1_rtx || op == constm1_rtx;
3265 }
3266
3267 /* Return nonzero if OP is acceptable as operand of DImode shift
3268 expander. */
3269
3270 int
3271 shiftdi_operand (op, mode)
3272 rtx op;
3273 enum machine_mode mode ATTRIBUTE_UNUSED;
3274 {
3275 if (TARGET_64BIT)
3276 return nonimmediate_operand (op, mode);
3277 else
3278 return register_operand (op, mode);
3279 }
3280
3281 /* Return false if this is the stack pointer, or any other fake
3282 register eliminable to the stack pointer. Otherwise, this is
3283 a register operand.
3284
3285 This is used to prevent esp from being used as an index reg.
3286 Which would only happen in pathological cases. */
3287
3288 int
3289 reg_no_sp_operand (op, mode)
3290 register rtx op;
3291 enum machine_mode mode;
3292 {
3293 rtx t = op;
3294 if (GET_CODE (t) == SUBREG)
3295 t = SUBREG_REG (t);
3296 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3297 return 0;
3298
3299 return register_operand (op, mode);
3300 }
3301
3302 int
3303 mmx_reg_operand (op, mode)
3304 register rtx op;
3305 enum machine_mode mode ATTRIBUTE_UNUSED;
3306 {
3307 return MMX_REG_P (op);
3308 }
3309
3310 /* Return false if this is any eliminable register. Otherwise
3311 general_operand. */
3312
3313 int
3314 general_no_elim_operand (op, mode)
3315 register rtx op;
3316 enum machine_mode mode;
3317 {
3318 rtx t = op;
3319 if (GET_CODE (t) == SUBREG)
3320 t = SUBREG_REG (t);
3321 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3322 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3323 || t == virtual_stack_dynamic_rtx)
3324 return 0;
3325 if (REG_P (t)
3326 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3327 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3328 return 0;
3329
3330 return general_operand (op, mode);
3331 }
3332
3333 /* Return false if this is any eliminable register. Otherwise
3334 register_operand or const_int. */
3335
3336 int
3337 nonmemory_no_elim_operand (op, mode)
3338 register rtx op;
3339 enum machine_mode mode;
3340 {
3341 rtx t = op;
3342 if (GET_CODE (t) == SUBREG)
3343 t = SUBREG_REG (t);
3344 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3345 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3346 || t == virtual_stack_dynamic_rtx)
3347 return 0;
3348
3349 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3350 }
3351
3352 /* Return false if this is any eliminable register or stack register,
3353 otherwise work like register_operand. */
3354
3355 int
3356 index_register_operand (op, mode)
3357 register rtx op;
3358 enum machine_mode mode;
3359 {
3360 rtx t = op;
3361 if (GET_CODE (t) == SUBREG)
3362 t = SUBREG_REG (t);
3363 if (!REG_P (t))
3364 return 0;
3365 if (t == arg_pointer_rtx
3366 || t == frame_pointer_rtx
3367 || t == virtual_incoming_args_rtx
3368 || t == virtual_stack_vars_rtx
3369 || t == virtual_stack_dynamic_rtx
3370 || REGNO (t) == STACK_POINTER_REGNUM)
3371 return 0;
3372
3373 return general_operand (op, mode);
3374 }
3375
3376 /* Return true if op is a Q_REGS class register. */
3377
3378 int
3379 q_regs_operand (op, mode)
3380 register rtx op;
3381 enum machine_mode mode;
3382 {
3383 if (mode != VOIDmode && GET_MODE (op) != mode)
3384 return 0;
3385 if (GET_CODE (op) == SUBREG)
3386 op = SUBREG_REG (op);
3387 return ANY_QI_REG_P (op);
3388 }
3389
3390 /* Return true if op is a NON_Q_REGS class register. */
3391
3392 int
3393 non_q_regs_operand (op, mode)
3394 register rtx op;
3395 enum machine_mode mode;
3396 {
3397 if (mode != VOIDmode && GET_MODE (op) != mode)
3398 return 0;
3399 if (GET_CODE (op) == SUBREG)
3400 op = SUBREG_REG (op);
3401 return NON_QI_REG_P (op);
3402 }
3403
3404 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3405 insns. */
3406 int
3407 sse_comparison_operator (op, mode)
3408 rtx op;
3409 enum machine_mode mode ATTRIBUTE_UNUSED;
3410 {
3411 enum rtx_code code = GET_CODE (op);
3412 switch (code)
3413 {
3414 /* Operations supported directly. */
3415 case EQ:
3416 case LT:
3417 case LE:
3418 case UNORDERED:
3419 case NE:
3420 case UNGE:
3421 case UNGT:
3422 case ORDERED:
3423 return 1;
3424 /* These are equivalent to ones above in non-IEEE comparisons. */
3425 case UNEQ:
3426 case UNLT:
3427 case UNLE:
3428 case LTGT:
3429 case GE:
3430 case GT:
3431 return !TARGET_IEEE_FP;
3432 default:
3433 return 0;
3434 }
3435 }
3436 /* Return 1 if OP is a valid comparison operator in valid mode. */
3437 int
3438 ix86_comparison_operator (op, mode)
3439 register rtx op;
3440 enum machine_mode mode;
3441 {
3442 enum machine_mode inmode;
3443 enum rtx_code code = GET_CODE (op);
3444 if (mode != VOIDmode && GET_MODE (op) != mode)
3445 return 0;
3446 if (GET_RTX_CLASS (code) != '<')
3447 return 0;
3448 inmode = GET_MODE (XEXP (op, 0));
3449
3450 if (inmode == CCFPmode || inmode == CCFPUmode)
3451 {
3452 enum rtx_code second_code, bypass_code;
3453 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3454 return (bypass_code == NIL && second_code == NIL);
3455 }
3456 switch (code)
3457 {
3458 case EQ: case NE:
3459 return 1;
3460 case LT: case GE:
3461 if (inmode == CCmode || inmode == CCGCmode
3462 || inmode == CCGOCmode || inmode == CCNOmode)
3463 return 1;
3464 return 0;
3465 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3466 if (inmode == CCmode)
3467 return 1;
3468 return 0;
3469 case GT: case LE:
3470 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3471 return 1;
3472 return 0;
3473 default:
3474 return 0;
3475 }
3476 }
3477
3478 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3479
3480 int
3481 fcmov_comparison_operator (op, mode)
3482 register rtx op;
3483 enum machine_mode mode;
3484 {
3485 enum machine_mode inmode;
3486 enum rtx_code code = GET_CODE (op);
3487 if (mode != VOIDmode && GET_MODE (op) != mode)
3488 return 0;
3489 if (GET_RTX_CLASS (code) != '<')
3490 return 0;
3491 inmode = GET_MODE (XEXP (op, 0));
3492 if (inmode == CCFPmode || inmode == CCFPUmode)
3493 {
3494 enum rtx_code second_code, bypass_code;
3495 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3496 if (bypass_code != NIL || second_code != NIL)
3497 return 0;
3498 code = ix86_fp_compare_code_to_integer (code);
3499 }
3500 /* i387 supports just limited amount of conditional codes. */
3501 switch (code)
3502 {
3503 case LTU: case GTU: case LEU: case GEU:
3504 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3505 return 1;
3506 return 0;
3507 case ORDERED: case UNORDERED:
3508 case EQ: case NE:
3509 return 1;
3510 default:
3511 return 0;
3512 }
3513 }
3514
3515 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3516
3517 int
3518 promotable_binary_operator (op, mode)
3519 register rtx op;
3520 enum machine_mode mode ATTRIBUTE_UNUSED;
3521 {
3522 switch (GET_CODE (op))
3523 {
3524 case MULT:
3525 /* Modern CPUs have same latency for HImode and SImode multiply,
3526 but 386 and 486 do HImode multiply faster. */
3527 return ix86_cpu > PROCESSOR_I486;
3528 case PLUS:
3529 case AND:
3530 case IOR:
3531 case XOR:
3532 case ASHIFT:
3533 return 1;
3534 default:
3535 return 0;
3536 }
3537 }
3538
3539 /* Nearly general operand, but accept any const_double, since we wish
3540 to be able to drop them into memory rather than have them get pulled
3541 into registers. */
3542
3543 int
3544 cmp_fp_expander_operand (op, mode)
3545 register rtx op;
3546 enum machine_mode mode;
3547 {
3548 if (mode != VOIDmode && mode != GET_MODE (op))
3549 return 0;
3550 if (GET_CODE (op) == CONST_DOUBLE)
3551 return 1;
3552 return general_operand (op, mode);
3553 }
3554
3555 /* Match an SI or HImode register for a zero_extract. */
3556
3557 int
3558 ext_register_operand (op, mode)
3559 register rtx op;
3560 enum machine_mode mode ATTRIBUTE_UNUSED;
3561 {
3562 int regno;
3563 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3564 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3565 return 0;
3566
3567 if (!register_operand (op, VOIDmode))
3568 return 0;
3569
3570 /* Be curefull to accept only registers having upper parts. */
3571 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3572 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3573 }
3574
3575 /* Return 1 if this is a valid binary floating-point operation.
3576 OP is the expression matched, and MODE is its mode. */
3577
3578 int
3579 binary_fp_operator (op, mode)
3580 register rtx op;
3581 enum machine_mode mode;
3582 {
3583 if (mode != VOIDmode && mode != GET_MODE (op))
3584 return 0;
3585
3586 switch (GET_CODE (op))
3587 {
3588 case PLUS:
3589 case MINUS:
3590 case MULT:
3591 case DIV:
3592 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3593
3594 default:
3595 return 0;
3596 }
3597 }
3598
3599 int
3600 mult_operator (op, mode)
3601 register rtx op;
3602 enum machine_mode mode ATTRIBUTE_UNUSED;
3603 {
3604 return GET_CODE (op) == MULT;
3605 }
3606
3607 int
3608 div_operator (op, mode)
3609 register rtx op;
3610 enum machine_mode mode ATTRIBUTE_UNUSED;
3611 {
3612 return GET_CODE (op) == DIV;
3613 }
3614
3615 int
3616 arith_or_logical_operator (op, mode)
3617 rtx op;
3618 enum machine_mode mode;
3619 {
3620 return ((mode == VOIDmode || GET_MODE (op) == mode)
3621 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3622 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3623 }
3624
3625 /* Returns 1 if OP is memory operand with a displacement. */
3626
3627 int
3628 memory_displacement_operand (op, mode)
3629 register rtx op;
3630 enum machine_mode mode;
3631 {
3632 struct ix86_address parts;
3633
3634 if (! memory_operand (op, mode))
3635 return 0;
3636
3637 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3638 abort ();
3639
3640 return parts.disp != NULL_RTX;
3641 }
3642
3643 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3644 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3645
3646 ??? It seems likely that this will only work because cmpsi is an
3647 expander, and no actual insns use this. */
3648
3649 int
3650 cmpsi_operand (op, mode)
3651 rtx op;
3652 enum machine_mode mode;
3653 {
3654 if (nonimmediate_operand (op, mode))
3655 return 1;
3656
3657 if (GET_CODE (op) == AND
3658 && GET_MODE (op) == SImode
3659 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3660 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3661 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3662 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3663 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3664 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3665 return 1;
3666
3667 return 0;
3668 }
3669
3670 /* Returns 1 if OP is memory operand that can not be represented by the
3671 modRM array. */
3672
3673 int
3674 long_memory_operand (op, mode)
3675 register rtx op;
3676 enum machine_mode mode;
3677 {
3678 if (! memory_operand (op, mode))
3679 return 0;
3680
3681 return memory_address_length (op) != 0;
3682 }
3683
3684 /* Return nonzero if the rtx is known aligned. */
3685
3686 int
3687 aligned_operand (op, mode)
3688 rtx op;
3689 enum machine_mode mode;
3690 {
3691 struct ix86_address parts;
3692
3693 if (!general_operand (op, mode))
3694 return 0;
3695
3696 /* Registers and immediate operands are always "aligned". */
3697 if (GET_CODE (op) != MEM)
3698 return 1;
3699
3700 /* Don't even try to do any aligned optimizations with volatiles. */
3701 if (MEM_VOLATILE_P (op))
3702 return 0;
3703
3704 op = XEXP (op, 0);
3705
3706 /* Pushes and pops are only valid on the stack pointer. */
3707 if (GET_CODE (op) == PRE_DEC
3708 || GET_CODE (op) == POST_INC)
3709 return 1;
3710
3711 /* Decode the address. */
3712 if (! ix86_decompose_address (op, &parts))
3713 abort ();
3714
3715 if (parts.base && GET_CODE (parts.base) == SUBREG)
3716 parts.base = SUBREG_REG (parts.base);
3717 if (parts.index && GET_CODE (parts.index) == SUBREG)
3718 parts.index = SUBREG_REG (parts.index);
3719
3720 /* Look for some component that isn't known to be aligned. */
3721 if (parts.index)
3722 {
3723 if (parts.scale < 4
3724 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
3725 return 0;
3726 }
3727 if (parts.base)
3728 {
3729 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
3730 return 0;
3731 }
3732 if (parts.disp)
3733 {
3734 if (GET_CODE (parts.disp) != CONST_INT
3735 || (INTVAL (parts.disp) & 3) != 0)
3736 return 0;
3737 }
3738
3739 /* Didn't find one -- this must be an aligned address. */
3740 return 1;
3741 }
3742 \f
3743 /* Return true if the constant is something that can be loaded with
3744 a special instruction. Only handle 0.0 and 1.0; others are less
3745 worthwhile. */
3746
3747 int
3748 standard_80387_constant_p (x)
3749 rtx x;
3750 {
3751 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
3752 return -1;
3753 /* Note that on the 80387, other constants, such as pi, that we should support
3754 too. On some machines, these are much slower to load as standard constant,
3755 than to load from doubles in memory. */
3756 if (x == CONST0_RTX (GET_MODE (x)))
3757 return 1;
3758 if (x == CONST1_RTX (GET_MODE (x)))
3759 return 2;
3760 return 0;
3761 }
3762
3763 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3764 */
3765 int
3766 standard_sse_constant_p (x)
3767 rtx x;
3768 {
3769 if (x == const0_rtx)
3770 return 1;
3771 return (x == CONST0_RTX (GET_MODE (x)));
3772 }
3773
3774 /* Returns 1 if OP contains a symbol reference */
3775
3776 int
3777 symbolic_reference_mentioned_p (op)
3778 rtx op;
3779 {
3780 register const char *fmt;
3781 register int i;
3782
3783 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3784 return 1;
3785
3786 fmt = GET_RTX_FORMAT (GET_CODE (op));
3787 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3788 {
3789 if (fmt[i] == 'E')
3790 {
3791 register int j;
3792
3793 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3794 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3795 return 1;
3796 }
3797
3798 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3799 return 1;
3800 }
3801
3802 return 0;
3803 }
3804
3805 /* Return 1 if it is appropriate to emit `ret' instructions in the
3806 body of a function. Do this only if the epilogue is simple, needing a
3807 couple of insns. Prior to reloading, we can't tell how many registers
3808 must be saved, so return 0 then. Return 0 if there is no frame
3809 marker to de-allocate.
3810
3811 If NON_SAVING_SETJMP is defined and true, then it is not possible
3812 for the epilogue to be simple, so return 0. This is a special case
3813 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3814 until final, but jump_optimize may need to know sooner if a
3815 `return' is OK. */
3816
3817 int
3818 ix86_can_use_return_insn_p ()
3819 {
3820 struct ix86_frame frame;
3821
3822 #ifdef NON_SAVING_SETJMP
3823 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3824 return 0;
3825 #endif
3826
3827 if (! reload_completed || frame_pointer_needed)
3828 return 0;
3829
3830 /* Don't allow more than 32 pop, since that's all we can do
3831 with one instruction. */
3832 if (current_function_pops_args
3833 && current_function_args_size >= 32768)
3834 return 0;
3835
3836 ix86_compute_frame_layout (&frame);
3837 return frame.to_allocate == 0 && frame.nregs == 0;
3838 }
3839 \f
3840 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
3841 int
3842 x86_64_sign_extended_value (value, allow_rip)
3843 rtx value;
3844 int allow_rip;
3845 {
3846 switch (GET_CODE (value))
3847 {
3848 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3849 to be at least 32 and this all acceptable constants are
3850 represented as CONST_INT. */
3851 case CONST_INT:
3852 if (HOST_BITS_PER_WIDE_INT == 32)
3853 return 1;
3854 else
3855 {
3856 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
3857 return trunc_int_for_mode (val, SImode) == val;
3858 }
3859 break;
3860
3861 /* For certain code models, the symbolic references are known to fit.
3862 in CM_SMALL_PIC model we know it fits if it is local to the shared
3863 library. Don't count TLS SYMBOL_REFs here, since they should fit
3864 only if inside of UNSPEC handled below. */
3865 case SYMBOL_REF:
3866 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL
3867 || (allow_rip
3868 && ix86_cmodel == CM_SMALL_PIC
3869 && (CONSTANT_POOL_ADDRESS_P (value)
3870 || SYMBOL_REF_FLAG (value))
3871 && ! tls_symbolic_operand (value, GET_MODE (value))));
3872
3873 /* For certain code models, the code is near as well. */
3874 case LABEL_REF:
3875 return ix86_cmodel != CM_LARGE
3876 && (allow_rip || ix86_cmodel != CM_SMALL_PIC);
3877
3878 /* We also may accept the offsetted memory references in certain special
3879 cases. */
3880 case CONST:
3881 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
3882 switch (XINT (XEXP (value, 0), 1))
3883 {
3884 case UNSPEC_GOTPCREL:
3885 case UNSPEC_DTPOFF:
3886 case UNSPEC_GOTNTPOFF:
3887 case UNSPEC_NTPOFF:
3888 return 1;
3889 default:
3890 break;
3891 }
3892 if (GET_CODE (XEXP (value, 0)) == PLUS)
3893 {
3894 rtx op1 = XEXP (XEXP (value, 0), 0);
3895 rtx op2 = XEXP (XEXP (value, 0), 1);
3896 HOST_WIDE_INT offset;
3897
3898 if (ix86_cmodel == CM_LARGE)
3899 return 0;
3900 if (GET_CODE (op2) != CONST_INT)
3901 return 0;
3902 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3903 switch (GET_CODE (op1))
3904 {
3905 case SYMBOL_REF:
3906 /* For CM_SMALL assume that latest object is 16MB before
3907 end of 31bits boundary. We may also accept pretty
3908 large negative constants knowing that all objects are
3909 in the positive half of address space. */
3910 if (ix86_cmodel == CM_SMALL
3911 && offset < 16*1024*1024
3912 && trunc_int_for_mode (offset, SImode) == offset)
3913 return 1;
3914 /* For CM_KERNEL we know that all object resist in the
3915 negative half of 32bits address space. We may not
3916 accept negative offsets, since they may be just off
3917 and we may accept pretty large positive ones. */
3918 if (ix86_cmodel == CM_KERNEL
3919 && offset > 0
3920 && trunc_int_for_mode (offset, SImode) == offset)
3921 return 1;
3922 /* For CM_SMALL_PIC, we can make similar assumptions
3923 as for CM_SMALL model, if we know the symbol is local
3924 to the shared library. Disallow any TLS symbols,
3925 since they should always be enclosed in an UNSPEC. */
3926 if (ix86_cmodel == CM_SMALL_PIC
3927 && allow_rip
3928 && (CONSTANT_POOL_ADDRESS_P (op1)
3929 || SYMBOL_REF_FLAG (op1))
3930 && ! tls_symbolic_operand (op1, GET_MODE (op1))
3931 && offset < 16*1024*1024
3932 && offset >= -16*1024*1024
3933 && trunc_int_for_mode (offset, SImode) == offset)
3934 return 1;
3935 break;
3936 case LABEL_REF:
3937 /* These conditions are similar to SYMBOL_REF ones, just the
3938 constraints for code models differ. */
3939 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
3940 || (ix86_cmodel == CM_SMALL_PIC && allow_rip
3941 && offset >= -16*1024*1024))
3942 && offset < 16*1024*1024
3943 && trunc_int_for_mode (offset, SImode) == offset)
3944 return 1;
3945 if (ix86_cmodel == CM_KERNEL
3946 && offset > 0
3947 && trunc_int_for_mode (offset, SImode) == offset)
3948 return 1;
3949 break;
3950 case UNSPEC:
3951 switch (XINT (op1, 1))
3952 {
3953 case UNSPEC_DTPOFF:
3954 case UNSPEC_NTPOFF:
3955 if (offset > 0
3956 && trunc_int_for_mode (offset, SImode) == offset)
3957 return 1;
3958 }
3959 break;
3960 default:
3961 return 0;
3962 }
3963 }
3964 return 0;
3965 default:
3966 return 0;
3967 }
3968 }
3969
3970 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
3971 int
3972 x86_64_zero_extended_value (value)
3973 rtx value;
3974 {
3975 switch (GET_CODE (value))
3976 {
3977 case CONST_DOUBLE:
3978 if (HOST_BITS_PER_WIDE_INT == 32)
3979 return (GET_MODE (value) == VOIDmode
3980 && !CONST_DOUBLE_HIGH (value));
3981 else
3982 return 0;
3983 case CONST_INT:
3984 if (HOST_BITS_PER_WIDE_INT == 32)
3985 return INTVAL (value) >= 0;
3986 else
3987 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
3988 break;
3989
3990 /* For certain code models, the symbolic references are known to fit. */
3991 case SYMBOL_REF:
3992 return ix86_cmodel == CM_SMALL;
3993
3994 /* For certain code models, the code is near as well. */
3995 case LABEL_REF:
3996 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3997
3998 /* We also may accept the offsetted memory references in certain special
3999 cases. */
4000 case CONST:
4001 if (GET_CODE (XEXP (value, 0)) == PLUS)
4002 {
4003 rtx op1 = XEXP (XEXP (value, 0), 0);
4004 rtx op2 = XEXP (XEXP (value, 0), 1);
4005
4006 if (ix86_cmodel == CM_LARGE)
4007 return 0;
4008 switch (GET_CODE (op1))
4009 {
4010 case SYMBOL_REF:
4011 return 0;
4012 /* For small code model we may accept pretty large positive
4013 offsets, since one bit is available for free. Negative
4014 offsets are limited by the size of NULL pointer area
4015 specified by the ABI. */
4016 if (ix86_cmodel == CM_SMALL
4017 && GET_CODE (op2) == CONST_INT
4018 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4019 && (trunc_int_for_mode (INTVAL (op2), SImode)
4020 == INTVAL (op2)))
4021 return 1;
4022 /* ??? For the kernel, we may accept adjustment of
4023 -0x10000000, since we know that it will just convert
4024 negative address space to positive, but perhaps this
4025 is not worthwhile. */
4026 break;
4027 case LABEL_REF:
4028 /* These conditions are similar to SYMBOL_REF ones, just the
4029 constraints for code models differ. */
4030 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4031 && GET_CODE (op2) == CONST_INT
4032 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4033 && (trunc_int_for_mode (INTVAL (op2), SImode)
4034 == INTVAL (op2)))
4035 return 1;
4036 break;
4037 default:
4038 return 0;
4039 }
4040 }
4041 return 0;
4042 default:
4043 return 0;
4044 }
4045 }
4046
4047 /* Value should be nonzero if functions must have frame pointers.
4048 Zero means the frame pointer need not be set up (and parms may
4049 be accessed via the stack pointer) in functions that seem suitable. */
4050
4051 int
4052 ix86_frame_pointer_required ()
4053 {
4054 /* If we accessed previous frames, then the generated code expects
4055 to be able to access the saved ebp value in our frame. */
4056 if (cfun->machine->accesses_prev_frame)
4057 return 1;
4058
4059 /* Several x86 os'es need a frame pointer for other reasons,
4060 usually pertaining to setjmp. */
4061 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4062 return 1;
4063
4064 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4065 the frame pointer by default. Turn it back on now if we've not
4066 got a leaf function. */
4067 if (TARGET_OMIT_LEAF_FRAME_POINTER
4068 && (!current_function_is_leaf))
4069 return 1;
4070
4071 if (current_function_profile)
4072 return 1;
4073
4074 return 0;
4075 }
4076
4077 /* Record that the current function accesses previous call frames. */
4078
4079 void
4080 ix86_setup_frame_addresses ()
4081 {
4082 cfun->machine->accesses_prev_frame = 1;
4083 }
4084 \f
4085 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4086 # define USE_HIDDEN_LINKONCE 1
4087 #else
4088 # define USE_HIDDEN_LINKONCE 0
4089 #endif
4090
4091 static int pic_labels_used;
4092
4093 /* Fills in the label name that should be used for a pc thunk for
4094 the given register. */
4095
4096 static void
4097 get_pc_thunk_name (name, regno)
4098 char name[32];
4099 unsigned int regno;
4100 {
4101 if (USE_HIDDEN_LINKONCE)
4102 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4103 else
4104 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4105 }
4106
4107
4108 /* This function generates code for -fpic that loads %ebx with
4109 the return address of the caller and then returns. */
4110
4111 void
4112 ix86_asm_file_end (file)
4113 FILE *file;
4114 {
4115 rtx xops[2];
4116 int regno;
4117
4118 for (regno = 0; regno < 8; ++regno)
4119 {
4120 char name[32];
4121
4122 if (! ((pic_labels_used >> regno) & 1))
4123 continue;
4124
4125 get_pc_thunk_name (name, regno);
4126
4127 if (USE_HIDDEN_LINKONCE)
4128 {
4129 tree decl;
4130
4131 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4132 error_mark_node);
4133 TREE_PUBLIC (decl) = 1;
4134 TREE_STATIC (decl) = 1;
4135 DECL_ONE_ONLY (decl) = 1;
4136
4137 (*targetm.asm_out.unique_section) (decl, 0);
4138 named_section (decl, NULL, 0);
4139
4140 (*targetm.asm_out.globalize_label) (file, name);
4141 fputs ("\t.hidden\t", file);
4142 assemble_name (file, name);
4143 fputc ('\n', file);
4144 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4145 }
4146 else
4147 {
4148 text_section ();
4149 ASM_OUTPUT_LABEL (file, name);
4150 }
4151
4152 xops[0] = gen_rtx_REG (SImode, regno);
4153 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4154 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4155 output_asm_insn ("ret", xops);
4156 }
4157 }
4158
4159 /* Emit code for the SET_GOT patterns. */
4160
4161 const char *
4162 output_set_got (dest)
4163 rtx dest;
4164 {
4165 rtx xops[3];
4166
4167 xops[0] = dest;
4168 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4169
4170 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4171 {
4172 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4173
4174 if (!flag_pic)
4175 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4176 else
4177 output_asm_insn ("call\t%a2", xops);
4178
4179 #if TARGET_MACHO
4180 /* Output the "canonical" label name ("Lxx$pb") here too. This
4181 is what will be referred to by the Mach-O PIC subsystem. */
4182 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4183 #endif
4184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4185 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4186
4187 if (flag_pic)
4188 output_asm_insn ("pop{l}\t%0", xops);
4189 }
4190 else
4191 {
4192 char name[32];
4193 get_pc_thunk_name (name, REGNO (dest));
4194 pic_labels_used |= 1 << REGNO (dest);
4195
4196 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4197 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4198 output_asm_insn ("call\t%X2", xops);
4199 }
4200
4201 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4202 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4203 else if (!TARGET_MACHO)
4204 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4205
4206 return "";
4207 }
4208
4209 /* Generate an "push" pattern for input ARG. */
4210
4211 static rtx
4212 gen_push (arg)
4213 rtx arg;
4214 {
4215 return gen_rtx_SET (VOIDmode,
4216 gen_rtx_MEM (Pmode,
4217 gen_rtx_PRE_DEC (Pmode,
4218 stack_pointer_rtx)),
4219 arg);
4220 }
4221
4222 /* Return >= 0 if there is an unused call-clobbered register available
4223 for the entire function. */
4224
4225 static unsigned int
4226 ix86_select_alt_pic_regnum ()
4227 {
4228 if (current_function_is_leaf && !current_function_profile)
4229 {
4230 int i;
4231 for (i = 2; i >= 0; --i)
4232 if (!regs_ever_live[i])
4233 return i;
4234 }
4235
4236 return INVALID_REGNUM;
4237 }
4238
4239 /* Return 1 if we need to save REGNO. */
4240 static int
4241 ix86_save_reg (regno, maybe_eh_return)
4242 unsigned int regno;
4243 int maybe_eh_return;
4244 {
4245 if (pic_offset_table_rtx
4246 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4247 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4248 || current_function_profile
4249 || current_function_calls_eh_return))
4250 {
4251 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4252 return 0;
4253 return 1;
4254 }
4255
4256 if (current_function_calls_eh_return && maybe_eh_return)
4257 {
4258 unsigned i;
4259 for (i = 0; ; i++)
4260 {
4261 unsigned test = EH_RETURN_DATA_REGNO (i);
4262 if (test == INVALID_REGNUM)
4263 break;
4264 if (test == regno)
4265 return 1;
4266 }
4267 }
4268
4269 return (regs_ever_live[regno]
4270 && !call_used_regs[regno]
4271 && !fixed_regs[regno]
4272 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4273 }
4274
4275 /* Return number of registers to be saved on the stack. */
4276
4277 static int
4278 ix86_nsaved_regs ()
4279 {
4280 int nregs = 0;
4281 int regno;
4282
4283 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4284 if (ix86_save_reg (regno, true))
4285 nregs++;
4286 return nregs;
4287 }
4288
4289 /* Return the offset between two registers, one to be eliminated, and the other
4290 its replacement, at the start of a routine. */
4291
4292 HOST_WIDE_INT
4293 ix86_initial_elimination_offset (from, to)
4294 int from;
4295 int to;
4296 {
4297 struct ix86_frame frame;
4298 ix86_compute_frame_layout (&frame);
4299
4300 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4301 return frame.hard_frame_pointer_offset;
4302 else if (from == FRAME_POINTER_REGNUM
4303 && to == HARD_FRAME_POINTER_REGNUM)
4304 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4305 else
4306 {
4307 if (to != STACK_POINTER_REGNUM)
4308 abort ();
4309 else if (from == ARG_POINTER_REGNUM)
4310 return frame.stack_pointer_offset;
4311 else if (from != FRAME_POINTER_REGNUM)
4312 abort ();
4313 else
4314 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4315 }
4316 }
4317
4318 /* Fill structure ix86_frame about frame of currently computed function. */
4319
4320 static void
4321 ix86_compute_frame_layout (frame)
4322 struct ix86_frame *frame;
4323 {
4324 HOST_WIDE_INT total_size;
4325 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4326 int offset;
4327 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4328 HOST_WIDE_INT size = get_frame_size ();
4329
4330 frame->nregs = ix86_nsaved_regs ();
4331 total_size = size;
4332
4333 /* Skip return address and saved base pointer. */
4334 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4335
4336 frame->hard_frame_pointer_offset = offset;
4337
4338 /* Do some sanity checking of stack_alignment_needed and
4339 preferred_alignment, since i386 port is the only using those features
4340 that may break easily. */
4341
4342 if (size && !stack_alignment_needed)
4343 abort ();
4344 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4345 abort ();
4346 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4347 abort ();
4348 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4349 abort ();
4350
4351 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4352 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4353
4354 /* Register save area */
4355 offset += frame->nregs * UNITS_PER_WORD;
4356
4357 /* Va-arg area */
4358 if (ix86_save_varrargs_registers)
4359 {
4360 offset += X86_64_VARARGS_SIZE;
4361 frame->va_arg_size = X86_64_VARARGS_SIZE;
4362 }
4363 else
4364 frame->va_arg_size = 0;
4365
4366 /* Align start of frame for local function. */
4367 frame->padding1 = ((offset + stack_alignment_needed - 1)
4368 & -stack_alignment_needed) - offset;
4369
4370 offset += frame->padding1;
4371
4372 /* Frame pointer points here. */
4373 frame->frame_pointer_offset = offset;
4374
4375 offset += size;
4376
4377 /* Add outgoing arguments area. Can be skipped if we eliminated
4378 all the function calls as dead code. */
4379 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4380 {
4381 offset += current_function_outgoing_args_size;
4382 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4383 }
4384 else
4385 frame->outgoing_arguments_size = 0;
4386
4387 /* Align stack boundary. Only needed if we're calling another function
4388 or using alloca. */
4389 if (!current_function_is_leaf || current_function_calls_alloca)
4390 frame->padding2 = ((offset + preferred_alignment - 1)
4391 & -preferred_alignment) - offset;
4392 else
4393 frame->padding2 = 0;
4394
4395 offset += frame->padding2;
4396
4397 /* We've reached end of stack frame. */
4398 frame->stack_pointer_offset = offset;
4399
4400 /* Size prologue needs to allocate. */
4401 frame->to_allocate =
4402 (size + frame->padding1 + frame->padding2
4403 + frame->outgoing_arguments_size + frame->va_arg_size);
4404
4405 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4406 && current_function_is_leaf)
4407 {
4408 frame->red_zone_size = frame->to_allocate;
4409 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4410 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4411 }
4412 else
4413 frame->red_zone_size = 0;
4414 frame->to_allocate -= frame->red_zone_size;
4415 frame->stack_pointer_offset -= frame->red_zone_size;
4416 #if 0
4417 fprintf (stderr, "nregs: %i\n", frame->nregs);
4418 fprintf (stderr, "size: %i\n", size);
4419 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4420 fprintf (stderr, "padding1: %i\n", frame->padding1);
4421 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4422 fprintf (stderr, "padding2: %i\n", frame->padding2);
4423 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4424 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4425 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4426 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4427 frame->hard_frame_pointer_offset);
4428 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4429 #endif
4430 }
4431
4432 /* Emit code to save registers in the prologue. */
4433
4434 static void
4435 ix86_emit_save_regs ()
4436 {
4437 register int regno;
4438 rtx insn;
4439
4440 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4441 if (ix86_save_reg (regno, true))
4442 {
4443 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4444 RTX_FRAME_RELATED_P (insn) = 1;
4445 }
4446 }
4447
4448 /* Emit code to save registers using MOV insns. First register
4449 is restored from POINTER + OFFSET. */
4450 static void
4451 ix86_emit_save_regs_using_mov (pointer, offset)
4452 rtx pointer;
4453 HOST_WIDE_INT offset;
4454 {
4455 int regno;
4456 rtx insn;
4457
4458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4459 if (ix86_save_reg (regno, true))
4460 {
4461 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4462 Pmode, offset),
4463 gen_rtx_REG (Pmode, regno));
4464 RTX_FRAME_RELATED_P (insn) = 1;
4465 offset += UNITS_PER_WORD;
4466 }
4467 }
4468
4469 /* Expand the prologue into a bunch of separate insns. */
4470
4471 void
4472 ix86_expand_prologue ()
4473 {
4474 rtx insn;
4475 bool pic_reg_used;
4476 struct ix86_frame frame;
4477 int use_mov = 0;
4478 HOST_WIDE_INT allocate;
4479
4480 if (!optimize_size)
4481 {
4482 use_fast_prologue_epilogue
4483 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
4484 if (TARGET_PROLOGUE_USING_MOVE)
4485 use_mov = use_fast_prologue_epilogue;
4486 }
4487 ix86_compute_frame_layout (&frame);
4488
4489 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4490 slower on all targets. Also sdb doesn't like it. */
4491
4492 if (frame_pointer_needed)
4493 {
4494 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4495 RTX_FRAME_RELATED_P (insn) = 1;
4496
4497 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4498 RTX_FRAME_RELATED_P (insn) = 1;
4499 }
4500
4501 allocate = frame.to_allocate;
4502 /* In case we are dealing only with single register and empty frame,
4503 push is equivalent of the mov+add sequence. */
4504 if (allocate == 0 && frame.nregs <= 1)
4505 use_mov = 0;
4506
4507 if (!use_mov)
4508 ix86_emit_save_regs ();
4509 else
4510 allocate += frame.nregs * UNITS_PER_WORD;
4511
4512 if (allocate == 0)
4513 ;
4514 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4515 {
4516 insn = emit_insn (gen_pro_epilogue_adjust_stack
4517 (stack_pointer_rtx, stack_pointer_rtx,
4518 GEN_INT (-allocate)));
4519 RTX_FRAME_RELATED_P (insn) = 1;
4520 }
4521 else
4522 {
4523 /* ??? Is this only valid for Win32? */
4524
4525 rtx arg0, sym;
4526
4527 if (TARGET_64BIT)
4528 abort ();
4529
4530 arg0 = gen_rtx_REG (SImode, 0);
4531 emit_move_insn (arg0, GEN_INT (allocate));
4532
4533 sym = gen_rtx_MEM (FUNCTION_MODE,
4534 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4535 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4536
4537 CALL_INSN_FUNCTION_USAGE (insn)
4538 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4539 CALL_INSN_FUNCTION_USAGE (insn));
4540 }
4541 if (use_mov)
4542 {
4543 if (!frame_pointer_needed || !frame.to_allocate)
4544 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4545 else
4546 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4547 -frame.nregs * UNITS_PER_WORD);
4548 }
4549
4550 #ifdef SUBTARGET_PROLOGUE
4551 SUBTARGET_PROLOGUE;
4552 #endif
4553
4554 pic_reg_used = false;
4555 if (pic_offset_table_rtx
4556 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4557 || current_function_profile))
4558 {
4559 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4560
4561 if (alt_pic_reg_used != INVALID_REGNUM)
4562 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4563
4564 pic_reg_used = true;
4565 }
4566
4567 if (pic_reg_used)
4568 {
4569 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4570
4571 /* Even with accurate pre-reload life analysis, we can wind up
4572 deleting all references to the pic register after reload.
4573 Consider if cross-jumping unifies two sides of a branch
4574 controled by a comparison vs the only read from a global.
4575 In which case, allow the set_got to be deleted, though we're
4576 too late to do anything about the ebx save in the prologue. */
4577 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4578 }
4579
4580 /* Prevent function calls from be scheduled before the call to mcount.
4581 In the pic_reg_used case, make sure that the got load isn't deleted. */
4582 if (current_function_profile)
4583 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4584 }
4585
4586 /* Emit code to restore saved registers using MOV insns. First register
4587 is restored from POINTER + OFFSET. */
4588 static void
4589 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4590 rtx pointer;
4591 int offset;
4592 int maybe_eh_return;
4593 {
4594 int regno;
4595
4596 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4597 if (ix86_save_reg (regno, maybe_eh_return))
4598 {
4599 emit_move_insn (gen_rtx_REG (Pmode, regno),
4600 adjust_address (gen_rtx_MEM (Pmode, pointer),
4601 Pmode, offset));
4602 offset += UNITS_PER_WORD;
4603 }
4604 }
4605
4606 /* Restore function stack, frame, and registers. */
4607
4608 void
4609 ix86_expand_epilogue (style)
4610 int style;
4611 {
4612 int regno;
4613 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4614 struct ix86_frame frame;
4615 HOST_WIDE_INT offset;
4616
4617 ix86_compute_frame_layout (&frame);
4618
4619 /* Calculate start of saved registers relative to ebp. Special care
4620 must be taken for the normal return case of a function using
4621 eh_return: the eax and edx registers are marked as saved, but not
4622 restored along this path. */
4623 offset = frame.nregs;
4624 if (current_function_calls_eh_return && style != 2)
4625 offset -= 2;
4626 offset *= -UNITS_PER_WORD;
4627
4628 /* If we're only restoring one register and sp is not valid then
4629 using a move instruction to restore the register since it's
4630 less work than reloading sp and popping the register.
4631
4632 The default code result in stack adjustment using add/lea instruction,
4633 while this code results in LEAVE instruction (or discrete equivalent),
4634 so it is profitable in some other cases as well. Especially when there
4635 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4636 and there is exactly one register to pop. This heruistic may need some
4637 tuning in future. */
4638 if ((!sp_valid && frame.nregs <= 1)
4639 || (TARGET_EPILOGUE_USING_MOVE
4640 && use_fast_prologue_epilogue
4641 && (frame.nregs > 1 || frame.to_allocate))
4642 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4643 || (frame_pointer_needed && TARGET_USE_LEAVE
4644 && use_fast_prologue_epilogue && frame.nregs == 1)
4645 || current_function_calls_eh_return)
4646 {
4647 /* Restore registers. We can use ebp or esp to address the memory
4648 locations. If both are available, default to ebp, since offsets
4649 are known to be small. Only exception is esp pointing directly to the
4650 end of block of saved registers, where we may simplify addressing
4651 mode. */
4652
4653 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4654 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4655 frame.to_allocate, style == 2);
4656 else
4657 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4658 offset, style == 2);
4659
4660 /* eh_return epilogues need %ecx added to the stack pointer. */
4661 if (style == 2)
4662 {
4663 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4664
4665 if (frame_pointer_needed)
4666 {
4667 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4668 tmp = plus_constant (tmp, UNITS_PER_WORD);
4669 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4670
4671 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4672 emit_move_insn (hard_frame_pointer_rtx, tmp);
4673
4674 emit_insn (gen_pro_epilogue_adjust_stack
4675 (stack_pointer_rtx, sa, const0_rtx));
4676 }
4677 else
4678 {
4679 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4680 tmp = plus_constant (tmp, (frame.to_allocate
4681 + frame.nregs * UNITS_PER_WORD));
4682 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4683 }
4684 }
4685 else if (!frame_pointer_needed)
4686 emit_insn (gen_pro_epilogue_adjust_stack
4687 (stack_pointer_rtx, stack_pointer_rtx,
4688 GEN_INT (frame.to_allocate
4689 + frame.nregs * UNITS_PER_WORD)));
4690 /* If not an i386, mov & pop is faster than "leave". */
4691 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
4692 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4693 else
4694 {
4695 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4696 hard_frame_pointer_rtx,
4697 const0_rtx));
4698 if (TARGET_64BIT)
4699 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4700 else
4701 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4702 }
4703 }
4704 else
4705 {
4706 /* First step is to deallocate the stack frame so that we can
4707 pop the registers. */
4708 if (!sp_valid)
4709 {
4710 if (!frame_pointer_needed)
4711 abort ();
4712 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4713 hard_frame_pointer_rtx,
4714 GEN_INT (offset)));
4715 }
4716 else if (frame.to_allocate)
4717 emit_insn (gen_pro_epilogue_adjust_stack
4718 (stack_pointer_rtx, stack_pointer_rtx,
4719 GEN_INT (frame.to_allocate)));
4720
4721 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4722 if (ix86_save_reg (regno, false))
4723 {
4724 if (TARGET_64BIT)
4725 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4726 else
4727 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4728 }
4729 if (frame_pointer_needed)
4730 {
4731 /* Leave results in shorter dependency chains on CPUs that are
4732 able to grok it fast. */
4733 if (TARGET_USE_LEAVE)
4734 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4735 else if (TARGET_64BIT)
4736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4737 else
4738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4739 }
4740 }
4741
4742 /* Sibcall epilogues don't want a return instruction. */
4743 if (style == 0)
4744 return;
4745
4746 if (current_function_pops_args && current_function_args_size)
4747 {
4748 rtx popc = GEN_INT (current_function_pops_args);
4749
4750 /* i386 can only pop 64K bytes. If asked to pop more, pop
4751 return address, do explicit add, and jump indirectly to the
4752 caller. */
4753
4754 if (current_function_pops_args >= 65536)
4755 {
4756 rtx ecx = gen_rtx_REG (SImode, 2);
4757
4758 /* There are is no "pascal" calling convention in 64bit ABI. */
4759 if (TARGET_64BIT)
4760 abort ();
4761
4762 emit_insn (gen_popsi1 (ecx));
4763 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
4764 emit_jump_insn (gen_return_indirect_internal (ecx));
4765 }
4766 else
4767 emit_jump_insn (gen_return_pop_internal (popc));
4768 }
4769 else
4770 emit_jump_insn (gen_return_internal ());
4771 }
4772
4773 /* Reset from the function's potential modifications. */
4774
4775 static void
4776 ix86_output_function_epilogue (file, size)
4777 FILE *file ATTRIBUTE_UNUSED;
4778 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4779 {
4780 if (pic_offset_table_rtx)
4781 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4782 }
4783 \f
4784 /* Extract the parts of an RTL expression that is a valid memory address
4785 for an instruction. Return 0 if the structure of the address is
4786 grossly off. Return -1 if the address contains ASHIFT, so it is not
4787 strictly valid, but still used for computing length of lea instruction.
4788 */
4789
4790 static int
4791 ix86_decompose_address (addr, out)
4792 register rtx addr;
4793 struct ix86_address *out;
4794 {
4795 rtx base = NULL_RTX;
4796 rtx index = NULL_RTX;
4797 rtx disp = NULL_RTX;
4798 HOST_WIDE_INT scale = 1;
4799 rtx scale_rtx = NULL_RTX;
4800 int retval = 1;
4801
4802 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4803 base = addr;
4804 else if (GET_CODE (addr) == PLUS)
4805 {
4806 rtx op0 = XEXP (addr, 0);
4807 rtx op1 = XEXP (addr, 1);
4808 enum rtx_code code0 = GET_CODE (op0);
4809 enum rtx_code code1 = GET_CODE (op1);
4810
4811 if (code0 == REG || code0 == SUBREG)
4812 {
4813 if (code1 == REG || code1 == SUBREG)
4814 index = op0, base = op1; /* index + base */
4815 else
4816 base = op0, disp = op1; /* base + displacement */
4817 }
4818 else if (code0 == MULT)
4819 {
4820 index = XEXP (op0, 0);
4821 scale_rtx = XEXP (op0, 1);
4822 if (code1 == REG || code1 == SUBREG)
4823 base = op1; /* index*scale + base */
4824 else
4825 disp = op1; /* index*scale + disp */
4826 }
4827 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4828 {
4829 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4830 scale_rtx = XEXP (XEXP (op0, 0), 1);
4831 base = XEXP (op0, 1);
4832 disp = op1;
4833 }
4834 else if (code0 == PLUS)
4835 {
4836 index = XEXP (op0, 0); /* index + base + disp */
4837 base = XEXP (op0, 1);
4838 disp = op1;
4839 }
4840 else
4841 return 0;
4842 }
4843 else if (GET_CODE (addr) == MULT)
4844 {
4845 index = XEXP (addr, 0); /* index*scale */
4846 scale_rtx = XEXP (addr, 1);
4847 }
4848 else if (GET_CODE (addr) == ASHIFT)
4849 {
4850 rtx tmp;
4851
4852 /* We're called for lea too, which implements ashift on occasion. */
4853 index = XEXP (addr, 0);
4854 tmp = XEXP (addr, 1);
4855 if (GET_CODE (tmp) != CONST_INT)
4856 return 0;
4857 scale = INTVAL (tmp);
4858 if ((unsigned HOST_WIDE_INT) scale > 3)
4859 return 0;
4860 scale = 1 << scale;
4861 retval = -1;
4862 }
4863 else
4864 disp = addr; /* displacement */
4865
4866 /* Extract the integral value of scale. */
4867 if (scale_rtx)
4868 {
4869 if (GET_CODE (scale_rtx) != CONST_INT)
4870 return 0;
4871 scale = INTVAL (scale_rtx);
4872 }
4873
4874 /* Allow arg pointer and stack pointer as index if there is not scaling */
4875 if (base && index && scale == 1
4876 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4877 || index == stack_pointer_rtx))
4878 {
4879 rtx tmp = base;
4880 base = index;
4881 index = tmp;
4882 }
4883
4884 /* Special case: %ebp cannot be encoded as a base without a displacement. */
4885 if ((base == hard_frame_pointer_rtx
4886 || base == frame_pointer_rtx
4887 || base == arg_pointer_rtx) && !disp)
4888 disp = const0_rtx;
4889
4890 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4891 Avoid this by transforming to [%esi+0]. */
4892 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4893 && base && !index && !disp
4894 && REG_P (base)
4895 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4896 disp = const0_rtx;
4897
4898 /* Special case: encode reg+reg instead of reg*2. */
4899 if (!base && index && scale && scale == 2)
4900 base = index, scale = 1;
4901
4902 /* Special case: scaling cannot be encoded without base or displacement. */
4903 if (!base && !disp && index && scale != 1)
4904 disp = const0_rtx;
4905
4906 out->base = base;
4907 out->index = index;
4908 out->disp = disp;
4909 out->scale = scale;
4910
4911 return retval;
4912 }
4913 \f
4914 /* Return cost of the memory address x.
4915 For i386, it is better to use a complex address than let gcc copy
4916 the address into a reg and make a new pseudo. But not if the address
4917 requires to two regs - that would mean more pseudos with longer
4918 lifetimes. */
4919 int
4920 ix86_address_cost (x)
4921 rtx x;
4922 {
4923 struct ix86_address parts;
4924 int cost = 1;
4925
4926 if (!ix86_decompose_address (x, &parts))
4927 abort ();
4928
4929 if (parts.base && GET_CODE (parts.base) == SUBREG)
4930 parts.base = SUBREG_REG (parts.base);
4931 if (parts.index && GET_CODE (parts.index) == SUBREG)
4932 parts.index = SUBREG_REG (parts.index);
4933
4934 /* More complex memory references are better. */
4935 if (parts.disp && parts.disp != const0_rtx)
4936 cost--;
4937
4938 /* Attempt to minimize number of registers in the address. */
4939 if ((parts.base
4940 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4941 || (parts.index
4942 && (!REG_P (parts.index)
4943 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4944 cost++;
4945
4946 if (parts.base
4947 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4948 && parts.index
4949 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4950 && parts.base != parts.index)
4951 cost++;
4952
4953 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4954 since it's predecode logic can't detect the length of instructions
4955 and it degenerates to vector decoded. Increase cost of such
4956 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
4957 to split such addresses or even refuse such addresses at all.
4958
4959 Following addressing modes are affected:
4960 [base+scale*index]
4961 [scale*index+disp]
4962 [base+index]
4963
4964 The first and last case may be avoidable by explicitly coding the zero in
4965 memory address, but I don't have AMD-K6 machine handy to check this
4966 theory. */
4967
4968 if (TARGET_K6
4969 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4970 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4971 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4972 cost += 10;
4973
4974 return cost;
4975 }
4976 \f
4977 /* If X is a machine specific address (i.e. a symbol or label being
4978 referenced as a displacement from the GOT implemented using an
4979 UNSPEC), then return the base term. Otherwise return X. */
4980
4981 rtx
4982 ix86_find_base_term (x)
4983 rtx x;
4984 {
4985 rtx term;
4986
4987 if (TARGET_64BIT)
4988 {
4989 if (GET_CODE (x) != CONST)
4990 return x;
4991 term = XEXP (x, 0);
4992 if (GET_CODE (term) == PLUS
4993 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4994 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4995 term = XEXP (term, 0);
4996 if (GET_CODE (term) != UNSPEC
4997 || XINT (term, 1) != UNSPEC_GOTPCREL)
4998 return x;
4999
5000 term = XVECEXP (term, 0, 0);
5001
5002 if (GET_CODE (term) != SYMBOL_REF
5003 && GET_CODE (term) != LABEL_REF)
5004 return x;
5005
5006 return term;
5007 }
5008
5009 if (GET_CODE (x) != PLUS
5010 || XEXP (x, 0) != pic_offset_table_rtx
5011 || GET_CODE (XEXP (x, 1)) != CONST)
5012 return x;
5013
5014 term = XEXP (XEXP (x, 1), 0);
5015
5016 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5017 term = XEXP (term, 0);
5018
5019 if (GET_CODE (term) != UNSPEC
5020 || XINT (term, 1) != UNSPEC_GOTOFF)
5021 return x;
5022
5023 term = XVECEXP (term, 0, 0);
5024
5025 if (GET_CODE (term) != SYMBOL_REF
5026 && GET_CODE (term) != LABEL_REF)
5027 return x;
5028
5029 return term;
5030 }
5031 \f
5032 /* Determine if a given RTX is a valid constant. We already know this
5033 satisfies CONSTANT_P. */
5034
5035 bool
5036 legitimate_constant_p (x)
5037 rtx x;
5038 {
5039 rtx inner;
5040
5041 switch (GET_CODE (x))
5042 {
5043 case SYMBOL_REF:
5044 /* TLS symbols are not constant. */
5045 if (tls_symbolic_operand (x, Pmode))
5046 return false;
5047 break;
5048
5049 case CONST:
5050 inner = XEXP (x, 0);
5051
5052 /* Offsets of TLS symbols are never valid.
5053 Discourage CSE from creating them. */
5054 if (GET_CODE (inner) == PLUS
5055 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5056 return false;
5057
5058 /* Only some unspecs are valid as "constants". */
5059 if (GET_CODE (inner) == UNSPEC)
5060 switch (XINT (inner, 1))
5061 {
5062 case UNSPEC_TPOFF:
5063 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5064 default:
5065 return false;
5066 }
5067 break;
5068
5069 default:
5070 break;
5071 }
5072
5073 /* Otherwise we handle everything else in the move patterns. */
5074 return true;
5075 }
5076
5077 /* Determine if a given RTX is a valid constant address. */
5078
5079 bool
5080 constant_address_p (x)
5081 rtx x;
5082 {
5083 switch (GET_CODE (x))
5084 {
5085 case LABEL_REF:
5086 case CONST_INT:
5087 return true;
5088
5089 case CONST_DOUBLE:
5090 return TARGET_64BIT;
5091
5092 case CONST:
5093 /* For Mach-O, really believe the CONST. */
5094 if (TARGET_MACHO)
5095 return true;
5096 /* Otherwise fall through. */
5097 case SYMBOL_REF:
5098 return !flag_pic && legitimate_constant_p (x);
5099
5100 default:
5101 return false;
5102 }
5103 }
5104
5105 /* Nonzero if the constant value X is a legitimate general operand
5106 when generating PIC code. It is given that flag_pic is on and
5107 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5108
5109 bool
5110 legitimate_pic_operand_p (x)
5111 rtx x;
5112 {
5113 rtx inner;
5114
5115 switch (GET_CODE (x))
5116 {
5117 case CONST:
5118 inner = XEXP (x, 0);
5119
5120 /* Only some unspecs are valid as "constants". */
5121 if (GET_CODE (inner) == UNSPEC)
5122 switch (XINT (inner, 1))
5123 {
5124 case UNSPEC_TPOFF:
5125 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5126 default:
5127 return false;
5128 }
5129 /* FALLTHRU */
5130
5131 case SYMBOL_REF:
5132 case LABEL_REF:
5133 return legitimate_pic_address_disp_p (x);
5134
5135 default:
5136 return true;
5137 }
5138 }
5139
5140 /* Determine if a given CONST RTX is a valid memory displacement
5141 in PIC mode. */
5142
5143 int
5144 legitimate_pic_address_disp_p (disp)
5145 register rtx disp;
5146 {
5147 bool saw_plus;
5148
5149 /* In 64bit mode we can allow direct addresses of symbols and labels
5150 when they are not dynamic symbols. */
5151 if (TARGET_64BIT && local_symbolic_operand (disp, Pmode))
5152 return 1;
5153 if (GET_CODE (disp) != CONST)
5154 return 0;
5155 disp = XEXP (disp, 0);
5156
5157 if (TARGET_64BIT)
5158 {
5159 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5160 of GOT tables. We should not need these anyway. */
5161 if (GET_CODE (disp) != UNSPEC
5162 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5163 return 0;
5164
5165 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5166 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5167 return 0;
5168 return 1;
5169 }
5170
5171 saw_plus = false;
5172 if (GET_CODE (disp) == PLUS)
5173 {
5174 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5175 return 0;
5176 disp = XEXP (disp, 0);
5177 saw_plus = true;
5178 }
5179
5180 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5181 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5182 {
5183 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5184 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5185 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5186 {
5187 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5188 if (strstr (sym_name, "$pb") != 0)
5189 return 1;
5190 }
5191 }
5192
5193 if (GET_CODE (disp) != UNSPEC)
5194 return 0;
5195
5196 switch (XINT (disp, 1))
5197 {
5198 case UNSPEC_GOT:
5199 if (saw_plus)
5200 return false;
5201 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5202 case UNSPEC_GOTOFF:
5203 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5204 case UNSPEC_GOTTPOFF:
5205 case UNSPEC_GOTNTPOFF:
5206 case UNSPEC_INDNTPOFF:
5207 if (saw_plus)
5208 return false;
5209 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5210 case UNSPEC_NTPOFF:
5211 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5212 case UNSPEC_DTPOFF:
5213 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5214 }
5215
5216 return 0;
5217 }
5218
5219 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5220 memory address for an instruction. The MODE argument is the machine mode
5221 for the MEM expression that wants to use this address.
5222
5223 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5224 convert common non-canonical forms to canonical form so that they will
5225 be recognized. */
5226
5227 int
5228 legitimate_address_p (mode, addr, strict)
5229 enum machine_mode mode;
5230 register rtx addr;
5231 int strict;
5232 {
5233 struct ix86_address parts;
5234 rtx base, index, disp;
5235 HOST_WIDE_INT scale;
5236 const char *reason = NULL;
5237 rtx reason_rtx = NULL_RTX;
5238
5239 if (TARGET_DEBUG_ADDR)
5240 {
5241 fprintf (stderr,
5242 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5243 GET_MODE_NAME (mode), strict);
5244 debug_rtx (addr);
5245 }
5246
5247 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5248 {
5249 if (TARGET_DEBUG_ADDR)
5250 fprintf (stderr, "Success.\n");
5251 return TRUE;
5252 }
5253
5254 if (ix86_decompose_address (addr, &parts) <= 0)
5255 {
5256 reason = "decomposition failed";
5257 goto report_error;
5258 }
5259
5260 base = parts.base;
5261 index = parts.index;
5262 disp = parts.disp;
5263 scale = parts.scale;
5264
5265 /* Validate base register.
5266
5267 Don't allow SUBREG's here, it can lead to spill failures when the base
5268 is one word out of a two word structure, which is represented internally
5269 as a DImode int. */
5270
5271 if (base)
5272 {
5273 rtx reg;
5274 reason_rtx = base;
5275
5276 if (GET_CODE (base) == SUBREG)
5277 reg = SUBREG_REG (base);
5278 else
5279 reg = base;
5280
5281 if (GET_CODE (reg) != REG)
5282 {
5283 reason = "base is not a register";
5284 goto report_error;
5285 }
5286
5287 if (GET_MODE (base) != Pmode)
5288 {
5289 reason = "base is not in Pmode";
5290 goto report_error;
5291 }
5292
5293 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5294 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5295 {
5296 reason = "base is not valid";
5297 goto report_error;
5298 }
5299 }
5300
5301 /* Validate index register.
5302
5303 Don't allow SUBREG's here, it can lead to spill failures when the index
5304 is one word out of a two word structure, which is represented internally
5305 as a DImode int. */
5306
5307 if (index)
5308 {
5309 rtx reg;
5310 reason_rtx = index;
5311
5312 if (GET_CODE (index) == SUBREG)
5313 reg = SUBREG_REG (index);
5314 else
5315 reg = index;
5316
5317 if (GET_CODE (reg) != REG)
5318 {
5319 reason = "index is not a register";
5320 goto report_error;
5321 }
5322
5323 if (GET_MODE (index) != Pmode)
5324 {
5325 reason = "index is not in Pmode";
5326 goto report_error;
5327 }
5328
5329 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5330 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5331 {
5332 reason = "index is not valid";
5333 goto report_error;
5334 }
5335 }
5336
5337 /* Validate scale factor. */
5338 if (scale != 1)
5339 {
5340 reason_rtx = GEN_INT (scale);
5341 if (!index)
5342 {
5343 reason = "scale without index";
5344 goto report_error;
5345 }
5346
5347 if (scale != 2 && scale != 4 && scale != 8)
5348 {
5349 reason = "scale is not a valid multiplier";
5350 goto report_error;
5351 }
5352 }
5353
5354 /* Validate displacement. */
5355 if (disp)
5356 {
5357 reason_rtx = disp;
5358
5359 if (TARGET_64BIT)
5360 {
5361 if (!x86_64_sign_extended_value (disp, !(index || base)))
5362 {
5363 reason = "displacement is out of range";
5364 goto report_error;
5365 }
5366 }
5367 else
5368 {
5369 if (GET_CODE (disp) == CONST_DOUBLE)
5370 {
5371 reason = "displacement is a const_double";
5372 goto report_error;
5373 }
5374 }
5375
5376 if (GET_CODE (disp) == CONST
5377 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5378 switch (XINT (XEXP (disp, 0), 1))
5379 {
5380 case UNSPEC_GOT:
5381 case UNSPEC_GOTOFF:
5382 case UNSPEC_GOTPCREL:
5383 if (!flag_pic)
5384 abort ();
5385 goto is_legitimate_pic;
5386
5387 case UNSPEC_GOTTPOFF:
5388 case UNSPEC_GOTNTPOFF:
5389 case UNSPEC_INDNTPOFF:
5390 case UNSPEC_NTPOFF:
5391 case UNSPEC_DTPOFF:
5392 break;
5393
5394 default:
5395 reason = "invalid address unspec";
5396 goto report_error;
5397 }
5398
5399 else if (flag_pic && (SYMBOLIC_CONST (disp)
5400 #if TARGET_MACHO
5401 && !machopic_operand_p (disp)
5402 #endif
5403 ))
5404 {
5405 is_legitimate_pic:
5406 if (TARGET_64BIT && (index || base))
5407 {
5408 /* foo@dtpoff(%rX) is ok. */
5409 if (GET_CODE (disp) != CONST
5410 || GET_CODE (XEXP (disp, 0)) != PLUS
5411 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5412 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5413 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5414 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5415 {
5416 reason = "non-constant pic memory reference";
5417 goto report_error;
5418 }
5419 }
5420 else if (! legitimate_pic_address_disp_p (disp))
5421 {
5422 reason = "displacement is an invalid pic construct";
5423 goto report_error;
5424 }
5425
5426 /* This code used to verify that a symbolic pic displacement
5427 includes the pic_offset_table_rtx register.
5428
5429 While this is good idea, unfortunately these constructs may
5430 be created by "adds using lea" optimization for incorrect
5431 code like:
5432
5433 int a;
5434 int foo(int i)
5435 {
5436 return *(&a+i);
5437 }
5438
5439 This code is nonsensical, but results in addressing
5440 GOT table with pic_offset_table_rtx base. We can't
5441 just refuse it easily, since it gets matched by
5442 "addsi3" pattern, that later gets split to lea in the
5443 case output register differs from input. While this
5444 can be handled by separate addsi pattern for this case
5445 that never results in lea, this seems to be easier and
5446 correct fix for crash to disable this test. */
5447 }
5448 else if (!CONSTANT_ADDRESS_P (disp))
5449 {
5450 reason = "displacement is not constant";
5451 goto report_error;
5452 }
5453 }
5454
5455 /* Everything looks valid. */
5456 if (TARGET_DEBUG_ADDR)
5457 fprintf (stderr, "Success.\n");
5458 return TRUE;
5459
5460 report_error:
5461 if (TARGET_DEBUG_ADDR)
5462 {
5463 fprintf (stderr, "Error: %s\n", reason);
5464 debug_rtx (reason_rtx);
5465 }
5466 return FALSE;
5467 }
5468 \f
5469 /* Return an unique alias set for the GOT. */
5470
5471 static HOST_WIDE_INT
5472 ix86_GOT_alias_set ()
5473 {
5474 static HOST_WIDE_INT set = -1;
5475 if (set == -1)
5476 set = new_alias_set ();
5477 return set;
5478 }
5479
5480 /* Return a legitimate reference for ORIG (an address) using the
5481 register REG. If REG is 0, a new pseudo is generated.
5482
5483 There are two types of references that must be handled:
5484
5485 1. Global data references must load the address from the GOT, via
5486 the PIC reg. An insn is emitted to do this load, and the reg is
5487 returned.
5488
5489 2. Static data references, constant pool addresses, and code labels
5490 compute the address as an offset from the GOT, whose base is in
5491 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5492 differentiate them from global data objects. The returned
5493 address is the PIC reg + an unspec constant.
5494
5495 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5496 reg also appears in the address. */
5497
5498 rtx
5499 legitimize_pic_address (orig, reg)
5500 rtx orig;
5501 rtx reg;
5502 {
5503 rtx addr = orig;
5504 rtx new = orig;
5505 rtx base;
5506
5507 #if TARGET_MACHO
5508 if (reg == 0)
5509 reg = gen_reg_rtx (Pmode);
5510 /* Use the generic Mach-O PIC machinery. */
5511 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5512 #endif
5513
5514 if (local_symbolic_operand (addr, Pmode))
5515 {
5516 /* In 64bit mode we can address such objects directly. */
5517 if (TARGET_64BIT)
5518 new = addr;
5519 else
5520 {
5521 /* This symbol may be referenced via a displacement from the PIC
5522 base address (@GOTOFF). */
5523
5524 if (reload_in_progress)
5525 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5526 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5527 new = gen_rtx_CONST (Pmode, new);
5528 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5529
5530 if (reg != 0)
5531 {
5532 emit_move_insn (reg, new);
5533 new = reg;
5534 }
5535 }
5536 }
5537 else if (GET_CODE (addr) == SYMBOL_REF)
5538 {
5539 if (TARGET_64BIT)
5540 {
5541 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5542 new = gen_rtx_CONST (Pmode, new);
5543 new = gen_rtx_MEM (Pmode, new);
5544 RTX_UNCHANGING_P (new) = 1;
5545 set_mem_alias_set (new, ix86_GOT_alias_set ());
5546
5547 if (reg == 0)
5548 reg = gen_reg_rtx (Pmode);
5549 /* Use directly gen_movsi, otherwise the address is loaded
5550 into register for CSE. We don't want to CSE this addresses,
5551 instead we CSE addresses from the GOT table, so skip this. */
5552 emit_insn (gen_movsi (reg, new));
5553 new = reg;
5554 }
5555 else
5556 {
5557 /* This symbol must be referenced via a load from the
5558 Global Offset Table (@GOT). */
5559
5560 if (reload_in_progress)
5561 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5562 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5563 new = gen_rtx_CONST (Pmode, new);
5564 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5565 new = gen_rtx_MEM (Pmode, new);
5566 RTX_UNCHANGING_P (new) = 1;
5567 set_mem_alias_set (new, ix86_GOT_alias_set ());
5568
5569 if (reg == 0)
5570 reg = gen_reg_rtx (Pmode);
5571 emit_move_insn (reg, new);
5572 new = reg;
5573 }
5574 }
5575 else
5576 {
5577 if (GET_CODE (addr) == CONST)
5578 {
5579 addr = XEXP (addr, 0);
5580
5581 /* We must match stuff we generate before. Assume the only
5582 unspecs that can get here are ours. Not that we could do
5583 anything with them anyway... */
5584 if (GET_CODE (addr) == UNSPEC
5585 || (GET_CODE (addr) == PLUS
5586 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5587 return orig;
5588 if (GET_CODE (addr) != PLUS)
5589 abort ();
5590 }
5591 if (GET_CODE (addr) == PLUS)
5592 {
5593 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5594
5595 /* Check first to see if this is a constant offset from a @GOTOFF
5596 symbol reference. */
5597 if (local_symbolic_operand (op0, Pmode)
5598 && GET_CODE (op1) == CONST_INT)
5599 {
5600 if (!TARGET_64BIT)
5601 {
5602 if (reload_in_progress)
5603 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5604 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5605 UNSPEC_GOTOFF);
5606 new = gen_rtx_PLUS (Pmode, new, op1);
5607 new = gen_rtx_CONST (Pmode, new);
5608 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5609
5610 if (reg != 0)
5611 {
5612 emit_move_insn (reg, new);
5613 new = reg;
5614 }
5615 }
5616 else
5617 {
5618 if (INTVAL (op1) < -16*1024*1024
5619 || INTVAL (op1) >= 16*1024*1024)
5620 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5621 }
5622 }
5623 else
5624 {
5625 base = legitimize_pic_address (XEXP (addr, 0), reg);
5626 new = legitimize_pic_address (XEXP (addr, 1),
5627 base == reg ? NULL_RTX : reg);
5628
5629 if (GET_CODE (new) == CONST_INT)
5630 new = plus_constant (base, INTVAL (new));
5631 else
5632 {
5633 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5634 {
5635 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5636 new = XEXP (new, 1);
5637 }
5638 new = gen_rtx_PLUS (Pmode, base, new);
5639 }
5640 }
5641 }
5642 }
5643 return new;
5644 }
5645
5646 static void
5647 ix86_encode_section_info (decl, first)
5648 tree decl;
5649 int first ATTRIBUTE_UNUSED;
5650 {
5651 bool local_p = (*targetm.binds_local_p) (decl);
5652 rtx rtl, symbol;
5653
5654 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5655 if (GET_CODE (rtl) != MEM)
5656 return;
5657 symbol = XEXP (rtl, 0);
5658 if (GET_CODE (symbol) != SYMBOL_REF)
5659 return;
5660
5661 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5662 symbol so that we may access it directly in the GOT. */
5663
5664 if (flag_pic)
5665 SYMBOL_REF_FLAG (symbol) = local_p;
5666
5667 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5668 "local dynamic", "initial exec" or "local exec" TLS models
5669 respectively. */
5670
5671 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
5672 {
5673 const char *symbol_str;
5674 char *newstr;
5675 size_t len;
5676 enum tls_model kind = decl_tls_model (decl);
5677
5678 if (TARGET_64BIT && ! flag_pic)
5679 {
5680 /* x86-64 doesn't allow non-pic code for shared libraries,
5681 so don't generate GD/LD TLS models for non-pic code. */
5682 switch (kind)
5683 {
5684 case TLS_MODEL_GLOBAL_DYNAMIC:
5685 kind = TLS_MODEL_INITIAL_EXEC; break;
5686 case TLS_MODEL_LOCAL_DYNAMIC:
5687 kind = TLS_MODEL_LOCAL_EXEC; break;
5688 default:
5689 break;
5690 }
5691 }
5692
5693 symbol_str = XSTR (symbol, 0);
5694
5695 if (symbol_str[0] == '%')
5696 {
5697 if (symbol_str[1] == tls_model_chars[kind])
5698 return;
5699 symbol_str += 2;
5700 }
5701 len = strlen (symbol_str) + 1;
5702 newstr = alloca (len + 2);
5703
5704 newstr[0] = '%';
5705 newstr[1] = tls_model_chars[kind];
5706 memcpy (newstr + 2, symbol_str, len);
5707
5708 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
5709 }
5710 }
5711
5712 /* Undo the above when printing symbol names. */
5713
5714 static const char *
5715 ix86_strip_name_encoding (str)
5716 const char *str;
5717 {
5718 if (str[0] == '%')
5719 str += 2;
5720 if (str [0] == '*')
5721 str += 1;
5722 return str;
5723 }
5724 \f
5725 /* Load the thread pointer into a register. */
5726
5727 static rtx
5728 get_thread_pointer ()
5729 {
5730 rtx tp;
5731
5732 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
5733 tp = gen_rtx_MEM (Pmode, tp);
5734 RTX_UNCHANGING_P (tp) = 1;
5735 set_mem_alias_set (tp, ix86_GOT_alias_set ());
5736 tp = force_reg (Pmode, tp);
5737
5738 return tp;
5739 }
5740
5741 /* Try machine-dependent ways of modifying an illegitimate address
5742 to be legitimate. If we find one, return the new, valid address.
5743 This macro is used in only one place: `memory_address' in explow.c.
5744
5745 OLDX is the address as it was before break_out_memory_refs was called.
5746 In some cases it is useful to look at this to decide what needs to be done.
5747
5748 MODE and WIN are passed so that this macro can use
5749 GO_IF_LEGITIMATE_ADDRESS.
5750
5751 It is always safe for this macro to do nothing. It exists to recognize
5752 opportunities to optimize the output.
5753
5754 For the 80386, we handle X+REG by loading X into a register R and
5755 using R+REG. R will go in a general reg and indexing will be used.
5756 However, if REG is a broken-out memory address or multiplication,
5757 nothing needs to be done because REG can certainly go in a general reg.
5758
5759 When -fpic is used, special handling is needed for symbolic references.
5760 See comments by legitimize_pic_address in i386.c for details. */
5761
5762 rtx
5763 legitimize_address (x, oldx, mode)
5764 register rtx x;
5765 register rtx oldx ATTRIBUTE_UNUSED;
5766 enum machine_mode mode;
5767 {
5768 int changed = 0;
5769 unsigned log;
5770
5771 if (TARGET_DEBUG_ADDR)
5772 {
5773 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5774 GET_MODE_NAME (mode));
5775 debug_rtx (x);
5776 }
5777
5778 log = tls_symbolic_operand (x, mode);
5779 if (log)
5780 {
5781 rtx dest, base, off, pic;
5782 int type;
5783
5784 switch (log)
5785 {
5786 case TLS_MODEL_GLOBAL_DYNAMIC:
5787 dest = gen_reg_rtx (Pmode);
5788 if (TARGET_64BIT)
5789 {
5790 rtx rax = gen_rtx_REG (Pmode, 0), insns;
5791
5792 start_sequence ();
5793 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
5794 insns = get_insns ();
5795 end_sequence ();
5796
5797 emit_libcall_block (insns, dest, rax, x);
5798 }
5799 else
5800 emit_insn (gen_tls_global_dynamic_32 (dest, x));
5801 break;
5802
5803 case TLS_MODEL_LOCAL_DYNAMIC:
5804 base = gen_reg_rtx (Pmode);
5805 if (TARGET_64BIT)
5806 {
5807 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
5808
5809 start_sequence ();
5810 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
5811 insns = get_insns ();
5812 end_sequence ();
5813
5814 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
5815 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
5816 emit_libcall_block (insns, base, rax, note);
5817 }
5818 else
5819 emit_insn (gen_tls_local_dynamic_base_32 (base));
5820
5821 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5822 off = gen_rtx_CONST (Pmode, off);
5823
5824 return gen_rtx_PLUS (Pmode, base, off);
5825
5826 case TLS_MODEL_INITIAL_EXEC:
5827 if (TARGET_64BIT)
5828 {
5829 pic = NULL;
5830 type = UNSPEC_GOTNTPOFF;
5831 }
5832 else if (flag_pic)
5833 {
5834 if (reload_in_progress)
5835 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5836 pic = pic_offset_table_rtx;
5837 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
5838 }
5839 else if (!TARGET_GNU_TLS)
5840 {
5841 pic = gen_reg_rtx (Pmode);
5842 emit_insn (gen_set_got (pic));
5843 type = UNSPEC_GOTTPOFF;
5844 }
5845 else
5846 {
5847 pic = NULL;
5848 type = UNSPEC_INDNTPOFF;
5849 }
5850
5851 base = get_thread_pointer ();
5852
5853 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
5854 off = gen_rtx_CONST (Pmode, off);
5855 if (pic)
5856 off = gen_rtx_PLUS (Pmode, pic, off);
5857 off = gen_rtx_MEM (Pmode, off);
5858 RTX_UNCHANGING_P (off) = 1;
5859 set_mem_alias_set (off, ix86_GOT_alias_set ());
5860 dest = gen_reg_rtx (Pmode);
5861
5862 if (TARGET_64BIT || TARGET_GNU_TLS)
5863 {
5864 emit_move_insn (dest, off);
5865 return gen_rtx_PLUS (Pmode, base, dest);
5866 }
5867 else
5868 emit_insn (gen_subsi3 (dest, base, off));
5869 break;
5870
5871 case TLS_MODEL_LOCAL_EXEC:
5872 base = get_thread_pointer ();
5873
5874 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5875 (TARGET_64BIT || TARGET_GNU_TLS)
5876 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5877 off = gen_rtx_CONST (Pmode, off);
5878
5879 if (TARGET_64BIT || TARGET_GNU_TLS)
5880 return gen_rtx_PLUS (Pmode, base, off);
5881 else
5882 {
5883 dest = gen_reg_rtx (Pmode);
5884 emit_insn (gen_subsi3 (dest, base, off));
5885 }
5886 break;
5887
5888 default:
5889 abort ();
5890 }
5891
5892 return dest;
5893 }
5894
5895 if (flag_pic && SYMBOLIC_CONST (x))
5896 return legitimize_pic_address (x, 0);
5897
5898 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5899 if (GET_CODE (x) == ASHIFT
5900 && GET_CODE (XEXP (x, 1)) == CONST_INT
5901 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
5902 {
5903 changed = 1;
5904 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5905 GEN_INT (1 << log));
5906 }
5907
5908 if (GET_CODE (x) == PLUS)
5909 {
5910 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
5911
5912 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5913 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
5914 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
5915 {
5916 changed = 1;
5917 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5918 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5919 GEN_INT (1 << log));
5920 }
5921
5922 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5923 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
5924 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
5925 {
5926 changed = 1;
5927 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5928 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5929 GEN_INT (1 << log));
5930 }
5931
5932 /* Put multiply first if it isn't already. */
5933 if (GET_CODE (XEXP (x, 1)) == MULT)
5934 {
5935 rtx tmp = XEXP (x, 0);
5936 XEXP (x, 0) = XEXP (x, 1);
5937 XEXP (x, 1) = tmp;
5938 changed = 1;
5939 }
5940
5941 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5942 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5943 created by virtual register instantiation, register elimination, and
5944 similar optimizations. */
5945 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5946 {
5947 changed = 1;
5948 x = gen_rtx_PLUS (Pmode,
5949 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5950 XEXP (XEXP (x, 1), 0)),
5951 XEXP (XEXP (x, 1), 1));
5952 }
5953
5954 /* Canonicalize
5955 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
5956 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5957 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5958 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5959 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5960 && CONSTANT_P (XEXP (x, 1)))
5961 {
5962 rtx constant;
5963 rtx other = NULL_RTX;
5964
5965 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5966 {
5967 constant = XEXP (x, 1);
5968 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5969 }
5970 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5971 {
5972 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5973 other = XEXP (x, 1);
5974 }
5975 else
5976 constant = 0;
5977
5978 if (constant)
5979 {
5980 changed = 1;
5981 x = gen_rtx_PLUS (Pmode,
5982 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5983 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5984 plus_constant (other, INTVAL (constant)));
5985 }
5986 }
5987
5988 if (changed && legitimate_address_p (mode, x, FALSE))
5989 return x;
5990
5991 if (GET_CODE (XEXP (x, 0)) == MULT)
5992 {
5993 changed = 1;
5994 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5995 }
5996
5997 if (GET_CODE (XEXP (x, 1)) == MULT)
5998 {
5999 changed = 1;
6000 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6001 }
6002
6003 if (changed
6004 && GET_CODE (XEXP (x, 1)) == REG
6005 && GET_CODE (XEXP (x, 0)) == REG)
6006 return x;
6007
6008 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6009 {
6010 changed = 1;
6011 x = legitimize_pic_address (x, 0);
6012 }
6013
6014 if (changed && legitimate_address_p (mode, x, FALSE))
6015 return x;
6016
6017 if (GET_CODE (XEXP (x, 0)) == REG)
6018 {
6019 register rtx temp = gen_reg_rtx (Pmode);
6020 register rtx val = force_operand (XEXP (x, 1), temp);
6021 if (val != temp)
6022 emit_move_insn (temp, val);
6023
6024 XEXP (x, 1) = temp;
6025 return x;
6026 }
6027
6028 else if (GET_CODE (XEXP (x, 1)) == REG)
6029 {
6030 register rtx temp = gen_reg_rtx (Pmode);
6031 register rtx val = force_operand (XEXP (x, 0), temp);
6032 if (val != temp)
6033 emit_move_insn (temp, val);
6034
6035 XEXP (x, 0) = temp;
6036 return x;
6037 }
6038 }
6039
6040 return x;
6041 }
6042 \f
6043 /* Print an integer constant expression in assembler syntax. Addition
6044 and subtraction are the only arithmetic that may appear in these
6045 expressions. FILE is the stdio stream to write to, X is the rtx, and
6046 CODE is the operand print code from the output string. */
6047
6048 static void
6049 output_pic_addr_const (file, x, code)
6050 FILE *file;
6051 rtx x;
6052 int code;
6053 {
6054 char buf[256];
6055
6056 switch (GET_CODE (x))
6057 {
6058 case PC:
6059 if (flag_pic)
6060 putc ('.', file);
6061 else
6062 abort ();
6063 break;
6064
6065 case SYMBOL_REF:
6066 assemble_name (file, XSTR (x, 0));
6067 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6068 fputs ("@PLT", file);
6069 break;
6070
6071 case LABEL_REF:
6072 x = XEXP (x, 0);
6073 /* FALLTHRU */
6074 case CODE_LABEL:
6075 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6076 assemble_name (asm_out_file, buf);
6077 break;
6078
6079 case CONST_INT:
6080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6081 break;
6082
6083 case CONST:
6084 /* This used to output parentheses around the expression,
6085 but that does not work on the 386 (either ATT or BSD assembler). */
6086 output_pic_addr_const (file, XEXP (x, 0), code);
6087 break;
6088
6089 case CONST_DOUBLE:
6090 if (GET_MODE (x) == VOIDmode)
6091 {
6092 /* We can use %d if the number is <32 bits and positive. */
6093 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6094 fprintf (file, "0x%lx%08lx",
6095 (unsigned long) CONST_DOUBLE_HIGH (x),
6096 (unsigned long) CONST_DOUBLE_LOW (x));
6097 else
6098 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6099 }
6100 else
6101 /* We can't handle floating point constants;
6102 PRINT_OPERAND must handle them. */
6103 output_operand_lossage ("floating constant misused");
6104 break;
6105
6106 case PLUS:
6107 /* Some assemblers need integer constants to appear first. */
6108 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6109 {
6110 output_pic_addr_const (file, XEXP (x, 0), code);
6111 putc ('+', file);
6112 output_pic_addr_const (file, XEXP (x, 1), code);
6113 }
6114 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6115 {
6116 output_pic_addr_const (file, XEXP (x, 1), code);
6117 putc ('+', file);
6118 output_pic_addr_const (file, XEXP (x, 0), code);
6119 }
6120 else
6121 abort ();
6122 break;
6123
6124 case MINUS:
6125 if (!TARGET_MACHO)
6126 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6127 output_pic_addr_const (file, XEXP (x, 0), code);
6128 putc ('-', file);
6129 output_pic_addr_const (file, XEXP (x, 1), code);
6130 if (!TARGET_MACHO)
6131 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6132 break;
6133
6134 case UNSPEC:
6135 if (XVECLEN (x, 0) != 1)
6136 abort ();
6137 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6138 switch (XINT (x, 1))
6139 {
6140 case UNSPEC_GOT:
6141 fputs ("@GOT", file);
6142 break;
6143 case UNSPEC_GOTOFF:
6144 fputs ("@GOTOFF", file);
6145 break;
6146 case UNSPEC_GOTPCREL:
6147 fputs ("@GOTPCREL(%rip)", file);
6148 break;
6149 case UNSPEC_GOTTPOFF:
6150 /* FIXME: This might be @TPOFF in Sun ld too. */
6151 fputs ("@GOTTPOFF", file);
6152 break;
6153 case UNSPEC_TPOFF:
6154 fputs ("@TPOFF", file);
6155 break;
6156 case UNSPEC_NTPOFF:
6157 if (TARGET_64BIT)
6158 fputs ("@TPOFF", file);
6159 else
6160 fputs ("@NTPOFF", file);
6161 break;
6162 case UNSPEC_DTPOFF:
6163 fputs ("@DTPOFF", file);
6164 break;
6165 case UNSPEC_GOTNTPOFF:
6166 if (TARGET_64BIT)
6167 fputs ("@GOTTPOFF(%rip)", file);
6168 else
6169 fputs ("@GOTNTPOFF", file);
6170 break;
6171 case UNSPEC_INDNTPOFF:
6172 fputs ("@INDNTPOFF", file);
6173 break;
6174 default:
6175 output_operand_lossage ("invalid UNSPEC as operand");
6176 break;
6177 }
6178 break;
6179
6180 default:
6181 output_operand_lossage ("invalid expression as operand");
6182 }
6183 }
6184
6185 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6186 We need to handle our special PIC relocations. */
6187
6188 void
6189 i386_dwarf_output_addr_const (file, x)
6190 FILE *file;
6191 rtx x;
6192 {
6193 #ifdef ASM_QUAD
6194 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6195 #else
6196 if (TARGET_64BIT)
6197 abort ();
6198 fprintf (file, "%s", ASM_LONG);
6199 #endif
6200 if (flag_pic)
6201 output_pic_addr_const (file, x, '\0');
6202 else
6203 output_addr_const (file, x);
6204 fputc ('\n', file);
6205 }
6206
6207 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6208 We need to emit DTP-relative relocations. */
6209
6210 void
6211 i386_output_dwarf_dtprel (file, size, x)
6212 FILE *file;
6213 int size;
6214 rtx x;
6215 {
6216 fputs (ASM_LONG, file);
6217 output_addr_const (file, x);
6218 fputs ("@DTPOFF", file);
6219 switch (size)
6220 {
6221 case 4:
6222 break;
6223 case 8:
6224 fputs (", 0", file);
6225 break;
6226 default:
6227 abort ();
6228 }
6229 }
6230
6231 /* In the name of slightly smaller debug output, and to cater to
6232 general assembler losage, recognize PIC+GOTOFF and turn it back
6233 into a direct symbol reference. */
6234
6235 rtx
6236 i386_simplify_dwarf_addr (orig_x)
6237 rtx orig_x;
6238 {
6239 rtx x = orig_x, y;
6240
6241 if (GET_CODE (x) == MEM)
6242 x = XEXP (x, 0);
6243
6244 if (TARGET_64BIT)
6245 {
6246 if (GET_CODE (x) != CONST
6247 || GET_CODE (XEXP (x, 0)) != UNSPEC
6248 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6249 || GET_CODE (orig_x) != MEM)
6250 return orig_x;
6251 return XVECEXP (XEXP (x, 0), 0, 0);
6252 }
6253
6254 if (GET_CODE (x) != PLUS
6255 || GET_CODE (XEXP (x, 1)) != CONST)
6256 return orig_x;
6257
6258 if (GET_CODE (XEXP (x, 0)) == REG
6259 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6260 /* %ebx + GOT/GOTOFF */
6261 y = NULL;
6262 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6263 {
6264 /* %ebx + %reg * scale + GOT/GOTOFF */
6265 y = XEXP (x, 0);
6266 if (GET_CODE (XEXP (y, 0)) == REG
6267 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6268 y = XEXP (y, 1);
6269 else if (GET_CODE (XEXP (y, 1)) == REG
6270 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6271 y = XEXP (y, 0);
6272 else
6273 return orig_x;
6274 if (GET_CODE (y) != REG
6275 && GET_CODE (y) != MULT
6276 && GET_CODE (y) != ASHIFT)
6277 return orig_x;
6278 }
6279 else
6280 return orig_x;
6281
6282 x = XEXP (XEXP (x, 1), 0);
6283 if (GET_CODE (x) == UNSPEC
6284 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6285 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6286 {
6287 if (y)
6288 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6289 return XVECEXP (x, 0, 0);
6290 }
6291
6292 if (GET_CODE (x) == PLUS
6293 && GET_CODE (XEXP (x, 0)) == UNSPEC
6294 && GET_CODE (XEXP (x, 1)) == CONST_INT
6295 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6296 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6297 && GET_CODE (orig_x) != MEM)))
6298 {
6299 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6300 if (y)
6301 return gen_rtx_PLUS (Pmode, y, x);
6302 return x;
6303 }
6304
6305 return orig_x;
6306 }
6307 \f
6308 static void
6309 put_condition_code (code, mode, reverse, fp, file)
6310 enum rtx_code code;
6311 enum machine_mode mode;
6312 int reverse, fp;
6313 FILE *file;
6314 {
6315 const char *suffix;
6316
6317 if (mode == CCFPmode || mode == CCFPUmode)
6318 {
6319 enum rtx_code second_code, bypass_code;
6320 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6321 if (bypass_code != NIL || second_code != NIL)
6322 abort ();
6323 code = ix86_fp_compare_code_to_integer (code);
6324 mode = CCmode;
6325 }
6326 if (reverse)
6327 code = reverse_condition (code);
6328
6329 switch (code)
6330 {
6331 case EQ:
6332 suffix = "e";
6333 break;
6334 case NE:
6335 suffix = "ne";
6336 break;
6337 case GT:
6338 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6339 abort ();
6340 suffix = "g";
6341 break;
6342 case GTU:
6343 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6344 Those same assemblers have the same but opposite losage on cmov. */
6345 if (mode != CCmode)
6346 abort ();
6347 suffix = fp ? "nbe" : "a";
6348 break;
6349 case LT:
6350 if (mode == CCNOmode || mode == CCGOCmode)
6351 suffix = "s";
6352 else if (mode == CCmode || mode == CCGCmode)
6353 suffix = "l";
6354 else
6355 abort ();
6356 break;
6357 case LTU:
6358 if (mode != CCmode)
6359 abort ();
6360 suffix = "b";
6361 break;
6362 case GE:
6363 if (mode == CCNOmode || mode == CCGOCmode)
6364 suffix = "ns";
6365 else if (mode == CCmode || mode == CCGCmode)
6366 suffix = "ge";
6367 else
6368 abort ();
6369 break;
6370 case GEU:
6371 /* ??? As above. */
6372 if (mode != CCmode)
6373 abort ();
6374 suffix = fp ? "nb" : "ae";
6375 break;
6376 case LE:
6377 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6378 abort ();
6379 suffix = "le";
6380 break;
6381 case LEU:
6382 if (mode != CCmode)
6383 abort ();
6384 suffix = "be";
6385 break;
6386 case UNORDERED:
6387 suffix = fp ? "u" : "p";
6388 break;
6389 case ORDERED:
6390 suffix = fp ? "nu" : "np";
6391 break;
6392 default:
6393 abort ();
6394 }
6395 fputs (suffix, file);
6396 }
6397
6398 void
6399 print_reg (x, code, file)
6400 rtx x;
6401 int code;
6402 FILE *file;
6403 {
6404 if (REGNO (x) == ARG_POINTER_REGNUM
6405 || REGNO (x) == FRAME_POINTER_REGNUM
6406 || REGNO (x) == FLAGS_REG
6407 || REGNO (x) == FPSR_REG)
6408 abort ();
6409
6410 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6411 putc ('%', file);
6412
6413 if (code == 'w' || MMX_REG_P (x))
6414 code = 2;
6415 else if (code == 'b')
6416 code = 1;
6417 else if (code == 'k')
6418 code = 4;
6419 else if (code == 'q')
6420 code = 8;
6421 else if (code == 'y')
6422 code = 3;
6423 else if (code == 'h')
6424 code = 0;
6425 else
6426 code = GET_MODE_SIZE (GET_MODE (x));
6427
6428 /* Irritatingly, AMD extended registers use different naming convention
6429 from the normal registers. */
6430 if (REX_INT_REG_P (x))
6431 {
6432 if (!TARGET_64BIT)
6433 abort ();
6434 switch (code)
6435 {
6436 case 0:
6437 error ("extended registers have no high halves");
6438 break;
6439 case 1:
6440 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6441 break;
6442 case 2:
6443 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6444 break;
6445 case 4:
6446 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6447 break;
6448 case 8:
6449 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6450 break;
6451 default:
6452 error ("unsupported operand size for extended register");
6453 break;
6454 }
6455 return;
6456 }
6457 switch (code)
6458 {
6459 case 3:
6460 if (STACK_TOP_P (x))
6461 {
6462 fputs ("st(0)", file);
6463 break;
6464 }
6465 /* FALLTHRU */
6466 case 8:
6467 case 4:
6468 case 12:
6469 if (! ANY_FP_REG_P (x))
6470 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6471 /* FALLTHRU */
6472 case 16:
6473 case 2:
6474 fputs (hi_reg_name[REGNO (x)], file);
6475 break;
6476 case 1:
6477 fputs (qi_reg_name[REGNO (x)], file);
6478 break;
6479 case 0:
6480 fputs (qi_high_reg_name[REGNO (x)], file);
6481 break;
6482 default:
6483 abort ();
6484 }
6485 }
6486
6487 /* Locate some local-dynamic symbol still in use by this function
6488 so that we can print its name in some tls_local_dynamic_base
6489 pattern. */
6490
6491 static const char *
6492 get_some_local_dynamic_name ()
6493 {
6494 rtx insn;
6495
6496 if (cfun->machine->some_ld_name)
6497 return cfun->machine->some_ld_name;
6498
6499 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6500 if (INSN_P (insn)
6501 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6502 return cfun->machine->some_ld_name;
6503
6504 abort ();
6505 }
6506
6507 static int
6508 get_some_local_dynamic_name_1 (px, data)
6509 rtx *px;
6510 void *data ATTRIBUTE_UNUSED;
6511 {
6512 rtx x = *px;
6513
6514 if (GET_CODE (x) == SYMBOL_REF
6515 && local_dynamic_symbolic_operand (x, Pmode))
6516 {
6517 cfun->machine->some_ld_name = XSTR (x, 0);
6518 return 1;
6519 }
6520
6521 return 0;
6522 }
6523
6524 /* Meaning of CODE:
6525 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6526 C -- print opcode suffix for set/cmov insn.
6527 c -- like C, but print reversed condition
6528 F,f -- likewise, but for floating-point.
6529 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6530 nothing
6531 R -- print the prefix for register names.
6532 z -- print the opcode suffix for the size of the current operand.
6533 * -- print a star (in certain assembler syntax)
6534 A -- print an absolute memory reference.
6535 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6536 s -- print a shift double count, followed by the assemblers argument
6537 delimiter.
6538 b -- print the QImode name of the register for the indicated operand.
6539 %b0 would print %al if operands[0] is reg 0.
6540 w -- likewise, print the HImode name of the register.
6541 k -- likewise, print the SImode name of the register.
6542 q -- likewise, print the DImode name of the register.
6543 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6544 y -- print "st(0)" instead of "st" as a register.
6545 D -- print condition for SSE cmp instruction.
6546 P -- if PIC, print an @PLT suffix.
6547 X -- don't print any sort of PIC '@' suffix for a symbol.
6548 & -- print some in-use local-dynamic symbol name.
6549 */
6550
6551 void
6552 print_operand (file, x, code)
6553 FILE *file;
6554 rtx x;
6555 int code;
6556 {
6557 if (code)
6558 {
6559 switch (code)
6560 {
6561 case '*':
6562 if (ASSEMBLER_DIALECT == ASM_ATT)
6563 putc ('*', file);
6564 return;
6565
6566 case '&':
6567 assemble_name (file, get_some_local_dynamic_name ());
6568 return;
6569
6570 case 'A':
6571 if (ASSEMBLER_DIALECT == ASM_ATT)
6572 putc ('*', file);
6573 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6574 {
6575 /* Intel syntax. For absolute addresses, registers should not
6576 be surrounded by braces. */
6577 if (GET_CODE (x) != REG)
6578 {
6579 putc ('[', file);
6580 PRINT_OPERAND (file, x, 0);
6581 putc (']', file);
6582 return;
6583 }
6584 }
6585 else
6586 abort ();
6587
6588 PRINT_OPERAND (file, x, 0);
6589 return;
6590
6591
6592 case 'L':
6593 if (ASSEMBLER_DIALECT == ASM_ATT)
6594 putc ('l', file);
6595 return;
6596
6597 case 'W':
6598 if (ASSEMBLER_DIALECT == ASM_ATT)
6599 putc ('w', file);
6600 return;
6601
6602 case 'B':
6603 if (ASSEMBLER_DIALECT == ASM_ATT)
6604 putc ('b', file);
6605 return;
6606
6607 case 'Q':
6608 if (ASSEMBLER_DIALECT == ASM_ATT)
6609 putc ('l', file);
6610 return;
6611
6612 case 'S':
6613 if (ASSEMBLER_DIALECT == ASM_ATT)
6614 putc ('s', file);
6615 return;
6616
6617 case 'T':
6618 if (ASSEMBLER_DIALECT == ASM_ATT)
6619 putc ('t', file);
6620 return;
6621
6622 case 'z':
6623 /* 387 opcodes don't get size suffixes if the operands are
6624 registers. */
6625 if (STACK_REG_P (x))
6626 return;
6627
6628 /* Likewise if using Intel opcodes. */
6629 if (ASSEMBLER_DIALECT == ASM_INTEL)
6630 return;
6631
6632 /* This is the size of op from size of operand. */
6633 switch (GET_MODE_SIZE (GET_MODE (x)))
6634 {
6635 case 2:
6636 #ifdef HAVE_GAS_FILDS_FISTS
6637 putc ('s', file);
6638 #endif
6639 return;
6640
6641 case 4:
6642 if (GET_MODE (x) == SFmode)
6643 {
6644 putc ('s', file);
6645 return;
6646 }
6647 else
6648 putc ('l', file);
6649 return;
6650
6651 case 12:
6652 case 16:
6653 putc ('t', file);
6654 return;
6655
6656 case 8:
6657 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6658 {
6659 #ifdef GAS_MNEMONICS
6660 putc ('q', file);
6661 #else
6662 putc ('l', file);
6663 putc ('l', file);
6664 #endif
6665 }
6666 else
6667 putc ('l', file);
6668 return;
6669
6670 default:
6671 abort ();
6672 }
6673
6674 case 'b':
6675 case 'w':
6676 case 'k':
6677 case 'q':
6678 case 'h':
6679 case 'y':
6680 case 'X':
6681 case 'P':
6682 break;
6683
6684 case 's':
6685 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6686 {
6687 PRINT_OPERAND (file, x, 0);
6688 putc (',', file);
6689 }
6690 return;
6691
6692 case 'D':
6693 /* Little bit of braindamage here. The SSE compare instructions
6694 does use completely different names for the comparisons that the
6695 fp conditional moves. */
6696 switch (GET_CODE (x))
6697 {
6698 case EQ:
6699 case UNEQ:
6700 fputs ("eq", file);
6701 break;
6702 case LT:
6703 case UNLT:
6704 fputs ("lt", file);
6705 break;
6706 case LE:
6707 case UNLE:
6708 fputs ("le", file);
6709 break;
6710 case UNORDERED:
6711 fputs ("unord", file);
6712 break;
6713 case NE:
6714 case LTGT:
6715 fputs ("neq", file);
6716 break;
6717 case UNGE:
6718 case GE:
6719 fputs ("nlt", file);
6720 break;
6721 case UNGT:
6722 case GT:
6723 fputs ("nle", file);
6724 break;
6725 case ORDERED:
6726 fputs ("ord", file);
6727 break;
6728 default:
6729 abort ();
6730 break;
6731 }
6732 return;
6733 case 'O':
6734 #ifdef CMOV_SUN_AS_SYNTAX
6735 if (ASSEMBLER_DIALECT == ASM_ATT)
6736 {
6737 switch (GET_MODE (x))
6738 {
6739 case HImode: putc ('w', file); break;
6740 case SImode:
6741 case SFmode: putc ('l', file); break;
6742 case DImode:
6743 case DFmode: putc ('q', file); break;
6744 default: abort ();
6745 }
6746 putc ('.', file);
6747 }
6748 #endif
6749 return;
6750 case 'C':
6751 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
6752 return;
6753 case 'F':
6754 #ifdef CMOV_SUN_AS_SYNTAX
6755 if (ASSEMBLER_DIALECT == ASM_ATT)
6756 putc ('.', file);
6757 #endif
6758 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
6759 return;
6760
6761 /* Like above, but reverse condition */
6762 case 'c':
6763 /* Check to see if argument to %c is really a constant
6764 and not a condition code which needs to be reversed. */
6765 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6766 {
6767 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6768 return;
6769 }
6770 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6771 return;
6772 case 'f':
6773 #ifdef CMOV_SUN_AS_SYNTAX
6774 if (ASSEMBLER_DIALECT == ASM_ATT)
6775 putc ('.', file);
6776 #endif
6777 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
6778 return;
6779 case '+':
6780 {
6781 rtx x;
6782
6783 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6784 return;
6785
6786 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6787 if (x)
6788 {
6789 int pred_val = INTVAL (XEXP (x, 0));
6790
6791 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6792 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6793 {
6794 int taken = pred_val > REG_BR_PROB_BASE / 2;
6795 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6796
6797 /* Emit hints only in the case default branch prediction
6798 heruistics would fail. */
6799 if (taken != cputaken)
6800 {
6801 /* We use 3e (DS) prefix for taken branches and
6802 2e (CS) prefix for not taken branches. */
6803 if (taken)
6804 fputs ("ds ; ", file);
6805 else
6806 fputs ("cs ; ", file);
6807 }
6808 }
6809 }
6810 return;
6811 }
6812 default:
6813 output_operand_lossage ("invalid operand code `%c'", code);
6814 }
6815 }
6816
6817 if (GET_CODE (x) == REG)
6818 {
6819 PRINT_REG (x, code, file);
6820 }
6821
6822 else if (GET_CODE (x) == MEM)
6823 {
6824 /* No `byte ptr' prefix for call instructions. */
6825 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
6826 {
6827 const char * size;
6828 switch (GET_MODE_SIZE (GET_MODE (x)))
6829 {
6830 case 1: size = "BYTE"; break;
6831 case 2: size = "WORD"; break;
6832 case 4: size = "DWORD"; break;
6833 case 8: size = "QWORD"; break;
6834 case 12: size = "XWORD"; break;
6835 case 16: size = "XMMWORD"; break;
6836 default:
6837 abort ();
6838 }
6839
6840 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6841 if (code == 'b')
6842 size = "BYTE";
6843 else if (code == 'w')
6844 size = "WORD";
6845 else if (code == 'k')
6846 size = "DWORD";
6847
6848 fputs (size, file);
6849 fputs (" PTR ", file);
6850 }
6851
6852 x = XEXP (x, 0);
6853 if (flag_pic && CONSTANT_ADDRESS_P (x))
6854 output_pic_addr_const (file, x, code);
6855 /* Avoid (%rip) for call operands. */
6856 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
6857 && GET_CODE (x) != CONST_INT)
6858 output_addr_const (file, x);
6859 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6860 output_operand_lossage ("invalid constraints for operand");
6861 else
6862 output_address (x);
6863 }
6864
6865 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6866 {
6867 REAL_VALUE_TYPE r;
6868 long l;
6869
6870 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6871 REAL_VALUE_TO_TARGET_SINGLE (r, l);
6872
6873 if (ASSEMBLER_DIALECT == ASM_ATT)
6874 putc ('$', file);
6875 fprintf (file, "0x%lx", l);
6876 }
6877
6878 /* These float cases don't actually occur as immediate operands. */
6879 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6880 {
6881 char dstr[30];
6882
6883 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6884 fprintf (file, "%s", dstr);
6885 }
6886
6887 else if (GET_CODE (x) == CONST_DOUBLE
6888 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
6889 {
6890 char dstr[30];
6891
6892 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
6893 fprintf (file, "%s", dstr);
6894 }
6895
6896 else
6897 {
6898 if (code != 'P')
6899 {
6900 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
6901 {
6902 if (ASSEMBLER_DIALECT == ASM_ATT)
6903 putc ('$', file);
6904 }
6905 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6906 || GET_CODE (x) == LABEL_REF)
6907 {
6908 if (ASSEMBLER_DIALECT == ASM_ATT)
6909 putc ('$', file);
6910 else
6911 fputs ("OFFSET FLAT:", file);
6912 }
6913 }
6914 if (GET_CODE (x) == CONST_INT)
6915 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6916 else if (flag_pic)
6917 output_pic_addr_const (file, x, code);
6918 else
6919 output_addr_const (file, x);
6920 }
6921 }
6922 \f
6923 /* Print a memory operand whose address is ADDR. */
6924
6925 void
6926 print_operand_address (file, addr)
6927 FILE *file;
6928 register rtx addr;
6929 {
6930 struct ix86_address parts;
6931 rtx base, index, disp;
6932 int scale;
6933
6934 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6935 {
6936 if (ASSEMBLER_DIALECT == ASM_INTEL)
6937 fputs ("DWORD PTR ", file);
6938 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6939 putc ('%', file);
6940 if (TARGET_64BIT)
6941 fputs ("fs:0", file);
6942 else
6943 fputs ("gs:0", file);
6944 return;
6945 }
6946
6947 if (! ix86_decompose_address (addr, &parts))
6948 abort ();
6949
6950 base = parts.base;
6951 index = parts.index;
6952 disp = parts.disp;
6953 scale = parts.scale;
6954
6955 if (!base && !index)
6956 {
6957 /* Displacement only requires special attention. */
6958
6959 if (GET_CODE (disp) == CONST_INT)
6960 {
6961 if (ASSEMBLER_DIALECT == ASM_INTEL)
6962 {
6963 if (USER_LABEL_PREFIX[0] == 0)
6964 putc ('%', file);
6965 fputs ("ds:", file);
6966 }
6967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
6968 }
6969 else if (flag_pic)
6970 output_pic_addr_const (file, addr, 0);
6971 else
6972 output_addr_const (file, addr);
6973
6974 /* Use one byte shorter RIP relative addressing for 64bit mode. */
6975 if (TARGET_64BIT
6976 && ((GET_CODE (addr) == SYMBOL_REF
6977 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
6978 || GET_CODE (addr) == LABEL_REF
6979 || (GET_CODE (addr) == CONST
6980 && GET_CODE (XEXP (addr, 0)) == PLUS
6981 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6982 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
6983 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
6984 fputs ("(%rip)", file);
6985 }
6986 else
6987 {
6988 if (ASSEMBLER_DIALECT == ASM_ATT)
6989 {
6990 if (disp)
6991 {
6992 if (flag_pic)
6993 output_pic_addr_const (file, disp, 0);
6994 else if (GET_CODE (disp) == LABEL_REF)
6995 output_asm_label (disp);
6996 else
6997 output_addr_const (file, disp);
6998 }
6999
7000 putc ('(', file);
7001 if (base)
7002 PRINT_REG (base, 0, file);
7003 if (index)
7004 {
7005 putc (',', file);
7006 PRINT_REG (index, 0, file);
7007 if (scale != 1)
7008 fprintf (file, ",%d", scale);
7009 }
7010 putc (')', file);
7011 }
7012 else
7013 {
7014 rtx offset = NULL_RTX;
7015
7016 if (disp)
7017 {
7018 /* Pull out the offset of a symbol; print any symbol itself. */
7019 if (GET_CODE (disp) == CONST
7020 && GET_CODE (XEXP (disp, 0)) == PLUS
7021 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7022 {
7023 offset = XEXP (XEXP (disp, 0), 1);
7024 disp = gen_rtx_CONST (VOIDmode,
7025 XEXP (XEXP (disp, 0), 0));
7026 }
7027
7028 if (flag_pic)
7029 output_pic_addr_const (file, disp, 0);
7030 else if (GET_CODE (disp) == LABEL_REF)
7031 output_asm_label (disp);
7032 else if (GET_CODE (disp) == CONST_INT)
7033 offset = disp;
7034 else
7035 output_addr_const (file, disp);
7036 }
7037
7038 putc ('[', file);
7039 if (base)
7040 {
7041 PRINT_REG (base, 0, file);
7042 if (offset)
7043 {
7044 if (INTVAL (offset) >= 0)
7045 putc ('+', file);
7046 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7047 }
7048 }
7049 else if (offset)
7050 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7051 else
7052 putc ('0', file);
7053
7054 if (index)
7055 {
7056 putc ('+', file);
7057 PRINT_REG (index, 0, file);
7058 if (scale != 1)
7059 fprintf (file, "*%d", scale);
7060 }
7061 putc (']', file);
7062 }
7063 }
7064 }
7065
7066 bool
7067 output_addr_const_extra (file, x)
7068 FILE *file;
7069 rtx x;
7070 {
7071 rtx op;
7072
7073 if (GET_CODE (x) != UNSPEC)
7074 return false;
7075
7076 op = XVECEXP (x, 0, 0);
7077 switch (XINT (x, 1))
7078 {
7079 case UNSPEC_GOTTPOFF:
7080 output_addr_const (file, op);
7081 /* FIXME: This might be @TPOFF in Sun ld. */
7082 fputs ("@GOTTPOFF", file);
7083 break;
7084 case UNSPEC_TPOFF:
7085 output_addr_const (file, op);
7086 fputs ("@TPOFF", file);
7087 break;
7088 case UNSPEC_NTPOFF:
7089 output_addr_const (file, op);
7090 if (TARGET_64BIT)
7091 fputs ("@TPOFF", file);
7092 else
7093 fputs ("@NTPOFF", file);
7094 break;
7095 case UNSPEC_DTPOFF:
7096 output_addr_const (file, op);
7097 fputs ("@DTPOFF", file);
7098 break;
7099 case UNSPEC_GOTNTPOFF:
7100 output_addr_const (file, op);
7101 if (TARGET_64BIT)
7102 fputs ("@GOTTPOFF(%rip)", file);
7103 else
7104 fputs ("@GOTNTPOFF", file);
7105 break;
7106 case UNSPEC_INDNTPOFF:
7107 output_addr_const (file, op);
7108 fputs ("@INDNTPOFF", file);
7109 break;
7110
7111 default:
7112 return false;
7113 }
7114
7115 return true;
7116 }
7117 \f
7118 /* Split one or more DImode RTL references into pairs of SImode
7119 references. The RTL can be REG, offsettable MEM, integer constant, or
7120 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7121 split and "num" is its length. lo_half and hi_half are output arrays
7122 that parallel "operands". */
7123
7124 void
7125 split_di (operands, num, lo_half, hi_half)
7126 rtx operands[];
7127 int num;
7128 rtx lo_half[], hi_half[];
7129 {
7130 while (num--)
7131 {
7132 rtx op = operands[num];
7133
7134 /* simplify_subreg refuse to split volatile memory addresses,
7135 but we still have to handle it. */
7136 if (GET_CODE (op) == MEM)
7137 {
7138 lo_half[num] = adjust_address (op, SImode, 0);
7139 hi_half[num] = adjust_address (op, SImode, 4);
7140 }
7141 else
7142 {
7143 lo_half[num] = simplify_gen_subreg (SImode, op,
7144 GET_MODE (op) == VOIDmode
7145 ? DImode : GET_MODE (op), 0);
7146 hi_half[num] = simplify_gen_subreg (SImode, op,
7147 GET_MODE (op) == VOIDmode
7148 ? DImode : GET_MODE (op), 4);
7149 }
7150 }
7151 }
7152 /* Split one or more TImode RTL references into pairs of SImode
7153 references. The RTL can be REG, offsettable MEM, integer constant, or
7154 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7155 split and "num" is its length. lo_half and hi_half are output arrays
7156 that parallel "operands". */
7157
7158 void
7159 split_ti (operands, num, lo_half, hi_half)
7160 rtx operands[];
7161 int num;
7162 rtx lo_half[], hi_half[];
7163 {
7164 while (num--)
7165 {
7166 rtx op = operands[num];
7167
7168 /* simplify_subreg refuse to split volatile memory addresses, but we
7169 still have to handle it. */
7170 if (GET_CODE (op) == MEM)
7171 {
7172 lo_half[num] = adjust_address (op, DImode, 0);
7173 hi_half[num] = adjust_address (op, DImode, 8);
7174 }
7175 else
7176 {
7177 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7178 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7179 }
7180 }
7181 }
7182 \f
7183 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7184 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7185 is the expression of the binary operation. The output may either be
7186 emitted here, or returned to the caller, like all output_* functions.
7187
7188 There is no guarantee that the operands are the same mode, as they
7189 might be within FLOAT or FLOAT_EXTEND expressions. */
7190
7191 #ifndef SYSV386_COMPAT
7192 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7193 wants to fix the assemblers because that causes incompatibility
7194 with gcc. No-one wants to fix gcc because that causes
7195 incompatibility with assemblers... You can use the option of
7196 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7197 #define SYSV386_COMPAT 1
7198 #endif
7199
7200 const char *
7201 output_387_binary_op (insn, operands)
7202 rtx insn;
7203 rtx *operands;
7204 {
7205 static char buf[30];
7206 const char *p;
7207 const char *ssep;
7208 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7209
7210 #ifdef ENABLE_CHECKING
7211 /* Even if we do not want to check the inputs, this documents input
7212 constraints. Which helps in understanding the following code. */
7213 if (STACK_REG_P (operands[0])
7214 && ((REG_P (operands[1])
7215 && REGNO (operands[0]) == REGNO (operands[1])
7216 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7217 || (REG_P (operands[2])
7218 && REGNO (operands[0]) == REGNO (operands[2])
7219 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7220 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7221 ; /* ok */
7222 else if (!is_sse)
7223 abort ();
7224 #endif
7225
7226 switch (GET_CODE (operands[3]))
7227 {
7228 case PLUS:
7229 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7230 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7231 p = "fiadd";
7232 else
7233 p = "fadd";
7234 ssep = "add";
7235 break;
7236
7237 case MINUS:
7238 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7239 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7240 p = "fisub";
7241 else
7242 p = "fsub";
7243 ssep = "sub";
7244 break;
7245
7246 case MULT:
7247 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7248 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7249 p = "fimul";
7250 else
7251 p = "fmul";
7252 ssep = "mul";
7253 break;
7254
7255 case DIV:
7256 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7257 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7258 p = "fidiv";
7259 else
7260 p = "fdiv";
7261 ssep = "div";
7262 break;
7263
7264 default:
7265 abort ();
7266 }
7267
7268 if (is_sse)
7269 {
7270 strcpy (buf, ssep);
7271 if (GET_MODE (operands[0]) == SFmode)
7272 strcat (buf, "ss\t{%2, %0|%0, %2}");
7273 else
7274 strcat (buf, "sd\t{%2, %0|%0, %2}");
7275 return buf;
7276 }
7277 strcpy (buf, p);
7278
7279 switch (GET_CODE (operands[3]))
7280 {
7281 case MULT:
7282 case PLUS:
7283 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7284 {
7285 rtx temp = operands[2];
7286 operands[2] = operands[1];
7287 operands[1] = temp;
7288 }
7289
7290 /* know operands[0] == operands[1]. */
7291
7292 if (GET_CODE (operands[2]) == MEM)
7293 {
7294 p = "%z2\t%2";
7295 break;
7296 }
7297
7298 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7299 {
7300 if (STACK_TOP_P (operands[0]))
7301 /* How is it that we are storing to a dead operand[2]?
7302 Well, presumably operands[1] is dead too. We can't
7303 store the result to st(0) as st(0) gets popped on this
7304 instruction. Instead store to operands[2] (which I
7305 think has to be st(1)). st(1) will be popped later.
7306 gcc <= 2.8.1 didn't have this check and generated
7307 assembly code that the Unixware assembler rejected. */
7308 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7309 else
7310 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7311 break;
7312 }
7313
7314 if (STACK_TOP_P (operands[0]))
7315 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7316 else
7317 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7318 break;
7319
7320 case MINUS:
7321 case DIV:
7322 if (GET_CODE (operands[1]) == MEM)
7323 {
7324 p = "r%z1\t%1";
7325 break;
7326 }
7327
7328 if (GET_CODE (operands[2]) == MEM)
7329 {
7330 p = "%z2\t%2";
7331 break;
7332 }
7333
7334 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7335 {
7336 #if SYSV386_COMPAT
7337 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7338 derived assemblers, confusingly reverse the direction of
7339 the operation for fsub{r} and fdiv{r} when the
7340 destination register is not st(0). The Intel assembler
7341 doesn't have this brain damage. Read !SYSV386_COMPAT to
7342 figure out what the hardware really does. */
7343 if (STACK_TOP_P (operands[0]))
7344 p = "{p\t%0, %2|rp\t%2, %0}";
7345 else
7346 p = "{rp\t%2, %0|p\t%0, %2}";
7347 #else
7348 if (STACK_TOP_P (operands[0]))
7349 /* As above for fmul/fadd, we can't store to st(0). */
7350 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7351 else
7352 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7353 #endif
7354 break;
7355 }
7356
7357 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7358 {
7359 #if SYSV386_COMPAT
7360 if (STACK_TOP_P (operands[0]))
7361 p = "{rp\t%0, %1|p\t%1, %0}";
7362 else
7363 p = "{p\t%1, %0|rp\t%0, %1}";
7364 #else
7365 if (STACK_TOP_P (operands[0]))
7366 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7367 else
7368 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7369 #endif
7370 break;
7371 }
7372
7373 if (STACK_TOP_P (operands[0]))
7374 {
7375 if (STACK_TOP_P (operands[1]))
7376 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7377 else
7378 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7379 break;
7380 }
7381 else if (STACK_TOP_P (operands[1]))
7382 {
7383 #if SYSV386_COMPAT
7384 p = "{\t%1, %0|r\t%0, %1}";
7385 #else
7386 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7387 #endif
7388 }
7389 else
7390 {
7391 #if SYSV386_COMPAT
7392 p = "{r\t%2, %0|\t%0, %2}";
7393 #else
7394 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7395 #endif
7396 }
7397 break;
7398
7399 default:
7400 abort ();
7401 }
7402
7403 strcat (buf, p);
7404 return buf;
7405 }
7406
7407 /* Output code to initialize control word copies used by
7408 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7409 is set to control word rounding downwards. */
7410 void
7411 emit_i387_cw_initialization (normal, round_down)
7412 rtx normal, round_down;
7413 {
7414 rtx reg = gen_reg_rtx (HImode);
7415
7416 emit_insn (gen_x86_fnstcw_1 (normal));
7417 emit_move_insn (reg, normal);
7418 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7419 && !TARGET_64BIT)
7420 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7421 else
7422 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7423 emit_move_insn (round_down, reg);
7424 }
7425
7426 /* Output code for INSN to convert a float to a signed int. OPERANDS
7427 are the insn operands. The output may be [HSD]Imode and the input
7428 operand may be [SDX]Fmode. */
7429
7430 const char *
7431 output_fix_trunc (insn, operands)
7432 rtx insn;
7433 rtx *operands;
7434 {
7435 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7436 int dimode_p = GET_MODE (operands[0]) == DImode;
7437
7438 /* Jump through a hoop or two for DImode, since the hardware has no
7439 non-popping instruction. We used to do this a different way, but
7440 that was somewhat fragile and broke with post-reload splitters. */
7441 if (dimode_p && !stack_top_dies)
7442 output_asm_insn ("fld\t%y1", operands);
7443
7444 if (!STACK_TOP_P (operands[1]))
7445 abort ();
7446
7447 if (GET_CODE (operands[0]) != MEM)
7448 abort ();
7449
7450 output_asm_insn ("fldcw\t%3", operands);
7451 if (stack_top_dies || dimode_p)
7452 output_asm_insn ("fistp%z0\t%0", operands);
7453 else
7454 output_asm_insn ("fist%z0\t%0", operands);
7455 output_asm_insn ("fldcw\t%2", operands);
7456
7457 return "";
7458 }
7459
7460 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7461 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7462 when fucom should be used. */
7463
7464 const char *
7465 output_fp_compare (insn, operands, eflags_p, unordered_p)
7466 rtx insn;
7467 rtx *operands;
7468 int eflags_p, unordered_p;
7469 {
7470 int stack_top_dies;
7471 rtx cmp_op0 = operands[0];
7472 rtx cmp_op1 = operands[1];
7473 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7474
7475 if (eflags_p == 2)
7476 {
7477 cmp_op0 = cmp_op1;
7478 cmp_op1 = operands[2];
7479 }
7480 if (is_sse)
7481 {
7482 if (GET_MODE (operands[0]) == SFmode)
7483 if (unordered_p)
7484 return "ucomiss\t{%1, %0|%0, %1}";
7485 else
7486 return "comiss\t{%1, %0|%0, %y}";
7487 else
7488 if (unordered_p)
7489 return "ucomisd\t{%1, %0|%0, %1}";
7490 else
7491 return "comisd\t{%1, %0|%0, %y}";
7492 }
7493
7494 if (! STACK_TOP_P (cmp_op0))
7495 abort ();
7496
7497 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7498
7499 if (STACK_REG_P (cmp_op1)
7500 && stack_top_dies
7501 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7502 && REGNO (cmp_op1) != FIRST_STACK_REG)
7503 {
7504 /* If both the top of the 387 stack dies, and the other operand
7505 is also a stack register that dies, then this must be a
7506 `fcompp' float compare */
7507
7508 if (eflags_p == 1)
7509 {
7510 /* There is no double popping fcomi variant. Fortunately,
7511 eflags is immune from the fstp's cc clobbering. */
7512 if (unordered_p)
7513 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7514 else
7515 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7516 return "fstp\t%y0";
7517 }
7518 else
7519 {
7520 if (eflags_p == 2)
7521 {
7522 if (unordered_p)
7523 return "fucompp\n\tfnstsw\t%0";
7524 else
7525 return "fcompp\n\tfnstsw\t%0";
7526 }
7527 else
7528 {
7529 if (unordered_p)
7530 return "fucompp";
7531 else
7532 return "fcompp";
7533 }
7534 }
7535 }
7536 else
7537 {
7538 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7539
7540 static const char * const alt[24] =
7541 {
7542 "fcom%z1\t%y1",
7543 "fcomp%z1\t%y1",
7544 "fucom%z1\t%y1",
7545 "fucomp%z1\t%y1",
7546
7547 "ficom%z1\t%y1",
7548 "ficomp%z1\t%y1",
7549 NULL,
7550 NULL,
7551
7552 "fcomi\t{%y1, %0|%0, %y1}",
7553 "fcomip\t{%y1, %0|%0, %y1}",
7554 "fucomi\t{%y1, %0|%0, %y1}",
7555 "fucomip\t{%y1, %0|%0, %y1}",
7556
7557 NULL,
7558 NULL,
7559 NULL,
7560 NULL,
7561
7562 "fcom%z2\t%y2\n\tfnstsw\t%0",
7563 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7564 "fucom%z2\t%y2\n\tfnstsw\t%0",
7565 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7566
7567 "ficom%z2\t%y2\n\tfnstsw\t%0",
7568 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7569 NULL,
7570 NULL
7571 };
7572
7573 int mask;
7574 const char *ret;
7575
7576 mask = eflags_p << 3;
7577 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7578 mask |= unordered_p << 1;
7579 mask |= stack_top_dies;
7580
7581 if (mask >= 24)
7582 abort ();
7583 ret = alt[mask];
7584 if (ret == NULL)
7585 abort ();
7586
7587 return ret;
7588 }
7589 }
7590
7591 void
7592 ix86_output_addr_vec_elt (file, value)
7593 FILE *file;
7594 int value;
7595 {
7596 const char *directive = ASM_LONG;
7597
7598 if (TARGET_64BIT)
7599 {
7600 #ifdef ASM_QUAD
7601 directive = ASM_QUAD;
7602 #else
7603 abort ();
7604 #endif
7605 }
7606
7607 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7608 }
7609
7610 void
7611 ix86_output_addr_diff_elt (file, value, rel)
7612 FILE *file;
7613 int value, rel;
7614 {
7615 if (TARGET_64BIT)
7616 fprintf (file, "%s%s%d-%s%d\n",
7617 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7618 else if (HAVE_AS_GOTOFF_IN_DATA)
7619 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7620 #if TARGET_MACHO
7621 else if (TARGET_MACHO)
7622 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7623 machopic_function_base_name () + 1);
7624 #endif
7625 else
7626 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7627 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7628 }
7629 \f
7630 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7631 for the target. */
7632
7633 void
7634 ix86_expand_clear (dest)
7635 rtx dest;
7636 {
7637 rtx tmp;
7638
7639 /* We play register width games, which are only valid after reload. */
7640 if (!reload_completed)
7641 abort ();
7642
7643 /* Avoid HImode and its attendant prefix byte. */
7644 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7645 dest = gen_rtx_REG (SImode, REGNO (dest));
7646
7647 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7648
7649 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7650 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7651 {
7652 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7653 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7654 }
7655
7656 emit_insn (tmp);
7657 }
7658
7659 /* X is an unchanging MEM. If it is a constant pool reference, return
7660 the constant pool rtx, else NULL. */
7661
7662 static rtx
7663 maybe_get_pool_constant (x)
7664 rtx x;
7665 {
7666 x = XEXP (x, 0);
7667
7668 if (flag_pic && ! TARGET_64BIT)
7669 {
7670 if (GET_CODE (x) != PLUS)
7671 return NULL_RTX;
7672 if (XEXP (x, 0) != pic_offset_table_rtx)
7673 return NULL_RTX;
7674 x = XEXP (x, 1);
7675 if (GET_CODE (x) != CONST)
7676 return NULL_RTX;
7677 x = XEXP (x, 0);
7678 if (GET_CODE (x) != UNSPEC)
7679 return NULL_RTX;
7680 if (XINT (x, 1) != UNSPEC_GOTOFF)
7681 return NULL_RTX;
7682 x = XVECEXP (x, 0, 0);
7683 }
7684
7685 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7686 return get_pool_constant (x);
7687
7688 return NULL_RTX;
7689 }
7690
7691 void
7692 ix86_expand_move (mode, operands)
7693 enum machine_mode mode;
7694 rtx operands[];
7695 {
7696 int strict = (reload_in_progress || reload_completed);
7697 rtx insn, op0, op1, tmp;
7698
7699 op0 = operands[0];
7700 op1 = operands[1];
7701
7702 /* ??? We have a slight problem. We need to say that tls symbols are
7703 not legitimate constants so that reload does not helpfully reload
7704 these constants from a REG_EQUIV, which we cannot handle. (Recall
7705 that general- and local-dynamic address resolution requires a
7706 function call.)
7707
7708 However, if we say that tls symbols are not legitimate constants,
7709 then emit_move_insn helpfully drop them into the constant pool.
7710
7711 It is far easier to work around emit_move_insn than reload. Recognize
7712 the MEM that we would have created and extract the symbol_ref. */
7713
7714 if (mode == Pmode
7715 && GET_CODE (op1) == MEM
7716 && RTX_UNCHANGING_P (op1))
7717 {
7718 tmp = maybe_get_pool_constant (op1);
7719 /* Note that we only care about symbolic constants here, which
7720 unlike CONST_INT will always have a proper mode. */
7721 if (tmp && GET_MODE (tmp) == Pmode)
7722 op1 = tmp;
7723 }
7724
7725 if (tls_symbolic_operand (op1, Pmode))
7726 {
7727 op1 = legitimize_address (op1, op1, VOIDmode);
7728 if (GET_CODE (op0) == MEM)
7729 {
7730 tmp = gen_reg_rtx (mode);
7731 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7732 op1 = tmp;
7733 }
7734 }
7735 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7736 {
7737 #if TARGET_MACHO
7738 if (MACHOPIC_PURE)
7739 {
7740 rtx temp = ((reload_in_progress
7741 || ((op0 && GET_CODE (op0) == REG)
7742 && mode == Pmode))
7743 ? op0 : gen_reg_rtx (Pmode));
7744 op1 = machopic_indirect_data_reference (op1, temp);
7745 op1 = machopic_legitimize_pic_address (op1, mode,
7746 temp == op1 ? 0 : temp);
7747 }
7748 else
7749 {
7750 if (MACHOPIC_INDIRECT)
7751 op1 = machopic_indirect_data_reference (op1, 0);
7752 }
7753 if (op0 != op1)
7754 {
7755 insn = gen_rtx_SET (VOIDmode, op0, op1);
7756 emit_insn (insn);
7757 }
7758 return;
7759 #endif /* TARGET_MACHO */
7760 if (GET_CODE (op0) == MEM)
7761 op1 = force_reg (Pmode, op1);
7762 else
7763 {
7764 rtx temp = op0;
7765 if (GET_CODE (temp) != REG)
7766 temp = gen_reg_rtx (Pmode);
7767 temp = legitimize_pic_address (op1, temp);
7768 if (temp == op0)
7769 return;
7770 op1 = temp;
7771 }
7772 }
7773 else
7774 {
7775 if (GET_CODE (op0) == MEM
7776 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
7777 || !push_operand (op0, mode))
7778 && GET_CODE (op1) == MEM)
7779 op1 = force_reg (mode, op1);
7780
7781 if (push_operand (op0, mode)
7782 && ! general_no_elim_operand (op1, mode))
7783 op1 = copy_to_mode_reg (mode, op1);
7784
7785 /* Force large constants in 64bit compilation into register
7786 to get them CSEed. */
7787 if (TARGET_64BIT && mode == DImode
7788 && immediate_operand (op1, mode)
7789 && !x86_64_zero_extended_value (op1)
7790 && !register_operand (op0, mode)
7791 && optimize && !reload_completed && !reload_in_progress)
7792 op1 = copy_to_mode_reg (mode, op1);
7793
7794 if (FLOAT_MODE_P (mode))
7795 {
7796 /* If we are loading a floating point constant to a register,
7797 force the value to memory now, since we'll get better code
7798 out the back end. */
7799
7800 if (strict)
7801 ;
7802 else if (GET_CODE (op1) == CONST_DOUBLE
7803 && register_operand (op0, mode))
7804 op1 = validize_mem (force_const_mem (mode, op1));
7805 }
7806 }
7807
7808 insn = gen_rtx_SET (VOIDmode, op0, op1);
7809
7810 emit_insn (insn);
7811 }
7812
7813 void
7814 ix86_expand_vector_move (mode, operands)
7815 enum machine_mode mode;
7816 rtx operands[];
7817 {
7818 /* Force constants other than zero into memory. We do not know how
7819 the instructions used to build constants modify the upper 64 bits
7820 of the register, once we have that information we may be able
7821 to handle some of them more efficiently. */
7822 if ((reload_in_progress | reload_completed) == 0
7823 && register_operand (operands[0], mode)
7824 && CONSTANT_P (operands[1]))
7825 operands[1] = force_const_mem (mode, operands[1]);
7826
7827 /* Make operand1 a register if it isn't already. */
7828 if (!no_new_pseudos
7829 && !register_operand (operands[0], mode)
7830 && !register_operand (operands[1], mode))
7831 {
7832 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
7833 emit_move_insn (operands[0], temp);
7834 return;
7835 }
7836
7837 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
7838 }
7839
7840 /* Attempt to expand a binary operator. Make the expansion closer to the
7841 actual machine, then just general_operand, which will allow 3 separate
7842 memory references (one output, two input) in a single insn. */
7843
7844 void
7845 ix86_expand_binary_operator (code, mode, operands)
7846 enum rtx_code code;
7847 enum machine_mode mode;
7848 rtx operands[];
7849 {
7850 int matching_memory;
7851 rtx src1, src2, dst, op, clob;
7852
7853 dst = operands[0];
7854 src1 = operands[1];
7855 src2 = operands[2];
7856
7857 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7858 if (GET_RTX_CLASS (code) == 'c'
7859 && (rtx_equal_p (dst, src2)
7860 || immediate_operand (src1, mode)))
7861 {
7862 rtx temp = src1;
7863 src1 = src2;
7864 src2 = temp;
7865 }
7866
7867 /* If the destination is memory, and we do not have matching source
7868 operands, do things in registers. */
7869 matching_memory = 0;
7870 if (GET_CODE (dst) == MEM)
7871 {
7872 if (rtx_equal_p (dst, src1))
7873 matching_memory = 1;
7874 else if (GET_RTX_CLASS (code) == 'c'
7875 && rtx_equal_p (dst, src2))
7876 matching_memory = 2;
7877 else
7878 dst = gen_reg_rtx (mode);
7879 }
7880
7881 /* Both source operands cannot be in memory. */
7882 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7883 {
7884 if (matching_memory != 2)
7885 src2 = force_reg (mode, src2);
7886 else
7887 src1 = force_reg (mode, src1);
7888 }
7889
7890 /* If the operation is not commutable, source 1 cannot be a constant
7891 or non-matching memory. */
7892 if ((CONSTANT_P (src1)
7893 || (!matching_memory && GET_CODE (src1) == MEM))
7894 && GET_RTX_CLASS (code) != 'c')
7895 src1 = force_reg (mode, src1);
7896
7897 /* If optimizing, copy to regs to improve CSE */
7898 if (optimize && ! no_new_pseudos)
7899 {
7900 if (GET_CODE (dst) == MEM)
7901 dst = gen_reg_rtx (mode);
7902 if (GET_CODE (src1) == MEM)
7903 src1 = force_reg (mode, src1);
7904 if (GET_CODE (src2) == MEM)
7905 src2 = force_reg (mode, src2);
7906 }
7907
7908 /* Emit the instruction. */
7909
7910 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7911 if (reload_in_progress)
7912 {
7913 /* Reload doesn't know about the flags register, and doesn't know that
7914 it doesn't want to clobber it. We can only do this with PLUS. */
7915 if (code != PLUS)
7916 abort ();
7917 emit_insn (op);
7918 }
7919 else
7920 {
7921 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7922 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7923 }
7924
7925 /* Fix up the destination if needed. */
7926 if (dst != operands[0])
7927 emit_move_insn (operands[0], dst);
7928 }
7929
7930 /* Return TRUE or FALSE depending on whether the binary operator meets the
7931 appropriate constraints. */
7932
7933 int
7934 ix86_binary_operator_ok (code, mode, operands)
7935 enum rtx_code code;
7936 enum machine_mode mode ATTRIBUTE_UNUSED;
7937 rtx operands[3];
7938 {
7939 /* Both source operands cannot be in memory. */
7940 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7941 return 0;
7942 /* If the operation is not commutable, source 1 cannot be a constant. */
7943 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7944 return 0;
7945 /* If the destination is memory, we must have a matching source operand. */
7946 if (GET_CODE (operands[0]) == MEM
7947 && ! (rtx_equal_p (operands[0], operands[1])
7948 || (GET_RTX_CLASS (code) == 'c'
7949 && rtx_equal_p (operands[0], operands[2]))))
7950 return 0;
7951 /* If the operation is not commutable and the source 1 is memory, we must
7952 have a matching destination. */
7953 if (GET_CODE (operands[1]) == MEM
7954 && GET_RTX_CLASS (code) != 'c'
7955 && ! rtx_equal_p (operands[0], operands[1]))
7956 return 0;
7957 return 1;
7958 }
7959
7960 /* Attempt to expand a unary operator. Make the expansion closer to the
7961 actual machine, then just general_operand, which will allow 2 separate
7962 memory references (one output, one input) in a single insn. */
7963
7964 void
7965 ix86_expand_unary_operator (code, mode, operands)
7966 enum rtx_code code;
7967 enum machine_mode mode;
7968 rtx operands[];
7969 {
7970 int matching_memory;
7971 rtx src, dst, op, clob;
7972
7973 dst = operands[0];
7974 src = operands[1];
7975
7976 /* If the destination is memory, and we do not have matching source
7977 operands, do things in registers. */
7978 matching_memory = 0;
7979 if (GET_CODE (dst) == MEM)
7980 {
7981 if (rtx_equal_p (dst, src))
7982 matching_memory = 1;
7983 else
7984 dst = gen_reg_rtx (mode);
7985 }
7986
7987 /* When source operand is memory, destination must match. */
7988 if (!matching_memory && GET_CODE (src) == MEM)
7989 src = force_reg (mode, src);
7990
7991 /* If optimizing, copy to regs to improve CSE */
7992 if (optimize && ! no_new_pseudos)
7993 {
7994 if (GET_CODE (dst) == MEM)
7995 dst = gen_reg_rtx (mode);
7996 if (GET_CODE (src) == MEM)
7997 src = force_reg (mode, src);
7998 }
7999
8000 /* Emit the instruction. */
8001
8002 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8003 if (reload_in_progress || code == NOT)
8004 {
8005 /* Reload doesn't know about the flags register, and doesn't know that
8006 it doesn't want to clobber it. */
8007 if (code != NOT)
8008 abort ();
8009 emit_insn (op);
8010 }
8011 else
8012 {
8013 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8014 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8015 }
8016
8017 /* Fix up the destination if needed. */
8018 if (dst != operands[0])
8019 emit_move_insn (operands[0], dst);
8020 }
8021
8022 /* Return TRUE or FALSE depending on whether the unary operator meets the
8023 appropriate constraints. */
8024
8025 int
8026 ix86_unary_operator_ok (code, mode, operands)
8027 enum rtx_code code ATTRIBUTE_UNUSED;
8028 enum machine_mode mode ATTRIBUTE_UNUSED;
8029 rtx operands[2] ATTRIBUTE_UNUSED;
8030 {
8031 /* If one of operands is memory, source and destination must match. */
8032 if ((GET_CODE (operands[0]) == MEM
8033 || GET_CODE (operands[1]) == MEM)
8034 && ! rtx_equal_p (operands[0], operands[1]))
8035 return FALSE;
8036 return TRUE;
8037 }
8038
8039 /* Return TRUE or FALSE depending on whether the first SET in INSN
8040 has source and destination with matching CC modes, and that the
8041 CC mode is at least as constrained as REQ_MODE. */
8042
8043 int
8044 ix86_match_ccmode (insn, req_mode)
8045 rtx insn;
8046 enum machine_mode req_mode;
8047 {
8048 rtx set;
8049 enum machine_mode set_mode;
8050
8051 set = PATTERN (insn);
8052 if (GET_CODE (set) == PARALLEL)
8053 set = XVECEXP (set, 0, 0);
8054 if (GET_CODE (set) != SET)
8055 abort ();
8056 if (GET_CODE (SET_SRC (set)) != COMPARE)
8057 abort ();
8058
8059 set_mode = GET_MODE (SET_DEST (set));
8060 switch (set_mode)
8061 {
8062 case CCNOmode:
8063 if (req_mode != CCNOmode
8064 && (req_mode != CCmode
8065 || XEXP (SET_SRC (set), 1) != const0_rtx))
8066 return 0;
8067 break;
8068 case CCmode:
8069 if (req_mode == CCGCmode)
8070 return 0;
8071 /* FALLTHRU */
8072 case CCGCmode:
8073 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8074 return 0;
8075 /* FALLTHRU */
8076 case CCGOCmode:
8077 if (req_mode == CCZmode)
8078 return 0;
8079 /* FALLTHRU */
8080 case CCZmode:
8081 break;
8082
8083 default:
8084 abort ();
8085 }
8086
8087 return (GET_MODE (SET_SRC (set)) == set_mode);
8088 }
8089
8090 /* Generate insn patterns to do an integer compare of OPERANDS. */
8091
8092 static rtx
8093 ix86_expand_int_compare (code, op0, op1)
8094 enum rtx_code code;
8095 rtx op0, op1;
8096 {
8097 enum machine_mode cmpmode;
8098 rtx tmp, flags;
8099
8100 cmpmode = SELECT_CC_MODE (code, op0, op1);
8101 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8102
8103 /* This is very simple, but making the interface the same as in the
8104 FP case makes the rest of the code easier. */
8105 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8106 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8107
8108 /* Return the test that should be put into the flags user, i.e.
8109 the bcc, scc, or cmov instruction. */
8110 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8111 }
8112
8113 /* Figure out whether to use ordered or unordered fp comparisons.
8114 Return the appropriate mode to use. */
8115
8116 enum machine_mode
8117 ix86_fp_compare_mode (code)
8118 enum rtx_code code ATTRIBUTE_UNUSED;
8119 {
8120 /* ??? In order to make all comparisons reversible, we do all comparisons
8121 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8122 all forms trapping and nontrapping comparisons, we can make inequality
8123 comparisons trapping again, since it results in better code when using
8124 FCOM based compares. */
8125 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8126 }
8127
8128 enum machine_mode
8129 ix86_cc_mode (code, op0, op1)
8130 enum rtx_code code;
8131 rtx op0, op1;
8132 {
8133 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8134 return ix86_fp_compare_mode (code);
8135 switch (code)
8136 {
8137 /* Only zero flag is needed. */
8138 case EQ: /* ZF=0 */
8139 case NE: /* ZF!=0 */
8140 return CCZmode;
8141 /* Codes needing carry flag. */
8142 case GEU: /* CF=0 */
8143 case GTU: /* CF=0 & ZF=0 */
8144 case LTU: /* CF=1 */
8145 case LEU: /* CF=1 | ZF=1 */
8146 return CCmode;
8147 /* Codes possibly doable only with sign flag when
8148 comparing against zero. */
8149 case GE: /* SF=OF or SF=0 */
8150 case LT: /* SF<>OF or SF=1 */
8151 if (op1 == const0_rtx)
8152 return CCGOCmode;
8153 else
8154 /* For other cases Carry flag is not required. */
8155 return CCGCmode;
8156 /* Codes doable only with sign flag when comparing
8157 against zero, but we miss jump instruction for it
8158 so we need to use relational tests agains overflow
8159 that thus needs to be zero. */
8160 case GT: /* ZF=0 & SF=OF */
8161 case LE: /* ZF=1 | SF<>OF */
8162 if (op1 == const0_rtx)
8163 return CCNOmode;
8164 else
8165 return CCGCmode;
8166 /* strcmp pattern do (use flags) and combine may ask us for proper
8167 mode. */
8168 case USE:
8169 return CCmode;
8170 default:
8171 abort ();
8172 }
8173 }
8174
8175 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8176
8177 int
8178 ix86_use_fcomi_compare (code)
8179 enum rtx_code code ATTRIBUTE_UNUSED;
8180 {
8181 enum rtx_code swapped_code = swap_condition (code);
8182 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8183 || (ix86_fp_comparison_cost (swapped_code)
8184 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8185 }
8186
8187 /* Swap, force into registers, or otherwise massage the two operands
8188 to a fp comparison. The operands are updated in place; the new
8189 comparsion code is returned. */
8190
8191 static enum rtx_code
8192 ix86_prepare_fp_compare_args (code, pop0, pop1)
8193 enum rtx_code code;
8194 rtx *pop0, *pop1;
8195 {
8196 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8197 rtx op0 = *pop0, op1 = *pop1;
8198 enum machine_mode op_mode = GET_MODE (op0);
8199 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8200
8201 /* All of the unordered compare instructions only work on registers.
8202 The same is true of the XFmode compare instructions. The same is
8203 true of the fcomi compare instructions. */
8204
8205 if (!is_sse
8206 && (fpcmp_mode == CCFPUmode
8207 || op_mode == XFmode
8208 || op_mode == TFmode
8209 || ix86_use_fcomi_compare (code)))
8210 {
8211 op0 = force_reg (op_mode, op0);
8212 op1 = force_reg (op_mode, op1);
8213 }
8214 else
8215 {
8216 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8217 things around if they appear profitable, otherwise force op0
8218 into a register. */
8219
8220 if (standard_80387_constant_p (op0) == 0
8221 || (GET_CODE (op0) == MEM
8222 && ! (standard_80387_constant_p (op1) == 0
8223 || GET_CODE (op1) == MEM)))
8224 {
8225 rtx tmp;
8226 tmp = op0, op0 = op1, op1 = tmp;
8227 code = swap_condition (code);
8228 }
8229
8230 if (GET_CODE (op0) != REG)
8231 op0 = force_reg (op_mode, op0);
8232
8233 if (CONSTANT_P (op1))
8234 {
8235 if (standard_80387_constant_p (op1))
8236 op1 = force_reg (op_mode, op1);
8237 else
8238 op1 = validize_mem (force_const_mem (op_mode, op1));
8239 }
8240 }
8241
8242 /* Try to rearrange the comparison to make it cheaper. */
8243 if (ix86_fp_comparison_cost (code)
8244 > ix86_fp_comparison_cost (swap_condition (code))
8245 && (GET_CODE (op1) == REG || !no_new_pseudos))
8246 {
8247 rtx tmp;
8248 tmp = op0, op0 = op1, op1 = tmp;
8249 code = swap_condition (code);
8250 if (GET_CODE (op0) != REG)
8251 op0 = force_reg (op_mode, op0);
8252 }
8253
8254 *pop0 = op0;
8255 *pop1 = op1;
8256 return code;
8257 }
8258
8259 /* Convert comparison codes we use to represent FP comparison to integer
8260 code that will result in proper branch. Return UNKNOWN if no such code
8261 is available. */
8262 static enum rtx_code
8263 ix86_fp_compare_code_to_integer (code)
8264 enum rtx_code code;
8265 {
8266 switch (code)
8267 {
8268 case GT:
8269 return GTU;
8270 case GE:
8271 return GEU;
8272 case ORDERED:
8273 case UNORDERED:
8274 return code;
8275 break;
8276 case UNEQ:
8277 return EQ;
8278 break;
8279 case UNLT:
8280 return LTU;
8281 break;
8282 case UNLE:
8283 return LEU;
8284 break;
8285 case LTGT:
8286 return NE;
8287 break;
8288 default:
8289 return UNKNOWN;
8290 }
8291 }
8292
8293 /* Split comparison code CODE into comparisons we can do using branch
8294 instructions. BYPASS_CODE is comparison code for branch that will
8295 branch around FIRST_CODE and SECOND_CODE. If some of branches
8296 is not required, set value to NIL.
8297 We never require more than two branches. */
8298 static void
8299 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8300 enum rtx_code code, *bypass_code, *first_code, *second_code;
8301 {
8302 *first_code = code;
8303 *bypass_code = NIL;
8304 *second_code = NIL;
8305
8306 /* The fcomi comparison sets flags as follows:
8307
8308 cmp ZF PF CF
8309 > 0 0 0
8310 < 0 0 1
8311 = 1 0 0
8312 un 1 1 1 */
8313
8314 switch (code)
8315 {
8316 case GT: /* GTU - CF=0 & ZF=0 */
8317 case GE: /* GEU - CF=0 */
8318 case ORDERED: /* PF=0 */
8319 case UNORDERED: /* PF=1 */
8320 case UNEQ: /* EQ - ZF=1 */
8321 case UNLT: /* LTU - CF=1 */
8322 case UNLE: /* LEU - CF=1 | ZF=1 */
8323 case LTGT: /* EQ - ZF=0 */
8324 break;
8325 case LT: /* LTU - CF=1 - fails on unordered */
8326 *first_code = UNLT;
8327 *bypass_code = UNORDERED;
8328 break;
8329 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8330 *first_code = UNLE;
8331 *bypass_code = UNORDERED;
8332 break;
8333 case EQ: /* EQ - ZF=1 - fails on unordered */
8334 *first_code = UNEQ;
8335 *bypass_code = UNORDERED;
8336 break;
8337 case NE: /* NE - ZF=0 - fails on unordered */
8338 *first_code = LTGT;
8339 *second_code = UNORDERED;
8340 break;
8341 case UNGE: /* GEU - CF=0 - fails on unordered */
8342 *first_code = GE;
8343 *second_code = UNORDERED;
8344 break;
8345 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8346 *first_code = GT;
8347 *second_code = UNORDERED;
8348 break;
8349 default:
8350 abort ();
8351 }
8352 if (!TARGET_IEEE_FP)
8353 {
8354 *second_code = NIL;
8355 *bypass_code = NIL;
8356 }
8357 }
8358
8359 /* Return cost of comparison done fcom + arithmetics operations on AX.
8360 All following functions do use number of instructions as an cost metrics.
8361 In future this should be tweaked to compute bytes for optimize_size and
8362 take into account performance of various instructions on various CPUs. */
8363 static int
8364 ix86_fp_comparison_arithmetics_cost (code)
8365 enum rtx_code code;
8366 {
8367 if (!TARGET_IEEE_FP)
8368 return 4;
8369 /* The cost of code output by ix86_expand_fp_compare. */
8370 switch (code)
8371 {
8372 case UNLE:
8373 case UNLT:
8374 case LTGT:
8375 case GT:
8376 case GE:
8377 case UNORDERED:
8378 case ORDERED:
8379 case UNEQ:
8380 return 4;
8381 break;
8382 case LT:
8383 case NE:
8384 case EQ:
8385 case UNGE:
8386 return 5;
8387 break;
8388 case LE:
8389 case UNGT:
8390 return 6;
8391 break;
8392 default:
8393 abort ();
8394 }
8395 }
8396
8397 /* Return cost of comparison done using fcomi operation.
8398 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8399 static int
8400 ix86_fp_comparison_fcomi_cost (code)
8401 enum rtx_code code;
8402 {
8403 enum rtx_code bypass_code, first_code, second_code;
8404 /* Return arbitarily high cost when instruction is not supported - this
8405 prevents gcc from using it. */
8406 if (!TARGET_CMOVE)
8407 return 1024;
8408 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8409 return (bypass_code != NIL || second_code != NIL) + 2;
8410 }
8411
8412 /* Return cost of comparison done using sahf operation.
8413 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8414 static int
8415 ix86_fp_comparison_sahf_cost (code)
8416 enum rtx_code code;
8417 {
8418 enum rtx_code bypass_code, first_code, second_code;
8419 /* Return arbitarily high cost when instruction is not preferred - this
8420 avoids gcc from using it. */
8421 if (!TARGET_USE_SAHF && !optimize_size)
8422 return 1024;
8423 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8424 return (bypass_code != NIL || second_code != NIL) + 3;
8425 }
8426
8427 /* Compute cost of the comparison done using any method.
8428 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8429 static int
8430 ix86_fp_comparison_cost (code)
8431 enum rtx_code code;
8432 {
8433 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8434 int min;
8435
8436 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8437 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8438
8439 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8440 if (min > sahf_cost)
8441 min = sahf_cost;
8442 if (min > fcomi_cost)
8443 min = fcomi_cost;
8444 return min;
8445 }
8446
8447 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8448
8449 static rtx
8450 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8451 enum rtx_code code;
8452 rtx op0, op1, scratch;
8453 rtx *second_test;
8454 rtx *bypass_test;
8455 {
8456 enum machine_mode fpcmp_mode, intcmp_mode;
8457 rtx tmp, tmp2;
8458 int cost = ix86_fp_comparison_cost (code);
8459 enum rtx_code bypass_code, first_code, second_code;
8460
8461 fpcmp_mode = ix86_fp_compare_mode (code);
8462 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8463
8464 if (second_test)
8465 *second_test = NULL_RTX;
8466 if (bypass_test)
8467 *bypass_test = NULL_RTX;
8468
8469 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8470
8471 /* Do fcomi/sahf based test when profitable. */
8472 if ((bypass_code == NIL || bypass_test)
8473 && (second_code == NIL || second_test)
8474 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8475 {
8476 if (TARGET_CMOVE)
8477 {
8478 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8479 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8480 tmp);
8481 emit_insn (tmp);
8482 }
8483 else
8484 {
8485 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8486 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8487 if (!scratch)
8488 scratch = gen_reg_rtx (HImode);
8489 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8490 emit_insn (gen_x86_sahf_1 (scratch));
8491 }
8492
8493 /* The FP codes work out to act like unsigned. */
8494 intcmp_mode = fpcmp_mode;
8495 code = first_code;
8496 if (bypass_code != NIL)
8497 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8498 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8499 const0_rtx);
8500 if (second_code != NIL)
8501 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8502 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8503 const0_rtx);
8504 }
8505 else
8506 {
8507 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8508 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8509 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8510 if (!scratch)
8511 scratch = gen_reg_rtx (HImode);
8512 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8513
8514 /* In the unordered case, we have to check C2 for NaN's, which
8515 doesn't happen to work out to anything nice combination-wise.
8516 So do some bit twiddling on the value we've got in AH to come
8517 up with an appropriate set of condition codes. */
8518
8519 intcmp_mode = CCNOmode;
8520 switch (code)
8521 {
8522 case GT:
8523 case UNGT:
8524 if (code == GT || !TARGET_IEEE_FP)
8525 {
8526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8527 code = EQ;
8528 }
8529 else
8530 {
8531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8532 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8533 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8534 intcmp_mode = CCmode;
8535 code = GEU;
8536 }
8537 break;
8538 case LT:
8539 case UNLT:
8540 if (code == LT && TARGET_IEEE_FP)
8541 {
8542 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8544 intcmp_mode = CCmode;
8545 code = EQ;
8546 }
8547 else
8548 {
8549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8550 code = NE;
8551 }
8552 break;
8553 case GE:
8554 case UNGE:
8555 if (code == GE || !TARGET_IEEE_FP)
8556 {
8557 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8558 code = EQ;
8559 }
8560 else
8561 {
8562 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8563 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8564 GEN_INT (0x01)));
8565 code = NE;
8566 }
8567 break;
8568 case LE:
8569 case UNLE:
8570 if (code == LE && TARGET_IEEE_FP)
8571 {
8572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8573 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8574 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8575 intcmp_mode = CCmode;
8576 code = LTU;
8577 }
8578 else
8579 {
8580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8581 code = NE;
8582 }
8583 break;
8584 case EQ:
8585 case UNEQ:
8586 if (code == EQ && TARGET_IEEE_FP)
8587 {
8588 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8589 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8590 intcmp_mode = CCmode;
8591 code = EQ;
8592 }
8593 else
8594 {
8595 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8596 code = NE;
8597 break;
8598 }
8599 break;
8600 case NE:
8601 case LTGT:
8602 if (code == NE && TARGET_IEEE_FP)
8603 {
8604 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8605 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8606 GEN_INT (0x40)));
8607 code = NE;
8608 }
8609 else
8610 {
8611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8612 code = EQ;
8613 }
8614 break;
8615
8616 case UNORDERED:
8617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8618 code = NE;
8619 break;
8620 case ORDERED:
8621 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8622 code = EQ;
8623 break;
8624
8625 default:
8626 abort ();
8627 }
8628 }
8629
8630 /* Return the test that should be put into the flags user, i.e.
8631 the bcc, scc, or cmov instruction. */
8632 return gen_rtx_fmt_ee (code, VOIDmode,
8633 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8634 const0_rtx);
8635 }
8636
8637 rtx
8638 ix86_expand_compare (code, second_test, bypass_test)
8639 enum rtx_code code;
8640 rtx *second_test, *bypass_test;
8641 {
8642 rtx op0, op1, ret;
8643 op0 = ix86_compare_op0;
8644 op1 = ix86_compare_op1;
8645
8646 if (second_test)
8647 *second_test = NULL_RTX;
8648 if (bypass_test)
8649 *bypass_test = NULL_RTX;
8650
8651 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8652 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8653 second_test, bypass_test);
8654 else
8655 ret = ix86_expand_int_compare (code, op0, op1);
8656
8657 return ret;
8658 }
8659
8660 /* Return true if the CODE will result in nontrivial jump sequence. */
8661 bool
8662 ix86_fp_jump_nontrivial_p (code)
8663 enum rtx_code code;
8664 {
8665 enum rtx_code bypass_code, first_code, second_code;
8666 if (!TARGET_CMOVE)
8667 return true;
8668 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8669 return bypass_code != NIL || second_code != NIL;
8670 }
8671
8672 void
8673 ix86_expand_branch (code, label)
8674 enum rtx_code code;
8675 rtx label;
8676 {
8677 rtx tmp;
8678
8679 switch (GET_MODE (ix86_compare_op0))
8680 {
8681 case QImode:
8682 case HImode:
8683 case SImode:
8684 simple:
8685 tmp = ix86_expand_compare (code, NULL, NULL);
8686 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8687 gen_rtx_LABEL_REF (VOIDmode, label),
8688 pc_rtx);
8689 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
8690 return;
8691
8692 case SFmode:
8693 case DFmode:
8694 case XFmode:
8695 case TFmode:
8696 {
8697 rtvec vec;
8698 int use_fcomi;
8699 enum rtx_code bypass_code, first_code, second_code;
8700
8701 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8702 &ix86_compare_op1);
8703
8704 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8705
8706 /* Check whether we will use the natural sequence with one jump. If
8707 so, we can expand jump early. Otherwise delay expansion by
8708 creating compound insn to not confuse optimizers. */
8709 if (bypass_code == NIL && second_code == NIL
8710 && TARGET_CMOVE)
8711 {
8712 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8713 gen_rtx_LABEL_REF (VOIDmode, label),
8714 pc_rtx, NULL_RTX);
8715 }
8716 else
8717 {
8718 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8719 ix86_compare_op0, ix86_compare_op1);
8720 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8721 gen_rtx_LABEL_REF (VOIDmode, label),
8722 pc_rtx);
8723 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8724
8725 use_fcomi = ix86_use_fcomi_compare (code);
8726 vec = rtvec_alloc (3 + !use_fcomi);
8727 RTVEC_ELT (vec, 0) = tmp;
8728 RTVEC_ELT (vec, 1)
8729 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8730 RTVEC_ELT (vec, 2)
8731 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8732 if (! use_fcomi)
8733 RTVEC_ELT (vec, 3)
8734 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8735
8736 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8737 }
8738 return;
8739 }
8740
8741 case DImode:
8742 if (TARGET_64BIT)
8743 goto simple;
8744 /* Expand DImode branch into multiple compare+branch. */
8745 {
8746 rtx lo[2], hi[2], label2;
8747 enum rtx_code code1, code2, code3;
8748
8749 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8750 {
8751 tmp = ix86_compare_op0;
8752 ix86_compare_op0 = ix86_compare_op1;
8753 ix86_compare_op1 = tmp;
8754 code = swap_condition (code);
8755 }
8756 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8757 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
8758
8759 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8760 avoid two branches. This costs one extra insn, so disable when
8761 optimizing for size. */
8762
8763 if ((code == EQ || code == NE)
8764 && (!optimize_size
8765 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8766 {
8767 rtx xor0, xor1;
8768
8769 xor1 = hi[0];
8770 if (hi[1] != const0_rtx)
8771 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8772 NULL_RTX, 0, OPTAB_WIDEN);
8773
8774 xor0 = lo[0];
8775 if (lo[1] != const0_rtx)
8776 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8777 NULL_RTX, 0, OPTAB_WIDEN);
8778
8779 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8780 NULL_RTX, 0, OPTAB_WIDEN);
8781
8782 ix86_compare_op0 = tmp;
8783 ix86_compare_op1 = const0_rtx;
8784 ix86_expand_branch (code, label);
8785 return;
8786 }
8787
8788 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8789 op1 is a constant and the low word is zero, then we can just
8790 examine the high word. */
8791
8792 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8793 switch (code)
8794 {
8795 case LT: case LTU: case GE: case GEU:
8796 ix86_compare_op0 = hi[0];
8797 ix86_compare_op1 = hi[1];
8798 ix86_expand_branch (code, label);
8799 return;
8800 default:
8801 break;
8802 }
8803
8804 /* Otherwise, we need two or three jumps. */
8805
8806 label2 = gen_label_rtx ();
8807
8808 code1 = code;
8809 code2 = swap_condition (code);
8810 code3 = unsigned_condition (code);
8811
8812 switch (code)
8813 {
8814 case LT: case GT: case LTU: case GTU:
8815 break;
8816
8817 case LE: code1 = LT; code2 = GT; break;
8818 case GE: code1 = GT; code2 = LT; break;
8819 case LEU: code1 = LTU; code2 = GTU; break;
8820 case GEU: code1 = GTU; code2 = LTU; break;
8821
8822 case EQ: code1 = NIL; code2 = NE; break;
8823 case NE: code2 = NIL; break;
8824
8825 default:
8826 abort ();
8827 }
8828
8829 /*
8830 * a < b =>
8831 * if (hi(a) < hi(b)) goto true;
8832 * if (hi(a) > hi(b)) goto false;
8833 * if (lo(a) < lo(b)) goto true;
8834 * false:
8835 */
8836
8837 ix86_compare_op0 = hi[0];
8838 ix86_compare_op1 = hi[1];
8839
8840 if (code1 != NIL)
8841 ix86_expand_branch (code1, label);
8842 if (code2 != NIL)
8843 ix86_expand_branch (code2, label2);
8844
8845 ix86_compare_op0 = lo[0];
8846 ix86_compare_op1 = lo[1];
8847 ix86_expand_branch (code3, label);
8848
8849 if (code2 != NIL)
8850 emit_label (label2);
8851 return;
8852 }
8853
8854 default:
8855 abort ();
8856 }
8857 }
8858
8859 /* Split branch based on floating point condition. */
8860 void
8861 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8862 enum rtx_code code;
8863 rtx op1, op2, target1, target2, tmp;
8864 {
8865 rtx second, bypass;
8866 rtx label = NULL_RTX;
8867 rtx condition;
8868 int bypass_probability = -1, second_probability = -1, probability = -1;
8869 rtx i;
8870
8871 if (target2 != pc_rtx)
8872 {
8873 rtx tmp = target2;
8874 code = reverse_condition_maybe_unordered (code);
8875 target2 = target1;
8876 target1 = tmp;
8877 }
8878
8879 condition = ix86_expand_fp_compare (code, op1, op2,
8880 tmp, &second, &bypass);
8881
8882 if (split_branch_probability >= 0)
8883 {
8884 /* Distribute the probabilities across the jumps.
8885 Assume the BYPASS and SECOND to be always test
8886 for UNORDERED. */
8887 probability = split_branch_probability;
8888
8889 /* Value of 1 is low enough to make no need for probability
8890 to be updated. Later we may run some experiments and see
8891 if unordered values are more frequent in practice. */
8892 if (bypass)
8893 bypass_probability = 1;
8894 if (second)
8895 second_probability = 1;
8896 }
8897 if (bypass != NULL_RTX)
8898 {
8899 label = gen_label_rtx ();
8900 i = emit_jump_insn (gen_rtx_SET
8901 (VOIDmode, pc_rtx,
8902 gen_rtx_IF_THEN_ELSE (VOIDmode,
8903 bypass,
8904 gen_rtx_LABEL_REF (VOIDmode,
8905 label),
8906 pc_rtx)));
8907 if (bypass_probability >= 0)
8908 REG_NOTES (i)
8909 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8910 GEN_INT (bypass_probability),
8911 REG_NOTES (i));
8912 }
8913 i = emit_jump_insn (gen_rtx_SET
8914 (VOIDmode, pc_rtx,
8915 gen_rtx_IF_THEN_ELSE (VOIDmode,
8916 condition, target1, target2)));
8917 if (probability >= 0)
8918 REG_NOTES (i)
8919 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8920 GEN_INT (probability),
8921 REG_NOTES (i));
8922 if (second != NULL_RTX)
8923 {
8924 i = emit_jump_insn (gen_rtx_SET
8925 (VOIDmode, pc_rtx,
8926 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8927 target2)));
8928 if (second_probability >= 0)
8929 REG_NOTES (i)
8930 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8931 GEN_INT (second_probability),
8932 REG_NOTES (i));
8933 }
8934 if (label != NULL_RTX)
8935 emit_label (label);
8936 }
8937
8938 int
8939 ix86_expand_setcc (code, dest)
8940 enum rtx_code code;
8941 rtx dest;
8942 {
8943 rtx ret, tmp, tmpreg;
8944 rtx second_test, bypass_test;
8945
8946 if (GET_MODE (ix86_compare_op0) == DImode
8947 && !TARGET_64BIT)
8948 return 0; /* FAIL */
8949
8950 if (GET_MODE (dest) != QImode)
8951 abort ();
8952
8953 ret = ix86_expand_compare (code, &second_test, &bypass_test);
8954 PUT_MODE (ret, QImode);
8955
8956 tmp = dest;
8957 tmpreg = dest;
8958
8959 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
8960 if (bypass_test || second_test)
8961 {
8962 rtx test = second_test;
8963 int bypass = 0;
8964 rtx tmp2 = gen_reg_rtx (QImode);
8965 if (bypass_test)
8966 {
8967 if (second_test)
8968 abort ();
8969 test = bypass_test;
8970 bypass = 1;
8971 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8972 }
8973 PUT_MODE (test, QImode);
8974 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8975
8976 if (bypass)
8977 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8978 else
8979 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8980 }
8981
8982 return 1; /* DONE */
8983 }
8984
8985 int
8986 ix86_expand_int_movcc (operands)
8987 rtx operands[];
8988 {
8989 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8990 rtx compare_seq, compare_op;
8991 rtx second_test, bypass_test;
8992 enum machine_mode mode = GET_MODE (operands[0]);
8993
8994 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8995 In case comparsion is done with immediate, we can convert it to LTU or
8996 GEU by altering the integer. */
8997
8998 if ((code == LEU || code == GTU)
8999 && GET_CODE (ix86_compare_op1) == CONST_INT
9000 && mode != HImode
9001 && INTVAL (ix86_compare_op1) != -1
9002 /* For x86-64, the immediate field in the instruction is 32-bit
9003 signed, so we can't increment a DImode value above 0x7fffffff. */
9004 && (!TARGET_64BIT
9005 || GET_MODE (ix86_compare_op0) != DImode
9006 || INTVAL (ix86_compare_op1) != 0x7fffffff)
9007 && GET_CODE (operands[2]) == CONST_INT
9008 && GET_CODE (operands[3]) == CONST_INT)
9009 {
9010 if (code == LEU)
9011 code = LTU;
9012 else
9013 code = GEU;
9014 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
9015 GET_MODE (ix86_compare_op0));
9016 }
9017
9018 start_sequence ();
9019 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9020 compare_seq = get_insns ();
9021 end_sequence ();
9022
9023 compare_code = GET_CODE (compare_op);
9024
9025 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9026 HImode insns, we'd be swallowed in word prefix ops. */
9027
9028 if (mode != HImode
9029 && (mode != DImode || TARGET_64BIT)
9030 && GET_CODE (operands[2]) == CONST_INT
9031 && GET_CODE (operands[3]) == CONST_INT)
9032 {
9033 rtx out = operands[0];
9034 HOST_WIDE_INT ct = INTVAL (operands[2]);
9035 HOST_WIDE_INT cf = INTVAL (operands[3]);
9036 HOST_WIDE_INT diff;
9037
9038 if ((compare_code == LTU || compare_code == GEU)
9039 && !second_test && !bypass_test)
9040 {
9041 /* Detect overlap between destination and compare sources. */
9042 rtx tmp = out;
9043
9044 /* To simplify rest of code, restrict to the GEU case. */
9045 if (compare_code == LTU)
9046 {
9047 int tmp = ct;
9048 ct = cf;
9049 cf = tmp;
9050 compare_code = reverse_condition (compare_code);
9051 code = reverse_condition (code);
9052 }
9053 diff = ct - cf;
9054
9055 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9056 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9057 tmp = gen_reg_rtx (mode);
9058
9059 emit_insn (compare_seq);
9060 if (mode == DImode)
9061 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9062 else
9063 emit_insn (gen_x86_movsicc_0_m1 (tmp));
9064
9065 if (diff == 1)
9066 {
9067 /*
9068 * cmpl op0,op1
9069 * sbbl dest,dest
9070 * [addl dest, ct]
9071 *
9072 * Size 5 - 8.
9073 */
9074 if (ct)
9075 tmp = expand_simple_binop (mode, PLUS,
9076 tmp, GEN_INT (ct),
9077 tmp, 1, OPTAB_DIRECT);
9078 }
9079 else if (cf == -1)
9080 {
9081 /*
9082 * cmpl op0,op1
9083 * sbbl dest,dest
9084 * orl $ct, dest
9085 *
9086 * Size 8.
9087 */
9088 tmp = expand_simple_binop (mode, IOR,
9089 tmp, GEN_INT (ct),
9090 tmp, 1, OPTAB_DIRECT);
9091 }
9092 else if (diff == -1 && ct)
9093 {
9094 /*
9095 * cmpl op0,op1
9096 * sbbl dest,dest
9097 * notl dest
9098 * [addl dest, cf]
9099 *
9100 * Size 8 - 11.
9101 */
9102 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9103 if (cf)
9104 tmp = expand_simple_binop (mode, PLUS,
9105 tmp, GEN_INT (cf),
9106 tmp, 1, OPTAB_DIRECT);
9107 }
9108 else
9109 {
9110 /*
9111 * cmpl op0,op1
9112 * sbbl dest,dest
9113 * [notl dest]
9114 * andl cf - ct, dest
9115 * [addl dest, ct]
9116 *
9117 * Size 8 - 11.
9118 */
9119
9120 if (cf == 0)
9121 {
9122 cf = ct;
9123 ct = 0;
9124 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
9125 }
9126
9127 tmp = expand_simple_binop (mode, AND,
9128 tmp,
9129 gen_int_mode (cf - ct, mode),
9130 tmp, 1, OPTAB_DIRECT);
9131 if (ct)
9132 tmp = expand_simple_binop (mode, PLUS,
9133 tmp, GEN_INT (ct),
9134 tmp, 1, OPTAB_DIRECT);
9135 }
9136
9137 if (tmp != out)
9138 emit_move_insn (out, tmp);
9139
9140 return 1; /* DONE */
9141 }
9142
9143 diff = ct - cf;
9144 if (diff < 0)
9145 {
9146 HOST_WIDE_INT tmp;
9147 tmp = ct, ct = cf, cf = tmp;
9148 diff = -diff;
9149 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9150 {
9151 /* We may be reversing unordered compare to normal compare, that
9152 is not valid in general (we may convert non-trapping condition
9153 to trapping one), however on i386 we currently emit all
9154 comparisons unordered. */
9155 compare_code = reverse_condition_maybe_unordered (compare_code);
9156 code = reverse_condition_maybe_unordered (code);
9157 }
9158 else
9159 {
9160 compare_code = reverse_condition (compare_code);
9161 code = reverse_condition (code);
9162 }
9163 }
9164
9165 compare_code = NIL;
9166 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9167 && GET_CODE (ix86_compare_op1) == CONST_INT)
9168 {
9169 if (ix86_compare_op1 == const0_rtx
9170 && (code == LT || code == GE))
9171 compare_code = code;
9172 else if (ix86_compare_op1 == constm1_rtx)
9173 {
9174 if (code == LE)
9175 compare_code = LT;
9176 else if (code == GT)
9177 compare_code = GE;
9178 }
9179 }
9180
9181 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9182 if (compare_code != NIL
9183 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9184 && (cf == -1 || ct == -1))
9185 {
9186 /* If lea code below could be used, only optimize
9187 if it results in a 2 insn sequence. */
9188
9189 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9190 || diff == 3 || diff == 5 || diff == 9)
9191 || (compare_code == LT && ct == -1)
9192 || (compare_code == GE && cf == -1))
9193 {
9194 /*
9195 * notl op1 (if necessary)
9196 * sarl $31, op1
9197 * orl cf, op1
9198 */
9199 if (ct != -1)
9200 {
9201 cf = ct;
9202 ct = -1;
9203 code = reverse_condition (code);
9204 }
9205
9206 out = emit_store_flag (out, code, ix86_compare_op0,
9207 ix86_compare_op1, VOIDmode, 0, -1);
9208
9209 out = expand_simple_binop (mode, IOR,
9210 out, GEN_INT (cf),
9211 out, 1, OPTAB_DIRECT);
9212 if (out != operands[0])
9213 emit_move_insn (operands[0], out);
9214
9215 return 1; /* DONE */
9216 }
9217 }
9218
9219 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9220 || diff == 3 || diff == 5 || diff == 9)
9221 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf), 0)))
9222 {
9223 /*
9224 * xorl dest,dest
9225 * cmpl op1,op2
9226 * setcc dest
9227 * lea cf(dest*(ct-cf)),dest
9228 *
9229 * Size 14.
9230 *
9231 * This also catches the degenerate setcc-only case.
9232 */
9233
9234 rtx tmp;
9235 int nops;
9236
9237 out = emit_store_flag (out, code, ix86_compare_op0,
9238 ix86_compare_op1, VOIDmode, 0, 1);
9239
9240 nops = 0;
9241 /* On x86_64 the lea instruction operates on Pmode, so we need
9242 to get arithmetics done in proper mode to match. */
9243 if (diff == 1)
9244 tmp = out;
9245 else
9246 {
9247 rtx out1;
9248 out1 = out;
9249 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9250 nops++;
9251 if (diff & 1)
9252 {
9253 tmp = gen_rtx_PLUS (mode, tmp, out1);
9254 nops++;
9255 }
9256 }
9257 if (cf != 0)
9258 {
9259 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9260 nops++;
9261 }
9262 if (tmp != out
9263 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
9264 {
9265 if (nops == 1)
9266 {
9267 rtx clob;
9268
9269 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9270 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9271
9272 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9273 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9274 emit_insn (tmp);
9275 }
9276 else
9277 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9278 }
9279 if (out != operands[0])
9280 emit_move_insn (operands[0], copy_rtx (out));
9281
9282 return 1; /* DONE */
9283 }
9284
9285 /*
9286 * General case: Jumpful:
9287 * xorl dest,dest cmpl op1, op2
9288 * cmpl op1, op2 movl ct, dest
9289 * setcc dest jcc 1f
9290 * decl dest movl cf, dest
9291 * andl (cf-ct),dest 1:
9292 * addl ct,dest
9293 *
9294 * Size 20. Size 14.
9295 *
9296 * This is reasonably steep, but branch mispredict costs are
9297 * high on modern cpus, so consider failing only if optimizing
9298 * for space.
9299 *
9300 * %%% Parameterize branch_cost on the tuning architecture, then
9301 * use that. The 80386 couldn't care less about mispredicts.
9302 */
9303
9304 if (!optimize_size && !TARGET_CMOVE)
9305 {
9306 if (cf == 0)
9307 {
9308 cf = ct;
9309 ct = 0;
9310 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9311 /* We may be reversing unordered compare to normal compare,
9312 that is not valid in general (we may convert non-trapping
9313 condition to trapping one), however on i386 we currently
9314 emit all comparisons unordered. */
9315 code = reverse_condition_maybe_unordered (code);
9316 else
9317 {
9318 code = reverse_condition (code);
9319 if (compare_code != NIL)
9320 compare_code = reverse_condition (compare_code);
9321 }
9322 }
9323
9324 if (compare_code != NIL)
9325 {
9326 /* notl op1 (if needed)
9327 sarl $31, op1
9328 andl (cf-ct), op1
9329 addl ct, op1
9330
9331 For x < 0 (resp. x <= -1) there will be no notl,
9332 so if possible swap the constants to get rid of the
9333 complement.
9334 True/false will be -1/0 while code below (store flag
9335 followed by decrement) is 0/-1, so the constants need
9336 to be exchanged once more. */
9337
9338 if (compare_code == GE || !cf)
9339 {
9340 code = reverse_condition (code);
9341 compare_code = LT;
9342 }
9343 else
9344 {
9345 HOST_WIDE_INT tmp = cf;
9346 cf = ct;
9347 ct = tmp;
9348 }
9349
9350 out = emit_store_flag (out, code, ix86_compare_op0,
9351 ix86_compare_op1, VOIDmode, 0, -1);
9352 }
9353 else
9354 {
9355 out = emit_store_flag (out, code, ix86_compare_op0,
9356 ix86_compare_op1, VOIDmode, 0, 1);
9357
9358 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
9359 out, 1, OPTAB_DIRECT);
9360 }
9361
9362 out = expand_simple_binop (mode, AND, out,
9363 gen_int_mode (cf - ct, mode),
9364 out, 1, OPTAB_DIRECT);
9365 if (ct)
9366 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9367 out, 1, OPTAB_DIRECT);
9368 if (out != operands[0])
9369 emit_move_insn (operands[0], out);
9370
9371 return 1; /* DONE */
9372 }
9373 }
9374
9375 if (!TARGET_CMOVE)
9376 {
9377 /* Try a few things more with specific constants and a variable. */
9378
9379 optab op;
9380 rtx var, orig_out, out, tmp;
9381
9382 if (optimize_size)
9383 return 0; /* FAIL */
9384
9385 /* If one of the two operands is an interesting constant, load a
9386 constant with the above and mask it in with a logical operation. */
9387
9388 if (GET_CODE (operands[2]) == CONST_INT)
9389 {
9390 var = operands[3];
9391 if (INTVAL (operands[2]) == 0)
9392 operands[3] = constm1_rtx, op = and_optab;
9393 else if (INTVAL (operands[2]) == -1)
9394 operands[3] = const0_rtx, op = ior_optab;
9395 else
9396 return 0; /* FAIL */
9397 }
9398 else if (GET_CODE (operands[3]) == CONST_INT)
9399 {
9400 var = operands[2];
9401 if (INTVAL (operands[3]) == 0)
9402 operands[2] = constm1_rtx, op = and_optab;
9403 else if (INTVAL (operands[3]) == -1)
9404 operands[2] = const0_rtx, op = ior_optab;
9405 else
9406 return 0; /* FAIL */
9407 }
9408 else
9409 return 0; /* FAIL */
9410
9411 orig_out = operands[0];
9412 tmp = gen_reg_rtx (mode);
9413 operands[0] = tmp;
9414
9415 /* Recurse to get the constant loaded. */
9416 if (ix86_expand_int_movcc (operands) == 0)
9417 return 0; /* FAIL */
9418
9419 /* Mask in the interesting variable. */
9420 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9421 OPTAB_WIDEN);
9422 if (out != orig_out)
9423 emit_move_insn (orig_out, out);
9424
9425 return 1; /* DONE */
9426 }
9427
9428 /*
9429 * For comparison with above,
9430 *
9431 * movl cf,dest
9432 * movl ct,tmp
9433 * cmpl op1,op2
9434 * cmovcc tmp,dest
9435 *
9436 * Size 15.
9437 */
9438
9439 if (! nonimmediate_operand (operands[2], mode))
9440 operands[2] = force_reg (mode, operands[2]);
9441 if (! nonimmediate_operand (operands[3], mode))
9442 operands[3] = force_reg (mode, operands[3]);
9443
9444 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9445 {
9446 rtx tmp = gen_reg_rtx (mode);
9447 emit_move_insn (tmp, operands[3]);
9448 operands[3] = tmp;
9449 }
9450 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9451 {
9452 rtx tmp = gen_reg_rtx (mode);
9453 emit_move_insn (tmp, operands[2]);
9454 operands[2] = tmp;
9455 }
9456 if (! register_operand (operands[2], VOIDmode)
9457 && ! register_operand (operands[3], VOIDmode))
9458 operands[2] = force_reg (mode, operands[2]);
9459
9460 emit_insn (compare_seq);
9461 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9462 gen_rtx_IF_THEN_ELSE (mode,
9463 compare_op, operands[2],
9464 operands[3])));
9465 if (bypass_test)
9466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9467 gen_rtx_IF_THEN_ELSE (mode,
9468 bypass_test,
9469 operands[3],
9470 operands[0])));
9471 if (second_test)
9472 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9473 gen_rtx_IF_THEN_ELSE (mode,
9474 second_test,
9475 operands[2],
9476 operands[0])));
9477
9478 return 1; /* DONE */
9479 }
9480
9481 int
9482 ix86_expand_fp_movcc (operands)
9483 rtx operands[];
9484 {
9485 enum rtx_code code;
9486 rtx tmp;
9487 rtx compare_op, second_test, bypass_test;
9488
9489 /* For SF/DFmode conditional moves based on comparisons
9490 in same mode, we may want to use SSE min/max instructions. */
9491 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9492 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9493 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9494 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9495 && (!TARGET_IEEE_FP
9496 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9497 /* We may be called from the post-reload splitter. */
9498 && (!REG_P (operands[0])
9499 || SSE_REG_P (operands[0])
9500 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9501 {
9502 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9503 code = GET_CODE (operands[1]);
9504
9505 /* See if we have (cross) match between comparison operands and
9506 conditional move operands. */
9507 if (rtx_equal_p (operands[2], op1))
9508 {
9509 rtx tmp = op0;
9510 op0 = op1;
9511 op1 = tmp;
9512 code = reverse_condition_maybe_unordered (code);
9513 }
9514 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9515 {
9516 /* Check for min operation. */
9517 if (code == LT)
9518 {
9519 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9520 if (memory_operand (op0, VOIDmode))
9521 op0 = force_reg (GET_MODE (operands[0]), op0);
9522 if (GET_MODE (operands[0]) == SFmode)
9523 emit_insn (gen_minsf3 (operands[0], op0, op1));
9524 else
9525 emit_insn (gen_mindf3 (operands[0], op0, op1));
9526 return 1;
9527 }
9528 /* Check for max operation. */
9529 if (code == GT)
9530 {
9531 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9532 if (memory_operand (op0, VOIDmode))
9533 op0 = force_reg (GET_MODE (operands[0]), op0);
9534 if (GET_MODE (operands[0]) == SFmode)
9535 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9536 else
9537 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9538 return 1;
9539 }
9540 }
9541 /* Manage condition to be sse_comparison_operator. In case we are
9542 in non-ieee mode, try to canonicalize the destination operand
9543 to be first in the comparison - this helps reload to avoid extra
9544 moves. */
9545 if (!sse_comparison_operator (operands[1], VOIDmode)
9546 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9547 {
9548 rtx tmp = ix86_compare_op0;
9549 ix86_compare_op0 = ix86_compare_op1;
9550 ix86_compare_op1 = tmp;
9551 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9552 VOIDmode, ix86_compare_op0,
9553 ix86_compare_op1);
9554 }
9555 /* Similary try to manage result to be first operand of conditional
9556 move. We also don't support the NE comparison on SSE, so try to
9557 avoid it. */
9558 if ((rtx_equal_p (operands[0], operands[3])
9559 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9560 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9561 {
9562 rtx tmp = operands[2];
9563 operands[2] = operands[3];
9564 operands[3] = tmp;
9565 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9566 (GET_CODE (operands[1])),
9567 VOIDmode, ix86_compare_op0,
9568 ix86_compare_op1);
9569 }
9570 if (GET_MODE (operands[0]) == SFmode)
9571 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9572 operands[2], operands[3],
9573 ix86_compare_op0, ix86_compare_op1));
9574 else
9575 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9576 operands[2], operands[3],
9577 ix86_compare_op0, ix86_compare_op1));
9578 return 1;
9579 }
9580
9581 /* The floating point conditional move instructions don't directly
9582 support conditions resulting from a signed integer comparison. */
9583
9584 code = GET_CODE (operands[1]);
9585 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9586
9587 /* The floating point conditional move instructions don't directly
9588 support signed integer comparisons. */
9589
9590 if (!fcmov_comparison_operator (compare_op, VOIDmode))
9591 {
9592 if (second_test != NULL || bypass_test != NULL)
9593 abort ();
9594 tmp = gen_reg_rtx (QImode);
9595 ix86_expand_setcc (code, tmp);
9596 code = NE;
9597 ix86_compare_op0 = tmp;
9598 ix86_compare_op1 = const0_rtx;
9599 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9600 }
9601 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9602 {
9603 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9604 emit_move_insn (tmp, operands[3]);
9605 operands[3] = tmp;
9606 }
9607 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9608 {
9609 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9610 emit_move_insn (tmp, operands[2]);
9611 operands[2] = tmp;
9612 }
9613
9614 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9615 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9616 compare_op,
9617 operands[2],
9618 operands[3])));
9619 if (bypass_test)
9620 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9621 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9622 bypass_test,
9623 operands[3],
9624 operands[0])));
9625 if (second_test)
9626 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9627 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9628 second_test,
9629 operands[2],
9630 operands[0])));
9631
9632 return 1;
9633 }
9634
9635 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9636 works for floating pointer parameters and nonoffsetable memories.
9637 For pushes, it returns just stack offsets; the values will be saved
9638 in the right order. Maximally three parts are generated. */
9639
9640 static int
9641 ix86_split_to_parts (operand, parts, mode)
9642 rtx operand;
9643 rtx *parts;
9644 enum machine_mode mode;
9645 {
9646 int size;
9647
9648 if (!TARGET_64BIT)
9649 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9650 else
9651 size = (GET_MODE_SIZE (mode) + 4) / 8;
9652
9653 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9654 abort ();
9655 if (size < 2 || size > 3)
9656 abort ();
9657
9658 /* Optimize constant pool reference to immediates. This is used by fp
9659 moves, that force all constants to memory to allow combining. */
9660 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9661 {
9662 rtx tmp = maybe_get_pool_constant (operand);
9663 if (tmp)
9664 operand = tmp;
9665 }
9666
9667 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
9668 {
9669 /* The only non-offsetable memories we handle are pushes. */
9670 if (! push_operand (operand, VOIDmode))
9671 abort ();
9672
9673 operand = copy_rtx (operand);
9674 PUT_MODE (operand, Pmode);
9675 parts[0] = parts[1] = parts[2] = operand;
9676 }
9677 else if (!TARGET_64BIT)
9678 {
9679 if (mode == DImode)
9680 split_di (&operand, 1, &parts[0], &parts[1]);
9681 else
9682 {
9683 if (REG_P (operand))
9684 {
9685 if (!reload_completed)
9686 abort ();
9687 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9688 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9689 if (size == 3)
9690 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9691 }
9692 else if (offsettable_memref_p (operand))
9693 {
9694 operand = adjust_address (operand, SImode, 0);
9695 parts[0] = operand;
9696 parts[1] = adjust_address (operand, SImode, 4);
9697 if (size == 3)
9698 parts[2] = adjust_address (operand, SImode, 8);
9699 }
9700 else if (GET_CODE (operand) == CONST_DOUBLE)
9701 {
9702 REAL_VALUE_TYPE r;
9703 long l[4];
9704
9705 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9706 switch (mode)
9707 {
9708 case XFmode:
9709 case TFmode:
9710 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9711 parts[2] = gen_int_mode (l[2], SImode);
9712 break;
9713 case DFmode:
9714 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9715 break;
9716 default:
9717 abort ();
9718 }
9719 parts[1] = gen_int_mode (l[1], SImode);
9720 parts[0] = gen_int_mode (l[0], SImode);
9721 }
9722 else
9723 abort ();
9724 }
9725 }
9726 else
9727 {
9728 if (mode == TImode)
9729 split_ti (&operand, 1, &parts[0], &parts[1]);
9730 if (mode == XFmode || mode == TFmode)
9731 {
9732 if (REG_P (operand))
9733 {
9734 if (!reload_completed)
9735 abort ();
9736 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9737 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9738 }
9739 else if (offsettable_memref_p (operand))
9740 {
9741 operand = adjust_address (operand, DImode, 0);
9742 parts[0] = operand;
9743 parts[1] = adjust_address (operand, SImode, 8);
9744 }
9745 else if (GET_CODE (operand) == CONST_DOUBLE)
9746 {
9747 REAL_VALUE_TYPE r;
9748 long l[3];
9749
9750 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9751 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9752 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9753 if (HOST_BITS_PER_WIDE_INT >= 64)
9754 parts[0]
9755 = gen_int_mode
9756 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
9757 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
9758 DImode);
9759 else
9760 parts[0] = immed_double_const (l[0], l[1], DImode);
9761 parts[1] = gen_int_mode (l[2], SImode);
9762 }
9763 else
9764 abort ();
9765 }
9766 }
9767
9768 return size;
9769 }
9770
9771 /* Emit insns to perform a move or push of DI, DF, and XF values.
9772 Return false when normal moves are needed; true when all required
9773 insns have been emitted. Operands 2-4 contain the input values
9774 int the correct order; operands 5-7 contain the output values. */
9775
9776 void
9777 ix86_split_long_move (operands)
9778 rtx operands[];
9779 {
9780 rtx part[2][3];
9781 int nparts;
9782 int push = 0;
9783 int collisions = 0;
9784 enum machine_mode mode = GET_MODE (operands[0]);
9785
9786 /* The DFmode expanders may ask us to move double.
9787 For 64bit target this is single move. By hiding the fact
9788 here we simplify i386.md splitters. */
9789 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9790 {
9791 /* Optimize constant pool reference to immediates. This is used by
9792 fp moves, that force all constants to memory to allow combining. */
9793
9794 if (GET_CODE (operands[1]) == MEM
9795 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9796 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9797 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9798 if (push_operand (operands[0], VOIDmode))
9799 {
9800 operands[0] = copy_rtx (operands[0]);
9801 PUT_MODE (operands[0], Pmode);
9802 }
9803 else
9804 operands[0] = gen_lowpart (DImode, operands[0]);
9805 operands[1] = gen_lowpart (DImode, operands[1]);
9806 emit_move_insn (operands[0], operands[1]);
9807 return;
9808 }
9809
9810 /* The only non-offsettable memory we handle is push. */
9811 if (push_operand (operands[0], VOIDmode))
9812 push = 1;
9813 else if (GET_CODE (operands[0]) == MEM
9814 && ! offsettable_memref_p (operands[0]))
9815 abort ();
9816
9817 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9818 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
9819
9820 /* When emitting push, take care for source operands on the stack. */
9821 if (push && GET_CODE (operands[1]) == MEM
9822 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9823 {
9824 if (nparts == 3)
9825 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9826 XEXP (part[1][2], 0));
9827 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9828 XEXP (part[1][1], 0));
9829 }
9830
9831 /* We need to do copy in the right order in case an address register
9832 of the source overlaps the destination. */
9833 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9834 {
9835 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9836 collisions++;
9837 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9838 collisions++;
9839 if (nparts == 3
9840 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9841 collisions++;
9842
9843 /* Collision in the middle part can be handled by reordering. */
9844 if (collisions == 1 && nparts == 3
9845 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9846 {
9847 rtx tmp;
9848 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9849 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9850 }
9851
9852 /* If there are more collisions, we can't handle it by reordering.
9853 Do an lea to the last part and use only one colliding move. */
9854 else if (collisions > 1)
9855 {
9856 collisions = 1;
9857 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
9858 XEXP (part[1][0], 0)));
9859 part[1][0] = change_address (part[1][0],
9860 TARGET_64BIT ? DImode : SImode,
9861 part[0][nparts - 1]);
9862 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
9863 if (nparts == 3)
9864 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
9865 }
9866 }
9867
9868 if (push)
9869 {
9870 if (!TARGET_64BIT)
9871 {
9872 if (nparts == 3)
9873 {
9874 /* We use only first 12 bytes of TFmode value, but for pushing we
9875 are required to adjust stack as if we were pushing real 16byte
9876 value. */
9877 if (mode == TFmode && !TARGET_64BIT)
9878 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9879 GEN_INT (-4)));
9880 emit_move_insn (part[0][2], part[1][2]);
9881 }
9882 }
9883 else
9884 {
9885 /* In 64bit mode we don't have 32bit push available. In case this is
9886 register, it is OK - we will just use larger counterpart. We also
9887 retype memory - these comes from attempt to avoid REX prefix on
9888 moving of second half of TFmode value. */
9889 if (GET_MODE (part[1][1]) == SImode)
9890 {
9891 if (GET_CODE (part[1][1]) == MEM)
9892 part[1][1] = adjust_address (part[1][1], DImode, 0);
9893 else if (REG_P (part[1][1]))
9894 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9895 else
9896 abort ();
9897 if (GET_MODE (part[1][0]) == SImode)
9898 part[1][0] = part[1][1];
9899 }
9900 }
9901 emit_move_insn (part[0][1], part[1][1]);
9902 emit_move_insn (part[0][0], part[1][0]);
9903 return;
9904 }
9905
9906 /* Choose correct order to not overwrite the source before it is copied. */
9907 if ((REG_P (part[0][0])
9908 && REG_P (part[1][1])
9909 && (REGNO (part[0][0]) == REGNO (part[1][1])
9910 || (nparts == 3
9911 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9912 || (collisions > 0
9913 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9914 {
9915 if (nparts == 3)
9916 {
9917 operands[2] = part[0][2];
9918 operands[3] = part[0][1];
9919 operands[4] = part[0][0];
9920 operands[5] = part[1][2];
9921 operands[6] = part[1][1];
9922 operands[7] = part[1][0];
9923 }
9924 else
9925 {
9926 operands[2] = part[0][1];
9927 operands[3] = part[0][0];
9928 operands[5] = part[1][1];
9929 operands[6] = part[1][0];
9930 }
9931 }
9932 else
9933 {
9934 if (nparts == 3)
9935 {
9936 operands[2] = part[0][0];
9937 operands[3] = part[0][1];
9938 operands[4] = part[0][2];
9939 operands[5] = part[1][0];
9940 operands[6] = part[1][1];
9941 operands[7] = part[1][2];
9942 }
9943 else
9944 {
9945 operands[2] = part[0][0];
9946 operands[3] = part[0][1];
9947 operands[5] = part[1][0];
9948 operands[6] = part[1][1];
9949 }
9950 }
9951 emit_move_insn (operands[2], operands[5]);
9952 emit_move_insn (operands[3], operands[6]);
9953 if (nparts == 3)
9954 emit_move_insn (operands[4], operands[7]);
9955
9956 return;
9957 }
9958
9959 void
9960 ix86_split_ashldi (operands, scratch)
9961 rtx *operands, scratch;
9962 {
9963 rtx low[2], high[2];
9964 int count;
9965
9966 if (GET_CODE (operands[2]) == CONST_INT)
9967 {
9968 split_di (operands, 2, low, high);
9969 count = INTVAL (operands[2]) & 63;
9970
9971 if (count >= 32)
9972 {
9973 emit_move_insn (high[0], low[1]);
9974 emit_move_insn (low[0], const0_rtx);
9975
9976 if (count > 32)
9977 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9978 }
9979 else
9980 {
9981 if (!rtx_equal_p (operands[0], operands[1]))
9982 emit_move_insn (operands[0], operands[1]);
9983 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9984 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9985 }
9986 }
9987 else
9988 {
9989 if (!rtx_equal_p (operands[0], operands[1]))
9990 emit_move_insn (operands[0], operands[1]);
9991
9992 split_di (operands, 1, low, high);
9993
9994 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9995 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
9996
9997 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
9998 {
9999 if (! no_new_pseudos)
10000 scratch = force_reg (SImode, const0_rtx);
10001 else
10002 emit_move_insn (scratch, const0_rtx);
10003
10004 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10005 scratch));
10006 }
10007 else
10008 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10009 }
10010 }
10011
10012 void
10013 ix86_split_ashrdi (operands, scratch)
10014 rtx *operands, scratch;
10015 {
10016 rtx low[2], high[2];
10017 int count;
10018
10019 if (GET_CODE (operands[2]) == CONST_INT)
10020 {
10021 split_di (operands, 2, low, high);
10022 count = INTVAL (operands[2]) & 63;
10023
10024 if (count >= 32)
10025 {
10026 emit_move_insn (low[0], high[1]);
10027
10028 if (! reload_completed)
10029 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10030 else
10031 {
10032 emit_move_insn (high[0], low[0]);
10033 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10034 }
10035
10036 if (count > 32)
10037 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10038 }
10039 else
10040 {
10041 if (!rtx_equal_p (operands[0], operands[1]))
10042 emit_move_insn (operands[0], operands[1]);
10043 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10044 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10045 }
10046 }
10047 else
10048 {
10049 if (!rtx_equal_p (operands[0], operands[1]))
10050 emit_move_insn (operands[0], operands[1]);
10051
10052 split_di (operands, 1, low, high);
10053
10054 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10055 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10056
10057 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10058 {
10059 if (! no_new_pseudos)
10060 scratch = gen_reg_rtx (SImode);
10061 emit_move_insn (scratch, high[0]);
10062 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10063 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10064 scratch));
10065 }
10066 else
10067 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10068 }
10069 }
10070
10071 void
10072 ix86_split_lshrdi (operands, scratch)
10073 rtx *operands, scratch;
10074 {
10075 rtx low[2], high[2];
10076 int count;
10077
10078 if (GET_CODE (operands[2]) == CONST_INT)
10079 {
10080 split_di (operands, 2, low, high);
10081 count = INTVAL (operands[2]) & 63;
10082
10083 if (count >= 32)
10084 {
10085 emit_move_insn (low[0], high[1]);
10086 emit_move_insn (high[0], const0_rtx);
10087
10088 if (count > 32)
10089 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10090 }
10091 else
10092 {
10093 if (!rtx_equal_p (operands[0], operands[1]))
10094 emit_move_insn (operands[0], operands[1]);
10095 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10096 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10097 }
10098 }
10099 else
10100 {
10101 if (!rtx_equal_p (operands[0], operands[1]))
10102 emit_move_insn (operands[0], operands[1]);
10103
10104 split_di (operands, 1, low, high);
10105
10106 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10107 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10108
10109 /* Heh. By reversing the arguments, we can reuse this pattern. */
10110 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10111 {
10112 if (! no_new_pseudos)
10113 scratch = force_reg (SImode, const0_rtx);
10114 else
10115 emit_move_insn (scratch, const0_rtx);
10116
10117 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10118 scratch));
10119 }
10120 else
10121 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10122 }
10123 }
10124
10125 /* Helper function for the string operations below. Dest VARIABLE whether
10126 it is aligned to VALUE bytes. If true, jump to the label. */
10127 static rtx
10128 ix86_expand_aligntest (variable, value)
10129 rtx variable;
10130 int value;
10131 {
10132 rtx label = gen_label_rtx ();
10133 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10134 if (GET_MODE (variable) == DImode)
10135 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10136 else
10137 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10138 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10139 1, label);
10140 return label;
10141 }
10142
10143 /* Adjust COUNTER by the VALUE. */
10144 static void
10145 ix86_adjust_counter (countreg, value)
10146 rtx countreg;
10147 HOST_WIDE_INT value;
10148 {
10149 if (GET_MODE (countreg) == DImode)
10150 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10151 else
10152 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10153 }
10154
10155 /* Zero extend possibly SImode EXP to Pmode register. */
10156 rtx
10157 ix86_zero_extend_to_Pmode (exp)
10158 rtx exp;
10159 {
10160 rtx r;
10161 if (GET_MODE (exp) == VOIDmode)
10162 return force_reg (Pmode, exp);
10163 if (GET_MODE (exp) == Pmode)
10164 return copy_to_mode_reg (Pmode, exp);
10165 r = gen_reg_rtx (Pmode);
10166 emit_insn (gen_zero_extendsidi2 (r, exp));
10167 return r;
10168 }
10169
10170 /* Expand string move (memcpy) operation. Use i386 string operations when
10171 profitable. expand_clrstr contains similar code. */
10172 int
10173 ix86_expand_movstr (dst, src, count_exp, align_exp)
10174 rtx dst, src, count_exp, align_exp;
10175 {
10176 rtx srcreg, destreg, countreg;
10177 enum machine_mode counter_mode;
10178 HOST_WIDE_INT align = 0;
10179 unsigned HOST_WIDE_INT count = 0;
10180 rtx insns;
10181
10182 start_sequence ();
10183
10184 if (GET_CODE (align_exp) == CONST_INT)
10185 align = INTVAL (align_exp);
10186
10187 /* This simple hack avoids all inlining code and simplifies code below. */
10188 if (!TARGET_ALIGN_STRINGOPS)
10189 align = 64;
10190
10191 if (GET_CODE (count_exp) == CONST_INT)
10192 count = INTVAL (count_exp);
10193
10194 /* Figure out proper mode for counter. For 32bits it is always SImode,
10195 for 64bits use SImode when possible, otherwise DImode.
10196 Set count to number of bytes copied when known at compile time. */
10197 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10198 || x86_64_zero_extended_value (count_exp))
10199 counter_mode = SImode;
10200 else
10201 counter_mode = DImode;
10202
10203 if (counter_mode != SImode && counter_mode != DImode)
10204 abort ();
10205
10206 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10207 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10208
10209 emit_insn (gen_cld ());
10210
10211 /* When optimizing for size emit simple rep ; movsb instruction for
10212 counts not divisible by 4. */
10213
10214 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10215 {
10216 countreg = ix86_zero_extend_to_Pmode (count_exp);
10217 if (TARGET_64BIT)
10218 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10219 destreg, srcreg, countreg));
10220 else
10221 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10222 destreg, srcreg, countreg));
10223 }
10224
10225 /* For constant aligned (or small unaligned) copies use rep movsl
10226 followed by code copying the rest. For PentiumPro ensure 8 byte
10227 alignment to allow rep movsl acceleration. */
10228
10229 else if (count != 0
10230 && (align >= 8
10231 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10232 || optimize_size || count < (unsigned int) 64))
10233 {
10234 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10235 if (count & ~(size - 1))
10236 {
10237 countreg = copy_to_mode_reg (counter_mode,
10238 GEN_INT ((count >> (size == 4 ? 2 : 3))
10239 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10240 countreg = ix86_zero_extend_to_Pmode (countreg);
10241 if (size == 4)
10242 {
10243 if (TARGET_64BIT)
10244 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10245 destreg, srcreg, countreg));
10246 else
10247 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10248 destreg, srcreg, countreg));
10249 }
10250 else
10251 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10252 destreg, srcreg, countreg));
10253 }
10254 if (size == 8 && (count & 0x04))
10255 emit_insn (gen_strmovsi (destreg, srcreg));
10256 if (count & 0x02)
10257 emit_insn (gen_strmovhi (destreg, srcreg));
10258 if (count & 0x01)
10259 emit_insn (gen_strmovqi (destreg, srcreg));
10260 }
10261 /* The generic code based on the glibc implementation:
10262 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10263 allowing accelerated copying there)
10264 - copy the data using rep movsl
10265 - copy the rest. */
10266 else
10267 {
10268 rtx countreg2;
10269 rtx label = NULL;
10270 int desired_alignment = (TARGET_PENTIUMPRO
10271 && (count == 0 || count >= (unsigned int) 260)
10272 ? 8 : UNITS_PER_WORD);
10273
10274 /* In case we don't know anything about the alignment, default to
10275 library version, since it is usually equally fast and result in
10276 shorter code. */
10277 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10278 {
10279 end_sequence ();
10280 return 0;
10281 }
10282
10283 if (TARGET_SINGLE_STRINGOP)
10284 emit_insn (gen_cld ());
10285
10286 countreg2 = gen_reg_rtx (Pmode);
10287 countreg = copy_to_mode_reg (counter_mode, count_exp);
10288
10289 /* We don't use loops to align destination and to copy parts smaller
10290 than 4 bytes, because gcc is able to optimize such code better (in
10291 the case the destination or the count really is aligned, gcc is often
10292 able to predict the branches) and also it is friendlier to the
10293 hardware branch prediction.
10294
10295 Using loops is benefical for generic case, because we can
10296 handle small counts using the loops. Many CPUs (such as Athlon)
10297 have large REP prefix setup costs.
10298
10299 This is quite costy. Maybe we can revisit this decision later or
10300 add some customizability to this code. */
10301
10302 if (count == 0 && align < desired_alignment)
10303 {
10304 label = gen_label_rtx ();
10305 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10306 LEU, 0, counter_mode, 1, label);
10307 }
10308 if (align <= 1)
10309 {
10310 rtx label = ix86_expand_aligntest (destreg, 1);
10311 emit_insn (gen_strmovqi (destreg, srcreg));
10312 ix86_adjust_counter (countreg, 1);
10313 emit_label (label);
10314 LABEL_NUSES (label) = 1;
10315 }
10316 if (align <= 2)
10317 {
10318 rtx label = ix86_expand_aligntest (destreg, 2);
10319 emit_insn (gen_strmovhi (destreg, srcreg));
10320 ix86_adjust_counter (countreg, 2);
10321 emit_label (label);
10322 LABEL_NUSES (label) = 1;
10323 }
10324 if (align <= 4 && desired_alignment > 4)
10325 {
10326 rtx label = ix86_expand_aligntest (destreg, 4);
10327 emit_insn (gen_strmovsi (destreg, srcreg));
10328 ix86_adjust_counter (countreg, 4);
10329 emit_label (label);
10330 LABEL_NUSES (label) = 1;
10331 }
10332
10333 if (label && desired_alignment > 4 && !TARGET_64BIT)
10334 {
10335 emit_label (label);
10336 LABEL_NUSES (label) = 1;
10337 label = NULL_RTX;
10338 }
10339 if (!TARGET_SINGLE_STRINGOP)
10340 emit_insn (gen_cld ());
10341 if (TARGET_64BIT)
10342 {
10343 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10344 GEN_INT (3)));
10345 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10346 destreg, srcreg, countreg2));
10347 }
10348 else
10349 {
10350 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10351 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10352 destreg, srcreg, countreg2));
10353 }
10354
10355 if (label)
10356 {
10357 emit_label (label);
10358 LABEL_NUSES (label) = 1;
10359 }
10360 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10361 emit_insn (gen_strmovsi (destreg, srcreg));
10362 if ((align <= 4 || count == 0) && TARGET_64BIT)
10363 {
10364 rtx label = ix86_expand_aligntest (countreg, 4);
10365 emit_insn (gen_strmovsi (destreg, srcreg));
10366 emit_label (label);
10367 LABEL_NUSES (label) = 1;
10368 }
10369 if (align > 2 && count != 0 && (count & 2))
10370 emit_insn (gen_strmovhi (destreg, srcreg));
10371 if (align <= 2 || count == 0)
10372 {
10373 rtx label = ix86_expand_aligntest (countreg, 2);
10374 emit_insn (gen_strmovhi (destreg, srcreg));
10375 emit_label (label);
10376 LABEL_NUSES (label) = 1;
10377 }
10378 if (align > 1 && count != 0 && (count & 1))
10379 emit_insn (gen_strmovqi (destreg, srcreg));
10380 if (align <= 1 || count == 0)
10381 {
10382 rtx label = ix86_expand_aligntest (countreg, 1);
10383 emit_insn (gen_strmovqi (destreg, srcreg));
10384 emit_label (label);
10385 LABEL_NUSES (label) = 1;
10386 }
10387 }
10388
10389 insns = get_insns ();
10390 end_sequence ();
10391
10392 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10393 emit_insn (insns);
10394 return 1;
10395 }
10396
10397 /* Expand string clear operation (bzero). Use i386 string operations when
10398 profitable. expand_movstr contains similar code. */
10399 int
10400 ix86_expand_clrstr (src, count_exp, align_exp)
10401 rtx src, count_exp, align_exp;
10402 {
10403 rtx destreg, zeroreg, countreg;
10404 enum machine_mode counter_mode;
10405 HOST_WIDE_INT align = 0;
10406 unsigned HOST_WIDE_INT count = 0;
10407
10408 if (GET_CODE (align_exp) == CONST_INT)
10409 align = INTVAL (align_exp);
10410
10411 /* This simple hack avoids all inlining code and simplifies code below. */
10412 if (!TARGET_ALIGN_STRINGOPS)
10413 align = 32;
10414
10415 if (GET_CODE (count_exp) == CONST_INT)
10416 count = INTVAL (count_exp);
10417 /* Figure out proper mode for counter. For 32bits it is always SImode,
10418 for 64bits use SImode when possible, otherwise DImode.
10419 Set count to number of bytes copied when known at compile time. */
10420 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10421 || x86_64_zero_extended_value (count_exp))
10422 counter_mode = SImode;
10423 else
10424 counter_mode = DImode;
10425
10426 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10427
10428 emit_insn (gen_cld ());
10429
10430 /* When optimizing for size emit simple rep ; movsb instruction for
10431 counts not divisible by 4. */
10432
10433 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10434 {
10435 countreg = ix86_zero_extend_to_Pmode (count_exp);
10436 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10437 if (TARGET_64BIT)
10438 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10439 destreg, countreg));
10440 else
10441 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10442 destreg, countreg));
10443 }
10444 else if (count != 0
10445 && (align >= 8
10446 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10447 || optimize_size || count < (unsigned int) 64))
10448 {
10449 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10450 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10451 if (count & ~(size - 1))
10452 {
10453 countreg = copy_to_mode_reg (counter_mode,
10454 GEN_INT ((count >> (size == 4 ? 2 : 3))
10455 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10456 countreg = ix86_zero_extend_to_Pmode (countreg);
10457 if (size == 4)
10458 {
10459 if (TARGET_64BIT)
10460 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10461 destreg, countreg));
10462 else
10463 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10464 destreg, countreg));
10465 }
10466 else
10467 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10468 destreg, countreg));
10469 }
10470 if (size == 8 && (count & 0x04))
10471 emit_insn (gen_strsetsi (destreg,
10472 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10473 if (count & 0x02)
10474 emit_insn (gen_strsethi (destreg,
10475 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10476 if (count & 0x01)
10477 emit_insn (gen_strsetqi (destreg,
10478 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10479 }
10480 else
10481 {
10482 rtx countreg2;
10483 rtx label = NULL;
10484 /* Compute desired alignment of the string operation. */
10485 int desired_alignment = (TARGET_PENTIUMPRO
10486 && (count == 0 || count >= (unsigned int) 260)
10487 ? 8 : UNITS_PER_WORD);
10488
10489 /* In case we don't know anything about the alignment, default to
10490 library version, since it is usually equally fast and result in
10491 shorter code. */
10492 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10493 return 0;
10494
10495 if (TARGET_SINGLE_STRINGOP)
10496 emit_insn (gen_cld ());
10497
10498 countreg2 = gen_reg_rtx (Pmode);
10499 countreg = copy_to_mode_reg (counter_mode, count_exp);
10500 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10501
10502 if (count == 0 && align < desired_alignment)
10503 {
10504 label = gen_label_rtx ();
10505 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10506 LEU, 0, counter_mode, 1, label);
10507 }
10508 if (align <= 1)
10509 {
10510 rtx label = ix86_expand_aligntest (destreg, 1);
10511 emit_insn (gen_strsetqi (destreg,
10512 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10513 ix86_adjust_counter (countreg, 1);
10514 emit_label (label);
10515 LABEL_NUSES (label) = 1;
10516 }
10517 if (align <= 2)
10518 {
10519 rtx label = ix86_expand_aligntest (destreg, 2);
10520 emit_insn (gen_strsethi (destreg,
10521 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10522 ix86_adjust_counter (countreg, 2);
10523 emit_label (label);
10524 LABEL_NUSES (label) = 1;
10525 }
10526 if (align <= 4 && desired_alignment > 4)
10527 {
10528 rtx label = ix86_expand_aligntest (destreg, 4);
10529 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10530 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10531 : zeroreg)));
10532 ix86_adjust_counter (countreg, 4);
10533 emit_label (label);
10534 LABEL_NUSES (label) = 1;
10535 }
10536
10537 if (label && desired_alignment > 4 && !TARGET_64BIT)
10538 {
10539 emit_label (label);
10540 LABEL_NUSES (label) = 1;
10541 label = NULL_RTX;
10542 }
10543
10544 if (!TARGET_SINGLE_STRINGOP)
10545 emit_insn (gen_cld ());
10546 if (TARGET_64BIT)
10547 {
10548 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10549 GEN_INT (3)));
10550 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10551 destreg, countreg2));
10552 }
10553 else
10554 {
10555 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10556 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10557 destreg, countreg2));
10558 }
10559 if (label)
10560 {
10561 emit_label (label);
10562 LABEL_NUSES (label) = 1;
10563 }
10564
10565 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10566 emit_insn (gen_strsetsi (destreg,
10567 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10568 if (TARGET_64BIT && (align <= 4 || count == 0))
10569 {
10570 rtx label = ix86_expand_aligntest (countreg, 4);
10571 emit_insn (gen_strsetsi (destreg,
10572 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10573 emit_label (label);
10574 LABEL_NUSES (label) = 1;
10575 }
10576 if (align > 2 && count != 0 && (count & 2))
10577 emit_insn (gen_strsethi (destreg,
10578 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10579 if (align <= 2 || count == 0)
10580 {
10581 rtx label = ix86_expand_aligntest (countreg, 2);
10582 emit_insn (gen_strsethi (destreg,
10583 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10584 emit_label (label);
10585 LABEL_NUSES (label) = 1;
10586 }
10587 if (align > 1 && count != 0 && (count & 1))
10588 emit_insn (gen_strsetqi (destreg,
10589 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10590 if (align <= 1 || count == 0)
10591 {
10592 rtx label = ix86_expand_aligntest (countreg, 1);
10593 emit_insn (gen_strsetqi (destreg,
10594 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10595 emit_label (label);
10596 LABEL_NUSES (label) = 1;
10597 }
10598 }
10599 return 1;
10600 }
10601 /* Expand strlen. */
10602 int
10603 ix86_expand_strlen (out, src, eoschar, align)
10604 rtx out, src, eoschar, align;
10605 {
10606 rtx addr, scratch1, scratch2, scratch3, scratch4;
10607
10608 /* The generic case of strlen expander is long. Avoid it's
10609 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10610
10611 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10612 && !TARGET_INLINE_ALL_STRINGOPS
10613 && !optimize_size
10614 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10615 return 0;
10616
10617 addr = force_reg (Pmode, XEXP (src, 0));
10618 scratch1 = gen_reg_rtx (Pmode);
10619
10620 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10621 && !optimize_size)
10622 {
10623 /* Well it seems that some optimizer does not combine a call like
10624 foo(strlen(bar), strlen(bar));
10625 when the move and the subtraction is done here. It does calculate
10626 the length just once when these instructions are done inside of
10627 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10628 often used and I use one fewer register for the lifetime of
10629 output_strlen_unroll() this is better. */
10630
10631 emit_move_insn (out, addr);
10632
10633 ix86_expand_strlensi_unroll_1 (out, align);
10634
10635 /* strlensi_unroll_1 returns the address of the zero at the end of
10636 the string, like memchr(), so compute the length by subtracting
10637 the start address. */
10638 if (TARGET_64BIT)
10639 emit_insn (gen_subdi3 (out, out, addr));
10640 else
10641 emit_insn (gen_subsi3 (out, out, addr));
10642 }
10643 else
10644 {
10645 scratch2 = gen_reg_rtx (Pmode);
10646 scratch3 = gen_reg_rtx (Pmode);
10647 scratch4 = force_reg (Pmode, constm1_rtx);
10648
10649 emit_move_insn (scratch3, addr);
10650 eoschar = force_reg (QImode, eoschar);
10651
10652 emit_insn (gen_cld ());
10653 if (TARGET_64BIT)
10654 {
10655 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10656 align, scratch4, scratch3));
10657 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10658 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10659 }
10660 else
10661 {
10662 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10663 align, scratch4, scratch3));
10664 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10665 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10666 }
10667 }
10668 return 1;
10669 }
10670
10671 /* Expand the appropriate insns for doing strlen if not just doing
10672 repnz; scasb
10673
10674 out = result, initialized with the start address
10675 align_rtx = alignment of the address.
10676 scratch = scratch register, initialized with the startaddress when
10677 not aligned, otherwise undefined
10678
10679 This is just the body. It needs the initialisations mentioned above and
10680 some address computing at the end. These things are done in i386.md. */
10681
10682 static void
10683 ix86_expand_strlensi_unroll_1 (out, align_rtx)
10684 rtx out, align_rtx;
10685 {
10686 int align;
10687 rtx tmp;
10688 rtx align_2_label = NULL_RTX;
10689 rtx align_3_label = NULL_RTX;
10690 rtx align_4_label = gen_label_rtx ();
10691 rtx end_0_label = gen_label_rtx ();
10692 rtx mem;
10693 rtx tmpreg = gen_reg_rtx (SImode);
10694 rtx scratch = gen_reg_rtx (SImode);
10695
10696 align = 0;
10697 if (GET_CODE (align_rtx) == CONST_INT)
10698 align = INTVAL (align_rtx);
10699
10700 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
10701
10702 /* Is there a known alignment and is it less than 4? */
10703 if (align < 4)
10704 {
10705 rtx scratch1 = gen_reg_rtx (Pmode);
10706 emit_move_insn (scratch1, out);
10707 /* Is there a known alignment and is it not 2? */
10708 if (align != 2)
10709 {
10710 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10711 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10712
10713 /* Leave just the 3 lower bits. */
10714 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
10715 NULL_RTX, 0, OPTAB_WIDEN);
10716
10717 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10718 Pmode, 1, align_4_label);
10719 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
10720 Pmode, 1, align_2_label);
10721 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
10722 Pmode, 1, align_3_label);
10723 }
10724 else
10725 {
10726 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10727 check if is aligned to 4 - byte. */
10728
10729 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
10730 NULL_RTX, 0, OPTAB_WIDEN);
10731
10732 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
10733 Pmode, 1, align_4_label);
10734 }
10735
10736 mem = gen_rtx_MEM (QImode, out);
10737
10738 /* Now compare the bytes. */
10739
10740 /* Compare the first n unaligned byte on a byte per byte basis. */
10741 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
10742 QImode, 1, end_0_label);
10743
10744 /* Increment the address. */
10745 if (TARGET_64BIT)
10746 emit_insn (gen_adddi3 (out, out, const1_rtx));
10747 else
10748 emit_insn (gen_addsi3 (out, out, const1_rtx));
10749
10750 /* Not needed with an alignment of 2 */
10751 if (align != 2)
10752 {
10753 emit_label (align_2_label);
10754
10755 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10756 end_0_label);
10757
10758 if (TARGET_64BIT)
10759 emit_insn (gen_adddi3 (out, out, const1_rtx));
10760 else
10761 emit_insn (gen_addsi3 (out, out, const1_rtx));
10762
10763 emit_label (align_3_label);
10764 }
10765
10766 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10767 end_0_label);
10768
10769 if (TARGET_64BIT)
10770 emit_insn (gen_adddi3 (out, out, const1_rtx));
10771 else
10772 emit_insn (gen_addsi3 (out, out, const1_rtx));
10773 }
10774
10775 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10776 align this loop. It gives only huge programs, but does not help to
10777 speed up. */
10778 emit_label (align_4_label);
10779
10780 mem = gen_rtx_MEM (SImode, out);
10781 emit_move_insn (scratch, mem);
10782 if (TARGET_64BIT)
10783 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10784 else
10785 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
10786
10787 /* This formula yields a nonzero result iff one of the bytes is zero.
10788 This saves three branches inside loop and many cycles. */
10789
10790 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10791 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10792 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
10793 emit_insn (gen_andsi3 (tmpreg, tmpreg,
10794 gen_int_mode (0x80808080, SImode)));
10795 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10796 align_4_label);
10797
10798 if (TARGET_CMOVE)
10799 {
10800 rtx reg = gen_reg_rtx (SImode);
10801 rtx reg2 = gen_reg_rtx (Pmode);
10802 emit_move_insn (reg, tmpreg);
10803 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10804
10805 /* If zero is not in the first two bytes, move two bytes forward. */
10806 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10807 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10808 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10809 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10810 gen_rtx_IF_THEN_ELSE (SImode, tmp,
10811 reg,
10812 tmpreg)));
10813 /* Emit lea manually to avoid clobbering of flags. */
10814 emit_insn (gen_rtx_SET (SImode, reg2,
10815 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
10816
10817 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10818 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10819 emit_insn (gen_rtx_SET (VOIDmode, out,
10820 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
10821 reg2,
10822 out)));
10823
10824 }
10825 else
10826 {
10827 rtx end_2_label = gen_label_rtx ();
10828 /* Is zero in the first two bytes? */
10829
10830 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
10831 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10832 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10833 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10834 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10835 pc_rtx);
10836 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10837 JUMP_LABEL (tmp) = end_2_label;
10838
10839 /* Not in the first two. Move two bytes forward. */
10840 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
10841 if (TARGET_64BIT)
10842 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10843 else
10844 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
10845
10846 emit_label (end_2_label);
10847
10848 }
10849
10850 /* Avoid branch in fixing the byte. */
10851 tmpreg = gen_lowpart (QImode, tmpreg);
10852 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
10853 if (TARGET_64BIT)
10854 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10855 else
10856 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
10857
10858 emit_label (end_0_label);
10859 }
10860
10861 void
10862 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10863 rtx retval, fnaddr, callarg1, callarg2, pop;
10864 {
10865 rtx use = NULL, call;
10866
10867 if (pop == const0_rtx)
10868 pop = NULL;
10869 if (TARGET_64BIT && pop)
10870 abort ();
10871
10872 #if TARGET_MACHO
10873 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10874 fnaddr = machopic_indirect_call_target (fnaddr);
10875 #else
10876 /* Static functions and indirect calls don't need the pic register. */
10877 if (! TARGET_64BIT && flag_pic
10878 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10879 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
10880 use_reg (&use, pic_offset_table_rtx);
10881
10882 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10883 {
10884 rtx al = gen_rtx_REG (QImode, 0);
10885 emit_move_insn (al, callarg2);
10886 use_reg (&use, al);
10887 }
10888 #endif /* TARGET_MACHO */
10889
10890 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10891 {
10892 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10893 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10894 }
10895
10896 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10897 if (retval)
10898 call = gen_rtx_SET (VOIDmode, retval, call);
10899 if (pop)
10900 {
10901 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10902 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10903 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10904 }
10905
10906 call = emit_call_insn (call);
10907 if (use)
10908 CALL_INSN_FUNCTION_USAGE (call) = use;
10909 }
10910
10911 \f
10912 /* Clear stack slot assignments remembered from previous functions.
10913 This is called from INIT_EXPANDERS once before RTL is emitted for each
10914 function. */
10915
10916 static struct machine_function *
10917 ix86_init_machine_status ()
10918 {
10919 return ggc_alloc_cleared (sizeof (struct machine_function));
10920 }
10921
10922 /* Return a MEM corresponding to a stack slot with mode MODE.
10923 Allocate a new slot if necessary.
10924
10925 The RTL for a function can have several slots available: N is
10926 which slot to use. */
10927
10928 rtx
10929 assign_386_stack_local (mode, n)
10930 enum machine_mode mode;
10931 int n;
10932 {
10933 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10934 abort ();
10935
10936 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10937 ix86_stack_locals[(int) mode][n]
10938 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10939
10940 return ix86_stack_locals[(int) mode][n];
10941 }
10942
10943 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10944
10945 static GTY(()) rtx ix86_tls_symbol;
10946 rtx
10947 ix86_tls_get_addr ()
10948 {
10949
10950 if (!ix86_tls_symbol)
10951 {
10952 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
10953 (TARGET_GNU_TLS && !TARGET_64BIT)
10954 ? "___tls_get_addr"
10955 : "__tls_get_addr");
10956 }
10957
10958 return ix86_tls_symbol;
10959 }
10960 \f
10961 /* Calculate the length of the memory address in the instruction
10962 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10963
10964 static int
10965 memory_address_length (addr)
10966 rtx addr;
10967 {
10968 struct ix86_address parts;
10969 rtx base, index, disp;
10970 int len;
10971
10972 if (GET_CODE (addr) == PRE_DEC
10973 || GET_CODE (addr) == POST_INC
10974 || GET_CODE (addr) == PRE_MODIFY
10975 || GET_CODE (addr) == POST_MODIFY)
10976 return 0;
10977
10978 if (! ix86_decompose_address (addr, &parts))
10979 abort ();
10980
10981 base = parts.base;
10982 index = parts.index;
10983 disp = parts.disp;
10984 len = 0;
10985
10986 /* Register Indirect. */
10987 if (base && !index && !disp)
10988 {
10989 /* Special cases: ebp and esp need the two-byte modrm form. */
10990 if (addr == stack_pointer_rtx
10991 || addr == arg_pointer_rtx
10992 || addr == frame_pointer_rtx
10993 || addr == hard_frame_pointer_rtx)
10994 len = 1;
10995 }
10996
10997 /* Direct Addressing. */
10998 else if (disp && !base && !index)
10999 len = 4;
11000
11001 else
11002 {
11003 /* Find the length of the displacement constant. */
11004 if (disp)
11005 {
11006 if (GET_CODE (disp) == CONST_INT
11007 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11008 len = 1;
11009 else
11010 len = 4;
11011 }
11012
11013 /* An index requires the two-byte modrm form. */
11014 if (index)
11015 len += 1;
11016 }
11017
11018 return len;
11019 }
11020
11021 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11022 is set, expect that insn have 8bit immediate alternative. */
11023 int
11024 ix86_attr_length_immediate_default (insn, shortform)
11025 rtx insn;
11026 int shortform;
11027 {
11028 int len = 0;
11029 int i;
11030 extract_insn_cached (insn);
11031 for (i = recog_data.n_operands - 1; i >= 0; --i)
11032 if (CONSTANT_P (recog_data.operand[i]))
11033 {
11034 if (len)
11035 abort ();
11036 if (shortform
11037 && GET_CODE (recog_data.operand[i]) == CONST_INT
11038 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11039 len = 1;
11040 else
11041 {
11042 switch (get_attr_mode (insn))
11043 {
11044 case MODE_QI:
11045 len+=1;
11046 break;
11047 case MODE_HI:
11048 len+=2;
11049 break;
11050 case MODE_SI:
11051 len+=4;
11052 break;
11053 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11054 case MODE_DI:
11055 len+=4;
11056 break;
11057 default:
11058 fatal_insn ("unknown insn mode", insn);
11059 }
11060 }
11061 }
11062 return len;
11063 }
11064 /* Compute default value for "length_address" attribute. */
11065 int
11066 ix86_attr_length_address_default (insn)
11067 rtx insn;
11068 {
11069 int i;
11070 extract_insn_cached (insn);
11071 for (i = recog_data.n_operands - 1; i >= 0; --i)
11072 if (GET_CODE (recog_data.operand[i]) == MEM)
11073 {
11074 return memory_address_length (XEXP (recog_data.operand[i], 0));
11075 break;
11076 }
11077 return 0;
11078 }
11079 \f
11080 /* Return the maximum number of instructions a cpu can issue. */
11081
11082 static int
11083 ix86_issue_rate ()
11084 {
11085 switch (ix86_cpu)
11086 {
11087 case PROCESSOR_PENTIUM:
11088 case PROCESSOR_K6:
11089 return 2;
11090
11091 case PROCESSOR_PENTIUMPRO:
11092 case PROCESSOR_PENTIUM4:
11093 case PROCESSOR_ATHLON:
11094 return 3;
11095
11096 default:
11097 return 1;
11098 }
11099 }
11100
11101 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11102 by DEP_INSN and nothing set by DEP_INSN. */
11103
11104 static int
11105 ix86_flags_dependant (insn, dep_insn, insn_type)
11106 rtx insn, dep_insn;
11107 enum attr_type insn_type;
11108 {
11109 rtx set, set2;
11110
11111 /* Simplify the test for uninteresting insns. */
11112 if (insn_type != TYPE_SETCC
11113 && insn_type != TYPE_ICMOV
11114 && insn_type != TYPE_FCMOV
11115 && insn_type != TYPE_IBR)
11116 return 0;
11117
11118 if ((set = single_set (dep_insn)) != 0)
11119 {
11120 set = SET_DEST (set);
11121 set2 = NULL_RTX;
11122 }
11123 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11124 && XVECLEN (PATTERN (dep_insn), 0) == 2
11125 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11126 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11127 {
11128 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11129 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11130 }
11131 else
11132 return 0;
11133
11134 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11135 return 0;
11136
11137 /* This test is true if the dependent insn reads the flags but
11138 not any other potentially set register. */
11139 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11140 return 0;
11141
11142 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11143 return 0;
11144
11145 return 1;
11146 }
11147
11148 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11149 address with operands set by DEP_INSN. */
11150
11151 static int
11152 ix86_agi_dependant (insn, dep_insn, insn_type)
11153 rtx insn, dep_insn;
11154 enum attr_type insn_type;
11155 {
11156 rtx addr;
11157
11158 if (insn_type == TYPE_LEA
11159 && TARGET_PENTIUM)
11160 {
11161 addr = PATTERN (insn);
11162 if (GET_CODE (addr) == SET)
11163 ;
11164 else if (GET_CODE (addr) == PARALLEL
11165 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11166 addr = XVECEXP (addr, 0, 0);
11167 else
11168 abort ();
11169 addr = SET_SRC (addr);
11170 }
11171 else
11172 {
11173 int i;
11174 extract_insn_cached (insn);
11175 for (i = recog_data.n_operands - 1; i >= 0; --i)
11176 if (GET_CODE (recog_data.operand[i]) == MEM)
11177 {
11178 addr = XEXP (recog_data.operand[i], 0);
11179 goto found;
11180 }
11181 return 0;
11182 found:;
11183 }
11184
11185 return modified_in_p (addr, dep_insn);
11186 }
11187
11188 static int
11189 ix86_adjust_cost (insn, link, dep_insn, cost)
11190 rtx insn, link, dep_insn;
11191 int cost;
11192 {
11193 enum attr_type insn_type, dep_insn_type;
11194 enum attr_memory memory, dep_memory;
11195 rtx set, set2;
11196 int dep_insn_code_number;
11197
11198 /* Anti and output depenancies have zero cost on all CPUs. */
11199 if (REG_NOTE_KIND (link) != 0)
11200 return 0;
11201
11202 dep_insn_code_number = recog_memoized (dep_insn);
11203
11204 /* If we can't recognize the insns, we can't really do anything. */
11205 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11206 return cost;
11207
11208 insn_type = get_attr_type (insn);
11209 dep_insn_type = get_attr_type (dep_insn);
11210
11211 switch (ix86_cpu)
11212 {
11213 case PROCESSOR_PENTIUM:
11214 /* Address Generation Interlock adds a cycle of latency. */
11215 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11216 cost += 1;
11217
11218 /* ??? Compares pair with jump/setcc. */
11219 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11220 cost = 0;
11221
11222 /* Floating point stores require value to be ready one cycle ealier. */
11223 if (insn_type == TYPE_FMOV
11224 && get_attr_memory (insn) == MEMORY_STORE
11225 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11226 cost += 1;
11227 break;
11228
11229 case PROCESSOR_PENTIUMPRO:
11230 memory = get_attr_memory (insn);
11231 dep_memory = get_attr_memory (dep_insn);
11232
11233 /* Since we can't represent delayed latencies of load+operation,
11234 increase the cost here for non-imov insns. */
11235 if (dep_insn_type != TYPE_IMOV
11236 && dep_insn_type != TYPE_FMOV
11237 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11238 cost += 1;
11239
11240 /* INT->FP conversion is expensive. */
11241 if (get_attr_fp_int_src (dep_insn))
11242 cost += 5;
11243
11244 /* There is one cycle extra latency between an FP op and a store. */
11245 if (insn_type == TYPE_FMOV
11246 && (set = single_set (dep_insn)) != NULL_RTX
11247 && (set2 = single_set (insn)) != NULL_RTX
11248 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11249 && GET_CODE (SET_DEST (set2)) == MEM)
11250 cost += 1;
11251
11252 /* Show ability of reorder buffer to hide latency of load by executing
11253 in parallel with previous instruction in case
11254 previous instruction is not needed to compute the address. */
11255 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11256 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11257 {
11258 /* Claim moves to take one cycle, as core can issue one load
11259 at time and the next load can start cycle later. */
11260 if (dep_insn_type == TYPE_IMOV
11261 || dep_insn_type == TYPE_FMOV)
11262 cost = 1;
11263 else if (cost > 1)
11264 cost--;
11265 }
11266 break;
11267
11268 case PROCESSOR_K6:
11269 memory = get_attr_memory (insn);
11270 dep_memory = get_attr_memory (dep_insn);
11271 /* The esp dependency is resolved before the instruction is really
11272 finished. */
11273 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11274 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11275 return 1;
11276
11277 /* Since we can't represent delayed latencies of load+operation,
11278 increase the cost here for non-imov insns. */
11279 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11280 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11281
11282 /* INT->FP conversion is expensive. */
11283 if (get_attr_fp_int_src (dep_insn))
11284 cost += 5;
11285
11286 /* Show ability of reorder buffer to hide latency of load by executing
11287 in parallel with previous instruction in case
11288 previous instruction is not needed to compute the address. */
11289 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11290 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11291 {
11292 /* Claim moves to take one cycle, as core can issue one load
11293 at time and the next load can start cycle later. */
11294 if (dep_insn_type == TYPE_IMOV
11295 || dep_insn_type == TYPE_FMOV)
11296 cost = 1;
11297 else if (cost > 2)
11298 cost -= 2;
11299 else
11300 cost = 1;
11301 }
11302 break;
11303
11304 case PROCESSOR_ATHLON:
11305 memory = get_attr_memory (insn);
11306 dep_memory = get_attr_memory (dep_insn);
11307
11308 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11309 {
11310 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11311 cost += 2;
11312 else
11313 cost += 3;
11314 }
11315 /* Show ability of reorder buffer to hide latency of load by executing
11316 in parallel with previous instruction in case
11317 previous instruction is not needed to compute the address. */
11318 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11319 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11320 {
11321 /* Claim moves to take one cycle, as core can issue one load
11322 at time and the next load can start cycle later. */
11323 if (dep_insn_type == TYPE_IMOV
11324 || dep_insn_type == TYPE_FMOV)
11325 cost = 0;
11326 else if (cost >= 3)
11327 cost -= 3;
11328 else
11329 cost = 0;
11330 }
11331
11332 default:
11333 break;
11334 }
11335
11336 return cost;
11337 }
11338
11339 static union
11340 {
11341 struct ppro_sched_data
11342 {
11343 rtx decode[3];
11344 int issued_this_cycle;
11345 } ppro;
11346 } ix86_sched_data;
11347
11348 static enum attr_ppro_uops
11349 ix86_safe_ppro_uops (insn)
11350 rtx insn;
11351 {
11352 if (recog_memoized (insn) >= 0)
11353 return get_attr_ppro_uops (insn);
11354 else
11355 return PPRO_UOPS_MANY;
11356 }
11357
11358 static void
11359 ix86_dump_ppro_packet (dump)
11360 FILE *dump;
11361 {
11362 if (ix86_sched_data.ppro.decode[0])
11363 {
11364 fprintf (dump, "PPRO packet: %d",
11365 INSN_UID (ix86_sched_data.ppro.decode[0]));
11366 if (ix86_sched_data.ppro.decode[1])
11367 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11368 if (ix86_sched_data.ppro.decode[2])
11369 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11370 fputc ('\n', dump);
11371 }
11372 }
11373
11374 /* We're beginning a new block. Initialize data structures as necessary. */
11375
11376 static void
11377 ix86_sched_init (dump, sched_verbose, veclen)
11378 FILE *dump ATTRIBUTE_UNUSED;
11379 int sched_verbose ATTRIBUTE_UNUSED;
11380 int veclen ATTRIBUTE_UNUSED;
11381 {
11382 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11383 }
11384
11385 /* Shift INSN to SLOT, and shift everything else down. */
11386
11387 static void
11388 ix86_reorder_insn (insnp, slot)
11389 rtx *insnp, *slot;
11390 {
11391 if (insnp != slot)
11392 {
11393 rtx insn = *insnp;
11394 do
11395 insnp[0] = insnp[1];
11396 while (++insnp != slot);
11397 *insnp = insn;
11398 }
11399 }
11400
11401 static void
11402 ix86_sched_reorder_ppro (ready, e_ready)
11403 rtx *ready;
11404 rtx *e_ready;
11405 {
11406 rtx decode[3];
11407 enum attr_ppro_uops cur_uops;
11408 int issued_this_cycle;
11409 rtx *insnp;
11410 int i;
11411
11412 /* At this point .ppro.decode contains the state of the three
11413 decoders from last "cycle". That is, those insns that were
11414 actually independent. But here we're scheduling for the
11415 decoder, and we may find things that are decodable in the
11416 same cycle. */
11417
11418 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11419 issued_this_cycle = 0;
11420
11421 insnp = e_ready;
11422 cur_uops = ix86_safe_ppro_uops (*insnp);
11423
11424 /* If the decoders are empty, and we've a complex insn at the
11425 head of the priority queue, let it issue without complaint. */
11426 if (decode[0] == NULL)
11427 {
11428 if (cur_uops == PPRO_UOPS_MANY)
11429 {
11430 decode[0] = *insnp;
11431 goto ppro_done;
11432 }
11433
11434 /* Otherwise, search for a 2-4 uop unsn to issue. */
11435 while (cur_uops != PPRO_UOPS_FEW)
11436 {
11437 if (insnp == ready)
11438 break;
11439 cur_uops = ix86_safe_ppro_uops (*--insnp);
11440 }
11441
11442 /* If so, move it to the head of the line. */
11443 if (cur_uops == PPRO_UOPS_FEW)
11444 ix86_reorder_insn (insnp, e_ready);
11445
11446 /* Issue the head of the queue. */
11447 issued_this_cycle = 1;
11448 decode[0] = *e_ready--;
11449 }
11450
11451 /* Look for simple insns to fill in the other two slots. */
11452 for (i = 1; i < 3; ++i)
11453 if (decode[i] == NULL)
11454 {
11455 if (ready > e_ready)
11456 goto ppro_done;
11457
11458 insnp = e_ready;
11459 cur_uops = ix86_safe_ppro_uops (*insnp);
11460 while (cur_uops != PPRO_UOPS_ONE)
11461 {
11462 if (insnp == ready)
11463 break;
11464 cur_uops = ix86_safe_ppro_uops (*--insnp);
11465 }
11466
11467 /* Found one. Move it to the head of the queue and issue it. */
11468 if (cur_uops == PPRO_UOPS_ONE)
11469 {
11470 ix86_reorder_insn (insnp, e_ready);
11471 decode[i] = *e_ready--;
11472 issued_this_cycle++;
11473 continue;
11474 }
11475
11476 /* ??? Didn't find one. Ideally, here we would do a lazy split
11477 of 2-uop insns, issue one and queue the other. */
11478 }
11479
11480 ppro_done:
11481 if (issued_this_cycle == 0)
11482 issued_this_cycle = 1;
11483 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11484 }
11485
11486 /* We are about to being issuing insns for this clock cycle.
11487 Override the default sort algorithm to better slot instructions. */
11488 static int
11489 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11490 FILE *dump ATTRIBUTE_UNUSED;
11491 int sched_verbose ATTRIBUTE_UNUSED;
11492 rtx *ready;
11493 int *n_readyp;
11494 int clock_var ATTRIBUTE_UNUSED;
11495 {
11496 int n_ready = *n_readyp;
11497 rtx *e_ready = ready + n_ready - 1;
11498
11499 /* Make sure to go ahead and initialize key items in
11500 ix86_sched_data if we are not going to bother trying to
11501 reorder the ready queue. */
11502 if (n_ready < 2)
11503 {
11504 ix86_sched_data.ppro.issued_this_cycle = 1;
11505 goto out;
11506 }
11507
11508 switch (ix86_cpu)
11509 {
11510 default:
11511 break;
11512
11513 case PROCESSOR_PENTIUMPRO:
11514 ix86_sched_reorder_ppro (ready, e_ready);
11515 break;
11516 }
11517
11518 out:
11519 return ix86_issue_rate ();
11520 }
11521
11522 /* We are about to issue INSN. Return the number of insns left on the
11523 ready queue that can be issued this cycle. */
11524
11525 static int
11526 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11527 FILE *dump;
11528 int sched_verbose;
11529 rtx insn;
11530 int can_issue_more;
11531 {
11532 int i;
11533 switch (ix86_cpu)
11534 {
11535 default:
11536 return can_issue_more - 1;
11537
11538 case PROCESSOR_PENTIUMPRO:
11539 {
11540 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
11541
11542 if (uops == PPRO_UOPS_MANY)
11543 {
11544 if (sched_verbose)
11545 ix86_dump_ppro_packet (dump);
11546 ix86_sched_data.ppro.decode[0] = insn;
11547 ix86_sched_data.ppro.decode[1] = NULL;
11548 ix86_sched_data.ppro.decode[2] = NULL;
11549 if (sched_verbose)
11550 ix86_dump_ppro_packet (dump);
11551 ix86_sched_data.ppro.decode[0] = NULL;
11552 }
11553 else if (uops == PPRO_UOPS_FEW)
11554 {
11555 if (sched_verbose)
11556 ix86_dump_ppro_packet (dump);
11557 ix86_sched_data.ppro.decode[0] = insn;
11558 ix86_sched_data.ppro.decode[1] = NULL;
11559 ix86_sched_data.ppro.decode[2] = NULL;
11560 }
11561 else
11562 {
11563 for (i = 0; i < 3; ++i)
11564 if (ix86_sched_data.ppro.decode[i] == NULL)
11565 {
11566 ix86_sched_data.ppro.decode[i] = insn;
11567 break;
11568 }
11569 if (i == 3)
11570 abort ();
11571 if (i == 2)
11572 {
11573 if (sched_verbose)
11574 ix86_dump_ppro_packet (dump);
11575 ix86_sched_data.ppro.decode[0] = NULL;
11576 ix86_sched_data.ppro.decode[1] = NULL;
11577 ix86_sched_data.ppro.decode[2] = NULL;
11578 }
11579 }
11580 }
11581 return --ix86_sched_data.ppro.issued_this_cycle;
11582 }
11583 }
11584
11585 static int
11586 ia32_use_dfa_pipeline_interface ()
11587 {
11588 if (ix86_cpu == PROCESSOR_PENTIUM)
11589 return 1;
11590 return 0;
11591 }
11592
11593 /* How many alternative schedules to try. This should be as wide as the
11594 scheduling freedom in the DFA, but no wider. Making this value too
11595 large results extra work for the scheduler. */
11596
11597 static int
11598 ia32_multipass_dfa_lookahead ()
11599 {
11600 if (ix86_cpu == PROCESSOR_PENTIUM)
11601 return 2;
11602 else
11603 return 0;
11604 }
11605
11606 \f
11607 /* Walk through INSNS and look for MEM references whose address is DSTREG or
11608 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11609 appropriate. */
11610
11611 void
11612 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11613 rtx insns;
11614 rtx dstref, srcref, dstreg, srcreg;
11615 {
11616 rtx insn;
11617
11618 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11619 if (INSN_P (insn))
11620 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11621 dstreg, srcreg);
11622 }
11623
11624 /* Subroutine of above to actually do the updating by recursively walking
11625 the rtx. */
11626
11627 static void
11628 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11629 rtx x;
11630 rtx dstref, srcref, dstreg, srcreg;
11631 {
11632 enum rtx_code code = GET_CODE (x);
11633 const char *format_ptr = GET_RTX_FORMAT (code);
11634 int i, j;
11635
11636 if (code == MEM && XEXP (x, 0) == dstreg)
11637 MEM_COPY_ATTRIBUTES (x, dstref);
11638 else if (code == MEM && XEXP (x, 0) == srcreg)
11639 MEM_COPY_ATTRIBUTES (x, srcref);
11640
11641 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11642 {
11643 if (*format_ptr == 'e')
11644 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11645 dstreg, srcreg);
11646 else if (*format_ptr == 'E')
11647 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11648 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
11649 dstreg, srcreg);
11650 }
11651 }
11652 \f
11653 /* Compute the alignment given to a constant that is being placed in memory.
11654 EXP is the constant and ALIGN is the alignment that the object would
11655 ordinarily have.
11656 The value of this function is used instead of that alignment to align
11657 the object. */
11658
11659 int
11660 ix86_constant_alignment (exp, align)
11661 tree exp;
11662 int align;
11663 {
11664 if (TREE_CODE (exp) == REAL_CST)
11665 {
11666 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11667 return 64;
11668 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11669 return 128;
11670 }
11671 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11672 && align < 256)
11673 return 256;
11674
11675 return align;
11676 }
11677
11678 /* Compute the alignment for a static variable.
11679 TYPE is the data type, and ALIGN is the alignment that
11680 the object would ordinarily have. The value of this function is used
11681 instead of that alignment to align the object. */
11682
11683 int
11684 ix86_data_alignment (type, align)
11685 tree type;
11686 int align;
11687 {
11688 if (AGGREGATE_TYPE_P (type)
11689 && TYPE_SIZE (type)
11690 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11691 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11692 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11693 return 256;
11694
11695 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11696 to 16byte boundary. */
11697 if (TARGET_64BIT)
11698 {
11699 if (AGGREGATE_TYPE_P (type)
11700 && TYPE_SIZE (type)
11701 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11702 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11703 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11704 return 128;
11705 }
11706
11707 if (TREE_CODE (type) == ARRAY_TYPE)
11708 {
11709 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11710 return 64;
11711 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11712 return 128;
11713 }
11714 else if (TREE_CODE (type) == COMPLEX_TYPE)
11715 {
11716
11717 if (TYPE_MODE (type) == DCmode && align < 64)
11718 return 64;
11719 if (TYPE_MODE (type) == XCmode && align < 128)
11720 return 128;
11721 }
11722 else if ((TREE_CODE (type) == RECORD_TYPE
11723 || TREE_CODE (type) == UNION_TYPE
11724 || TREE_CODE (type) == QUAL_UNION_TYPE)
11725 && TYPE_FIELDS (type))
11726 {
11727 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11728 return 64;
11729 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11730 return 128;
11731 }
11732 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11733 || TREE_CODE (type) == INTEGER_TYPE)
11734 {
11735 if (TYPE_MODE (type) == DFmode && align < 64)
11736 return 64;
11737 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11738 return 128;
11739 }
11740
11741 return align;
11742 }
11743
11744 /* Compute the alignment for a local variable.
11745 TYPE is the data type, and ALIGN is the alignment that
11746 the object would ordinarily have. The value of this macro is used
11747 instead of that alignment to align the object. */
11748
11749 int
11750 ix86_local_alignment (type, align)
11751 tree type;
11752 int align;
11753 {
11754 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11755 to 16byte boundary. */
11756 if (TARGET_64BIT)
11757 {
11758 if (AGGREGATE_TYPE_P (type)
11759 && TYPE_SIZE (type)
11760 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11761 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11762 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11763 return 128;
11764 }
11765 if (TREE_CODE (type) == ARRAY_TYPE)
11766 {
11767 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11768 return 64;
11769 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11770 return 128;
11771 }
11772 else if (TREE_CODE (type) == COMPLEX_TYPE)
11773 {
11774 if (TYPE_MODE (type) == DCmode && align < 64)
11775 return 64;
11776 if (TYPE_MODE (type) == XCmode && align < 128)
11777 return 128;
11778 }
11779 else if ((TREE_CODE (type) == RECORD_TYPE
11780 || TREE_CODE (type) == UNION_TYPE
11781 || TREE_CODE (type) == QUAL_UNION_TYPE)
11782 && TYPE_FIELDS (type))
11783 {
11784 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11785 return 64;
11786 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11787 return 128;
11788 }
11789 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11790 || TREE_CODE (type) == INTEGER_TYPE)
11791 {
11792
11793 if (TYPE_MODE (type) == DFmode && align < 64)
11794 return 64;
11795 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11796 return 128;
11797 }
11798 return align;
11799 }
11800 \f
11801 /* Emit RTL insns to initialize the variable parts of a trampoline.
11802 FNADDR is an RTX for the address of the function's pure code.
11803 CXT is an RTX for the static chain value for the function. */
11804 void
11805 x86_initialize_trampoline (tramp, fnaddr, cxt)
11806 rtx tramp, fnaddr, cxt;
11807 {
11808 if (!TARGET_64BIT)
11809 {
11810 /* Compute offset from the end of the jmp to the target function. */
11811 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11812 plus_constant (tramp, 10),
11813 NULL_RTX, 1, OPTAB_DIRECT);
11814 emit_move_insn (gen_rtx_MEM (QImode, tramp),
11815 gen_int_mode (0xb9, QImode));
11816 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11817 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
11818 gen_int_mode (0xe9, QImode));
11819 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11820 }
11821 else
11822 {
11823 int offset = 0;
11824 /* Try to load address using shorter movl instead of movabs.
11825 We may want to support movq for kernel mode, but kernel does not use
11826 trampolines at the moment. */
11827 if (x86_64_zero_extended_value (fnaddr))
11828 {
11829 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11830 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11831 gen_int_mode (0xbb41, HImode));
11832 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11833 gen_lowpart (SImode, fnaddr));
11834 offset += 6;
11835 }
11836 else
11837 {
11838 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11839 gen_int_mode (0xbb49, HImode));
11840 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11841 fnaddr);
11842 offset += 10;
11843 }
11844 /* Load static chain using movabs to r10. */
11845 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11846 gen_int_mode (0xba49, HImode));
11847 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11848 cxt);
11849 offset += 10;
11850 /* Jump to the r11 */
11851 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
11852 gen_int_mode (0xff49, HImode));
11853 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
11854 gen_int_mode (0xe3, QImode));
11855 offset += 3;
11856 if (offset > TRAMPOLINE_SIZE)
11857 abort ();
11858 }
11859
11860 #ifdef TRANSFER_FROM_TRAMPOLINE
11861 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
11862 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
11863 #endif
11864 }
11865 \f
11866 #define def_builtin(MASK, NAME, TYPE, CODE) \
11867 do { \
11868 if ((MASK) & target_flags) \
11869 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11870 NULL, NULL_TREE); \
11871 } while (0)
11872
11873 struct builtin_description
11874 {
11875 const unsigned int mask;
11876 const enum insn_code icode;
11877 const char *const name;
11878 const enum ix86_builtins code;
11879 const enum rtx_code comparison;
11880 const unsigned int flag;
11881 };
11882
11883 /* Used for builtins that are enabled both by -msse and -msse2. */
11884 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11885
11886 static const struct builtin_description bdesc_comi[] =
11887 {
11888 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
11889 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
11890 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
11891 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
11892 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
11893 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
11894 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
11895 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
11896 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
11897 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
11898 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
11899 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
11900 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
11901 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
11902 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
11903 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
11904 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
11905 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
11906 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
11907 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
11908 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
11909 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
11910 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
11911 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
11912 };
11913
11914 static const struct builtin_description bdesc_2arg[] =
11915 {
11916 /* SSE */
11917 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11918 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11919 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11920 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11921 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11922 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11923 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11924 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11925
11926 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11927 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11928 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11929 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11930 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11931 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11932 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11933 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11934 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11935 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11936 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11937 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11938 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11939 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11940 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11941 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11942 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11943 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11944 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11945 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11946
11947 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11948 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11949 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11950 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11951
11952 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
11953 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
11954 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
11955 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
11956
11957 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11958 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11959 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11960 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11961 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
11962
11963 /* MMX */
11964 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11965 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11966 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11967 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11968 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11969 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11970
11971 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11972 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11973 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11974 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11975 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11976 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11977 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11978 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11979
11980 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11981 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
11982 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
11983
11984 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11985 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11986 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11987 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11988
11989 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11990 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
11991
11992 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11993 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11994 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11995 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11996 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11997 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11998
11999 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12000 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12001 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12002 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12003
12004 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12005 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12006 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12007 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12008 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12009 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12010
12011 /* Special. */
12012 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12013 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12014 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12015
12016 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12017 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12018
12019 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12020 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12021 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12022 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12023 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12024 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12025
12026 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12027 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12028 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12029 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12030 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12031 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12032
12033 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12034 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12035 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12036 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12037
12038 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12039 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12040
12041 /* SSE2 */
12042 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12043 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12044 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12045 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12046 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12047 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12049 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12050
12051 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12052 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12053 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12054 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12055 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12056 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12057 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12058 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12059 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12060 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12061 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12062 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12063 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12064 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12065 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12066 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12067 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12068 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12069 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12070 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12071
12072 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12073 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12074 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12075 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12076
12077 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12078 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12079 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12080 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12081
12082 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12083 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12084 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12085
12086 /* SSE2 MMX */
12087 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12088 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12089 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12090 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12091 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12092 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12093 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12094 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12095
12096 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12097 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12098 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12099 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12100 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12101 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12102 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12103 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12104
12105 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12106 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12107 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12108 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12109
12110 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12111 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12112 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12113 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12114
12115 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12116 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12117
12118 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12119 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12120 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12121 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12122 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12123 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12124
12125 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12126 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12127 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12128 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12129
12130 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12131 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12132 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12133 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12134 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12135 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12136 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12137 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12138
12139 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12140 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12141 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12142
12143 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12144 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12145
12146 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12147 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12148 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12149 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12150 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12151 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12152
12153 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12154 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12155 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12156 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12157 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12158 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12159
12160 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12161 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12162 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12163 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12164
12165 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12166
12167 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12168 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12169 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12170 };
12171
12172 static const struct builtin_description bdesc_1arg[] =
12173 {
12174 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12175 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12176
12177 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12178 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12179 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12180
12181 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12182 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12183 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12184 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12185
12186 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12187 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12188 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12189 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12190
12191 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12192
12193 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12194 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12195
12196 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12197 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12198 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12199 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12200 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12201
12202 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12203
12204 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12205 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12206
12207 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12208 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12209 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12210
12211 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12212 };
12213
12214 void
12215 ix86_init_builtins ()
12216 {
12217 if (TARGET_MMX)
12218 ix86_init_mmx_sse_builtins ();
12219 }
12220
12221 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12222 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12223 builtins. */
12224 static void
12225 ix86_init_mmx_sse_builtins ()
12226 {
12227 const struct builtin_description * d;
12228 size_t i;
12229
12230 tree pchar_type_node = build_pointer_type (char_type_node);
12231 tree pfloat_type_node = build_pointer_type (float_type_node);
12232 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12233 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12234 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12235
12236 /* Comparisons. */
12237 tree int_ftype_v4sf_v4sf
12238 = build_function_type_list (integer_type_node,
12239 V4SF_type_node, V4SF_type_node, NULL_TREE);
12240 tree v4si_ftype_v4sf_v4sf
12241 = build_function_type_list (V4SI_type_node,
12242 V4SF_type_node, V4SF_type_node, NULL_TREE);
12243 /* MMX/SSE/integer conversions. */
12244 tree int_ftype_v4sf
12245 = build_function_type_list (integer_type_node,
12246 V4SF_type_node, NULL_TREE);
12247 tree int_ftype_v8qi
12248 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12249 tree v4sf_ftype_v4sf_int
12250 = build_function_type_list (V4SF_type_node,
12251 V4SF_type_node, integer_type_node, NULL_TREE);
12252 tree v4sf_ftype_v4sf_v2si
12253 = build_function_type_list (V4SF_type_node,
12254 V4SF_type_node, V2SI_type_node, NULL_TREE);
12255 tree int_ftype_v4hi_int
12256 = build_function_type_list (integer_type_node,
12257 V4HI_type_node, integer_type_node, NULL_TREE);
12258 tree v4hi_ftype_v4hi_int_int
12259 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12260 integer_type_node, integer_type_node,
12261 NULL_TREE);
12262 /* Miscellaneous. */
12263 tree v8qi_ftype_v4hi_v4hi
12264 = build_function_type_list (V8QI_type_node,
12265 V4HI_type_node, V4HI_type_node, NULL_TREE);
12266 tree v4hi_ftype_v2si_v2si
12267 = build_function_type_list (V4HI_type_node,
12268 V2SI_type_node, V2SI_type_node, NULL_TREE);
12269 tree v4sf_ftype_v4sf_v4sf_int
12270 = build_function_type_list (V4SF_type_node,
12271 V4SF_type_node, V4SF_type_node,
12272 integer_type_node, NULL_TREE);
12273 tree v2si_ftype_v4hi_v4hi
12274 = build_function_type_list (V2SI_type_node,
12275 V4HI_type_node, V4HI_type_node, NULL_TREE);
12276 tree v4hi_ftype_v4hi_int
12277 = build_function_type_list (V4HI_type_node,
12278 V4HI_type_node, integer_type_node, NULL_TREE);
12279 tree v4hi_ftype_v4hi_di
12280 = build_function_type_list (V4HI_type_node,
12281 V4HI_type_node, long_long_unsigned_type_node,
12282 NULL_TREE);
12283 tree v2si_ftype_v2si_di
12284 = build_function_type_list (V2SI_type_node,
12285 V2SI_type_node, long_long_unsigned_type_node,
12286 NULL_TREE);
12287 tree void_ftype_void
12288 = build_function_type (void_type_node, void_list_node);
12289 tree void_ftype_unsigned
12290 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12291 tree unsigned_ftype_void
12292 = build_function_type (unsigned_type_node, void_list_node);
12293 tree di_ftype_void
12294 = build_function_type (long_long_unsigned_type_node, void_list_node);
12295 tree v4sf_ftype_void
12296 = build_function_type (V4SF_type_node, void_list_node);
12297 tree v2si_ftype_v4sf
12298 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12299 /* Loads/stores. */
12300 tree void_ftype_v8qi_v8qi_pchar
12301 = build_function_type_list (void_type_node,
12302 V8QI_type_node, V8QI_type_node,
12303 pchar_type_node, NULL_TREE);
12304 tree v4sf_ftype_pfloat
12305 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
12306 /* @@@ the type is bogus */
12307 tree v4sf_ftype_v4sf_pv2si
12308 = build_function_type_list (V4SF_type_node,
12309 V4SF_type_node, pv2si_type_node, NULL_TREE);
12310 tree void_ftype_pv2si_v4sf
12311 = build_function_type_list (void_type_node,
12312 pv2si_type_node, V4SF_type_node, NULL_TREE);
12313 tree void_ftype_pfloat_v4sf
12314 = build_function_type_list (void_type_node,
12315 pfloat_type_node, V4SF_type_node, NULL_TREE);
12316 tree void_ftype_pdi_di
12317 = build_function_type_list (void_type_node,
12318 pdi_type_node, long_long_unsigned_type_node,
12319 NULL_TREE);
12320 tree void_ftype_pv2di_v2di
12321 = build_function_type_list (void_type_node,
12322 pv2di_type_node, V2DI_type_node, NULL_TREE);
12323 /* Normal vector unops. */
12324 tree v4sf_ftype_v4sf
12325 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12326
12327 /* Normal vector binops. */
12328 tree v4sf_ftype_v4sf_v4sf
12329 = build_function_type_list (V4SF_type_node,
12330 V4SF_type_node, V4SF_type_node, NULL_TREE);
12331 tree v8qi_ftype_v8qi_v8qi
12332 = build_function_type_list (V8QI_type_node,
12333 V8QI_type_node, V8QI_type_node, NULL_TREE);
12334 tree v4hi_ftype_v4hi_v4hi
12335 = build_function_type_list (V4HI_type_node,
12336 V4HI_type_node, V4HI_type_node, NULL_TREE);
12337 tree v2si_ftype_v2si_v2si
12338 = build_function_type_list (V2SI_type_node,
12339 V2SI_type_node, V2SI_type_node, NULL_TREE);
12340 tree di_ftype_di_di
12341 = build_function_type_list (long_long_unsigned_type_node,
12342 long_long_unsigned_type_node,
12343 long_long_unsigned_type_node, NULL_TREE);
12344
12345 tree v2si_ftype_v2sf
12346 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12347 tree v2sf_ftype_v2si
12348 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12349 tree v2si_ftype_v2si
12350 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12351 tree v2sf_ftype_v2sf
12352 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12353 tree v2sf_ftype_v2sf_v2sf
12354 = build_function_type_list (V2SF_type_node,
12355 V2SF_type_node, V2SF_type_node, NULL_TREE);
12356 tree v2si_ftype_v2sf_v2sf
12357 = build_function_type_list (V2SI_type_node,
12358 V2SF_type_node, V2SF_type_node, NULL_TREE);
12359 tree pint_type_node = build_pointer_type (integer_type_node);
12360 tree pdouble_type_node = build_pointer_type (double_type_node);
12361 tree int_ftype_v2df_v2df
12362 = build_function_type_list (integer_type_node,
12363 V2DF_type_node, V2DF_type_node, NULL_TREE);
12364
12365 tree ti_ftype_void
12366 = build_function_type (intTI_type_node, void_list_node);
12367 tree v2di_ftype_void
12368 = build_function_type (V2DI_type_node, void_list_node);
12369 tree ti_ftype_ti_ti
12370 = build_function_type_list (intTI_type_node,
12371 intTI_type_node, intTI_type_node, NULL_TREE);
12372 tree void_ftype_pvoid
12373 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
12374 tree v2di_ftype_di
12375 = build_function_type_list (V2DI_type_node,
12376 long_long_unsigned_type_node, NULL_TREE);
12377 tree di_ftype_v2di
12378 = build_function_type_list (long_long_unsigned_type_node,
12379 V2DI_type_node, NULL_TREE);
12380 tree v4sf_ftype_v4si
12381 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12382 tree v4si_ftype_v4sf
12383 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12384 tree v2df_ftype_v4si
12385 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12386 tree v4si_ftype_v2df
12387 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12388 tree v2si_ftype_v2df
12389 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12390 tree v4sf_ftype_v2df
12391 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12392 tree v2df_ftype_v2si
12393 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12394 tree v2df_ftype_v4sf
12395 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12396 tree int_ftype_v2df
12397 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12398 tree v2df_ftype_v2df_int
12399 = build_function_type_list (V2DF_type_node,
12400 V2DF_type_node, integer_type_node, NULL_TREE);
12401 tree v4sf_ftype_v4sf_v2df
12402 = build_function_type_list (V4SF_type_node,
12403 V4SF_type_node, V2DF_type_node, NULL_TREE);
12404 tree v2df_ftype_v2df_v4sf
12405 = build_function_type_list (V2DF_type_node,
12406 V2DF_type_node, V4SF_type_node, NULL_TREE);
12407 tree v2df_ftype_v2df_v2df_int
12408 = build_function_type_list (V2DF_type_node,
12409 V2DF_type_node, V2DF_type_node,
12410 integer_type_node,
12411 NULL_TREE);
12412 tree v2df_ftype_v2df_pv2si
12413 = build_function_type_list (V2DF_type_node,
12414 V2DF_type_node, pv2si_type_node, NULL_TREE);
12415 tree void_ftype_pv2si_v2df
12416 = build_function_type_list (void_type_node,
12417 pv2si_type_node, V2DF_type_node, NULL_TREE);
12418 tree void_ftype_pdouble_v2df
12419 = build_function_type_list (void_type_node,
12420 pdouble_type_node, V2DF_type_node, NULL_TREE);
12421 tree void_ftype_pint_int
12422 = build_function_type_list (void_type_node,
12423 pint_type_node, integer_type_node, NULL_TREE);
12424 tree void_ftype_v16qi_v16qi_pchar
12425 = build_function_type_list (void_type_node,
12426 V16QI_type_node, V16QI_type_node,
12427 pchar_type_node, NULL_TREE);
12428 tree v2df_ftype_pdouble
12429 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
12430 tree v2df_ftype_v2df_v2df
12431 = build_function_type_list (V2DF_type_node,
12432 V2DF_type_node, V2DF_type_node, NULL_TREE);
12433 tree v16qi_ftype_v16qi_v16qi
12434 = build_function_type_list (V16QI_type_node,
12435 V16QI_type_node, V16QI_type_node, NULL_TREE);
12436 tree v8hi_ftype_v8hi_v8hi
12437 = build_function_type_list (V8HI_type_node,
12438 V8HI_type_node, V8HI_type_node, NULL_TREE);
12439 tree v4si_ftype_v4si_v4si
12440 = build_function_type_list (V4SI_type_node,
12441 V4SI_type_node, V4SI_type_node, NULL_TREE);
12442 tree v2di_ftype_v2di_v2di
12443 = build_function_type_list (V2DI_type_node,
12444 V2DI_type_node, V2DI_type_node, NULL_TREE);
12445 tree v2di_ftype_v2df_v2df
12446 = build_function_type_list (V2DI_type_node,
12447 V2DF_type_node, V2DF_type_node, NULL_TREE);
12448 tree v2df_ftype_v2df
12449 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12450 tree v2df_ftype_double
12451 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12452 tree v2df_ftype_double_double
12453 = build_function_type_list (V2DF_type_node,
12454 double_type_node, double_type_node, NULL_TREE);
12455 tree int_ftype_v8hi_int
12456 = build_function_type_list (integer_type_node,
12457 V8HI_type_node, integer_type_node, NULL_TREE);
12458 tree v8hi_ftype_v8hi_int_int
12459 = build_function_type_list (V8HI_type_node,
12460 V8HI_type_node, integer_type_node,
12461 integer_type_node, NULL_TREE);
12462 tree v2di_ftype_v2di_int
12463 = build_function_type_list (V2DI_type_node,
12464 V2DI_type_node, integer_type_node, NULL_TREE);
12465 tree v4si_ftype_v4si_int
12466 = build_function_type_list (V4SI_type_node,
12467 V4SI_type_node, integer_type_node, NULL_TREE);
12468 tree v8hi_ftype_v8hi_int
12469 = build_function_type_list (V8HI_type_node,
12470 V8HI_type_node, integer_type_node, NULL_TREE);
12471 tree v8hi_ftype_v8hi_v2di
12472 = build_function_type_list (V8HI_type_node,
12473 V8HI_type_node, V2DI_type_node, NULL_TREE);
12474 tree v4si_ftype_v4si_v2di
12475 = build_function_type_list (V4SI_type_node,
12476 V4SI_type_node, V2DI_type_node, NULL_TREE);
12477 tree v4si_ftype_v8hi_v8hi
12478 = build_function_type_list (V4SI_type_node,
12479 V8HI_type_node, V8HI_type_node, NULL_TREE);
12480 tree di_ftype_v8qi_v8qi
12481 = build_function_type_list (long_long_unsigned_type_node,
12482 V8QI_type_node, V8QI_type_node, NULL_TREE);
12483 tree v2di_ftype_v16qi_v16qi
12484 = build_function_type_list (V2DI_type_node,
12485 V16QI_type_node, V16QI_type_node, NULL_TREE);
12486 tree int_ftype_v16qi
12487 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12488 tree v16qi_ftype_pchar
12489 = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
12490 tree void_ftype_pchar_v16qi
12491 = build_function_type_list (void_type_node,
12492 pchar_type_node, V16QI_type_node, NULL_TREE);
12493 tree v4si_ftype_pchar
12494 = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
12495 tree void_ftype_pchar_v4si
12496 = build_function_type_list (void_type_node,
12497 pchar_type_node, V4SI_type_node, NULL_TREE);
12498 tree v2di_ftype_v2di
12499 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
12500
12501 /* Add all builtins that are more or less simple operations on two
12502 operands. */
12503 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12504 {
12505 /* Use one of the operands; the target can have a different mode for
12506 mask-generating compares. */
12507 enum machine_mode mode;
12508 tree type;
12509
12510 if (d->name == 0)
12511 continue;
12512 mode = insn_data[d->icode].operand[1].mode;
12513
12514 switch (mode)
12515 {
12516 case V16QImode:
12517 type = v16qi_ftype_v16qi_v16qi;
12518 break;
12519 case V8HImode:
12520 type = v8hi_ftype_v8hi_v8hi;
12521 break;
12522 case V4SImode:
12523 type = v4si_ftype_v4si_v4si;
12524 break;
12525 case V2DImode:
12526 type = v2di_ftype_v2di_v2di;
12527 break;
12528 case V2DFmode:
12529 type = v2df_ftype_v2df_v2df;
12530 break;
12531 case TImode:
12532 type = ti_ftype_ti_ti;
12533 break;
12534 case V4SFmode:
12535 type = v4sf_ftype_v4sf_v4sf;
12536 break;
12537 case V8QImode:
12538 type = v8qi_ftype_v8qi_v8qi;
12539 break;
12540 case V4HImode:
12541 type = v4hi_ftype_v4hi_v4hi;
12542 break;
12543 case V2SImode:
12544 type = v2si_ftype_v2si_v2si;
12545 break;
12546 case DImode:
12547 type = di_ftype_di_di;
12548 break;
12549
12550 default:
12551 abort ();
12552 }
12553
12554 /* Override for comparisons. */
12555 if (d->icode == CODE_FOR_maskcmpv4sf3
12556 || d->icode == CODE_FOR_maskncmpv4sf3
12557 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12558 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12559 type = v4si_ftype_v4sf_v4sf;
12560
12561 if (d->icode == CODE_FOR_maskcmpv2df3
12562 || d->icode == CODE_FOR_maskncmpv2df3
12563 || d->icode == CODE_FOR_vmmaskcmpv2df3
12564 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12565 type = v2di_ftype_v2df_v2df;
12566
12567 def_builtin (d->mask, d->name, type, d->code);
12568 }
12569
12570 /* Add the remaining MMX insns with somewhat more complicated types. */
12571 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12572 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12573 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12574 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12575 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12576 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12577 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12578
12579 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12580 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12581 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12582
12583 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12584 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12585
12586 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12587 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
12588
12589 /* comi/ucomi insns. */
12590 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
12591 if (d->mask == MASK_SSE2)
12592 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12593 else
12594 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
12595
12596 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12597 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12598 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
12599
12600 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12601 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12602 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12603 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12604 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12605 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
12606
12607 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12608 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
12609
12610 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
12611
12612 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12613 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12614 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12615 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12616 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12617 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
12618
12619 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12620 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12621 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12622 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
12623
12624 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12625 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12626 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12627 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
12628
12629 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
12630
12631 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
12632
12633 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12634 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12635 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12636 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12637 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12638 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
12639
12640 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
12641
12642 /* Original 3DNow! */
12643 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12644 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12645 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12646 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12647 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12648 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12649 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12650 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12651 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12652 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12653 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12654 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12655 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12656 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12657 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12658 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12659 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12660 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12661 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12662 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
12663
12664 /* 3DNow! extension as used in the Athlon CPU. */
12665 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12666 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12667 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12668 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12669 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12670 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12671
12672 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12673
12674 /* SSE2 */
12675 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12676 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12677
12678 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12679 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12680 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
12681
12682 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12683 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12684 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12685 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12686 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12687 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12688
12689 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12690 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12691 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12692 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12693
12694 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
12695 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
12696 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12697 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
12698 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
12699
12700 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12701 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12702 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
12703 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
12704
12705 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12706 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12707
12708 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12709
12710 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
12711 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
12712
12713 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12714 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12715 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12716 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12717 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12718
12719 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12720
12721 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12722 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12723
12724 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12725 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12726 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12727
12728 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12729 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12730 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12731
12732 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12733 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12734 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12735 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12736 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12737 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12738 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12739
12740 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12741 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12742 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
12743
12744 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
12745 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
12746 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
12747 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
12748 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
12749 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
12750 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
12751
12752 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
12753
12754 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12755 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12756 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12757
12758 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12759 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12760 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12761
12762 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12763 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12764
12765 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
12766 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12767 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12768 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12769
12770 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
12771 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12772 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12773 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12774
12775 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12776 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12777
12778 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
12779 }
12780
12781 /* Errors in the source file can cause expand_expr to return const0_rtx
12782 where we expect a vector. To avoid crashing, use one of the vector
12783 clear instructions. */
12784 static rtx
12785 safe_vector_operand (x, mode)
12786 rtx x;
12787 enum machine_mode mode;
12788 {
12789 if (x != const0_rtx)
12790 return x;
12791 x = gen_reg_rtx (mode);
12792
12793 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
12794 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12795 : gen_rtx_SUBREG (DImode, x, 0)));
12796 else
12797 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12798 : gen_rtx_SUBREG (V4SFmode, x, 0)));
12799 return x;
12800 }
12801
12802 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
12803
12804 static rtx
12805 ix86_expand_binop_builtin (icode, arglist, target)
12806 enum insn_code icode;
12807 tree arglist;
12808 rtx target;
12809 {
12810 rtx pat;
12811 tree arg0 = TREE_VALUE (arglist);
12812 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12813 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12814 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12815 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12816 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12817 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12818
12819 if (VECTOR_MODE_P (mode0))
12820 op0 = safe_vector_operand (op0, mode0);
12821 if (VECTOR_MODE_P (mode1))
12822 op1 = safe_vector_operand (op1, mode1);
12823
12824 if (! target
12825 || GET_MODE (target) != tmode
12826 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12827 target = gen_reg_rtx (tmode);
12828
12829 /* In case the insn wants input operands in modes different from
12830 the result, abort. */
12831 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12832 abort ();
12833
12834 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12835 op0 = copy_to_mode_reg (mode0, op0);
12836 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12837 op1 = copy_to_mode_reg (mode1, op1);
12838
12839 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12840 yet one of the two must not be a memory. This is normally enforced
12841 by expanders, but we didn't bother to create one here. */
12842 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12843 op0 = copy_to_mode_reg (mode0, op0);
12844
12845 pat = GEN_FCN (icode) (target, op0, op1);
12846 if (! pat)
12847 return 0;
12848 emit_insn (pat);
12849 return target;
12850 }
12851
12852 /* Subroutine of ix86_expand_builtin to take care of stores. */
12853
12854 static rtx
12855 ix86_expand_store_builtin (icode, arglist)
12856 enum insn_code icode;
12857 tree arglist;
12858 {
12859 rtx pat;
12860 tree arg0 = TREE_VALUE (arglist);
12861 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12862 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12863 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12864 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12865 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12866
12867 if (VECTOR_MODE_P (mode1))
12868 op1 = safe_vector_operand (op1, mode1);
12869
12870 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12871
12872 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12873 op1 = copy_to_mode_reg (mode1, op1);
12874
12875 pat = GEN_FCN (icode) (op0, op1);
12876 if (pat)
12877 emit_insn (pat);
12878 return 0;
12879 }
12880
12881 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
12882
12883 static rtx
12884 ix86_expand_unop_builtin (icode, arglist, target, do_load)
12885 enum insn_code icode;
12886 tree arglist;
12887 rtx target;
12888 int do_load;
12889 {
12890 rtx pat;
12891 tree arg0 = TREE_VALUE (arglist);
12892 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12893 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12894 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12895
12896 if (! target
12897 || GET_MODE (target) != tmode
12898 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12899 target = gen_reg_rtx (tmode);
12900 if (do_load)
12901 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12902 else
12903 {
12904 if (VECTOR_MODE_P (mode0))
12905 op0 = safe_vector_operand (op0, mode0);
12906
12907 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12908 op0 = copy_to_mode_reg (mode0, op0);
12909 }
12910
12911 pat = GEN_FCN (icode) (target, op0);
12912 if (! pat)
12913 return 0;
12914 emit_insn (pat);
12915 return target;
12916 }
12917
12918 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12919 sqrtss, rsqrtss, rcpss. */
12920
12921 static rtx
12922 ix86_expand_unop1_builtin (icode, arglist, target)
12923 enum insn_code icode;
12924 tree arglist;
12925 rtx target;
12926 {
12927 rtx pat;
12928 tree arg0 = TREE_VALUE (arglist);
12929 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12930 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12931 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12932
12933 if (! target
12934 || GET_MODE (target) != tmode
12935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12936 target = gen_reg_rtx (tmode);
12937
12938 if (VECTOR_MODE_P (mode0))
12939 op0 = safe_vector_operand (op0, mode0);
12940
12941 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12942 op0 = copy_to_mode_reg (mode0, op0);
12943
12944 op1 = op0;
12945 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12946 op1 = copy_to_mode_reg (mode0, op1);
12947
12948 pat = GEN_FCN (icode) (target, op0, op1);
12949 if (! pat)
12950 return 0;
12951 emit_insn (pat);
12952 return target;
12953 }
12954
12955 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12956
12957 static rtx
12958 ix86_expand_sse_compare (d, arglist, target)
12959 const struct builtin_description *d;
12960 tree arglist;
12961 rtx target;
12962 {
12963 rtx pat;
12964 tree arg0 = TREE_VALUE (arglist);
12965 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12966 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12967 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12968 rtx op2;
12969 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12970 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12971 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12972 enum rtx_code comparison = d->comparison;
12973
12974 if (VECTOR_MODE_P (mode0))
12975 op0 = safe_vector_operand (op0, mode0);
12976 if (VECTOR_MODE_P (mode1))
12977 op1 = safe_vector_operand (op1, mode1);
12978
12979 /* Swap operands if we have a comparison that isn't available in
12980 hardware. */
12981 if (d->flag)
12982 {
12983 rtx tmp = gen_reg_rtx (mode1);
12984 emit_move_insn (tmp, op1);
12985 op1 = op0;
12986 op0 = tmp;
12987 }
12988
12989 if (! target
12990 || GET_MODE (target) != tmode
12991 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
12992 target = gen_reg_rtx (tmode);
12993
12994 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12995 op0 = copy_to_mode_reg (mode0, op0);
12996 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12997 op1 = copy_to_mode_reg (mode1, op1);
12998
12999 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13000 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13001 if (! pat)
13002 return 0;
13003 emit_insn (pat);
13004 return target;
13005 }
13006
13007 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13008
13009 static rtx
13010 ix86_expand_sse_comi (d, arglist, target)
13011 const struct builtin_description *d;
13012 tree arglist;
13013 rtx target;
13014 {
13015 rtx pat;
13016 tree arg0 = TREE_VALUE (arglist);
13017 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13018 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13019 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13020 rtx op2;
13021 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13022 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13023 enum rtx_code comparison = d->comparison;
13024
13025 if (VECTOR_MODE_P (mode0))
13026 op0 = safe_vector_operand (op0, mode0);
13027 if (VECTOR_MODE_P (mode1))
13028 op1 = safe_vector_operand (op1, mode1);
13029
13030 /* Swap operands if we have a comparison that isn't available in
13031 hardware. */
13032 if (d->flag)
13033 {
13034 rtx tmp = op1;
13035 op1 = op0;
13036 op0 = tmp;
13037 }
13038
13039 target = gen_reg_rtx (SImode);
13040 emit_move_insn (target, const0_rtx);
13041 target = gen_rtx_SUBREG (QImode, target, 0);
13042
13043 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13044 op0 = copy_to_mode_reg (mode0, op0);
13045 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13046 op1 = copy_to_mode_reg (mode1, op1);
13047
13048 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13049 pat = GEN_FCN (d->icode) (op0, op1);
13050 if (! pat)
13051 return 0;
13052 emit_insn (pat);
13053 emit_insn (gen_rtx_SET (VOIDmode,
13054 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13055 gen_rtx_fmt_ee (comparison, QImode,
13056 SET_DEST (pat),
13057 const0_rtx)));
13058
13059 return SUBREG_REG (target);
13060 }
13061
13062 /* Expand an expression EXP that calls a built-in function,
13063 with result going to TARGET if that's convenient
13064 (and in mode MODE if that's convenient).
13065 SUBTARGET may be used as the target for computing one of EXP's operands.
13066 IGNORE is nonzero if the value is to be ignored. */
13067
13068 rtx
13069 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13070 tree exp;
13071 rtx target;
13072 rtx subtarget ATTRIBUTE_UNUSED;
13073 enum machine_mode mode ATTRIBUTE_UNUSED;
13074 int ignore ATTRIBUTE_UNUSED;
13075 {
13076 const struct builtin_description *d;
13077 size_t i;
13078 enum insn_code icode;
13079 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13080 tree arglist = TREE_OPERAND (exp, 1);
13081 tree arg0, arg1, arg2;
13082 rtx op0, op1, op2, pat;
13083 enum machine_mode tmode, mode0, mode1, mode2;
13084 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13085
13086 switch (fcode)
13087 {
13088 case IX86_BUILTIN_EMMS:
13089 emit_insn (gen_emms ());
13090 return 0;
13091
13092 case IX86_BUILTIN_SFENCE:
13093 emit_insn (gen_sfence ());
13094 return 0;
13095
13096 case IX86_BUILTIN_PEXTRW:
13097 case IX86_BUILTIN_PEXTRW128:
13098 icode = (fcode == IX86_BUILTIN_PEXTRW
13099 ? CODE_FOR_mmx_pextrw
13100 : CODE_FOR_sse2_pextrw);
13101 arg0 = TREE_VALUE (arglist);
13102 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13103 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13104 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13105 tmode = insn_data[icode].operand[0].mode;
13106 mode0 = insn_data[icode].operand[1].mode;
13107 mode1 = insn_data[icode].operand[2].mode;
13108
13109 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13110 op0 = copy_to_mode_reg (mode0, op0);
13111 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13112 {
13113 /* @@@ better error message */
13114 error ("selector must be an immediate");
13115 return gen_reg_rtx (tmode);
13116 }
13117 if (target == 0
13118 || GET_MODE (target) != tmode
13119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13120 target = gen_reg_rtx (tmode);
13121 pat = GEN_FCN (icode) (target, op0, op1);
13122 if (! pat)
13123 return 0;
13124 emit_insn (pat);
13125 return target;
13126
13127 case IX86_BUILTIN_PINSRW:
13128 case IX86_BUILTIN_PINSRW128:
13129 icode = (fcode == IX86_BUILTIN_PINSRW
13130 ? CODE_FOR_mmx_pinsrw
13131 : CODE_FOR_sse2_pinsrw);
13132 arg0 = TREE_VALUE (arglist);
13133 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13134 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13135 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13136 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13137 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13138 tmode = insn_data[icode].operand[0].mode;
13139 mode0 = insn_data[icode].operand[1].mode;
13140 mode1 = insn_data[icode].operand[2].mode;
13141 mode2 = insn_data[icode].operand[3].mode;
13142
13143 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13144 op0 = copy_to_mode_reg (mode0, op0);
13145 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13146 op1 = copy_to_mode_reg (mode1, op1);
13147 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13148 {
13149 /* @@@ better error message */
13150 error ("selector must be an immediate");
13151 return const0_rtx;
13152 }
13153 if (target == 0
13154 || GET_MODE (target) != tmode
13155 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13156 target = gen_reg_rtx (tmode);
13157 pat = GEN_FCN (icode) (target, op0, op1, op2);
13158 if (! pat)
13159 return 0;
13160 emit_insn (pat);
13161 return target;
13162
13163 case IX86_BUILTIN_MASKMOVQ:
13164 case IX86_BUILTIN_MASKMOVDQU:
13165 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13166 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13167 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13168 : CODE_FOR_sse2_maskmovdqu));
13169 /* Note the arg order is different from the operand order. */
13170 arg1 = TREE_VALUE (arglist);
13171 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13172 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13173 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13174 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13175 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13176 mode0 = insn_data[icode].operand[0].mode;
13177 mode1 = insn_data[icode].operand[1].mode;
13178 mode2 = insn_data[icode].operand[2].mode;
13179
13180 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13181 op0 = copy_to_mode_reg (mode0, op0);
13182 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13183 op1 = copy_to_mode_reg (mode1, op1);
13184 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13185 op2 = copy_to_mode_reg (mode2, op2);
13186 pat = GEN_FCN (icode) (op0, op1, op2);
13187 if (! pat)
13188 return 0;
13189 emit_insn (pat);
13190 return 0;
13191
13192 case IX86_BUILTIN_SQRTSS:
13193 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13194 case IX86_BUILTIN_RSQRTSS:
13195 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13196 case IX86_BUILTIN_RCPSS:
13197 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13198
13199 case IX86_BUILTIN_LOADAPS:
13200 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13201
13202 case IX86_BUILTIN_LOADUPS:
13203 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13204
13205 case IX86_BUILTIN_STOREAPS:
13206 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13207
13208 case IX86_BUILTIN_STOREUPS:
13209 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13210
13211 case IX86_BUILTIN_LOADSS:
13212 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13213
13214 case IX86_BUILTIN_STORESS:
13215 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13216
13217 case IX86_BUILTIN_LOADHPS:
13218 case IX86_BUILTIN_LOADLPS:
13219 case IX86_BUILTIN_LOADHPD:
13220 case IX86_BUILTIN_LOADLPD:
13221 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13222 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13223 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13224 : CODE_FOR_sse2_movlpd);
13225 arg0 = TREE_VALUE (arglist);
13226 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13227 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13228 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13229 tmode = insn_data[icode].operand[0].mode;
13230 mode0 = insn_data[icode].operand[1].mode;
13231 mode1 = insn_data[icode].operand[2].mode;
13232
13233 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13234 op0 = copy_to_mode_reg (mode0, op0);
13235 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13236 if (target == 0
13237 || GET_MODE (target) != tmode
13238 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13239 target = gen_reg_rtx (tmode);
13240 pat = GEN_FCN (icode) (target, op0, op1);
13241 if (! pat)
13242 return 0;
13243 emit_insn (pat);
13244 return target;
13245
13246 case IX86_BUILTIN_STOREHPS:
13247 case IX86_BUILTIN_STORELPS:
13248 case IX86_BUILTIN_STOREHPD:
13249 case IX86_BUILTIN_STORELPD:
13250 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13251 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13252 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13253 : CODE_FOR_sse2_movlpd);
13254 arg0 = TREE_VALUE (arglist);
13255 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13256 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13257 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13258 mode0 = insn_data[icode].operand[1].mode;
13259 mode1 = insn_data[icode].operand[2].mode;
13260
13261 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13262 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13263 op1 = copy_to_mode_reg (mode1, op1);
13264
13265 pat = GEN_FCN (icode) (op0, op0, op1);
13266 if (! pat)
13267 return 0;
13268 emit_insn (pat);
13269 return 0;
13270
13271 case IX86_BUILTIN_MOVNTPS:
13272 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13273 case IX86_BUILTIN_MOVNTQ:
13274 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13275
13276 case IX86_BUILTIN_LDMXCSR:
13277 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13278 target = assign_386_stack_local (SImode, 0);
13279 emit_move_insn (target, op0);
13280 emit_insn (gen_ldmxcsr (target));
13281 return 0;
13282
13283 case IX86_BUILTIN_STMXCSR:
13284 target = assign_386_stack_local (SImode, 0);
13285 emit_insn (gen_stmxcsr (target));
13286 return copy_to_mode_reg (SImode, target);
13287
13288 case IX86_BUILTIN_SHUFPS:
13289 case IX86_BUILTIN_SHUFPD:
13290 icode = (fcode == IX86_BUILTIN_SHUFPS
13291 ? CODE_FOR_sse_shufps
13292 : CODE_FOR_sse2_shufpd);
13293 arg0 = TREE_VALUE (arglist);
13294 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13295 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13296 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13297 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13298 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13299 tmode = insn_data[icode].operand[0].mode;
13300 mode0 = insn_data[icode].operand[1].mode;
13301 mode1 = insn_data[icode].operand[2].mode;
13302 mode2 = insn_data[icode].operand[3].mode;
13303
13304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13305 op0 = copy_to_mode_reg (mode0, op0);
13306 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13307 op1 = copy_to_mode_reg (mode1, op1);
13308 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13309 {
13310 /* @@@ better error message */
13311 error ("mask must be an immediate");
13312 return gen_reg_rtx (tmode);
13313 }
13314 if (target == 0
13315 || GET_MODE (target) != tmode
13316 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13317 target = gen_reg_rtx (tmode);
13318 pat = GEN_FCN (icode) (target, op0, op1, op2);
13319 if (! pat)
13320 return 0;
13321 emit_insn (pat);
13322 return target;
13323
13324 case IX86_BUILTIN_PSHUFW:
13325 case IX86_BUILTIN_PSHUFD:
13326 case IX86_BUILTIN_PSHUFHW:
13327 case IX86_BUILTIN_PSHUFLW:
13328 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13329 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13330 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13331 : CODE_FOR_mmx_pshufw);
13332 arg0 = TREE_VALUE (arglist);
13333 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13334 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13335 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13336 tmode = insn_data[icode].operand[0].mode;
13337 mode1 = insn_data[icode].operand[1].mode;
13338 mode2 = insn_data[icode].operand[2].mode;
13339
13340 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13341 op0 = copy_to_mode_reg (mode1, op0);
13342 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13343 {
13344 /* @@@ better error message */
13345 error ("mask must be an immediate");
13346 return const0_rtx;
13347 }
13348 if (target == 0
13349 || GET_MODE (target) != tmode
13350 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13351 target = gen_reg_rtx (tmode);
13352 pat = GEN_FCN (icode) (target, op0, op1);
13353 if (! pat)
13354 return 0;
13355 emit_insn (pat);
13356 return target;
13357
13358 case IX86_BUILTIN_PSLLDQI128:
13359 case IX86_BUILTIN_PSRLDQI128:
13360 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13361 : CODE_FOR_sse2_lshrti3);
13362 arg0 = TREE_VALUE (arglist);
13363 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13364 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13365 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13366 tmode = insn_data[icode].operand[0].mode;
13367 mode1 = insn_data[icode].operand[1].mode;
13368 mode2 = insn_data[icode].operand[2].mode;
13369
13370 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13371 {
13372 op0 = copy_to_reg (op0);
13373 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13374 }
13375 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13376 {
13377 error ("shift must be an immediate");
13378 return const0_rtx;
13379 }
13380 target = gen_reg_rtx (V2DImode);
13381 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13382 if (! pat)
13383 return 0;
13384 emit_insn (pat);
13385 return target;
13386
13387 case IX86_BUILTIN_FEMMS:
13388 emit_insn (gen_femms ());
13389 return NULL_RTX;
13390
13391 case IX86_BUILTIN_PAVGUSB:
13392 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13393
13394 case IX86_BUILTIN_PF2ID:
13395 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13396
13397 case IX86_BUILTIN_PFACC:
13398 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13399
13400 case IX86_BUILTIN_PFADD:
13401 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13402
13403 case IX86_BUILTIN_PFCMPEQ:
13404 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13405
13406 case IX86_BUILTIN_PFCMPGE:
13407 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13408
13409 case IX86_BUILTIN_PFCMPGT:
13410 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13411
13412 case IX86_BUILTIN_PFMAX:
13413 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13414
13415 case IX86_BUILTIN_PFMIN:
13416 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13417
13418 case IX86_BUILTIN_PFMUL:
13419 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13420
13421 case IX86_BUILTIN_PFRCP:
13422 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13423
13424 case IX86_BUILTIN_PFRCPIT1:
13425 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13426
13427 case IX86_BUILTIN_PFRCPIT2:
13428 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13429
13430 case IX86_BUILTIN_PFRSQIT1:
13431 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13432
13433 case IX86_BUILTIN_PFRSQRT:
13434 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13435
13436 case IX86_BUILTIN_PFSUB:
13437 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13438
13439 case IX86_BUILTIN_PFSUBR:
13440 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13441
13442 case IX86_BUILTIN_PI2FD:
13443 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13444
13445 case IX86_BUILTIN_PMULHRW:
13446 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13447
13448 case IX86_BUILTIN_PF2IW:
13449 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13450
13451 case IX86_BUILTIN_PFNACC:
13452 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13453
13454 case IX86_BUILTIN_PFPNACC:
13455 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13456
13457 case IX86_BUILTIN_PI2FW:
13458 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13459
13460 case IX86_BUILTIN_PSWAPDSI:
13461 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13462
13463 case IX86_BUILTIN_PSWAPDSF:
13464 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13465
13466 case IX86_BUILTIN_SSE_ZERO:
13467 target = gen_reg_rtx (V4SFmode);
13468 emit_insn (gen_sse_clrv4sf (target));
13469 return target;
13470
13471 case IX86_BUILTIN_MMX_ZERO:
13472 target = gen_reg_rtx (DImode);
13473 emit_insn (gen_mmx_clrdi (target));
13474 return target;
13475
13476 case IX86_BUILTIN_CLRTI:
13477 target = gen_reg_rtx (V2DImode);
13478 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13479 return target;
13480
13481
13482 case IX86_BUILTIN_SQRTSD:
13483 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13484 case IX86_BUILTIN_LOADAPD:
13485 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13486 case IX86_BUILTIN_LOADUPD:
13487 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13488
13489 case IX86_BUILTIN_STOREAPD:
13490 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13491 case IX86_BUILTIN_STOREUPD:
13492 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13493
13494 case IX86_BUILTIN_LOADSD:
13495 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13496
13497 case IX86_BUILTIN_STORESD:
13498 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13499
13500 case IX86_BUILTIN_SETPD1:
13501 target = assign_386_stack_local (DFmode, 0);
13502 arg0 = TREE_VALUE (arglist);
13503 emit_move_insn (adjust_address (target, DFmode, 0),
13504 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13505 op0 = gen_reg_rtx (V2DFmode);
13506 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13507 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13508 return op0;
13509
13510 case IX86_BUILTIN_SETPD:
13511 target = assign_386_stack_local (V2DFmode, 0);
13512 arg0 = TREE_VALUE (arglist);
13513 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13514 emit_move_insn (adjust_address (target, DFmode, 0),
13515 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13516 emit_move_insn (adjust_address (target, DFmode, 8),
13517 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13518 op0 = gen_reg_rtx (V2DFmode);
13519 emit_insn (gen_sse2_movapd (op0, target));
13520 return op0;
13521
13522 case IX86_BUILTIN_LOADRPD:
13523 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13524 gen_reg_rtx (V2DFmode), 1);
13525 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13526 return target;
13527
13528 case IX86_BUILTIN_LOADPD1:
13529 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13530 gen_reg_rtx (V2DFmode), 1);
13531 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13532 return target;
13533
13534 case IX86_BUILTIN_STOREPD1:
13535 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13536 case IX86_BUILTIN_STORERPD:
13537 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13538
13539 case IX86_BUILTIN_CLRPD:
13540 target = gen_reg_rtx (V2DFmode);
13541 emit_insn (gen_sse_clrv2df (target));
13542 return target;
13543
13544 case IX86_BUILTIN_MFENCE:
13545 emit_insn (gen_sse2_mfence ());
13546 return 0;
13547 case IX86_BUILTIN_LFENCE:
13548 emit_insn (gen_sse2_lfence ());
13549 return 0;
13550
13551 case IX86_BUILTIN_CLFLUSH:
13552 arg0 = TREE_VALUE (arglist);
13553 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13554 icode = CODE_FOR_sse2_clflush;
13555 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
13556 op0 = copy_to_mode_reg (Pmode, op0);
13557
13558 emit_insn (gen_sse2_clflush (op0));
13559 return 0;
13560
13561 case IX86_BUILTIN_MOVNTPD:
13562 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13563 case IX86_BUILTIN_MOVNTDQ:
13564 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
13565 case IX86_BUILTIN_MOVNTI:
13566 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13567
13568 case IX86_BUILTIN_LOADDQA:
13569 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
13570 case IX86_BUILTIN_LOADDQU:
13571 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
13572 case IX86_BUILTIN_LOADD:
13573 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
13574
13575 case IX86_BUILTIN_STOREDQA:
13576 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
13577 case IX86_BUILTIN_STOREDQU:
13578 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
13579 case IX86_BUILTIN_STORED:
13580 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
13581
13582 default:
13583 break;
13584 }
13585
13586 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13587 if (d->code == fcode)
13588 {
13589 /* Compares are treated specially. */
13590 if (d->icode == CODE_FOR_maskcmpv4sf3
13591 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13592 || d->icode == CODE_FOR_maskncmpv4sf3
13593 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13594 || d->icode == CODE_FOR_maskcmpv2df3
13595 || d->icode == CODE_FOR_vmmaskcmpv2df3
13596 || d->icode == CODE_FOR_maskncmpv2df3
13597 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13598 return ix86_expand_sse_compare (d, arglist, target);
13599
13600 return ix86_expand_binop_builtin (d->icode, arglist, target);
13601 }
13602
13603 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13604 if (d->code == fcode)
13605 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
13606
13607 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13608 if (d->code == fcode)
13609 return ix86_expand_sse_comi (d, arglist, target);
13610
13611 /* @@@ Should really do something sensible here. */
13612 return 0;
13613 }
13614
13615 /* Store OPERAND to the memory after reload is completed. This means
13616 that we can't easily use assign_stack_local. */
13617 rtx
13618 ix86_force_to_memory (mode, operand)
13619 enum machine_mode mode;
13620 rtx operand;
13621 {
13622 rtx result;
13623 if (!reload_completed)
13624 abort ();
13625 if (TARGET_64BIT && TARGET_RED_ZONE)
13626 {
13627 result = gen_rtx_MEM (mode,
13628 gen_rtx_PLUS (Pmode,
13629 stack_pointer_rtx,
13630 GEN_INT (-RED_ZONE_SIZE)));
13631 emit_move_insn (result, operand);
13632 }
13633 else if (TARGET_64BIT && !TARGET_RED_ZONE)
13634 {
13635 switch (mode)
13636 {
13637 case HImode:
13638 case SImode:
13639 operand = gen_lowpart (DImode, operand);
13640 /* FALLTHRU */
13641 case DImode:
13642 emit_insn (
13643 gen_rtx_SET (VOIDmode,
13644 gen_rtx_MEM (DImode,
13645 gen_rtx_PRE_DEC (DImode,
13646 stack_pointer_rtx)),
13647 operand));
13648 break;
13649 default:
13650 abort ();
13651 }
13652 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13653 }
13654 else
13655 {
13656 switch (mode)
13657 {
13658 case DImode:
13659 {
13660 rtx operands[2];
13661 split_di (&operand, 1, operands, operands + 1);
13662 emit_insn (
13663 gen_rtx_SET (VOIDmode,
13664 gen_rtx_MEM (SImode,
13665 gen_rtx_PRE_DEC (Pmode,
13666 stack_pointer_rtx)),
13667 operands[1]));
13668 emit_insn (
13669 gen_rtx_SET (VOIDmode,
13670 gen_rtx_MEM (SImode,
13671 gen_rtx_PRE_DEC (Pmode,
13672 stack_pointer_rtx)),
13673 operands[0]));
13674 }
13675 break;
13676 case HImode:
13677 /* It is better to store HImodes as SImodes. */
13678 if (!TARGET_PARTIAL_REG_STALL)
13679 operand = gen_lowpart (SImode, operand);
13680 /* FALLTHRU */
13681 case SImode:
13682 emit_insn (
13683 gen_rtx_SET (VOIDmode,
13684 gen_rtx_MEM (GET_MODE (operand),
13685 gen_rtx_PRE_DEC (SImode,
13686 stack_pointer_rtx)),
13687 operand));
13688 break;
13689 default:
13690 abort ();
13691 }
13692 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13693 }
13694 return result;
13695 }
13696
13697 /* Free operand from the memory. */
13698 void
13699 ix86_free_from_memory (mode)
13700 enum machine_mode mode;
13701 {
13702 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13703 {
13704 int size;
13705
13706 if (mode == DImode || TARGET_64BIT)
13707 size = 8;
13708 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13709 size = 2;
13710 else
13711 size = 4;
13712 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13713 to pop or add instruction if registers are available. */
13714 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13715 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13716 GEN_INT (size))));
13717 }
13718 }
13719
13720 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13721 QImode must go into class Q_REGS.
13722 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
13723 movdf to do mem-to-mem moves through integer regs. */
13724 enum reg_class
13725 ix86_preferred_reload_class (x, class)
13726 rtx x;
13727 enum reg_class class;
13728 {
13729 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
13730 return NO_REGS;
13731 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13732 {
13733 /* SSE can't load any constant directly yet. */
13734 if (SSE_CLASS_P (class))
13735 return NO_REGS;
13736 /* Floats can load 0 and 1. */
13737 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13738 {
13739 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13740 if (MAYBE_SSE_CLASS_P (class))
13741 return (reg_class_subset_p (class, GENERAL_REGS)
13742 ? GENERAL_REGS : FLOAT_REGS);
13743 else
13744 return class;
13745 }
13746 /* General regs can load everything. */
13747 if (reg_class_subset_p (class, GENERAL_REGS))
13748 return GENERAL_REGS;
13749 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13750 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13751 return NO_REGS;
13752 }
13753 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13754 return NO_REGS;
13755 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13756 return Q_REGS;
13757 return class;
13758 }
13759
13760 /* If we are copying between general and FP registers, we need a memory
13761 location. The same is true for SSE and MMX registers.
13762
13763 The macro can't work reliably when one of the CLASSES is class containing
13764 registers from multiple units (SSE, MMX, integer). We avoid this by never
13765 combining those units in single alternative in the machine description.
13766 Ensure that this constraint holds to avoid unexpected surprises.
13767
13768 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13769 enforce these sanity checks. */
13770 int
13771 ix86_secondary_memory_needed (class1, class2, mode, strict)
13772 enum reg_class class1, class2;
13773 enum machine_mode mode;
13774 int strict;
13775 {
13776 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13777 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13778 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13779 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13780 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13781 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13782 {
13783 if (strict)
13784 abort ();
13785 else
13786 return 1;
13787 }
13788 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13789 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13790 && (mode) != SImode)
13791 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13792 && (mode) != SImode));
13793 }
13794 /* Return the cost of moving data from a register in class CLASS1 to
13795 one in class CLASS2.
13796
13797 It is not required that the cost always equal 2 when FROM is the same as TO;
13798 on some machines it is expensive to move between registers if they are not
13799 general registers. */
13800 int
13801 ix86_register_move_cost (mode, class1, class2)
13802 enum machine_mode mode;
13803 enum reg_class class1, class2;
13804 {
13805 /* In case we require secondary memory, compute cost of the store followed
13806 by load. In order to avoid bad register allocation choices, we need
13807 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13808
13809 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13810 {
13811 int cost = 1;
13812
13813 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13814 MEMORY_MOVE_COST (mode, class1, 1));
13815 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13816 MEMORY_MOVE_COST (mode, class2, 1));
13817
13818 /* In case of copying from general_purpose_register we may emit multiple
13819 stores followed by single load causing memory size mismatch stall.
13820 Count this as arbitarily high cost of 20. */
13821 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
13822 cost += 20;
13823
13824 /* In the case of FP/MMX moves, the registers actually overlap, and we
13825 have to switch modes in order to treat them differently. */
13826 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13827 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13828 cost += 20;
13829
13830 return cost;
13831 }
13832
13833 /* Moves between SSE/MMX and integer unit are expensive. */
13834 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13835 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
13836 return ix86_cost->mmxsse_to_integer;
13837 if (MAYBE_FLOAT_CLASS_P (class1))
13838 return ix86_cost->fp_move;
13839 if (MAYBE_SSE_CLASS_P (class1))
13840 return ix86_cost->sse_move;
13841 if (MAYBE_MMX_CLASS_P (class1))
13842 return ix86_cost->mmx_move;
13843 return 2;
13844 }
13845
13846 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13847 int
13848 ix86_hard_regno_mode_ok (regno, mode)
13849 int regno;
13850 enum machine_mode mode;
13851 {
13852 /* Flags and only flags can only hold CCmode values. */
13853 if (CC_REGNO_P (regno))
13854 return GET_MODE_CLASS (mode) == MODE_CC;
13855 if (GET_MODE_CLASS (mode) == MODE_CC
13856 || GET_MODE_CLASS (mode) == MODE_RANDOM
13857 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13858 return 0;
13859 if (FP_REGNO_P (regno))
13860 return VALID_FP_MODE_P (mode);
13861 if (SSE_REGNO_P (regno))
13862 return VALID_SSE_REG_MODE (mode);
13863 if (MMX_REGNO_P (regno))
13864 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
13865 /* We handle both integer and floats in the general purpose registers.
13866 In future we should be able to handle vector modes as well. */
13867 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13868 return 0;
13869 /* Take care for QImode values - they can be in non-QI regs, but then
13870 they do cause partial register stalls. */
13871 if (regno < 4 || mode != QImode || TARGET_64BIT)
13872 return 1;
13873 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13874 }
13875
13876 /* Return the cost of moving data of mode M between a
13877 register and memory. A value of 2 is the default; this cost is
13878 relative to those in `REGISTER_MOVE_COST'.
13879
13880 If moving between registers and memory is more expensive than
13881 between two registers, you should define this macro to express the
13882 relative cost.
13883
13884 Model also increased moving costs of QImode registers in non
13885 Q_REGS classes.
13886 */
13887 int
13888 ix86_memory_move_cost (mode, class, in)
13889 enum machine_mode mode;
13890 enum reg_class class;
13891 int in;
13892 {
13893 if (FLOAT_CLASS_P (class))
13894 {
13895 int index;
13896 switch (mode)
13897 {
13898 case SFmode:
13899 index = 0;
13900 break;
13901 case DFmode:
13902 index = 1;
13903 break;
13904 case XFmode:
13905 case TFmode:
13906 index = 2;
13907 break;
13908 default:
13909 return 100;
13910 }
13911 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13912 }
13913 if (SSE_CLASS_P (class))
13914 {
13915 int index;
13916 switch (GET_MODE_SIZE (mode))
13917 {
13918 case 4:
13919 index = 0;
13920 break;
13921 case 8:
13922 index = 1;
13923 break;
13924 case 16:
13925 index = 2;
13926 break;
13927 default:
13928 return 100;
13929 }
13930 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13931 }
13932 if (MMX_CLASS_P (class))
13933 {
13934 int index;
13935 switch (GET_MODE_SIZE (mode))
13936 {
13937 case 4:
13938 index = 0;
13939 break;
13940 case 8:
13941 index = 1;
13942 break;
13943 default:
13944 return 100;
13945 }
13946 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13947 }
13948 switch (GET_MODE_SIZE (mode))
13949 {
13950 case 1:
13951 if (in)
13952 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13953 : ix86_cost->movzbl_load);
13954 else
13955 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13956 : ix86_cost->int_store[0] + 4);
13957 break;
13958 case 2:
13959 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13960 default:
13961 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13962 if (mode == TFmode)
13963 mode = XFmode;
13964 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
13965 * ((int) GET_MODE_SIZE (mode)
13966 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
13967 }
13968 }
13969
13970 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
13971 static void
13972 ix86_svr3_asm_out_constructor (symbol, priority)
13973 rtx symbol;
13974 int priority ATTRIBUTE_UNUSED;
13975 {
13976 init_section ();
13977 fputs ("\tpushl $", asm_out_file);
13978 assemble_name (asm_out_file, XSTR (symbol, 0));
13979 fputc ('\n', asm_out_file);
13980 }
13981 #endif
13982
13983 #if TARGET_MACHO
13984
13985 static int current_machopic_label_num;
13986
13987 /* Given a symbol name and its associated stub, write out the
13988 definition of the stub. */
13989
13990 void
13991 machopic_output_stub (file, symb, stub)
13992 FILE *file;
13993 const char *symb, *stub;
13994 {
13995 unsigned int length;
13996 char *binder_name, *symbol_name, lazy_ptr_name[32];
13997 int label = ++current_machopic_label_num;
13998
13999 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14000 symb = (*targetm.strip_name_encoding) (symb);
14001
14002 length = strlen (stub);
14003 binder_name = alloca (length + 32);
14004 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14005
14006 length = strlen (symb);
14007 symbol_name = alloca (length + 32);
14008 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14009
14010 sprintf (lazy_ptr_name, "L%d$lz", label);
14011
14012 if (MACHOPIC_PURE)
14013 machopic_picsymbol_stub_section ();
14014 else
14015 machopic_symbol_stub_section ();
14016
14017 fprintf (file, "%s:\n", stub);
14018 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14019
14020 if (MACHOPIC_PURE)
14021 {
14022 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14023 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14024 fprintf (file, "\tjmp %%edx\n");
14025 }
14026 else
14027 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14028
14029 fprintf (file, "%s:\n", binder_name);
14030
14031 if (MACHOPIC_PURE)
14032 {
14033 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14034 fprintf (file, "\tpushl %%eax\n");
14035 }
14036 else
14037 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14038
14039 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14040
14041 machopic_lazy_symbol_ptr_section ();
14042 fprintf (file, "%s:\n", lazy_ptr_name);
14043 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14044 fprintf (file, "\t.long %s\n", binder_name);
14045 }
14046 #endif /* TARGET_MACHO */
14047
14048 /* Order the registers for register allocator. */
14049
14050 void
14051 x86_order_regs_for_local_alloc ()
14052 {
14053 int pos = 0;
14054 int i;
14055
14056 /* First allocate the local general purpose registers. */
14057 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14058 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14059 reg_alloc_order [pos++] = i;
14060
14061 /* Global general purpose registers. */
14062 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14063 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14064 reg_alloc_order [pos++] = i;
14065
14066 /* x87 registers come first in case we are doing FP math
14067 using them. */
14068 if (!TARGET_SSE_MATH)
14069 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14070 reg_alloc_order [pos++] = i;
14071
14072 /* SSE registers. */
14073 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14074 reg_alloc_order [pos++] = i;
14075 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14076 reg_alloc_order [pos++] = i;
14077
14078 /* x87 registerts. */
14079 if (TARGET_SSE_MATH)
14080 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14081 reg_alloc_order [pos++] = i;
14082
14083 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14084 reg_alloc_order [pos++] = i;
14085
14086 /* Initialize the rest of array as we do not allocate some registers
14087 at all. */
14088 while (pos < FIRST_PSEUDO_REGISTER)
14089 reg_alloc_order [pos++] = 0;
14090 }
14091
14092 /* Returns an expression indicating where the this parameter is
14093 located on entry to the FUNCTION. */
14094
14095 static rtx
14096 x86_this_parameter (function)
14097 tree function;
14098 {
14099 tree type = TREE_TYPE (function);
14100
14101 if (TARGET_64BIT)
14102 {
14103 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14104 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14105 }
14106
14107 if (ix86_fntype_regparm (type) > 0)
14108 {
14109 tree parm;
14110
14111 parm = TYPE_ARG_TYPES (type);
14112 /* Figure out whether or not the function has a variable number of
14113 arguments. */
14114 for (; parm; parm = TREE_CHAIN (parm))
14115 if (TREE_VALUE (parm) == void_type_node)
14116 break;
14117 /* If not, the this parameter is in %eax. */
14118 if (parm)
14119 return gen_rtx_REG (SImode, 0);
14120 }
14121
14122 if (aggregate_value_p (TREE_TYPE (type)))
14123 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14124 else
14125 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14126 }
14127
14128 /* Determine whether x86_output_mi_thunk can succeed. */
14129
14130 static bool
14131 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14132 tree thunk ATTRIBUTE_UNUSED;
14133 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14134 HOST_WIDE_INT vcall_offset;
14135 tree function;
14136 {
14137 /* 64-bit can handle anything. */
14138 if (TARGET_64BIT)
14139 return true;
14140
14141 /* For 32-bit, everything's fine if we have one free register. */
14142 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14143 return true;
14144
14145 /* Need a free register for vcall_offset. */
14146 if (vcall_offset)
14147 return false;
14148
14149 /* Need a free register for GOT references. */
14150 if (flag_pic && !(*targetm.binds_local_p) (function))
14151 return false;
14152
14153 /* Otherwise ok. */
14154 return true;
14155 }
14156
14157 /* Output the assembler code for a thunk function. THUNK_DECL is the
14158 declaration for the thunk function itself, FUNCTION is the decl for
14159 the target function. DELTA is an immediate constant offset to be
14160 added to THIS. If VCALL_OFFSET is non-zero, the word at
14161 *(*this + vcall_offset) should be added to THIS. */
14162
14163 static void
14164 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14165 FILE *file ATTRIBUTE_UNUSED;
14166 tree thunk ATTRIBUTE_UNUSED;
14167 HOST_WIDE_INT delta;
14168 HOST_WIDE_INT vcall_offset;
14169 tree function;
14170 {
14171 rtx xops[3];
14172 rtx this = x86_this_parameter (function);
14173 rtx this_reg, tmp;
14174
14175 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14176 pull it in now and let DELTA benefit. */
14177 if (REG_P (this))
14178 this_reg = this;
14179 else if (vcall_offset)
14180 {
14181 /* Put the this parameter into %eax. */
14182 xops[0] = this;
14183 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14184 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14185 }
14186 else
14187 this_reg = NULL_RTX;
14188
14189 /* Adjust the this parameter by a fixed constant. */
14190 if (delta)
14191 {
14192 xops[0] = GEN_INT (delta);
14193 xops[1] = this_reg ? this_reg : this;
14194 if (TARGET_64BIT)
14195 {
14196 if (!x86_64_general_operand (xops[0], DImode))
14197 {
14198 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14199 xops[1] = tmp;
14200 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
14201 xops[0] = tmp;
14202 xops[1] = this;
14203 }
14204 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14205 }
14206 else
14207 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14208 }
14209
14210 /* Adjust the this parameter by a value stored in the vtable. */
14211 if (vcall_offset)
14212 {
14213 if (TARGET_64BIT)
14214 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
14215 else
14216 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14217
14218 xops[0] = gen_rtx_MEM (Pmode, this_reg);
14219 xops[1] = tmp;
14220 if (TARGET_64BIT)
14221 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14222 else
14223 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14224
14225 /* Adjust the this parameter. */
14226 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
14227 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
14228 {
14229 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
14230 xops[0] = GEN_INT (vcall_offset);
14231 xops[1] = tmp2;
14232 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
14233 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
14234 }
14235 xops[1] = this_reg;
14236 if (TARGET_64BIT)
14237 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
14238 else
14239 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
14240 }
14241
14242 /* If necessary, drop THIS back to its stack slot. */
14243 if (this_reg && this_reg != this)
14244 {
14245 xops[0] = this_reg;
14246 xops[1] = this;
14247 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14248 }
14249
14250 xops[0] = DECL_RTL (function);
14251 if (TARGET_64BIT)
14252 {
14253 if (!flag_pic || (*targetm.binds_local_p) (function))
14254 output_asm_insn ("jmp\t%P0", xops);
14255 else
14256 {
14257 tmp = XEXP (xops[0], 0);
14258 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
14259 tmp = gen_rtx_CONST (Pmode, tmp);
14260 tmp = gen_rtx_MEM (QImode, tmp);
14261 xops[0] = tmp;
14262 output_asm_insn ("jmp\t%A0", xops);
14263 }
14264 }
14265 else
14266 {
14267 if (!flag_pic || (*targetm.binds_local_p) (function))
14268 output_asm_insn ("jmp\t%P0", xops);
14269 else
14270 {
14271 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
14272 output_set_got (tmp);
14273
14274 xops[1] = tmp;
14275 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
14276 output_asm_insn ("jmp\t{*}%1", xops);
14277 }
14278 }
14279 }
14280
14281 int
14282 x86_field_alignment (field, computed)
14283 tree field;
14284 int computed;
14285 {
14286 enum machine_mode mode;
14287 tree type = TREE_TYPE (field);
14288
14289 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
14290 return computed;
14291 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14292 ? get_inner_array_type (type) : type);
14293 if (mode == DFmode || mode == DCmode
14294 || GET_MODE_CLASS (mode) == MODE_INT
14295 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
14296 return MIN (32, computed);
14297 return computed;
14298 }
14299
14300 /* Output assembler code to FILE to increment profiler label # LABELNO
14301 for profiling a function entry. */
14302 void
14303 x86_function_profiler (file, labelno)
14304 FILE *file;
14305 int labelno;
14306 {
14307 if (TARGET_64BIT)
14308 if (flag_pic)
14309 {
14310 #ifndef NO_PROFILE_COUNTERS
14311 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
14312 #endif
14313 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
14314 }
14315 else
14316 {
14317 #ifndef NO_PROFILE_COUNTERS
14318 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
14319 #endif
14320 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14321 }
14322 else if (flag_pic)
14323 {
14324 #ifndef NO_PROFILE_COUNTERS
14325 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
14326 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
14327 #endif
14328 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
14329 }
14330 else
14331 {
14332 #ifndef NO_PROFILE_COUNTERS
14333 fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
14334 PROFILE_COUNT_REGISTER);
14335 #endif
14336 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
14337 }
14338 }
14339
14340 /* Implement machine specific optimizations.
14341 At the moment we implement single transformation: AMD Athlon works faster
14342 when RET is not destination of conditional jump or directly preceeded
14343 by other jump instruction. We avoid the penalty by inserting NOP just
14344 before the RET instructions in such cases. */
14345 void
14346 x86_machine_dependent_reorg (first)
14347 rtx first ATTRIBUTE_UNUSED;
14348 {
14349 edge e;
14350
14351 if (!TARGET_ATHLON || !optimize || optimize_size)
14352 return;
14353 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14354 {
14355 basic_block bb = e->src;
14356 rtx ret = bb->end;
14357 rtx prev;
14358 bool insert = false;
14359
14360 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14361 continue;
14362 prev = prev_nonnote_insn (ret);
14363 if (prev && GET_CODE (prev) == CODE_LABEL)
14364 {
14365 edge e;
14366 for (e = bb->pred; e; e = e->pred_next)
14367 if (EDGE_FREQUENCY (e) && e->src->index > 0
14368 && !(e->flags & EDGE_FALLTHRU))
14369 insert = 1;
14370 }
14371 if (!insert)
14372 {
14373 prev = prev_real_insn (ret);
14374 if (prev && GET_CODE (prev) == JUMP_INSN
14375 && any_condjump_p (prev))
14376 insert = 1;
14377 }
14378 if (insert)
14379 emit_insn_before (gen_nop (), ret);
14380 }
14381 }
14382
14383 #include "gt-i386.h"