b51586f715195b5ee396788df875eedab9a7c925
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51
52 #ifndef CHECK_STACK_LIMIT
53 #define CHECK_STACK_LIMIT (-1)
54 #endif
55
56 /* Return index of given mode in mult and division cost tables. */
57 #define MODE_INDEX(mode) \
58 ((mode) == QImode ? 0 \
59 : (mode) == HImode ? 1 \
60 : (mode) == SImode ? 2 \
61 : (mode) == DImode ? 3 \
62 : 4)
63
64 /* Processor costs (relative to an add) */
65 static const
66 struct processor_costs size_cost = { /* costs for tunning for size */
67 2, /* cost of an add instruction */
68 3, /* cost of a lea instruction */
69 2, /* variable shift costs */
70 3, /* constant shift costs */
71 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
72 0, /* cost of multiply per each bit set */
73 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
74 3, /* cost of movsx */
75 3, /* cost of movzx */
76 0, /* "large" insn */
77 2, /* MOVE_RATIO */
78 2, /* cost for loading QImode using movzbl */
79 {2, 2, 2}, /* cost of loading integer registers
80 in QImode, HImode and SImode.
81 Relative to reg-reg move (2). */
82 {2, 2, 2}, /* cost of storing integer registers */
83 2, /* cost of reg,reg fld/fst */
84 {2, 2, 2}, /* cost of loading fp registers
85 in SFmode, DFmode and XFmode */
86 {2, 2, 2}, /* cost of loading integer registers */
87 3, /* cost of moving MMX register */
88 {3, 3}, /* cost of loading MMX registers
89 in SImode and DImode */
90 {3, 3}, /* cost of storing MMX registers
91 in SImode and DImode */
92 3, /* cost of moving SSE register */
93 {3, 3, 3}, /* cost of loading SSE registers
94 in SImode, DImode and TImode */
95 {3, 3, 3}, /* cost of storing SSE registers
96 in SImode, DImode and TImode */
97 3, /* MMX or SSE register to integer */
98 0, /* size of prefetch block */
99 0, /* number of parallel prefetches */
100 1, /* Branch cost */
101 2, /* cost of FADD and FSUB insns. */
102 2, /* cost of FMUL instruction. */
103 2, /* cost of FDIV instruction. */
104 2, /* cost of FABS instruction. */
105 2, /* cost of FCHS instruction. */
106 2, /* cost of FSQRT instruction. */
107 };
108
109 /* Processor costs (relative to an add) */
110 static const
111 struct processor_costs i386_cost = { /* 386 specific costs */
112 1, /* cost of an add instruction */
113 1, /* cost of a lea instruction */
114 3, /* variable shift costs */
115 2, /* constant shift costs */
116 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
117 1, /* cost of multiply per each bit set */
118 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
119 3, /* cost of movsx */
120 2, /* cost of movzx */
121 15, /* "large" insn */
122 3, /* MOVE_RATIO */
123 4, /* cost for loading QImode using movzbl */
124 {2, 4, 2}, /* cost of loading integer registers
125 in QImode, HImode and SImode.
126 Relative to reg-reg move (2). */
127 {2, 4, 2}, /* cost of storing integer registers */
128 2, /* cost of reg,reg fld/fst */
129 {8, 8, 8}, /* cost of loading fp registers
130 in SFmode, DFmode and XFmode */
131 {8, 8, 8}, /* cost of loading integer registers */
132 2, /* cost of moving MMX register */
133 {4, 8}, /* cost of loading MMX registers
134 in SImode and DImode */
135 {4, 8}, /* cost of storing MMX registers
136 in SImode and DImode */
137 2, /* cost of moving SSE register */
138 {4, 8, 16}, /* cost of loading SSE registers
139 in SImode, DImode and TImode */
140 {4, 8, 16}, /* cost of storing SSE registers
141 in SImode, DImode and TImode */
142 3, /* MMX or SSE register to integer */
143 0, /* size of prefetch block */
144 0, /* number of parallel prefetches */
145 1, /* Branch cost */
146 23, /* cost of FADD and FSUB insns. */
147 27, /* cost of FMUL instruction. */
148 88, /* cost of FDIV instruction. */
149 22, /* cost of FABS instruction. */
150 24, /* cost of FCHS instruction. */
151 122, /* cost of FSQRT instruction. */
152 };
153
154 static const
155 struct processor_costs i486_cost = { /* 486 specific costs */
156 1, /* cost of an add instruction */
157 1, /* cost of a lea instruction */
158 3, /* variable shift costs */
159 2, /* constant shift costs */
160 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
161 1, /* cost of multiply per each bit set */
162 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
163 3, /* cost of movsx */
164 2, /* cost of movzx */
165 15, /* "large" insn */
166 3, /* MOVE_RATIO */
167 4, /* cost for loading QImode using movzbl */
168 {2, 4, 2}, /* cost of loading integer registers
169 in QImode, HImode and SImode.
170 Relative to reg-reg move (2). */
171 {2, 4, 2}, /* cost of storing integer registers */
172 2, /* cost of reg,reg fld/fst */
173 {8, 8, 8}, /* cost of loading fp registers
174 in SFmode, DFmode and XFmode */
175 {8, 8, 8}, /* cost of loading integer registers */
176 2, /* cost of moving MMX register */
177 {4, 8}, /* cost of loading MMX registers
178 in SImode and DImode */
179 {4, 8}, /* cost of storing MMX registers
180 in SImode and DImode */
181 2, /* cost of moving SSE register */
182 {4, 8, 16}, /* cost of loading SSE registers
183 in SImode, DImode and TImode */
184 {4, 8, 16}, /* cost of storing SSE registers
185 in SImode, DImode and TImode */
186 3, /* MMX or SSE register to integer */
187 0, /* size of prefetch block */
188 0, /* number of parallel prefetches */
189 1, /* Branch cost */
190 8, /* cost of FADD and FSUB insns. */
191 16, /* cost of FMUL instruction. */
192 73, /* cost of FDIV instruction. */
193 3, /* cost of FABS instruction. */
194 3, /* cost of FCHS instruction. */
195 83, /* cost of FSQRT instruction. */
196 };
197
198 static const
199 struct processor_costs pentium_cost = {
200 1, /* cost of an add instruction */
201 1, /* cost of a lea instruction */
202 4, /* variable shift costs */
203 1, /* constant shift costs */
204 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
205 0, /* cost of multiply per each bit set */
206 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
207 3, /* cost of movsx */
208 2, /* cost of movzx */
209 8, /* "large" insn */
210 6, /* MOVE_RATIO */
211 6, /* cost for loading QImode using movzbl */
212 {2, 4, 2}, /* cost of loading integer registers
213 in QImode, HImode and SImode.
214 Relative to reg-reg move (2). */
215 {2, 4, 2}, /* cost of storing integer registers */
216 2, /* cost of reg,reg fld/fst */
217 {2, 2, 6}, /* cost of loading fp registers
218 in SFmode, DFmode and XFmode */
219 {4, 4, 6}, /* cost of loading integer registers */
220 8, /* cost of moving MMX register */
221 {8, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {8, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of prefetch block */
232 0, /* number of parallel prefetches */
233 2, /* Branch cost */
234 3, /* cost of FADD and FSUB insns. */
235 3, /* cost of FMUL instruction. */
236 39, /* cost of FDIV instruction. */
237 1, /* cost of FABS instruction. */
238 1, /* cost of FCHS instruction. */
239 70, /* cost of FSQRT instruction. */
240 };
241
242 static const
243 struct processor_costs pentiumpro_cost = {
244 1, /* cost of an add instruction */
245 1, /* cost of a lea instruction */
246 1, /* variable shift costs */
247 1, /* constant shift costs */
248 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
249 0, /* cost of multiply per each bit set */
250 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
251 1, /* cost of movsx */
252 1, /* cost of movzx */
253 8, /* "large" insn */
254 6, /* MOVE_RATIO */
255 2, /* cost for loading QImode using movzbl */
256 {4, 4, 4}, /* cost of loading integer registers
257 in QImode, HImode and SImode.
258 Relative to reg-reg move (2). */
259 {2, 2, 2}, /* cost of storing integer registers */
260 2, /* cost of reg,reg fld/fst */
261 {2, 2, 6}, /* cost of loading fp registers
262 in SFmode, DFmode and XFmode */
263 {4, 4, 6}, /* cost of loading integer registers */
264 2, /* cost of moving MMX register */
265 {2, 2}, /* cost of loading MMX registers
266 in SImode and DImode */
267 {2, 2}, /* cost of storing MMX registers
268 in SImode and DImode */
269 2, /* cost of moving SSE register */
270 {2, 2, 8}, /* cost of loading SSE registers
271 in SImode, DImode and TImode */
272 {2, 2, 8}, /* cost of storing SSE registers
273 in SImode, DImode and TImode */
274 3, /* MMX or SSE register to integer */
275 32, /* size of prefetch block */
276 6, /* number of parallel prefetches */
277 2, /* Branch cost */
278 3, /* cost of FADD and FSUB insns. */
279 5, /* cost of FMUL instruction. */
280 56, /* cost of FDIV instruction. */
281 2, /* cost of FABS instruction. */
282 2, /* cost of FCHS instruction. */
283 56, /* cost of FSQRT instruction. */
284 };
285
286 static const
287 struct processor_costs k6_cost = {
288 1, /* cost of an add instruction */
289 2, /* cost of a lea instruction */
290 1, /* variable shift costs */
291 1, /* constant shift costs */
292 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
293 0, /* cost of multiply per each bit set */
294 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
295 2, /* cost of movsx */
296 2, /* cost of movzx */
297 8, /* "large" insn */
298 4, /* MOVE_RATIO */
299 3, /* cost for loading QImode using movzbl */
300 {4, 5, 4}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 3, 2}, /* cost of storing integer registers */
304 4, /* cost of reg,reg fld/fst */
305 {6, 6, 6}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {4, 4, 4}, /* cost of loading integer registers */
308 2, /* cost of moving MMX register */
309 {2, 2}, /* cost of loading MMX registers
310 in SImode and DImode */
311 {2, 2}, /* cost of storing MMX registers
312 in SImode and DImode */
313 2, /* cost of moving SSE register */
314 {2, 2, 8}, /* cost of loading SSE registers
315 in SImode, DImode and TImode */
316 {2, 2, 8}, /* cost of storing SSE registers
317 in SImode, DImode and TImode */
318 6, /* MMX or SSE register to integer */
319 32, /* size of prefetch block */
320 1, /* number of parallel prefetches */
321 1, /* Branch cost */
322 2, /* cost of FADD and FSUB insns. */
323 2, /* cost of FMUL instruction. */
324 56, /* cost of FDIV instruction. */
325 2, /* cost of FABS instruction. */
326 2, /* cost of FCHS instruction. */
327 56, /* cost of FSQRT instruction. */
328 };
329
330 static const
331 struct processor_costs athlon_cost = {
332 1, /* cost of an add instruction */
333 2, /* cost of a lea instruction */
334 1, /* variable shift costs */
335 1, /* constant shift costs */
336 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
337 0, /* cost of multiply per each bit set */
338 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
339 1, /* cost of movsx */
340 1, /* cost of movzx */
341 8, /* "large" insn */
342 9, /* MOVE_RATIO */
343 4, /* cost for loading QImode using movzbl */
344 {3, 4, 3}, /* cost of loading integer registers
345 in QImode, HImode and SImode.
346 Relative to reg-reg move (2). */
347 {3, 4, 3}, /* cost of storing integer registers */
348 4, /* cost of reg,reg fld/fst */
349 {4, 4, 12}, /* cost of loading fp registers
350 in SFmode, DFmode and XFmode */
351 {6, 6, 8}, /* cost of loading integer registers */
352 2, /* cost of moving MMX register */
353 {4, 4}, /* cost of loading MMX registers
354 in SImode and DImode */
355 {4, 4}, /* cost of storing MMX registers
356 in SImode and DImode */
357 2, /* cost of moving SSE register */
358 {4, 4, 6}, /* cost of loading SSE registers
359 in SImode, DImode and TImode */
360 {4, 4, 5}, /* cost of storing SSE registers
361 in SImode, DImode and TImode */
362 5, /* MMX or SSE register to integer */
363 64, /* size of prefetch block */
364 6, /* number of parallel prefetches */
365 2, /* Branch cost */
366 4, /* cost of FADD and FSUB insns. */
367 4, /* cost of FMUL instruction. */
368 24, /* cost of FDIV instruction. */
369 2, /* cost of FABS instruction. */
370 2, /* cost of FCHS instruction. */
371 35, /* cost of FSQRT instruction. */
372 };
373
374 static const
375 struct processor_costs k8_cost = {
376 1, /* cost of an add instruction */
377 2, /* cost of a lea instruction */
378 1, /* variable shift costs */
379 1, /* constant shift costs */
380 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
381 0, /* cost of multiply per each bit set */
382 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
383 1, /* cost of movsx */
384 1, /* cost of movzx */
385 8, /* "large" insn */
386 9, /* MOVE_RATIO */
387 4, /* cost for loading QImode using movzbl */
388 {3, 4, 3}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {3, 4, 3}, /* cost of storing integer registers */
392 4, /* cost of reg,reg fld/fst */
393 {4, 4, 12}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {6, 6, 8}, /* cost of loading integer registers */
396 2, /* cost of moving MMX register */
397 {3, 3}, /* cost of loading MMX registers
398 in SImode and DImode */
399 {4, 4}, /* cost of storing MMX registers
400 in SImode and DImode */
401 2, /* cost of moving SSE register */
402 {4, 3, 6}, /* cost of loading SSE registers
403 in SImode, DImode and TImode */
404 {4, 4, 5}, /* cost of storing SSE registers
405 in SImode, DImode and TImode */
406 5, /* MMX or SSE register to integer */
407 64, /* size of prefetch block */
408 6, /* number of parallel prefetches */
409 2, /* Branch cost */
410 4, /* cost of FADD and FSUB insns. */
411 4, /* cost of FMUL instruction. */
412 19, /* cost of FDIV instruction. */
413 2, /* cost of FABS instruction. */
414 2, /* cost of FCHS instruction. */
415 35, /* cost of FSQRT instruction. */
416 };
417
418 static const
419 struct processor_costs pentium4_cost = {
420 1, /* cost of an add instruction */
421 3, /* cost of a lea instruction */
422 4, /* variable shift costs */
423 4, /* constant shift costs */
424 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
425 0, /* cost of multiply per each bit set */
426 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
427 1, /* cost of movsx */
428 1, /* cost of movzx */
429 16, /* "large" insn */
430 6, /* MOVE_RATIO */
431 2, /* cost for loading QImode using movzbl */
432 {4, 5, 4}, /* cost of loading integer registers
433 in QImode, HImode and SImode.
434 Relative to reg-reg move (2). */
435 {2, 3, 2}, /* cost of storing integer registers */
436 2, /* cost of reg,reg fld/fst */
437 {2, 2, 6}, /* cost of loading fp registers
438 in SFmode, DFmode and XFmode */
439 {4, 4, 6}, /* cost of loading integer registers */
440 2, /* cost of moving MMX register */
441 {2, 2}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {2, 2}, /* cost of storing MMX registers
444 in SImode and DImode */
445 12, /* cost of moving SSE register */
446 {12, 12, 12}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {2, 2, 8}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 10, /* MMX or SSE register to integer */
451 64, /* size of prefetch block */
452 6, /* number of parallel prefetches */
453 2, /* Branch cost */
454 5, /* cost of FADD and FSUB insns. */
455 7, /* cost of FMUL instruction. */
456 43, /* cost of FDIV instruction. */
457 2, /* cost of FABS instruction. */
458 2, /* cost of FCHS instruction. */
459 43, /* cost of FSQRT instruction. */
460 };
461
462 static const
463 struct processor_costs nocona_cost = {
464 1, /* cost of an add instruction */
465 1, /* cost of a lea instruction */
466 1, /* variable shift costs */
467 1, /* constant shift costs */
468 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
469 0, /* cost of multiply per each bit set */
470 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
471 1, /* cost of movsx */
472 1, /* cost of movzx */
473 16, /* "large" insn */
474 9, /* MOVE_RATIO */
475 4, /* cost for loading QImode using movzbl */
476 {4, 4, 4}, /* cost of loading integer registers
477 in QImode, HImode and SImode.
478 Relative to reg-reg move (2). */
479 {4, 4, 4}, /* cost of storing integer registers */
480 3, /* cost of reg,reg fld/fst */
481 {12, 12, 12}, /* cost of loading fp registers
482 in SFmode, DFmode and XFmode */
483 {4, 4, 4}, /* cost of loading integer registers */
484 6, /* cost of moving MMX register */
485 {12, 12}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {12, 12}, /* cost of storing MMX registers
488 in SImode and DImode */
489 6, /* cost of moving SSE register */
490 {12, 12, 12}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {12, 12, 12}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 8, /* MMX or SSE register to integer */
495 128, /* size of prefetch block */
496 8, /* number of parallel prefetches */
497 1, /* Branch cost */
498 6, /* cost of FADD and FSUB insns. */
499 8, /* cost of FMUL instruction. */
500 40, /* cost of FDIV instruction. */
501 3, /* cost of FABS instruction. */
502 3, /* cost of FCHS instruction. */
503 44, /* cost of FSQRT instruction. */
504 };
505
506 const struct processor_costs *ix86_cost = &pentium_cost;
507
508 /* Processor feature/optimization bitmasks. */
509 #define m_386 (1<<PROCESSOR_I386)
510 #define m_486 (1<<PROCESSOR_I486)
511 #define m_PENT (1<<PROCESSOR_PENTIUM)
512 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
513 #define m_K6 (1<<PROCESSOR_K6)
514 #define m_ATHLON (1<<PROCESSOR_ATHLON)
515 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
516 #define m_K8 (1<<PROCESSOR_K8)
517 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
518 #define m_NOCONA (1<<PROCESSOR_NOCONA)
519
520 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
521 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
522 const int x86_zero_extend_with_and = m_486 | m_PENT;
523 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
524 const int x86_double_with_add = ~m_386;
525 const int x86_use_bit_test = m_386;
526 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
527 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
528 const int x86_3dnow_a = m_ATHLON_K8;
529 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
530 const int x86_branch_hints = m_PENT4 | m_NOCONA;
531 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
532 const int x86_partial_reg_stall = m_PPRO;
533 const int x86_use_loop = m_K6;
534 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
535 const int x86_use_mov0 = m_K6;
536 const int x86_use_cltd = ~(m_PENT | m_K6);
537 const int x86_read_modify_write = ~m_PENT;
538 const int x86_read_modify = ~(m_PENT | m_PPRO);
539 const int x86_split_long_moves = m_PPRO;
540 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
541 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
542 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
543 const int x86_qimode_math = ~(0);
544 const int x86_promote_qi_regs = 0;
545 const int x86_himode_math = ~(m_PPRO);
546 const int x86_promote_hi_regs = m_PPRO;
547 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
548 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
549 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
550 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
551 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
552 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
553 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
554 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
555 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
556 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
557 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
558 const int x86_shift1 = ~m_486;
559 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
561 /* Set for machines where the type and dependencies are resolved on SSE register
562 parts instead of whole registers, so we may maintain just lower part of
563 scalar values in proper format leaving the upper part undefined. */
564 const int x86_sse_partial_regs = m_ATHLON_K8;
565 /* Athlon optimizes partial-register FPS special case, thus avoiding the
566 need for extra instructions beforehand */
567 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
568 const int x86_sse_typeless_stores = m_ATHLON_K8;
569 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
570 const int x86_use_ffreep = m_ATHLON_K8;
571 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
572 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
573 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
574 /* Some CPU cores are not able to predict more than 4 branch instructions in
575 the 16 byte window. */
576 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
577
578 /* In case the average insn count for single function invocation is
579 lower than this constant, emit fast (but longer) prologue and
580 epilogue code. */
581 #define FAST_PROLOGUE_INSN_COUNT 20
582
583 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
584 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
585 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
586 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
587
588 /* Array of the smallest class containing reg number REGNO, indexed by
589 REGNO. Used by REGNO_REG_CLASS in i386.h. */
590
591 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
592 {
593 /* ax, dx, cx, bx */
594 AREG, DREG, CREG, BREG,
595 /* si, di, bp, sp */
596 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
597 /* FP registers */
598 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
599 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
600 /* arg pointer */
601 NON_Q_REGS,
602 /* flags, fpsr, dirflag, frame */
603 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
605 SSE_REGS, SSE_REGS,
606 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
607 MMX_REGS, MMX_REGS,
608 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
609 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
610 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
611 SSE_REGS, SSE_REGS,
612 };
613
614 /* The "default" register map used in 32bit mode. */
615
616 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
617 {
618 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
619 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
620 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
625 };
626
627 static int const x86_64_int_parameter_registers[6] =
628 {
629 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
630 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
631 };
632
633 static int const x86_64_int_return_registers[4] =
634 {
635 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
636 };
637
638 /* The "default" register map used in 64bit mode. */
639 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
640 {
641 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
642 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
643 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
644 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
645 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
646 8,9,10,11,12,13,14,15, /* extended integer registers */
647 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
648 };
649
650 /* Define the register numbers to be used in Dwarf debugging information.
651 The SVR4 reference port C compiler uses the following register numbers
652 in its Dwarf output code:
653 0 for %eax (gcc regno = 0)
654 1 for %ecx (gcc regno = 2)
655 2 for %edx (gcc regno = 1)
656 3 for %ebx (gcc regno = 3)
657 4 for %esp (gcc regno = 7)
658 5 for %ebp (gcc regno = 6)
659 6 for %esi (gcc regno = 4)
660 7 for %edi (gcc regno = 5)
661 The following three DWARF register numbers are never generated by
662 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
663 believes these numbers have these meanings.
664 8 for %eip (no gcc equivalent)
665 9 for %eflags (gcc regno = 17)
666 10 for %trapno (no gcc equivalent)
667 It is not at all clear how we should number the FP stack registers
668 for the x86 architecture. If the version of SDB on x86/svr4 were
669 a bit less brain dead with respect to floating-point then we would
670 have a precedent to follow with respect to DWARF register numbers
671 for x86 FP registers, but the SDB on x86/svr4 is so completely
672 broken with respect to FP registers that it is hardly worth thinking
673 of it as something to strive for compatibility with.
674 The version of x86/svr4 SDB I have at the moment does (partially)
675 seem to believe that DWARF register number 11 is associated with
676 the x86 register %st(0), but that's about all. Higher DWARF
677 register numbers don't seem to be associated with anything in
678 particular, and even for DWARF regno 11, SDB only seems to under-
679 stand that it should say that a variable lives in %st(0) (when
680 asked via an `=' command) if we said it was in DWARF regno 11,
681 but SDB still prints garbage when asked for the value of the
682 variable in question (via a `/' command).
683 (Also note that the labels SDB prints for various FP stack regs
684 when doing an `x' command are all wrong.)
685 Note that these problems generally don't affect the native SVR4
686 C compiler because it doesn't allow the use of -O with -g and
687 because when it is *not* optimizing, it allocates a memory
688 location for each floating-point variable, and the memory
689 location is what gets described in the DWARF AT_location
690 attribute for the variable in question.
691 Regardless of the severe mental illness of the x86/svr4 SDB, we
692 do something sensible here and we use the following DWARF
693 register numbers. Note that these are all stack-top-relative
694 numbers.
695 11 for %st(0) (gcc regno = 8)
696 12 for %st(1) (gcc regno = 9)
697 13 for %st(2) (gcc regno = 10)
698 14 for %st(3) (gcc regno = 11)
699 15 for %st(4) (gcc regno = 12)
700 16 for %st(5) (gcc regno = 13)
701 17 for %st(6) (gcc regno = 14)
702 18 for %st(7) (gcc regno = 15)
703 */
704 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
705 {
706 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
707 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
708 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
709 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
710 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
711 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
712 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
713 };
714
715 /* Test and compare insns in i386.md store the information needed to
716 generate branch and scc insns here. */
717
718 rtx ix86_compare_op0 = NULL_RTX;
719 rtx ix86_compare_op1 = NULL_RTX;
720
721 #define MAX_386_STACK_LOCALS 3
722 /* Size of the register save area. */
723 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
724
725 /* Define the structure for the machine field in struct function. */
726
727 struct stack_local_entry GTY(())
728 {
729 unsigned short mode;
730 unsigned short n;
731 rtx rtl;
732 struct stack_local_entry *next;
733 };
734
735 /* Structure describing stack frame layout.
736 Stack grows downward:
737
738 [arguments]
739 <- ARG_POINTER
740 saved pc
741
742 saved frame pointer if frame_pointer_needed
743 <- HARD_FRAME_POINTER
744 [saved regs]
745
746 [padding1] \
747 )
748 [va_arg registers] (
749 > to_allocate <- FRAME_POINTER
750 [frame] (
751 )
752 [padding2] /
753 */
754 struct ix86_frame
755 {
756 int nregs;
757 int padding1;
758 int va_arg_size;
759 HOST_WIDE_INT frame;
760 int padding2;
761 int outgoing_arguments_size;
762 int red_zone_size;
763
764 HOST_WIDE_INT to_allocate;
765 /* The offsets relative to ARG_POINTER. */
766 HOST_WIDE_INT frame_pointer_offset;
767 HOST_WIDE_INT hard_frame_pointer_offset;
768 HOST_WIDE_INT stack_pointer_offset;
769
770 /* When save_regs_using_mov is set, emit prologue using
771 move instead of push instructions. */
772 bool save_regs_using_mov;
773 };
774
775 /* Used to enable/disable debugging features. */
776 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
777 /* Code model option as passed by user. */
778 const char *ix86_cmodel_string;
779 /* Parsed value. */
780 enum cmodel ix86_cmodel;
781 /* Asm dialect. */
782 const char *ix86_asm_string;
783 enum asm_dialect ix86_asm_dialect = ASM_ATT;
784 /* TLS dialext. */
785 const char *ix86_tls_dialect_string;
786 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
787
788 /* Which unit we are generating floating point math for. */
789 enum fpmath_unit ix86_fpmath;
790
791 /* Which cpu are we scheduling for. */
792 enum processor_type ix86_tune;
793 /* Which instruction set architecture to use. */
794 enum processor_type ix86_arch;
795
796 /* Strings to hold which cpu and instruction set architecture to use. */
797 const char *ix86_tune_string; /* for -mtune=<xxx> */
798 const char *ix86_arch_string; /* for -march=<xxx> */
799 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
800
801 /* # of registers to use to pass arguments. */
802 const char *ix86_regparm_string;
803
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
806
807 /* ix86_regparm_string as a number */
808 int ix86_regparm;
809
810 /* Alignment to use for loops and jumps: */
811
812 /* Power of two alignment for loops. */
813 const char *ix86_align_loops_string;
814
815 /* Power of two alignment for non-loop jumps. */
816 const char *ix86_align_jumps_string;
817
818 /* Power of two alignment for stack boundary in bytes. */
819 const char *ix86_preferred_stack_boundary_string;
820
821 /* Preferred alignment for stack boundary in bits. */
822 int ix86_preferred_stack_boundary;
823
824 /* Values 1-5: see jump.c */
825 int ix86_branch_cost;
826 const char *ix86_branch_cost_string;
827
828 /* Power of two alignment for functions. */
829 const char *ix86_align_funcs_string;
830
831 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
832 static char internal_label_prefix[16];
833 static int internal_label_prefix_len;
834 \f
835 static int local_symbolic_operand (rtx, enum machine_mode);
836 static int tls_symbolic_operand_1 (rtx, enum tls_model);
837 static void output_pic_addr_const (FILE *, rtx, int);
838 static void put_condition_code (enum rtx_code, enum machine_mode,
839 int, int, FILE *);
840 static const char *get_some_local_dynamic_name (void);
841 static int get_some_local_dynamic_name_1 (rtx *, void *);
842 static rtx maybe_get_pool_constant (rtx);
843 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
844 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
845 rtx *);
846 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
847 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
848 enum machine_mode);
849 static rtx get_thread_pointer (int);
850 static rtx legitimize_tls_address (rtx, enum tls_model, int);
851 static void get_pc_thunk_name (char [32], unsigned int);
852 static rtx gen_push (rtx);
853 static int memory_address_length (rtx addr);
854 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
855 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
856 static struct machine_function * ix86_init_machine_status (void);
857 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
858 static int ix86_nsaved_regs (void);
859 static void ix86_emit_save_regs (void);
860 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
861 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
862 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
863 static HOST_WIDE_INT ix86_GOT_alias_set (void);
864 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
865 static rtx ix86_expand_aligntest (rtx, int);
866 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
867 static int ix86_issue_rate (void);
868 static int ix86_adjust_cost (rtx, rtx, rtx, int);
869 static int ia32_multipass_dfa_lookahead (void);
870 static void ix86_init_mmx_sse_builtins (void);
871 static rtx x86_this_parameter (tree);
872 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
873 HOST_WIDE_INT, tree);
874 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
875 static void x86_file_start (void);
876 static void ix86_reorg (void);
877 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
878 static tree ix86_build_builtin_va_list (void);
879 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
880 tree, int *, int);
881 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
882
883 struct ix86_address
884 {
885 rtx base, index, disp;
886 HOST_WIDE_INT scale;
887 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
888 };
889
890 static int ix86_decompose_address (rtx, struct ix86_address *);
891 static int ix86_address_cost (rtx);
892 static bool ix86_cannot_force_const_mem (rtx);
893 static rtx ix86_delegitimize_address (rtx);
894
895 struct builtin_description;
896 static rtx ix86_expand_sse_comi (const struct builtin_description *,
897 tree, rtx);
898 static rtx ix86_expand_sse_compare (const struct builtin_description *,
899 tree, rtx);
900 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
901 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
902 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
903 static rtx ix86_expand_store_builtin (enum insn_code, tree);
904 static rtx safe_vector_operand (rtx, enum machine_mode);
905 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
906 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
907 enum rtx_code *, enum rtx_code *);
908 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
909 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
910 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
911 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
912 static int ix86_fp_comparison_cost (enum rtx_code code);
913 static unsigned int ix86_select_alt_pic_regnum (void);
914 static int ix86_save_reg (unsigned int, int);
915 static void ix86_compute_frame_layout (struct ix86_frame *);
916 static int ix86_comp_type_attributes (tree, tree);
917 static int ix86_function_regparm (tree, tree);
918 const struct attribute_spec ix86_attribute_table[];
919 static bool ix86_function_ok_for_sibcall (tree, tree);
920 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
921 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
922 static int ix86_value_regno (enum machine_mode);
923 static bool contains_128bit_aligned_vector_p (tree);
924 static rtx ix86_struct_value_rtx (tree, int);
925 static bool ix86_ms_bitfield_layout_p (tree);
926 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
927 static int extended_reg_mentioned_1 (rtx *, void *);
928 static bool ix86_rtx_costs (rtx, int, int, int *);
929 static int min_insn_size (rtx);
930 static tree ix86_md_asm_clobbers (tree clobbers);
931
932 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
933 static void ix86_svr3_asm_out_constructor (rtx, int);
934 #endif
935
936 /* Register class used for passing given 64bit part of the argument.
937 These represent classes as documented by the PS ABI, with the exception
938 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
939 use SF or DFmode move instead of DImode to avoid reformatting penalties.
940
941 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
942 whenever possible (upper half does contain padding).
943 */
944 enum x86_64_reg_class
945 {
946 X86_64_NO_CLASS,
947 X86_64_INTEGER_CLASS,
948 X86_64_INTEGERSI_CLASS,
949 X86_64_SSE_CLASS,
950 X86_64_SSESF_CLASS,
951 X86_64_SSEDF_CLASS,
952 X86_64_SSEUP_CLASS,
953 X86_64_X87_CLASS,
954 X86_64_X87UP_CLASS,
955 X86_64_MEMORY_CLASS
956 };
957 static const char * const x86_64_reg_class_name[] =
958 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
959
960 #define MAX_CLASSES 4
961 static int classify_argument (enum machine_mode, tree,
962 enum x86_64_reg_class [MAX_CLASSES], int);
963 static int examine_argument (enum machine_mode, tree, int, int *, int *);
964 static rtx construct_container (enum machine_mode, tree, int, int, int,
965 const int *, int);
966 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
967 enum x86_64_reg_class);
968
969 /* Table of constants used by fldpi, fldln2, etc.... */
970 static REAL_VALUE_TYPE ext_80387_constants_table [5];
971 static bool ext_80387_constants_init = 0;
972 static void init_ext_80387_constants (void);
973 \f
974 /* Initialize the GCC target structure. */
975 #undef TARGET_ATTRIBUTE_TABLE
976 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
977 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
978 # undef TARGET_MERGE_DECL_ATTRIBUTES
979 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
980 #endif
981
982 #undef TARGET_COMP_TYPE_ATTRIBUTES
983 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
984
985 #undef TARGET_INIT_BUILTINS
986 #define TARGET_INIT_BUILTINS ix86_init_builtins
987
988 #undef TARGET_EXPAND_BUILTIN
989 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
990
991 #undef TARGET_ASM_FUNCTION_EPILOGUE
992 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
993
994 #undef TARGET_ASM_OPEN_PAREN
995 #define TARGET_ASM_OPEN_PAREN ""
996 #undef TARGET_ASM_CLOSE_PAREN
997 #define TARGET_ASM_CLOSE_PAREN ""
998
999 #undef TARGET_ASM_ALIGNED_HI_OP
1000 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1001 #undef TARGET_ASM_ALIGNED_SI_OP
1002 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1003 #ifdef ASM_QUAD
1004 #undef TARGET_ASM_ALIGNED_DI_OP
1005 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1006 #endif
1007
1008 #undef TARGET_ASM_UNALIGNED_HI_OP
1009 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1010 #undef TARGET_ASM_UNALIGNED_SI_OP
1011 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1012 #undef TARGET_ASM_UNALIGNED_DI_OP
1013 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1014
1015 #undef TARGET_SCHED_ADJUST_COST
1016 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1017 #undef TARGET_SCHED_ISSUE_RATE
1018 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1019 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
1020 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hook_int_void_1
1021 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1022 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1023 ia32_multipass_dfa_lookahead
1024
1025 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1026 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1027
1028 #ifdef HAVE_AS_TLS
1029 #undef TARGET_HAVE_TLS
1030 #define TARGET_HAVE_TLS true
1031 #endif
1032 #undef TARGET_CANNOT_FORCE_CONST_MEM
1033 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1034
1035 #undef TARGET_DELEGITIMIZE_ADDRESS
1036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1037
1038 #undef TARGET_MS_BITFIELD_LAYOUT_P
1039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1040
1041 #undef TARGET_ASM_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1043 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1044 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1045
1046 #undef TARGET_ASM_FILE_START
1047 #define TARGET_ASM_FILE_START x86_file_start
1048
1049 #undef TARGET_RTX_COSTS
1050 #define TARGET_RTX_COSTS ix86_rtx_costs
1051 #undef TARGET_ADDRESS_COST
1052 #define TARGET_ADDRESS_COST ix86_address_cost
1053
1054 #undef TARGET_FIXED_CONDITION_CODE_REGS
1055 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1056 #undef TARGET_CC_MODES_COMPATIBLE
1057 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1058
1059 #undef TARGET_MACHINE_DEPENDENT_REORG
1060 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1061
1062 #undef TARGET_BUILD_BUILTIN_VA_LIST
1063 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1064
1065 #undef TARGET_MD_ASM_CLOBBERS
1066 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1067
1068 #undef TARGET_PROMOTE_PROTOTYPES
1069 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1070 #undef TARGET_STRUCT_VALUE_RTX
1071 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1072 #undef TARGET_SETUP_INCOMING_VARARGS
1073 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1074
1075 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1076 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1077
1078 struct gcc_target targetm = TARGET_INITIALIZER;
1079
1080 \f
1081 /* The svr4 ABI for the i386 says that records and unions are returned
1082 in memory. */
1083 #ifndef DEFAULT_PCC_STRUCT_RETURN
1084 #define DEFAULT_PCC_STRUCT_RETURN 1
1085 #endif
1086
1087 /* Sometimes certain combinations of command options do not make
1088 sense on a particular target machine. You can define a macro
1089 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1090 defined, is executed once just after all the command options have
1091 been parsed.
1092
1093 Don't use this macro to turn on various extra optimizations for
1094 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1095
1096 void
1097 override_options (void)
1098 {
1099 int i;
1100 /* Comes from final.c -- no real reason to change it. */
1101 #define MAX_CODE_ALIGN 16
1102
1103 static struct ptt
1104 {
1105 const struct processor_costs *cost; /* Processor costs */
1106 const int target_enable; /* Target flags to enable. */
1107 const int target_disable; /* Target flags to disable. */
1108 const int align_loop; /* Default alignments. */
1109 const int align_loop_max_skip;
1110 const int align_jump;
1111 const int align_jump_max_skip;
1112 const int align_func;
1113 }
1114 const processor_target_table[PROCESSOR_max] =
1115 {
1116 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1117 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1118 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1119 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1120 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1121 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1122 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1123 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1124 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1125 };
1126
1127 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1128 static struct pta
1129 {
1130 const char *const name; /* processor name or nickname. */
1131 const enum processor_type processor;
1132 const enum pta_flags
1133 {
1134 PTA_SSE = 1,
1135 PTA_SSE2 = 2,
1136 PTA_SSE3 = 4,
1137 PTA_MMX = 8,
1138 PTA_PREFETCH_SSE = 16,
1139 PTA_3DNOW = 32,
1140 PTA_3DNOW_A = 64,
1141 PTA_64BIT = 128
1142 } flags;
1143 }
1144 const processor_alias_table[] =
1145 {
1146 {"i386", PROCESSOR_I386, 0},
1147 {"i486", PROCESSOR_I486, 0},
1148 {"i586", PROCESSOR_PENTIUM, 0},
1149 {"pentium", PROCESSOR_PENTIUM, 0},
1150 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1151 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1152 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1153 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1154 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1155 {"i686", PROCESSOR_PENTIUMPRO, 0},
1156 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1157 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1158 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1159 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1160 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1161 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1162 | PTA_MMX | PTA_PREFETCH_SSE},
1163 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1164 | PTA_MMX | PTA_PREFETCH_SSE},
1165 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1166 | PTA_MMX | PTA_PREFETCH_SSE},
1167 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1168 | PTA_MMX | PTA_PREFETCH_SSE},
1169 {"k6", PROCESSOR_K6, PTA_MMX},
1170 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1171 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1172 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1173 | PTA_3DNOW_A},
1174 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1175 | PTA_3DNOW | PTA_3DNOW_A},
1176 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1177 | PTA_3DNOW_A | PTA_SSE},
1178 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1179 | PTA_3DNOW_A | PTA_SSE},
1180 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1181 | PTA_3DNOW_A | PTA_SSE},
1182 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1183 | PTA_SSE | PTA_SSE2 },
1184 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1185 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1186 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1187 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1188 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1189 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1190 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1191 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1192 };
1193
1194 int const pta_size = ARRAY_SIZE (processor_alias_table);
1195
1196 /* Set the default values for switches whose default depends on TARGET_64BIT
1197 in case they weren't overwritten by command line options. */
1198 if (TARGET_64BIT)
1199 {
1200 if (flag_omit_frame_pointer == 2)
1201 flag_omit_frame_pointer = 1;
1202 if (flag_asynchronous_unwind_tables == 2)
1203 flag_asynchronous_unwind_tables = 1;
1204 if (flag_pcc_struct_return == 2)
1205 flag_pcc_struct_return = 0;
1206 }
1207 else
1208 {
1209 if (flag_omit_frame_pointer == 2)
1210 flag_omit_frame_pointer = 0;
1211 if (flag_asynchronous_unwind_tables == 2)
1212 flag_asynchronous_unwind_tables = 0;
1213 if (flag_pcc_struct_return == 2)
1214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1215 }
1216
1217 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1218 SUBTARGET_OVERRIDE_OPTIONS;
1219 #endif
1220
1221 if (!ix86_tune_string && ix86_arch_string)
1222 ix86_tune_string = ix86_arch_string;
1223 if (!ix86_tune_string)
1224 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1225 if (!ix86_arch_string)
1226 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1227
1228 if (ix86_cmodel_string != 0)
1229 {
1230 if (!strcmp (ix86_cmodel_string, "small"))
1231 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1232 else if (flag_pic)
1233 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1234 else if (!strcmp (ix86_cmodel_string, "32"))
1235 ix86_cmodel = CM_32;
1236 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1237 ix86_cmodel = CM_KERNEL;
1238 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1239 ix86_cmodel = CM_MEDIUM;
1240 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1241 ix86_cmodel = CM_LARGE;
1242 else
1243 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1244 }
1245 else
1246 {
1247 ix86_cmodel = CM_32;
1248 if (TARGET_64BIT)
1249 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1250 }
1251 if (ix86_asm_string != 0)
1252 {
1253 if (!strcmp (ix86_asm_string, "intel"))
1254 ix86_asm_dialect = ASM_INTEL;
1255 else if (!strcmp (ix86_asm_string, "att"))
1256 ix86_asm_dialect = ASM_ATT;
1257 else
1258 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1259 }
1260 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1261 error ("code model `%s' not supported in the %s bit mode",
1262 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1263 if (ix86_cmodel == CM_LARGE)
1264 sorry ("code model `large' not supported yet");
1265 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1266 sorry ("%i-bit mode not compiled in",
1267 (target_flags & MASK_64BIT) ? 64 : 32);
1268
1269 for (i = 0; i < pta_size; i++)
1270 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1271 {
1272 ix86_arch = processor_alias_table[i].processor;
1273 /* Default cpu tuning to the architecture. */
1274 ix86_tune = ix86_arch;
1275 if (processor_alias_table[i].flags & PTA_MMX
1276 && !(target_flags_explicit & MASK_MMX))
1277 target_flags |= MASK_MMX;
1278 if (processor_alias_table[i].flags & PTA_3DNOW
1279 && !(target_flags_explicit & MASK_3DNOW))
1280 target_flags |= MASK_3DNOW;
1281 if (processor_alias_table[i].flags & PTA_3DNOW_A
1282 && !(target_flags_explicit & MASK_3DNOW_A))
1283 target_flags |= MASK_3DNOW_A;
1284 if (processor_alias_table[i].flags & PTA_SSE
1285 && !(target_flags_explicit & MASK_SSE))
1286 target_flags |= MASK_SSE;
1287 if (processor_alias_table[i].flags & PTA_SSE2
1288 && !(target_flags_explicit & MASK_SSE2))
1289 target_flags |= MASK_SSE2;
1290 if (processor_alias_table[i].flags & PTA_SSE3
1291 && !(target_flags_explicit & MASK_SSE3))
1292 target_flags |= MASK_SSE3;
1293 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1294 x86_prefetch_sse = true;
1295 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1296 error ("CPU you selected does not support x86-64 instruction set");
1297 break;
1298 }
1299
1300 if (i == pta_size)
1301 error ("bad value (%s) for -march= switch", ix86_arch_string);
1302
1303 for (i = 0; i < pta_size; i++)
1304 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1305 {
1306 ix86_tune = processor_alias_table[i].processor;
1307 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1308 error ("CPU you selected does not support x86-64 instruction set");
1309 break;
1310 }
1311 if (i == pta_size)
1312 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1313
1314 if (optimize_size)
1315 ix86_cost = &size_cost;
1316 else
1317 ix86_cost = processor_target_table[ix86_tune].cost;
1318 target_flags |= processor_target_table[ix86_tune].target_enable;
1319 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1320
1321 /* Arrange to set up i386_stack_locals for all functions. */
1322 init_machine_status = ix86_init_machine_status;
1323
1324 /* Validate -mregparm= value. */
1325 if (ix86_regparm_string)
1326 {
1327 i = atoi (ix86_regparm_string);
1328 if (i < 0 || i > REGPARM_MAX)
1329 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1330 else
1331 ix86_regparm = i;
1332 }
1333 else
1334 if (TARGET_64BIT)
1335 ix86_regparm = REGPARM_MAX;
1336
1337 /* If the user has provided any of the -malign-* options,
1338 warn and use that value only if -falign-* is not set.
1339 Remove this code in GCC 3.2 or later. */
1340 if (ix86_align_loops_string)
1341 {
1342 warning ("-malign-loops is obsolete, use -falign-loops");
1343 if (align_loops == 0)
1344 {
1345 i = atoi (ix86_align_loops_string);
1346 if (i < 0 || i > MAX_CODE_ALIGN)
1347 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1348 else
1349 align_loops = 1 << i;
1350 }
1351 }
1352
1353 if (ix86_align_jumps_string)
1354 {
1355 warning ("-malign-jumps is obsolete, use -falign-jumps");
1356 if (align_jumps == 0)
1357 {
1358 i = atoi (ix86_align_jumps_string);
1359 if (i < 0 || i > MAX_CODE_ALIGN)
1360 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1361 else
1362 align_jumps = 1 << i;
1363 }
1364 }
1365
1366 if (ix86_align_funcs_string)
1367 {
1368 warning ("-malign-functions is obsolete, use -falign-functions");
1369 if (align_functions == 0)
1370 {
1371 i = atoi (ix86_align_funcs_string);
1372 if (i < 0 || i > MAX_CODE_ALIGN)
1373 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1374 else
1375 align_functions = 1 << i;
1376 }
1377 }
1378
1379 /* Default align_* from the processor table. */
1380 if (align_loops == 0)
1381 {
1382 align_loops = processor_target_table[ix86_tune].align_loop;
1383 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1384 }
1385 if (align_jumps == 0)
1386 {
1387 align_jumps = processor_target_table[ix86_tune].align_jump;
1388 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1389 }
1390 if (align_functions == 0)
1391 {
1392 align_functions = processor_target_table[ix86_tune].align_func;
1393 }
1394
1395 /* Validate -mpreferred-stack-boundary= value, or provide default.
1396 The default of 128 bits is for Pentium III's SSE __m128, but we
1397 don't want additional code to keep the stack aligned when
1398 optimizing for code size. */
1399 ix86_preferred_stack_boundary = (optimize_size
1400 ? TARGET_64BIT ? 128 : 32
1401 : 128);
1402 if (ix86_preferred_stack_boundary_string)
1403 {
1404 i = atoi (ix86_preferred_stack_boundary_string);
1405 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1406 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1407 TARGET_64BIT ? 4 : 2);
1408 else
1409 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1410 }
1411
1412 /* Validate -mbranch-cost= value, or provide default. */
1413 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1414 if (ix86_branch_cost_string)
1415 {
1416 i = atoi (ix86_branch_cost_string);
1417 if (i < 0 || i > 5)
1418 error ("-mbranch-cost=%d is not between 0 and 5", i);
1419 else
1420 ix86_branch_cost = i;
1421 }
1422
1423 if (ix86_tls_dialect_string)
1424 {
1425 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1426 ix86_tls_dialect = TLS_DIALECT_GNU;
1427 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1428 ix86_tls_dialect = TLS_DIALECT_SUN;
1429 else
1430 error ("bad value (%s) for -mtls-dialect= switch",
1431 ix86_tls_dialect_string);
1432 }
1433
1434 /* Keep nonleaf frame pointers. */
1435 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1436 flag_omit_frame_pointer = 1;
1437
1438 /* If we're doing fast math, we don't care about comparison order
1439 wrt NaNs. This lets us use a shorter comparison sequence. */
1440 if (flag_unsafe_math_optimizations)
1441 target_flags &= ~MASK_IEEE_FP;
1442
1443 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1444 since the insns won't need emulation. */
1445 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1446 target_flags &= ~MASK_NO_FANCY_MATH_387;
1447
1448 /* Turn on SSE2 builtins for -msse3. */
1449 if (TARGET_SSE3)
1450 target_flags |= MASK_SSE2;
1451
1452 /* Turn on SSE builtins for -msse2. */
1453 if (TARGET_SSE2)
1454 target_flags |= MASK_SSE;
1455
1456 if (TARGET_64BIT)
1457 {
1458 if (TARGET_ALIGN_DOUBLE)
1459 error ("-malign-double makes no sense in the 64bit mode");
1460 if (TARGET_RTD)
1461 error ("-mrtd calling convention not supported in the 64bit mode");
1462 /* Enable by default the SSE and MMX builtins. */
1463 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1464 ix86_fpmath = FPMATH_SSE;
1465 }
1466 else
1467 {
1468 ix86_fpmath = FPMATH_387;
1469 /* i386 ABI does not specify red zone. It still makes sense to use it
1470 when programmer takes care to stack from being destroyed. */
1471 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1472 target_flags |= MASK_NO_RED_ZONE;
1473 }
1474
1475 if (ix86_fpmath_string != 0)
1476 {
1477 if (! strcmp (ix86_fpmath_string, "387"))
1478 ix86_fpmath = FPMATH_387;
1479 else if (! strcmp (ix86_fpmath_string, "sse"))
1480 {
1481 if (!TARGET_SSE)
1482 {
1483 warning ("SSE instruction set disabled, using 387 arithmetics");
1484 ix86_fpmath = FPMATH_387;
1485 }
1486 else
1487 ix86_fpmath = FPMATH_SSE;
1488 }
1489 else if (! strcmp (ix86_fpmath_string, "387,sse")
1490 || ! strcmp (ix86_fpmath_string, "sse,387"))
1491 {
1492 if (!TARGET_SSE)
1493 {
1494 warning ("SSE instruction set disabled, using 387 arithmetics");
1495 ix86_fpmath = FPMATH_387;
1496 }
1497 else if (!TARGET_80387)
1498 {
1499 warning ("387 instruction set disabled, using SSE arithmetics");
1500 ix86_fpmath = FPMATH_SSE;
1501 }
1502 else
1503 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1504 }
1505 else
1506 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1507 }
1508
1509 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1510 on by -msse. */
1511 if (TARGET_SSE)
1512 {
1513 target_flags |= MASK_MMX;
1514 x86_prefetch_sse = true;
1515 }
1516
1517 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1518 if (TARGET_3DNOW)
1519 {
1520 target_flags |= MASK_MMX;
1521 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1522 extensions it adds. */
1523 if (x86_3dnow_a & (1 << ix86_arch))
1524 target_flags |= MASK_3DNOW_A;
1525 }
1526 if ((x86_accumulate_outgoing_args & TUNEMASK)
1527 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1528 && !optimize_size)
1529 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1530
1531 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1532 {
1533 char *p;
1534 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1535 p = strchr (internal_label_prefix, 'X');
1536 internal_label_prefix_len = p - internal_label_prefix;
1537 *p = '\0';
1538 }
1539 }
1540 \f
1541 void
1542 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1543 {
1544 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1545 make the problem with not enough registers even worse. */
1546 #ifdef INSN_SCHEDULING
1547 if (level > 1)
1548 flag_schedule_insns = 0;
1549 #endif
1550
1551 /* The default values of these switches depend on the TARGET_64BIT
1552 that is not known at this moment. Mark these values with 2 and
1553 let user the to override these. In case there is no command line option
1554 specifying them, we will set the defaults in override_options. */
1555 if (optimize >= 1)
1556 flag_omit_frame_pointer = 2;
1557 flag_pcc_struct_return = 2;
1558 flag_asynchronous_unwind_tables = 2;
1559 }
1560 \f
1561 /* Table of valid machine attributes. */
1562 const struct attribute_spec ix86_attribute_table[] =
1563 {
1564 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1565 /* Stdcall attribute says callee is responsible for popping arguments
1566 if they are not variable. */
1567 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1568 /* Fastcall attribute says callee is responsible for popping arguments
1569 if they are not variable. */
1570 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1571 /* Cdecl attribute says the callee is a normal C declaration */
1572 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1573 /* Regparm attribute specifies how many integer arguments are to be
1574 passed in registers. */
1575 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1576 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1577 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1578 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1579 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1580 #endif
1581 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1582 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1583 { NULL, 0, 0, false, false, false, NULL }
1584 };
1585
1586 /* Decide whether we can make a sibling call to a function. DECL is the
1587 declaration of the function being targeted by the call and EXP is the
1588 CALL_EXPR representing the call. */
1589
1590 static bool
1591 ix86_function_ok_for_sibcall (tree decl, tree exp)
1592 {
1593 /* If we are generating position-independent code, we cannot sibcall
1594 optimize any indirect call, or a direct call to a global function,
1595 as the PLT requires %ebx be live. */
1596 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1597 return false;
1598
1599 /* If we are returning floats on the 80387 register stack, we cannot
1600 make a sibcall from a function that doesn't return a float to a
1601 function that does or, conversely, from a function that does return
1602 a float to a function that doesn't; the necessary stack adjustment
1603 would not be executed. */
1604 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1605 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1606 return false;
1607
1608 /* If this call is indirect, we'll need to be able to use a call-clobbered
1609 register for the address of the target function. Make sure that all
1610 such registers are not used for passing parameters. */
1611 if (!decl && !TARGET_64BIT)
1612 {
1613 tree type;
1614
1615 /* We're looking at the CALL_EXPR, we need the type of the function. */
1616 type = TREE_OPERAND (exp, 0); /* pointer expression */
1617 type = TREE_TYPE (type); /* pointer type */
1618 type = TREE_TYPE (type); /* function type */
1619
1620 if (ix86_function_regparm (type, NULL) >= 3)
1621 {
1622 /* ??? Need to count the actual number of registers to be used,
1623 not the possible number of registers. Fix later. */
1624 return false;
1625 }
1626 }
1627
1628 /* Otherwise okay. That also includes certain types of indirect calls. */
1629 return true;
1630 }
1631
1632 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1633 arguments as in struct attribute_spec.handler. */
1634 static tree
1635 ix86_handle_cdecl_attribute (tree *node, tree name,
1636 tree args ATTRIBUTE_UNUSED,
1637 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1638 {
1639 if (TREE_CODE (*node) != FUNCTION_TYPE
1640 && TREE_CODE (*node) != METHOD_TYPE
1641 && TREE_CODE (*node) != FIELD_DECL
1642 && TREE_CODE (*node) != TYPE_DECL)
1643 {
1644 warning ("`%s' attribute only applies to functions",
1645 IDENTIFIER_POINTER (name));
1646 *no_add_attrs = true;
1647 }
1648 else
1649 {
1650 if (is_attribute_p ("fastcall", name))
1651 {
1652 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1653 {
1654 error ("fastcall and stdcall attributes are not compatible");
1655 }
1656 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1657 {
1658 error ("fastcall and regparm attributes are not compatible");
1659 }
1660 }
1661 else if (is_attribute_p ("stdcall", name))
1662 {
1663 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1664 {
1665 error ("fastcall and stdcall attributes are not compatible");
1666 }
1667 }
1668 }
1669
1670 if (TARGET_64BIT)
1671 {
1672 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1673 *no_add_attrs = true;
1674 }
1675
1676 return NULL_TREE;
1677 }
1678
1679 /* Handle a "regparm" attribute;
1680 arguments as in struct attribute_spec.handler. */
1681 static tree
1682 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1683 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1684 {
1685 if (TREE_CODE (*node) != FUNCTION_TYPE
1686 && TREE_CODE (*node) != METHOD_TYPE
1687 && TREE_CODE (*node) != FIELD_DECL
1688 && TREE_CODE (*node) != TYPE_DECL)
1689 {
1690 warning ("`%s' attribute only applies to functions",
1691 IDENTIFIER_POINTER (name));
1692 *no_add_attrs = true;
1693 }
1694 else
1695 {
1696 tree cst;
1697
1698 cst = TREE_VALUE (args);
1699 if (TREE_CODE (cst) != INTEGER_CST)
1700 {
1701 warning ("`%s' attribute requires an integer constant argument",
1702 IDENTIFIER_POINTER (name));
1703 *no_add_attrs = true;
1704 }
1705 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1706 {
1707 warning ("argument to `%s' attribute larger than %d",
1708 IDENTIFIER_POINTER (name), REGPARM_MAX);
1709 *no_add_attrs = true;
1710 }
1711
1712 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1713 {
1714 error ("fastcall and regparm attributes are not compatible");
1715 }
1716 }
1717
1718 return NULL_TREE;
1719 }
1720
1721 /* Return 0 if the attributes for two types are incompatible, 1 if they
1722 are compatible, and 2 if they are nearly compatible (which causes a
1723 warning to be generated). */
1724
1725 static int
1726 ix86_comp_type_attributes (tree type1, tree type2)
1727 {
1728 /* Check for mismatch of non-default calling convention. */
1729 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1730
1731 if (TREE_CODE (type1) != FUNCTION_TYPE)
1732 return 1;
1733
1734 /* Check for mismatched fastcall types */
1735 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1736 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1737 return 0;
1738
1739 /* Check for mismatched return types (cdecl vs stdcall). */
1740 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1741 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1742 return 0;
1743 if (ix86_function_regparm (type1, NULL)
1744 != ix86_function_regparm (type2, NULL))
1745 return 0;
1746 return 1;
1747 }
1748 \f
1749 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1750 DECL may be NULL when calling function indirectly
1751 or considering a libcall. */
1752
1753 static int
1754 ix86_function_regparm (tree type, tree decl)
1755 {
1756 tree attr;
1757 int regparm = ix86_regparm;
1758 bool user_convention = false;
1759
1760 if (!TARGET_64BIT)
1761 {
1762 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1763 if (attr)
1764 {
1765 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1766 user_convention = true;
1767 }
1768
1769 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1770 {
1771 regparm = 2;
1772 user_convention = true;
1773 }
1774
1775 /* Use register calling convention for local functions when possible. */
1776 if (!TARGET_64BIT && !user_convention && decl
1777 && flag_unit_at_a_time && !profile_flag)
1778 {
1779 struct cgraph_local_info *i = cgraph_local_info (decl);
1780 if (i && i->local)
1781 {
1782 /* We can't use regparm(3) for nested functions as these use
1783 static chain pointer in third argument. */
1784 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1785 regparm = 2;
1786 else
1787 regparm = 3;
1788 }
1789 }
1790 }
1791 return regparm;
1792 }
1793
1794 /* Return true if EAX is live at the start of the function. Used by
1795 ix86_expand_prologue to determine if we need special help before
1796 calling allocate_stack_worker. */
1797
1798 static bool
1799 ix86_eax_live_at_start_p (void)
1800 {
1801 /* Cheat. Don't bother working forward from ix86_function_regparm
1802 to the function type to whether an actual argument is located in
1803 eax. Instead just look at cfg info, which is still close enough
1804 to correct at this point. This gives false positives for broken
1805 functions that might use uninitialized data that happens to be
1806 allocated in eax, but who cares? */
1807 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1808 }
1809
1810 /* Value is the number of bytes of arguments automatically
1811 popped when returning from a subroutine call.
1812 FUNDECL is the declaration node of the function (as a tree),
1813 FUNTYPE is the data type of the function (as a tree),
1814 or for a library call it is an identifier node for the subroutine name.
1815 SIZE is the number of bytes of arguments passed on the stack.
1816
1817 On the 80386, the RTD insn may be used to pop them if the number
1818 of args is fixed, but if the number is variable then the caller
1819 must pop them all. RTD can't be used for library calls now
1820 because the library is compiled with the Unix compiler.
1821 Use of RTD is a selectable option, since it is incompatible with
1822 standard Unix calling sequences. If the option is not selected,
1823 the caller must always pop the args.
1824
1825 The attribute stdcall is equivalent to RTD on a per module basis. */
1826
1827 int
1828 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1829 {
1830 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1831
1832 /* Cdecl functions override -mrtd, and never pop the stack. */
1833 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1834
1835 /* Stdcall and fastcall functions will pop the stack if not
1836 variable args. */
1837 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1838 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1839 rtd = 1;
1840
1841 if (rtd
1842 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1843 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1844 == void_type_node)))
1845 return size;
1846 }
1847
1848 /* Lose any fake structure return argument if it is passed on the stack. */
1849 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1850 && !TARGET_64BIT)
1851 {
1852 int nregs = ix86_function_regparm (funtype, fundecl);
1853
1854 if (!nregs)
1855 return GET_MODE_SIZE (Pmode);
1856 }
1857
1858 return 0;
1859 }
1860 \f
1861 /* Argument support functions. */
1862
1863 /* Return true when register may be used to pass function parameters. */
1864 bool
1865 ix86_function_arg_regno_p (int regno)
1866 {
1867 int i;
1868 if (!TARGET_64BIT)
1869 return (regno < REGPARM_MAX
1870 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1871 if (SSE_REGNO_P (regno) && TARGET_SSE)
1872 return true;
1873 /* RAX is used as hidden argument to va_arg functions. */
1874 if (!regno)
1875 return true;
1876 for (i = 0; i < REGPARM_MAX; i++)
1877 if (regno == x86_64_int_parameter_registers[i])
1878 return true;
1879 return false;
1880 }
1881
1882 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1883 for a call to a function whose data type is FNTYPE.
1884 For a library call, FNTYPE is 0. */
1885
1886 void
1887 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1888 tree fntype, /* tree ptr for function decl */
1889 rtx libname, /* SYMBOL_REF of library name or 0 */
1890 tree fndecl)
1891 {
1892 static CUMULATIVE_ARGS zero_cum;
1893 tree param, next_param;
1894
1895 if (TARGET_DEBUG_ARG)
1896 {
1897 fprintf (stderr, "\ninit_cumulative_args (");
1898 if (fntype)
1899 fprintf (stderr, "fntype code = %s, ret code = %s",
1900 tree_code_name[(int) TREE_CODE (fntype)],
1901 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1902 else
1903 fprintf (stderr, "no fntype");
1904
1905 if (libname)
1906 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1907 }
1908
1909 *cum = zero_cum;
1910
1911 /* Set up the number of registers to use for passing arguments. */
1912 if (fntype)
1913 cum->nregs = ix86_function_regparm (fntype, fndecl);
1914 else
1915 cum->nregs = ix86_regparm;
1916 cum->sse_nregs = SSE_REGPARM_MAX;
1917 cum->mmx_nregs = MMX_REGPARM_MAX;
1918 cum->warn_sse = true;
1919 cum->warn_mmx = true;
1920 cum->maybe_vaarg = false;
1921
1922 /* Use ecx and edx registers if function has fastcall attribute */
1923 if (fntype && !TARGET_64BIT)
1924 {
1925 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1926 {
1927 cum->nregs = 2;
1928 cum->fastcall = 1;
1929 }
1930 }
1931
1932
1933 /* Determine if this function has variable arguments. This is
1934 indicated by the last argument being 'void_type_mode' if there
1935 are no variable arguments. If there are variable arguments, then
1936 we won't pass anything in registers */
1937
1938 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1939 {
1940 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1941 param != 0; param = next_param)
1942 {
1943 next_param = TREE_CHAIN (param);
1944 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1945 {
1946 if (!TARGET_64BIT)
1947 {
1948 cum->nregs = 0;
1949 cum->sse_nregs = 0;
1950 cum->mmx_nregs = 0;
1951 cum->warn_sse = 0;
1952 cum->warn_mmx = 0;
1953 cum->fastcall = 0;
1954 }
1955 cum->maybe_vaarg = true;
1956 }
1957 }
1958 }
1959 if ((!fntype && !libname)
1960 || (fntype && !TYPE_ARG_TYPES (fntype)))
1961 cum->maybe_vaarg = 1;
1962
1963 if (TARGET_DEBUG_ARG)
1964 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1965
1966 return;
1967 }
1968
1969 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1970 of this code is to classify each 8bytes of incoming argument by the register
1971 class and assign registers accordingly. */
1972
1973 /* Return the union class of CLASS1 and CLASS2.
1974 See the x86-64 PS ABI for details. */
1975
1976 static enum x86_64_reg_class
1977 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1978 {
1979 /* Rule #1: If both classes are equal, this is the resulting class. */
1980 if (class1 == class2)
1981 return class1;
1982
1983 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1984 the other class. */
1985 if (class1 == X86_64_NO_CLASS)
1986 return class2;
1987 if (class2 == X86_64_NO_CLASS)
1988 return class1;
1989
1990 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1991 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1992 return X86_64_MEMORY_CLASS;
1993
1994 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1995 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1996 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1997 return X86_64_INTEGERSI_CLASS;
1998 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1999 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2000 return X86_64_INTEGER_CLASS;
2001
2002 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
2003 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
2004 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
2005 return X86_64_MEMORY_CLASS;
2006
2007 /* Rule #6: Otherwise class SSE is used. */
2008 return X86_64_SSE_CLASS;
2009 }
2010
2011 /* Classify the argument of type TYPE and mode MODE.
2012 CLASSES will be filled by the register class used to pass each word
2013 of the operand. The number of words is returned. In case the parameter
2014 should be passed in memory, 0 is returned. As a special case for zero
2015 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2016
2017 BIT_OFFSET is used internally for handling records and specifies offset
2018 of the offset in bits modulo 256 to avoid overflow cases.
2019
2020 See the x86-64 PS ABI for details.
2021 */
2022
2023 static int
2024 classify_argument (enum machine_mode mode, tree type,
2025 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2026 {
2027 HOST_WIDE_INT bytes =
2028 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2029 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2030
2031 /* Variable sized entities are always passed/returned in memory. */
2032 if (bytes < 0)
2033 return 0;
2034
2035 if (mode != VOIDmode
2036 && MUST_PASS_IN_STACK (mode, type))
2037 return 0;
2038
2039 if (type && AGGREGATE_TYPE_P (type))
2040 {
2041 int i;
2042 tree field;
2043 enum x86_64_reg_class subclasses[MAX_CLASSES];
2044
2045 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2046 if (bytes > 16)
2047 return 0;
2048
2049 for (i = 0; i < words; i++)
2050 classes[i] = X86_64_NO_CLASS;
2051
2052 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2053 signalize memory class, so handle it as special case. */
2054 if (!words)
2055 {
2056 classes[0] = X86_64_NO_CLASS;
2057 return 1;
2058 }
2059
2060 /* Classify each field of record and merge classes. */
2061 if (TREE_CODE (type) == RECORD_TYPE)
2062 {
2063 /* For classes first merge in the field of the subclasses. */
2064 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2065 {
2066 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2067 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2068 int i;
2069
2070 for (i = 0; i < n_bases; ++i)
2071 {
2072 tree binfo = TREE_VEC_ELT (bases, i);
2073 int num;
2074 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2075 tree type = BINFO_TYPE (binfo);
2076
2077 num = classify_argument (TYPE_MODE (type),
2078 type, subclasses,
2079 (offset + bit_offset) % 256);
2080 if (!num)
2081 return 0;
2082 for (i = 0; i < num; i++)
2083 {
2084 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2085 classes[i + pos] =
2086 merge_classes (subclasses[i], classes[i + pos]);
2087 }
2088 }
2089 }
2090 /* And now merge the fields of structure. */
2091 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2092 {
2093 if (TREE_CODE (field) == FIELD_DECL)
2094 {
2095 int num;
2096
2097 /* Bitfields are always classified as integer. Handle them
2098 early, since later code would consider them to be
2099 misaligned integers. */
2100 if (DECL_BIT_FIELD (field))
2101 {
2102 for (i = int_bit_position (field) / 8 / 8;
2103 i < (int_bit_position (field)
2104 + tree_low_cst (DECL_SIZE (field), 0)
2105 + 63) / 8 / 8; i++)
2106 classes[i] =
2107 merge_classes (X86_64_INTEGER_CLASS,
2108 classes[i]);
2109 }
2110 else
2111 {
2112 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2113 TREE_TYPE (field), subclasses,
2114 (int_bit_position (field)
2115 + bit_offset) % 256);
2116 if (!num)
2117 return 0;
2118 for (i = 0; i < num; i++)
2119 {
2120 int pos =
2121 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2122 classes[i + pos] =
2123 merge_classes (subclasses[i], classes[i + pos]);
2124 }
2125 }
2126 }
2127 }
2128 }
2129 /* Arrays are handled as small records. */
2130 else if (TREE_CODE (type) == ARRAY_TYPE)
2131 {
2132 int num;
2133 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2134 TREE_TYPE (type), subclasses, bit_offset);
2135 if (!num)
2136 return 0;
2137
2138 /* The partial classes are now full classes. */
2139 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2140 subclasses[0] = X86_64_SSE_CLASS;
2141 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2142 subclasses[0] = X86_64_INTEGER_CLASS;
2143
2144 for (i = 0; i < words; i++)
2145 classes[i] = subclasses[i % num];
2146 }
2147 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2148 else if (TREE_CODE (type) == UNION_TYPE
2149 || TREE_CODE (type) == QUAL_UNION_TYPE)
2150 {
2151 /* For classes first merge in the field of the subclasses. */
2152 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2153 {
2154 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2155 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2156 int i;
2157
2158 for (i = 0; i < n_bases; ++i)
2159 {
2160 tree binfo = TREE_VEC_ELT (bases, i);
2161 int num;
2162 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2163 tree type = BINFO_TYPE (binfo);
2164
2165 num = classify_argument (TYPE_MODE (type),
2166 type, subclasses,
2167 (offset + (bit_offset % 64)) % 256);
2168 if (!num)
2169 return 0;
2170 for (i = 0; i < num; i++)
2171 {
2172 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2173 classes[i + pos] =
2174 merge_classes (subclasses[i], classes[i + pos]);
2175 }
2176 }
2177 }
2178 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2179 {
2180 if (TREE_CODE (field) == FIELD_DECL)
2181 {
2182 int num;
2183 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2184 TREE_TYPE (field), subclasses,
2185 bit_offset);
2186 if (!num)
2187 return 0;
2188 for (i = 0; i < num; i++)
2189 classes[i] = merge_classes (subclasses[i], classes[i]);
2190 }
2191 }
2192 }
2193 else if (TREE_CODE (type) == SET_TYPE)
2194 {
2195 if (bytes <= 4)
2196 {
2197 classes[0] = X86_64_INTEGERSI_CLASS;
2198 return 1;
2199 }
2200 else if (bytes <= 8)
2201 {
2202 classes[0] = X86_64_INTEGER_CLASS;
2203 return 1;
2204 }
2205 else if (bytes <= 12)
2206 {
2207 classes[0] = X86_64_INTEGER_CLASS;
2208 classes[1] = X86_64_INTEGERSI_CLASS;
2209 return 2;
2210 }
2211 else
2212 {
2213 classes[0] = X86_64_INTEGER_CLASS;
2214 classes[1] = X86_64_INTEGER_CLASS;
2215 return 2;
2216 }
2217 }
2218 else
2219 abort ();
2220
2221 /* Final merger cleanup. */
2222 for (i = 0; i < words; i++)
2223 {
2224 /* If one class is MEMORY, everything should be passed in
2225 memory. */
2226 if (classes[i] == X86_64_MEMORY_CLASS)
2227 return 0;
2228
2229 /* The X86_64_SSEUP_CLASS should be always preceded by
2230 X86_64_SSE_CLASS. */
2231 if (classes[i] == X86_64_SSEUP_CLASS
2232 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2233 classes[i] = X86_64_SSE_CLASS;
2234
2235 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2236 if (classes[i] == X86_64_X87UP_CLASS
2237 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2238 classes[i] = X86_64_SSE_CLASS;
2239 }
2240 return words;
2241 }
2242
2243 /* Compute alignment needed. We align all types to natural boundaries with
2244 exception of XFmode that is aligned to 64bits. */
2245 if (mode != VOIDmode && mode != BLKmode)
2246 {
2247 int mode_alignment = GET_MODE_BITSIZE (mode);
2248
2249 if (mode == XFmode)
2250 mode_alignment = 128;
2251 else if (mode == XCmode)
2252 mode_alignment = 256;
2253 if (COMPLEX_MODE_P (mode))
2254 mode_alignment /= 2;
2255 /* Misaligned fields are always returned in memory. */
2256 if (bit_offset % mode_alignment)
2257 return 0;
2258 }
2259
2260 /* Classification of atomic types. */
2261 switch (mode)
2262 {
2263 case DImode:
2264 case SImode:
2265 case HImode:
2266 case QImode:
2267 case CSImode:
2268 case CHImode:
2269 case CQImode:
2270 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2271 classes[0] = X86_64_INTEGERSI_CLASS;
2272 else
2273 classes[0] = X86_64_INTEGER_CLASS;
2274 return 1;
2275 case CDImode:
2276 case TImode:
2277 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2278 return 2;
2279 case CTImode:
2280 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2281 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2282 return 4;
2283 case SFmode:
2284 if (!(bit_offset % 64))
2285 classes[0] = X86_64_SSESF_CLASS;
2286 else
2287 classes[0] = X86_64_SSE_CLASS;
2288 return 1;
2289 case DFmode:
2290 classes[0] = X86_64_SSEDF_CLASS;
2291 return 1;
2292 case XFmode:
2293 classes[0] = X86_64_X87_CLASS;
2294 classes[1] = X86_64_X87UP_CLASS;
2295 return 2;
2296 case TFmode:
2297 case TCmode:
2298 return 0;
2299 case XCmode:
2300 classes[0] = X86_64_X87_CLASS;
2301 classes[1] = X86_64_X87UP_CLASS;
2302 classes[2] = X86_64_X87_CLASS;
2303 classes[3] = X86_64_X87UP_CLASS;
2304 return 4;
2305 case DCmode:
2306 classes[0] = X86_64_SSEDF_CLASS;
2307 classes[1] = X86_64_SSEDF_CLASS;
2308 return 2;
2309 case SCmode:
2310 classes[0] = X86_64_SSE_CLASS;
2311 return 1;
2312 case V4SFmode:
2313 case V4SImode:
2314 case V16QImode:
2315 case V8HImode:
2316 case V2DFmode:
2317 case V2DImode:
2318 classes[0] = X86_64_SSE_CLASS;
2319 classes[1] = X86_64_SSEUP_CLASS;
2320 return 2;
2321 case V2SFmode:
2322 case V2SImode:
2323 case V4HImode:
2324 case V8QImode:
2325 return 0;
2326 case BLKmode:
2327 case VOIDmode:
2328 return 0;
2329 default:
2330 abort ();
2331 }
2332 }
2333
2334 /* Examine the argument and return set number of register required in each
2335 class. Return 0 iff parameter should be passed in memory. */
2336 static int
2337 examine_argument (enum machine_mode mode, tree type, int in_return,
2338 int *int_nregs, int *sse_nregs)
2339 {
2340 enum x86_64_reg_class class[MAX_CLASSES];
2341 int n = classify_argument (mode, type, class, 0);
2342
2343 *int_nregs = 0;
2344 *sse_nregs = 0;
2345 if (!n)
2346 return 0;
2347 for (n--; n >= 0; n--)
2348 switch (class[n])
2349 {
2350 case X86_64_INTEGER_CLASS:
2351 case X86_64_INTEGERSI_CLASS:
2352 (*int_nregs)++;
2353 break;
2354 case X86_64_SSE_CLASS:
2355 case X86_64_SSESF_CLASS:
2356 case X86_64_SSEDF_CLASS:
2357 (*sse_nregs)++;
2358 break;
2359 case X86_64_NO_CLASS:
2360 case X86_64_SSEUP_CLASS:
2361 break;
2362 case X86_64_X87_CLASS:
2363 case X86_64_X87UP_CLASS:
2364 if (!in_return)
2365 return 0;
2366 break;
2367 case X86_64_MEMORY_CLASS:
2368 abort ();
2369 }
2370 return 1;
2371 }
2372 /* Construct container for the argument used by GCC interface. See
2373 FUNCTION_ARG for the detailed description. */
2374 static rtx
2375 construct_container (enum machine_mode mode, tree type, int in_return,
2376 int nintregs, int nsseregs, const int * intreg,
2377 int sse_regno)
2378 {
2379 enum machine_mode tmpmode;
2380 int bytes =
2381 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2382 enum x86_64_reg_class class[MAX_CLASSES];
2383 int n;
2384 int i;
2385 int nexps = 0;
2386 int needed_sseregs, needed_intregs;
2387 rtx exp[MAX_CLASSES];
2388 rtx ret;
2389
2390 n = classify_argument (mode, type, class, 0);
2391 if (TARGET_DEBUG_ARG)
2392 {
2393 if (!n)
2394 fprintf (stderr, "Memory class\n");
2395 else
2396 {
2397 fprintf (stderr, "Classes:");
2398 for (i = 0; i < n; i++)
2399 {
2400 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2401 }
2402 fprintf (stderr, "\n");
2403 }
2404 }
2405 if (!n)
2406 return NULL;
2407 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2408 return NULL;
2409 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2410 return NULL;
2411
2412 /* First construct simple cases. Avoid SCmode, since we want to use
2413 single register to pass this type. */
2414 if (n == 1 && mode != SCmode)
2415 switch (class[0])
2416 {
2417 case X86_64_INTEGER_CLASS:
2418 case X86_64_INTEGERSI_CLASS:
2419 return gen_rtx_REG (mode, intreg[0]);
2420 case X86_64_SSE_CLASS:
2421 case X86_64_SSESF_CLASS:
2422 case X86_64_SSEDF_CLASS:
2423 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2424 case X86_64_X87_CLASS:
2425 return gen_rtx_REG (mode, FIRST_STACK_REG);
2426 case X86_64_NO_CLASS:
2427 /* Zero sized array, struct or class. */
2428 return NULL;
2429 default:
2430 abort ();
2431 }
2432 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2433 && mode != BLKmode)
2434 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2435 if (n == 2
2436 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2437 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2438 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2439 && class[1] == X86_64_INTEGER_CLASS
2440 && (mode == CDImode || mode == TImode || mode == TFmode)
2441 && intreg[0] + 1 == intreg[1])
2442 return gen_rtx_REG (mode, intreg[0]);
2443 if (n == 4
2444 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2445 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2446 && mode != BLKmode)
2447 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2448
2449 /* Otherwise figure out the entries of the PARALLEL. */
2450 for (i = 0; i < n; i++)
2451 {
2452 switch (class[i])
2453 {
2454 case X86_64_NO_CLASS:
2455 break;
2456 case X86_64_INTEGER_CLASS:
2457 case X86_64_INTEGERSI_CLASS:
2458 /* Merge TImodes on aligned occasions here too. */
2459 if (i * 8 + 8 > bytes)
2460 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2461 else if (class[i] == X86_64_INTEGERSI_CLASS)
2462 tmpmode = SImode;
2463 else
2464 tmpmode = DImode;
2465 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2466 if (tmpmode == BLKmode)
2467 tmpmode = DImode;
2468 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2469 gen_rtx_REG (tmpmode, *intreg),
2470 GEN_INT (i*8));
2471 intreg++;
2472 break;
2473 case X86_64_SSESF_CLASS:
2474 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2475 gen_rtx_REG (SFmode,
2476 SSE_REGNO (sse_regno)),
2477 GEN_INT (i*8));
2478 sse_regno++;
2479 break;
2480 case X86_64_SSEDF_CLASS:
2481 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2482 gen_rtx_REG (DFmode,
2483 SSE_REGNO (sse_regno)),
2484 GEN_INT (i*8));
2485 sse_regno++;
2486 break;
2487 case X86_64_SSE_CLASS:
2488 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2489 tmpmode = TImode;
2490 else
2491 tmpmode = DImode;
2492 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2493 gen_rtx_REG (tmpmode,
2494 SSE_REGNO (sse_regno)),
2495 GEN_INT (i*8));
2496 if (tmpmode == TImode)
2497 i++;
2498 sse_regno++;
2499 break;
2500 default:
2501 abort ();
2502 }
2503 }
2504 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2505 for (i = 0; i < nexps; i++)
2506 XVECEXP (ret, 0, i) = exp [i];
2507 return ret;
2508 }
2509
2510 /* Update the data in CUM to advance over an argument
2511 of mode MODE and data type TYPE.
2512 (TYPE is null for libcalls where that information may not be available.) */
2513
2514 void
2515 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2516 enum machine_mode mode, /* current arg mode */
2517 tree type, /* type of the argument or 0 if lib support */
2518 int named) /* whether or not the argument was named */
2519 {
2520 int bytes =
2521 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2522 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2523
2524 if (TARGET_DEBUG_ARG)
2525 fprintf (stderr,
2526 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2527 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2528 if (TARGET_64BIT)
2529 {
2530 int int_nregs, sse_nregs;
2531 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2532 cum->words += words;
2533 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2534 {
2535 cum->nregs -= int_nregs;
2536 cum->sse_nregs -= sse_nregs;
2537 cum->regno += int_nregs;
2538 cum->sse_regno += sse_nregs;
2539 }
2540 else
2541 cum->words += words;
2542 }
2543 else
2544 {
2545 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2546 && (!type || !AGGREGATE_TYPE_P (type)))
2547 {
2548 cum->sse_words += words;
2549 cum->sse_nregs -= 1;
2550 cum->sse_regno += 1;
2551 if (cum->sse_nregs <= 0)
2552 {
2553 cum->sse_nregs = 0;
2554 cum->sse_regno = 0;
2555 }
2556 }
2557 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2558 && (!type || !AGGREGATE_TYPE_P (type)))
2559 {
2560 cum->mmx_words += words;
2561 cum->mmx_nregs -= 1;
2562 cum->mmx_regno += 1;
2563 if (cum->mmx_nregs <= 0)
2564 {
2565 cum->mmx_nregs = 0;
2566 cum->mmx_regno = 0;
2567 }
2568 }
2569 else
2570 {
2571 cum->words += words;
2572 cum->nregs -= words;
2573 cum->regno += words;
2574
2575 if (cum->nregs <= 0)
2576 {
2577 cum->nregs = 0;
2578 cum->regno = 0;
2579 }
2580 }
2581 }
2582 return;
2583 }
2584
2585 /* Define where to put the arguments to a function.
2586 Value is zero to push the argument on the stack,
2587 or a hard register in which to store the argument.
2588
2589 MODE is the argument's machine mode.
2590 TYPE is the data type of the argument (as a tree).
2591 This is null for libcalls where that information may
2592 not be available.
2593 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2594 the preceding args and about the function being called.
2595 NAMED is nonzero if this argument is a named parameter
2596 (otherwise it is an extra parameter matching an ellipsis). */
2597
2598 rtx
2599 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2600 enum machine_mode mode, /* current arg mode */
2601 tree type, /* type of the argument or 0 if lib support */
2602 int named) /* != 0 for normal args, == 0 for ... args */
2603 {
2604 rtx ret = NULL_RTX;
2605 int bytes =
2606 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2607 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2608 static bool warnedsse, warnedmmx;
2609
2610 /* Handle a hidden AL argument containing number of registers for varargs
2611 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2612 any AL settings. */
2613 if (mode == VOIDmode)
2614 {
2615 if (TARGET_64BIT)
2616 return GEN_INT (cum->maybe_vaarg
2617 ? (cum->sse_nregs < 0
2618 ? SSE_REGPARM_MAX
2619 : cum->sse_regno)
2620 : -1);
2621 else
2622 return constm1_rtx;
2623 }
2624 if (TARGET_64BIT)
2625 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2626 &x86_64_int_parameter_registers [cum->regno],
2627 cum->sse_regno);
2628 else
2629 switch (mode)
2630 {
2631 /* For now, pass fp/complex values on the stack. */
2632 default:
2633 break;
2634
2635 case BLKmode:
2636 if (bytes < 0)
2637 break;
2638 /* FALLTHRU */
2639 case DImode:
2640 case SImode:
2641 case HImode:
2642 case QImode:
2643 if (words <= cum->nregs)
2644 {
2645 int regno = cum->regno;
2646
2647 /* Fastcall allocates the first two DWORD (SImode) or
2648 smaller arguments to ECX and EDX. */
2649 if (cum->fastcall)
2650 {
2651 if (mode == BLKmode || mode == DImode)
2652 break;
2653
2654 /* ECX not EAX is the first allocated register. */
2655 if (regno == 0)
2656 regno = 2;
2657 }
2658 ret = gen_rtx_REG (mode, regno);
2659 }
2660 break;
2661 case TImode:
2662 case V16QImode:
2663 case V8HImode:
2664 case V4SImode:
2665 case V2DImode:
2666 case V4SFmode:
2667 case V2DFmode:
2668 if (!type || !AGGREGATE_TYPE_P (type))
2669 {
2670 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2671 {
2672 warnedsse = true;
2673 warning ("SSE vector argument without SSE enabled "
2674 "changes the ABI");
2675 }
2676 if (cum->sse_nregs)
2677 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2678 }
2679 break;
2680 case V8QImode:
2681 case V4HImode:
2682 case V2SImode:
2683 case V2SFmode:
2684 if (!type || !AGGREGATE_TYPE_P (type))
2685 {
2686 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2687 {
2688 warnedmmx = true;
2689 warning ("MMX vector argument without MMX enabled "
2690 "changes the ABI");
2691 }
2692 if (cum->mmx_nregs)
2693 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2694 }
2695 break;
2696 }
2697
2698 if (TARGET_DEBUG_ARG)
2699 {
2700 fprintf (stderr,
2701 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2702 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2703
2704 if (ret)
2705 print_simple_rtl (stderr, ret);
2706 else
2707 fprintf (stderr, ", stack");
2708
2709 fprintf (stderr, " )\n");
2710 }
2711
2712 return ret;
2713 }
2714
2715 /* A C expression that indicates when an argument must be passed by
2716 reference. If nonzero for an argument, a copy of that argument is
2717 made in memory and a pointer to the argument is passed instead of
2718 the argument itself. The pointer is passed in whatever way is
2719 appropriate for passing a pointer to that type. */
2720
2721 int
2722 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2723 enum machine_mode mode ATTRIBUTE_UNUSED,
2724 tree type, int named ATTRIBUTE_UNUSED)
2725 {
2726 if (!TARGET_64BIT)
2727 return 0;
2728
2729 if (type && int_size_in_bytes (type) == -1)
2730 {
2731 if (TARGET_DEBUG_ARG)
2732 fprintf (stderr, "function_arg_pass_by_reference\n");
2733 return 1;
2734 }
2735
2736 return 0;
2737 }
2738
2739 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2740 ABI */
2741 static bool
2742 contains_128bit_aligned_vector_p (tree type)
2743 {
2744 enum machine_mode mode = TYPE_MODE (type);
2745 if (SSE_REG_MODE_P (mode)
2746 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2747 return true;
2748 if (TYPE_ALIGN (type) < 128)
2749 return false;
2750
2751 if (AGGREGATE_TYPE_P (type))
2752 {
2753 /* Walk the aggregates recursively. */
2754 if (TREE_CODE (type) == RECORD_TYPE
2755 || TREE_CODE (type) == UNION_TYPE
2756 || TREE_CODE (type) == QUAL_UNION_TYPE)
2757 {
2758 tree field;
2759
2760 if (TYPE_BINFO (type) && BINFO_BASE_BINFOS (TYPE_BINFO (type)))
2761 {
2762 tree bases = BINFO_BASE_BINFOS (TYPE_BINFO (type));
2763 int n_bases = BINFO_N_BASE_BINFOS (TYPE_BINFO (type));
2764 int i;
2765
2766 for (i = 0; i < n_bases; ++i)
2767 {
2768 tree binfo = TREE_VEC_ELT (bases, i);
2769 tree type = BINFO_TYPE (binfo);
2770
2771 if (contains_128bit_aligned_vector_p (type))
2772 return true;
2773 }
2774 }
2775 /* And now merge the fields of structure. */
2776 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2777 {
2778 if (TREE_CODE (field) == FIELD_DECL
2779 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2780 return true;
2781 }
2782 }
2783 /* Just for use if some languages passes arrays by value. */
2784 else if (TREE_CODE (type) == ARRAY_TYPE)
2785 {
2786 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2787 return true;
2788 }
2789 else
2790 abort ();
2791 }
2792 return false;
2793 }
2794
2795 /* Gives the alignment boundary, in bits, of an argument with the
2796 specified mode and type. */
2797
2798 int
2799 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2800 {
2801 int align;
2802 if (type)
2803 align = TYPE_ALIGN (type);
2804 else
2805 align = GET_MODE_ALIGNMENT (mode);
2806 if (align < PARM_BOUNDARY)
2807 align = PARM_BOUNDARY;
2808 if (!TARGET_64BIT)
2809 {
2810 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2811 make an exception for SSE modes since these require 128bit
2812 alignment.
2813
2814 The handling here differs from field_alignment. ICC aligns MMX
2815 arguments to 4 byte boundaries, while structure fields are aligned
2816 to 8 byte boundaries. */
2817 if (!type)
2818 {
2819 if (!SSE_REG_MODE_P (mode))
2820 align = PARM_BOUNDARY;
2821 }
2822 else
2823 {
2824 if (!contains_128bit_aligned_vector_p (type))
2825 align = PARM_BOUNDARY;
2826 }
2827 }
2828 if (align > 128)
2829 align = 128;
2830 return align;
2831 }
2832
2833 /* Return true if N is a possible register number of function value. */
2834 bool
2835 ix86_function_value_regno_p (int regno)
2836 {
2837 if (!TARGET_64BIT)
2838 {
2839 return ((regno) == 0
2840 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2841 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2842 }
2843 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2844 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2845 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2846 }
2847
2848 /* Define how to find the value returned by a function.
2849 VALTYPE is the data type of the value (as a tree).
2850 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2851 otherwise, FUNC is 0. */
2852 rtx
2853 ix86_function_value (tree valtype)
2854 {
2855 if (TARGET_64BIT)
2856 {
2857 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2858 REGPARM_MAX, SSE_REGPARM_MAX,
2859 x86_64_int_return_registers, 0);
2860 /* For zero sized structures, construct_container return NULL, but we need
2861 to keep rest of compiler happy by returning meaningful value. */
2862 if (!ret)
2863 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2864 return ret;
2865 }
2866 else
2867 return gen_rtx_REG (TYPE_MODE (valtype),
2868 ix86_value_regno (TYPE_MODE (valtype)));
2869 }
2870
2871 /* Return false iff type is returned in memory. */
2872 int
2873 ix86_return_in_memory (tree type)
2874 {
2875 int needed_intregs, needed_sseregs, size;
2876 enum machine_mode mode = TYPE_MODE (type);
2877
2878 if (TARGET_64BIT)
2879 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2880
2881 if (mode == BLKmode)
2882 return 1;
2883
2884 size = int_size_in_bytes (type);
2885
2886 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2887 return 0;
2888
2889 if (VECTOR_MODE_P (mode) || mode == TImode)
2890 {
2891 /* User-created vectors small enough to fit in EAX. */
2892 if (size < 8)
2893 return 0;
2894
2895 /* MMX/3dNow values are returned on the stack, since we've
2896 got to EMMS/FEMMS before returning. */
2897 if (size == 8)
2898 return 1;
2899
2900 /* SSE values are returned in XMM0, except when it doesn't exist. */
2901 if (size == 16)
2902 return (TARGET_SSE ? 0 : 1);
2903 }
2904
2905 if (mode == XFmode)
2906 return 0;
2907
2908 if (size > 12)
2909 return 1;
2910 return 0;
2911 }
2912
2913 /* When returning SSE vector types, we have a choice of either
2914 (1) being abi incompatible with a -march switch, or
2915 (2) generating an error.
2916 Given no good solution, I think the safest thing is one warning.
2917 The user won't be able to use -Werror, but....
2918
2919 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
2920 called in response to actually generating a caller or callee that
2921 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
2922 via aggregate_value_p for general type probing from tree-ssa. */
2923
2924 static rtx
2925 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
2926 {
2927 static bool warned;
2928
2929 if (!TARGET_SSE && type && !warned)
2930 {
2931 /* Look at the return type of the function, not the function type. */
2932 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
2933
2934 if (mode == TImode
2935 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2936 {
2937 warned = true;
2938 warning ("SSE vector return without SSE enabled changes the ABI");
2939 }
2940 }
2941
2942 return NULL;
2943 }
2944
2945 /* Define how to find the value returned by a library function
2946 assuming the value has mode MODE. */
2947 rtx
2948 ix86_libcall_value (enum machine_mode mode)
2949 {
2950 if (TARGET_64BIT)
2951 {
2952 switch (mode)
2953 {
2954 case SFmode:
2955 case SCmode:
2956 case DFmode:
2957 case DCmode:
2958 return gen_rtx_REG (mode, FIRST_SSE_REG);
2959 case XFmode:
2960 case XCmode:
2961 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2962 case TFmode:
2963 case TCmode:
2964 return NULL;
2965 default:
2966 return gen_rtx_REG (mode, 0);
2967 }
2968 }
2969 else
2970 return gen_rtx_REG (mode, ix86_value_regno (mode));
2971 }
2972
2973 /* Given a mode, return the register to use for a return value. */
2974
2975 static int
2976 ix86_value_regno (enum machine_mode mode)
2977 {
2978 /* Floating point return values in %st(0). */
2979 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2980 return FIRST_FLOAT_REG;
2981 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2982 we prevent this case when sse is not available. */
2983 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2984 return FIRST_SSE_REG;
2985 /* Everything else in %eax. */
2986 return 0;
2987 }
2988 \f
2989 /* Create the va_list data type. */
2990
2991 static tree
2992 ix86_build_builtin_va_list (void)
2993 {
2994 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2995
2996 /* For i386 we use plain pointer to argument area. */
2997 if (!TARGET_64BIT)
2998 return build_pointer_type (char_type_node);
2999
3000 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3001 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3002
3003 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3004 unsigned_type_node);
3005 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3006 unsigned_type_node);
3007 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3008 ptr_type_node);
3009 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3010 ptr_type_node);
3011
3012 DECL_FIELD_CONTEXT (f_gpr) = record;
3013 DECL_FIELD_CONTEXT (f_fpr) = record;
3014 DECL_FIELD_CONTEXT (f_ovf) = record;
3015 DECL_FIELD_CONTEXT (f_sav) = record;
3016
3017 TREE_CHAIN (record) = type_decl;
3018 TYPE_NAME (record) = type_decl;
3019 TYPE_FIELDS (record) = f_gpr;
3020 TREE_CHAIN (f_gpr) = f_fpr;
3021 TREE_CHAIN (f_fpr) = f_ovf;
3022 TREE_CHAIN (f_ovf) = f_sav;
3023
3024 layout_type (record);
3025
3026 /* The correct type is an array type of one element. */
3027 return build_array_type (record, build_index_type (size_zero_node));
3028 }
3029
3030 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3031
3032 static void
3033 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3034 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3035 int no_rtl)
3036 {
3037 CUMULATIVE_ARGS next_cum;
3038 rtx save_area = NULL_RTX, mem;
3039 rtx label;
3040 rtx label_ref;
3041 rtx tmp_reg;
3042 rtx nsse_reg;
3043 int set;
3044 tree fntype;
3045 int stdarg_p;
3046 int i;
3047
3048 if (!TARGET_64BIT)
3049 return;
3050
3051 /* Indicate to allocate space on the stack for varargs save area. */
3052 ix86_save_varrargs_registers = 1;
3053
3054 cfun->stack_alignment_needed = 128;
3055
3056 fntype = TREE_TYPE (current_function_decl);
3057 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3058 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3059 != void_type_node));
3060
3061 /* For varargs, we do not want to skip the dummy va_dcl argument.
3062 For stdargs, we do want to skip the last named argument. */
3063 next_cum = *cum;
3064 if (stdarg_p)
3065 function_arg_advance (&next_cum, mode, type, 1);
3066
3067 if (!no_rtl)
3068 save_area = frame_pointer_rtx;
3069
3070 set = get_varargs_alias_set ();
3071
3072 for (i = next_cum.regno; i < ix86_regparm; i++)
3073 {
3074 mem = gen_rtx_MEM (Pmode,
3075 plus_constant (save_area, i * UNITS_PER_WORD));
3076 set_mem_alias_set (mem, set);
3077 emit_move_insn (mem, gen_rtx_REG (Pmode,
3078 x86_64_int_parameter_registers[i]));
3079 }
3080
3081 if (next_cum.sse_nregs)
3082 {
3083 /* Now emit code to save SSE registers. The AX parameter contains number
3084 of SSE parameter registers used to call this function. We use
3085 sse_prologue_save insn template that produces computed jump across
3086 SSE saves. We need some preparation work to get this working. */
3087
3088 label = gen_label_rtx ();
3089 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3090
3091 /* Compute address to jump to :
3092 label - 5*eax + nnamed_sse_arguments*5 */
3093 tmp_reg = gen_reg_rtx (Pmode);
3094 nsse_reg = gen_reg_rtx (Pmode);
3095 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3096 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3097 gen_rtx_MULT (Pmode, nsse_reg,
3098 GEN_INT (4))));
3099 if (next_cum.sse_regno)
3100 emit_move_insn
3101 (nsse_reg,
3102 gen_rtx_CONST (DImode,
3103 gen_rtx_PLUS (DImode,
3104 label_ref,
3105 GEN_INT (next_cum.sse_regno * 4))));
3106 else
3107 emit_move_insn (nsse_reg, label_ref);
3108 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3109
3110 /* Compute address of memory block we save into. We always use pointer
3111 pointing 127 bytes after first byte to store - this is needed to keep
3112 instruction size limited by 4 bytes. */
3113 tmp_reg = gen_reg_rtx (Pmode);
3114 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3115 plus_constant (save_area,
3116 8 * REGPARM_MAX + 127)));
3117 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3118 set_mem_alias_set (mem, set);
3119 set_mem_align (mem, BITS_PER_WORD);
3120
3121 /* And finally do the dirty job! */
3122 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3123 GEN_INT (next_cum.sse_regno), label));
3124 }
3125
3126 }
3127
3128 /* Implement va_start. */
3129
3130 void
3131 ix86_va_start (tree valist, rtx nextarg)
3132 {
3133 HOST_WIDE_INT words, n_gpr, n_fpr;
3134 tree f_gpr, f_fpr, f_ovf, f_sav;
3135 tree gpr, fpr, ovf, sav, t;
3136
3137 /* Only 64bit target needs something special. */
3138 if (!TARGET_64BIT)
3139 {
3140 std_expand_builtin_va_start (valist, nextarg);
3141 return;
3142 }
3143
3144 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3145 f_fpr = TREE_CHAIN (f_gpr);
3146 f_ovf = TREE_CHAIN (f_fpr);
3147 f_sav = TREE_CHAIN (f_ovf);
3148
3149 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3150 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3151 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3152 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3153 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3154
3155 /* Count number of gp and fp argument registers used. */
3156 words = current_function_args_info.words;
3157 n_gpr = current_function_args_info.regno;
3158 n_fpr = current_function_args_info.sse_regno;
3159
3160 if (TARGET_DEBUG_ARG)
3161 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3162 (int) words, (int) n_gpr, (int) n_fpr);
3163
3164 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3165 build_int_2 (n_gpr * 8, 0));
3166 TREE_SIDE_EFFECTS (t) = 1;
3167 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3168
3169 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3170 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3171 TREE_SIDE_EFFECTS (t) = 1;
3172 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3173
3174 /* Find the overflow area. */
3175 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3176 if (words != 0)
3177 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3178 build_int_2 (words * UNITS_PER_WORD, 0));
3179 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3180 TREE_SIDE_EFFECTS (t) = 1;
3181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3182
3183 /* Find the register save area.
3184 Prologue of the function save it right above stack frame. */
3185 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3186 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3187 TREE_SIDE_EFFECTS (t) = 1;
3188 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3189 }
3190
3191 /* Implement va_arg. */
3192
3193 tree
3194 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3195 {
3196 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3197 tree f_gpr, f_fpr, f_ovf, f_sav;
3198 tree gpr, fpr, ovf, sav, t;
3199 int size, rsize;
3200 tree lab_false, lab_over = NULL_TREE;
3201 tree addr, t2;
3202 rtx container;
3203 int indirect_p = 0;
3204 tree ptrtype;
3205
3206 /* Only 64bit target needs something special. */
3207 if (!TARGET_64BIT)
3208 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3209
3210 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3211 f_fpr = TREE_CHAIN (f_gpr);
3212 f_ovf = TREE_CHAIN (f_fpr);
3213 f_sav = TREE_CHAIN (f_ovf);
3214
3215 valist = build_fold_indirect_ref (valist);
3216 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3217 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3218 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3219 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3220
3221 size = int_size_in_bytes (type);
3222 if (size == -1)
3223 {
3224 /* Variable-size types are passed by reference. */
3225 indirect_p = 1;
3226 type = build_pointer_type (type);
3227 size = int_size_in_bytes (type);
3228 }
3229 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3230
3231 container = construct_container (TYPE_MODE (type), type, 0,
3232 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3233 /*
3234 * Pull the value out of the saved registers ...
3235 */
3236
3237 addr = create_tmp_var (ptr_type_node, "addr");
3238 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3239
3240 if (container)
3241 {
3242 int needed_intregs, needed_sseregs;
3243 bool need_temp;
3244 tree int_addr, sse_addr;
3245
3246 lab_false = create_artificial_label ();
3247 lab_over = create_artificial_label ();
3248
3249 examine_argument (TYPE_MODE (type), type, 0,
3250 &needed_intregs, &needed_sseregs);
3251
3252 need_temp = (!REG_P (container)
3253 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3254 || TYPE_ALIGN (type) > 128));
3255
3256 /* In case we are passing structure, verify that it is consecutive block
3257 on the register save area. If not we need to do moves. */
3258 if (!need_temp && !REG_P (container))
3259 {
3260 /* Verify that all registers are strictly consecutive */
3261 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3262 {
3263 int i;
3264
3265 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3266 {
3267 rtx slot = XVECEXP (container, 0, i);
3268 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3269 || INTVAL (XEXP (slot, 1)) != i * 16)
3270 need_temp = 1;
3271 }
3272 }
3273 else
3274 {
3275 int i;
3276
3277 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3278 {
3279 rtx slot = XVECEXP (container, 0, i);
3280 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3281 || INTVAL (XEXP (slot, 1)) != i * 8)
3282 need_temp = 1;
3283 }
3284 }
3285 }
3286 if (!need_temp)
3287 {
3288 int_addr = addr;
3289 sse_addr = addr;
3290 }
3291 else
3292 {
3293 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3294 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3295 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3296 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3297 }
3298 /* First ensure that we fit completely in registers. */
3299 if (needed_intregs)
3300 {
3301 t = build_int_2 ((REGPARM_MAX - needed_intregs + 1) * 8, 0);
3302 TREE_TYPE (t) = TREE_TYPE (gpr);
3303 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
3304 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3305 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3306 gimplify_and_add (t, pre_p);
3307 }
3308 if (needed_sseregs)
3309 {
3310 t = build_int_2 ((SSE_REGPARM_MAX - needed_sseregs + 1) * 16
3311 + REGPARM_MAX * 8, 0);
3312 TREE_TYPE (t) = TREE_TYPE (fpr);
3313 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
3314 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
3315 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
3316 gimplify_and_add (t, pre_p);
3317 }
3318
3319 /* Compute index to start of area used for integer regs. */
3320 if (needed_intregs)
3321 {
3322 /* int_addr = gpr + sav; */
3323 t = build2 (PLUS_EXPR, ptr_type_node, sav, gpr);
3324 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
3325 gimplify_and_add (t, pre_p);
3326 }
3327 if (needed_sseregs)
3328 {
3329 /* sse_addr = fpr + sav; */
3330 t = build2 (PLUS_EXPR, ptr_type_node, sav, fpr);
3331 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
3332 gimplify_and_add (t, pre_p);
3333 }
3334 if (need_temp)
3335 {
3336 int i;
3337 tree temp = create_tmp_var (type, "va_arg_tmp");
3338
3339 /* addr = &temp; */
3340 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
3341 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
3342 gimplify_and_add (t, pre_p);
3343
3344 for (i = 0; i < XVECLEN (container, 0); i++)
3345 {
3346 rtx slot = XVECEXP (container, 0, i);
3347 rtx reg = XEXP (slot, 0);
3348 enum machine_mode mode = GET_MODE (reg);
3349 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
3350 tree addr_type = build_pointer_type (piece_type);
3351 tree src_addr, src;
3352 int src_offset;
3353 tree dest_addr, dest;
3354
3355 if (SSE_REGNO_P (REGNO (reg)))
3356 {
3357 src_addr = sse_addr;
3358 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3359 }
3360 else
3361 {
3362 src_addr = int_addr;
3363 src_offset = REGNO (reg) * 8;
3364 }
3365 src_addr = fold_convert (addr_type, src_addr);
3366 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
3367 size_int (src_offset)));
3368 src = build_fold_indirect_ref (src_addr);
3369
3370 dest_addr = fold_convert (addr_type, addr);
3371 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
3372 size_int (INTVAL (XEXP (slot, 1)))));
3373 dest = build_fold_indirect_ref (dest_addr);
3374
3375 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
3376 gimplify_and_add (t, pre_p);
3377 }
3378 }
3379
3380 if (needed_intregs)
3381 {
3382 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3383 build_int_2 (needed_intregs * 8, 0));
3384 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3385 gimplify_and_add (t, pre_p);
3386 }
3387 if (needed_sseregs)
3388 {
3389 t =
3390 build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3391 build_int_2 (needed_sseregs * 16, 0));
3392 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3393 gimplify_and_add (t, pre_p);
3394 }
3395
3396 t = build1 (GOTO_EXPR, void_type_node, lab_over);
3397 gimplify_and_add (t, pre_p);
3398
3399 t = build1 (LABEL_EXPR, void_type_node, lab_false);
3400 append_to_statement_list (t, pre_p);
3401 }
3402
3403 /* ... otherwise out of the overflow area. */
3404
3405 /* Care for on-stack alignment if needed. */
3406 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3407 t = ovf;
3408 else
3409 {
3410 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3411 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3412 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3413 }
3414 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
3415
3416 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
3417 gimplify_and_add (t2, pre_p);
3418
3419 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
3420 build_int_2 (rsize * UNITS_PER_WORD, 0));
3421 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3422 gimplify_and_add (t, pre_p);
3423
3424 if (container)
3425 {
3426 t = build1 (LABEL_EXPR, void_type_node, lab_over);
3427 append_to_statement_list (t, pre_p);
3428 }
3429
3430 ptrtype = build_pointer_type (type);
3431 addr = fold_convert (ptrtype, addr);
3432
3433 if (indirect_p)
3434 addr = build_fold_indirect_ref (addr);
3435 return build_fold_indirect_ref (addr);
3436 }
3437 \f
3438 /* Return nonzero if OP is either a i387 or SSE fp register. */
3439 int
3440 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3441 {
3442 return ANY_FP_REG_P (op);
3443 }
3444
3445 /* Return nonzero if OP is an i387 fp register. */
3446 int
3447 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3448 {
3449 return FP_REG_P (op);
3450 }
3451
3452 /* Return nonzero if OP is a non-fp register_operand. */
3453 int
3454 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3455 {
3456 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3457 }
3458
3459 /* Return nonzero if OP is a register operand other than an
3460 i387 fp register. */
3461 int
3462 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3463 {
3464 return register_operand (op, mode) && !FP_REG_P (op);
3465 }
3466
3467 /* Return nonzero if OP is general operand representable on x86_64. */
3468
3469 int
3470 x86_64_general_operand (rtx op, enum machine_mode mode)
3471 {
3472 if (!TARGET_64BIT)
3473 return general_operand (op, mode);
3474 if (nonimmediate_operand (op, mode))
3475 return 1;
3476 return x86_64_sign_extended_value (op);
3477 }
3478
3479 /* Return nonzero if OP is general operand representable on x86_64
3480 as either sign extended or zero extended constant. */
3481
3482 int
3483 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3484 {
3485 if (!TARGET_64BIT)
3486 return general_operand (op, mode);
3487 if (nonimmediate_operand (op, mode))
3488 return 1;
3489 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3490 }
3491
3492 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3493
3494 int
3495 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3496 {
3497 if (!TARGET_64BIT)
3498 return nonmemory_operand (op, mode);
3499 if (register_operand (op, mode))
3500 return 1;
3501 return x86_64_sign_extended_value (op);
3502 }
3503
3504 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3505
3506 int
3507 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3508 {
3509 if (!TARGET_64BIT || !flag_pic)
3510 return nonmemory_operand (op, mode);
3511 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3512 return 1;
3513 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3514 return 1;
3515 return 0;
3516 }
3517
3518 /* Return nonzero if OPNUM's MEM should be matched
3519 in movabs* patterns. */
3520
3521 int
3522 ix86_check_movabs (rtx insn, int opnum)
3523 {
3524 rtx set, mem;
3525
3526 set = PATTERN (insn);
3527 if (GET_CODE (set) == PARALLEL)
3528 set = XVECEXP (set, 0, 0);
3529 if (GET_CODE (set) != SET)
3530 abort ();
3531 mem = XEXP (set, opnum);
3532 while (GET_CODE (mem) == SUBREG)
3533 mem = SUBREG_REG (mem);
3534 if (GET_CODE (mem) != MEM)
3535 abort ();
3536 return (volatile_ok || !MEM_VOLATILE_P (mem));
3537 }
3538
3539 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3540
3541 int
3542 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3543 {
3544 if (!TARGET_64BIT)
3545 return nonmemory_operand (op, mode);
3546 if (register_operand (op, mode))
3547 return 1;
3548 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3549 }
3550
3551 /* Return nonzero if OP is immediate operand representable on x86_64. */
3552
3553 int
3554 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3555 {
3556 if (!TARGET_64BIT)
3557 return immediate_operand (op, mode);
3558 return x86_64_sign_extended_value (op);
3559 }
3560
3561 /* Return nonzero if OP is immediate operand representable on x86_64. */
3562
3563 int
3564 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3565 {
3566 return x86_64_zero_extended_value (op);
3567 }
3568
3569 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3570 for shift & compare patterns, as shifting by 0 does not change flags),
3571 else return zero. */
3572
3573 int
3574 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3575 {
3576 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3577 }
3578
3579 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3580 reference and a constant. */
3581
3582 int
3583 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3584 {
3585 switch (GET_CODE (op))
3586 {
3587 case SYMBOL_REF:
3588 case LABEL_REF:
3589 return 1;
3590
3591 case CONST:
3592 op = XEXP (op, 0);
3593 if (GET_CODE (op) == SYMBOL_REF
3594 || GET_CODE (op) == LABEL_REF
3595 || (GET_CODE (op) == UNSPEC
3596 && (XINT (op, 1) == UNSPEC_GOT
3597 || XINT (op, 1) == UNSPEC_GOTOFF
3598 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3599 return 1;
3600 if (GET_CODE (op) != PLUS
3601 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3602 return 0;
3603
3604 op = XEXP (op, 0);
3605 if (GET_CODE (op) == SYMBOL_REF
3606 || GET_CODE (op) == LABEL_REF)
3607 return 1;
3608 /* Only @GOTOFF gets offsets. */
3609 if (GET_CODE (op) != UNSPEC
3610 || XINT (op, 1) != UNSPEC_GOTOFF)
3611 return 0;
3612
3613 op = XVECEXP (op, 0, 0);
3614 if (GET_CODE (op) == SYMBOL_REF
3615 || GET_CODE (op) == LABEL_REF)
3616 return 1;
3617 return 0;
3618
3619 default:
3620 return 0;
3621 }
3622 }
3623
3624 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3625
3626 int
3627 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3628 {
3629 if (GET_CODE (op) != CONST)
3630 return 0;
3631 op = XEXP (op, 0);
3632 if (TARGET_64BIT)
3633 {
3634 if (GET_CODE (op) == UNSPEC
3635 && XINT (op, 1) == UNSPEC_GOTPCREL)
3636 return 1;
3637 if (GET_CODE (op) == PLUS
3638 && GET_CODE (XEXP (op, 0)) == UNSPEC
3639 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3640 return 1;
3641 }
3642 else
3643 {
3644 if (GET_CODE (op) == UNSPEC)
3645 return 1;
3646 if (GET_CODE (op) != PLUS
3647 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3648 return 0;
3649 op = XEXP (op, 0);
3650 if (GET_CODE (op) == UNSPEC)
3651 return 1;
3652 }
3653 return 0;
3654 }
3655
3656 /* Return true if OP is a symbolic operand that resolves locally. */
3657
3658 static int
3659 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3660 {
3661 if (GET_CODE (op) == CONST
3662 && GET_CODE (XEXP (op, 0)) == PLUS
3663 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3664 op = XEXP (XEXP (op, 0), 0);
3665
3666 if (GET_CODE (op) == LABEL_REF)
3667 return 1;
3668
3669 if (GET_CODE (op) != SYMBOL_REF)
3670 return 0;
3671
3672 if (SYMBOL_REF_LOCAL_P (op))
3673 return 1;
3674
3675 /* There is, however, a not insubstantial body of code in the rest of
3676 the compiler that assumes it can just stick the results of
3677 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3678 /* ??? This is a hack. Should update the body of the compiler to
3679 always create a DECL an invoke targetm.encode_section_info. */
3680 if (strncmp (XSTR (op, 0), internal_label_prefix,
3681 internal_label_prefix_len) == 0)
3682 return 1;
3683
3684 return 0;
3685 }
3686
3687 /* Test for various thread-local symbols. */
3688
3689 int
3690 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3691 {
3692 if (GET_CODE (op) != SYMBOL_REF)
3693 return 0;
3694 return SYMBOL_REF_TLS_MODEL (op);
3695 }
3696
3697 static inline int
3698 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3699 {
3700 if (GET_CODE (op) != SYMBOL_REF)
3701 return 0;
3702 return SYMBOL_REF_TLS_MODEL (op) == kind;
3703 }
3704
3705 int
3706 global_dynamic_symbolic_operand (rtx op,
3707 enum machine_mode mode ATTRIBUTE_UNUSED)
3708 {
3709 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3710 }
3711
3712 int
3713 local_dynamic_symbolic_operand (rtx op,
3714 enum machine_mode mode ATTRIBUTE_UNUSED)
3715 {
3716 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3717 }
3718
3719 int
3720 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3721 {
3722 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3723 }
3724
3725 int
3726 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3727 {
3728 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3729 }
3730
3731 /* Test for a valid operand for a call instruction. Don't allow the
3732 arg pointer register or virtual regs since they may decay into
3733 reg + const, which the patterns can't handle. */
3734
3735 int
3736 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3737 {
3738 /* Disallow indirect through a virtual register. This leads to
3739 compiler aborts when trying to eliminate them. */
3740 if (GET_CODE (op) == REG
3741 && (op == arg_pointer_rtx
3742 || op == frame_pointer_rtx
3743 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3744 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3745 return 0;
3746
3747 /* Disallow `call 1234'. Due to varying assembler lameness this
3748 gets either rejected or translated to `call .+1234'. */
3749 if (GET_CODE (op) == CONST_INT)
3750 return 0;
3751
3752 /* Explicitly allow SYMBOL_REF even if pic. */
3753 if (GET_CODE (op) == SYMBOL_REF)
3754 return 1;
3755
3756 /* Otherwise we can allow any general_operand in the address. */
3757 return general_operand (op, Pmode);
3758 }
3759
3760 /* Test for a valid operand for a call instruction. Don't allow the
3761 arg pointer register or virtual regs since they may decay into
3762 reg + const, which the patterns can't handle. */
3763
3764 int
3765 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3766 {
3767 /* Disallow indirect through a virtual register. This leads to
3768 compiler aborts when trying to eliminate them. */
3769 if (GET_CODE (op) == REG
3770 && (op == arg_pointer_rtx
3771 || op == frame_pointer_rtx
3772 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3773 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3774 return 0;
3775
3776 /* Explicitly allow SYMBOL_REF even if pic. */
3777 if (GET_CODE (op) == SYMBOL_REF)
3778 return 1;
3779
3780 /* Otherwise we can only allow register operands. */
3781 return register_operand (op, Pmode);
3782 }
3783
3784 int
3785 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3786 {
3787 if (GET_CODE (op) == CONST
3788 && GET_CODE (XEXP (op, 0)) == PLUS
3789 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3790 op = XEXP (XEXP (op, 0), 0);
3791 return GET_CODE (op) == SYMBOL_REF;
3792 }
3793
3794 /* Match exactly zero and one. */
3795
3796 int
3797 const0_operand (rtx op, enum machine_mode mode)
3798 {
3799 return op == CONST0_RTX (mode);
3800 }
3801
3802 int
3803 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3804 {
3805 return op == const1_rtx;
3806 }
3807
3808 /* Match 2, 4, or 8. Used for leal multiplicands. */
3809
3810 int
3811 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3812 {
3813 return (GET_CODE (op) == CONST_INT
3814 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3815 }
3816
3817 int
3818 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3819 {
3820 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3821 }
3822
3823 int
3824 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3825 {
3826 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3827 }
3828
3829 int
3830 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3831 {
3832 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3833 }
3834
3835 int
3836 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3837 {
3838 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3839 }
3840
3841
3842 /* True if this is a constant appropriate for an increment or decrement. */
3843
3844 int
3845 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3846 {
3847 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3848 registers, since carry flag is not set. */
3849 if ((TARGET_PENTIUM4 || TARGET_NOCONA) && !optimize_size)
3850 return 0;
3851 return op == const1_rtx || op == constm1_rtx;
3852 }
3853
3854 /* Return nonzero if OP is acceptable as operand of DImode shift
3855 expander. */
3856
3857 int
3858 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3859 {
3860 if (TARGET_64BIT)
3861 return nonimmediate_operand (op, mode);
3862 else
3863 return register_operand (op, mode);
3864 }
3865
3866 /* Return false if this is the stack pointer, or any other fake
3867 register eliminable to the stack pointer. Otherwise, this is
3868 a register operand.
3869
3870 This is used to prevent esp from being used as an index reg.
3871 Which would only happen in pathological cases. */
3872
3873 int
3874 reg_no_sp_operand (rtx op, enum machine_mode mode)
3875 {
3876 rtx t = op;
3877 if (GET_CODE (t) == SUBREG)
3878 t = SUBREG_REG (t);
3879 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3880 return 0;
3881
3882 return register_operand (op, mode);
3883 }
3884
3885 int
3886 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3887 {
3888 return MMX_REG_P (op);
3889 }
3890
3891 /* Return false if this is any eliminable register. Otherwise
3892 general_operand. */
3893
3894 int
3895 general_no_elim_operand (rtx op, enum machine_mode mode)
3896 {
3897 rtx t = op;
3898 if (GET_CODE (t) == SUBREG)
3899 t = SUBREG_REG (t);
3900 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3901 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3902 || t == virtual_stack_dynamic_rtx)
3903 return 0;
3904 if (REG_P (t)
3905 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3906 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3907 return 0;
3908
3909 return general_operand (op, mode);
3910 }
3911
3912 /* Return false if this is any eliminable register. Otherwise
3913 register_operand or const_int. */
3914
3915 int
3916 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3917 {
3918 rtx t = op;
3919 if (GET_CODE (t) == SUBREG)
3920 t = SUBREG_REG (t);
3921 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3922 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3923 || t == virtual_stack_dynamic_rtx)
3924 return 0;
3925
3926 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3927 }
3928
3929 /* Return false if this is any eliminable register or stack register,
3930 otherwise work like register_operand. */
3931
3932 int
3933 index_register_operand (rtx op, enum machine_mode mode)
3934 {
3935 rtx t = op;
3936 if (GET_CODE (t) == SUBREG)
3937 t = SUBREG_REG (t);
3938 if (!REG_P (t))
3939 return 0;
3940 if (t == arg_pointer_rtx
3941 || t == frame_pointer_rtx
3942 || t == virtual_incoming_args_rtx
3943 || t == virtual_stack_vars_rtx
3944 || t == virtual_stack_dynamic_rtx
3945 || REGNO (t) == STACK_POINTER_REGNUM)
3946 return 0;
3947
3948 return general_operand (op, mode);
3949 }
3950
3951 /* Return true if op is a Q_REGS class register. */
3952
3953 int
3954 q_regs_operand (rtx op, enum machine_mode mode)
3955 {
3956 if (mode != VOIDmode && GET_MODE (op) != mode)
3957 return 0;
3958 if (GET_CODE (op) == SUBREG)
3959 op = SUBREG_REG (op);
3960 return ANY_QI_REG_P (op);
3961 }
3962
3963 /* Return true if op is an flags register. */
3964
3965 int
3966 flags_reg_operand (rtx op, enum machine_mode mode)
3967 {
3968 if (mode != VOIDmode && GET_MODE (op) != mode)
3969 return 0;
3970 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3971 }
3972
3973 /* Return true if op is a NON_Q_REGS class register. */
3974
3975 int
3976 non_q_regs_operand (rtx op, enum machine_mode mode)
3977 {
3978 if (mode != VOIDmode && GET_MODE (op) != mode)
3979 return 0;
3980 if (GET_CODE (op) == SUBREG)
3981 op = SUBREG_REG (op);
3982 return NON_QI_REG_P (op);
3983 }
3984
3985 int
3986 zero_extended_scalar_load_operand (rtx op,
3987 enum machine_mode mode ATTRIBUTE_UNUSED)
3988 {
3989 unsigned n_elts;
3990 if (GET_CODE (op) != MEM)
3991 return 0;
3992 op = maybe_get_pool_constant (op);
3993 if (!op)
3994 return 0;
3995 if (GET_CODE (op) != CONST_VECTOR)
3996 return 0;
3997 n_elts =
3998 (GET_MODE_SIZE (GET_MODE (op)) /
3999 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
4000 for (n_elts--; n_elts > 0; n_elts--)
4001 {
4002 rtx elt = CONST_VECTOR_ELT (op, n_elts);
4003 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
4004 return 0;
4005 }
4006 return 1;
4007 }
4008
4009 /* Return 1 when OP is operand acceptable for standard SSE move. */
4010 int
4011 vector_move_operand (rtx op, enum machine_mode mode)
4012 {
4013 if (nonimmediate_operand (op, mode))
4014 return 1;
4015 if (GET_MODE (op) != mode && mode != VOIDmode)
4016 return 0;
4017 return (op == CONST0_RTX (GET_MODE (op)));
4018 }
4019
4020 /* Return true if op if a valid address, and does not contain
4021 a segment override. */
4022
4023 int
4024 no_seg_address_operand (rtx op, enum machine_mode mode)
4025 {
4026 struct ix86_address parts;
4027
4028 if (! address_operand (op, mode))
4029 return 0;
4030
4031 if (! ix86_decompose_address (op, &parts))
4032 abort ();
4033
4034 return parts.seg == SEG_DEFAULT;
4035 }
4036
4037 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4038 insns. */
4039 int
4040 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4041 {
4042 enum rtx_code code = GET_CODE (op);
4043 switch (code)
4044 {
4045 /* Operations supported directly. */
4046 case EQ:
4047 case LT:
4048 case LE:
4049 case UNORDERED:
4050 case NE:
4051 case UNGE:
4052 case UNGT:
4053 case ORDERED:
4054 return 1;
4055 /* These are equivalent to ones above in non-IEEE comparisons. */
4056 case UNEQ:
4057 case UNLT:
4058 case UNLE:
4059 case LTGT:
4060 case GE:
4061 case GT:
4062 return !TARGET_IEEE_FP;
4063 default:
4064 return 0;
4065 }
4066 }
4067 /* Return 1 if OP is a valid comparison operator in valid mode. */
4068 int
4069 ix86_comparison_operator (rtx op, enum machine_mode mode)
4070 {
4071 enum machine_mode inmode;
4072 enum rtx_code code = GET_CODE (op);
4073 if (mode != VOIDmode && GET_MODE (op) != mode)
4074 return 0;
4075 if (!COMPARISON_P (op))
4076 return 0;
4077 inmode = GET_MODE (XEXP (op, 0));
4078
4079 if (inmode == CCFPmode || inmode == CCFPUmode)
4080 {
4081 enum rtx_code second_code, bypass_code;
4082 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4083 return (bypass_code == NIL && second_code == NIL);
4084 }
4085 switch (code)
4086 {
4087 case EQ: case NE:
4088 return 1;
4089 case LT: case GE:
4090 if (inmode == CCmode || inmode == CCGCmode
4091 || inmode == CCGOCmode || inmode == CCNOmode)
4092 return 1;
4093 return 0;
4094 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4095 if (inmode == CCmode)
4096 return 1;
4097 return 0;
4098 case GT: case LE:
4099 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4100 return 1;
4101 return 0;
4102 default:
4103 return 0;
4104 }
4105 }
4106
4107 /* Return 1 if OP is a valid comparison operator testing carry flag
4108 to be set. */
4109 int
4110 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4111 {
4112 enum machine_mode inmode;
4113 enum rtx_code code = GET_CODE (op);
4114
4115 if (mode != VOIDmode && GET_MODE (op) != mode)
4116 return 0;
4117 if (!COMPARISON_P (op))
4118 return 0;
4119 inmode = GET_MODE (XEXP (op, 0));
4120 if (GET_CODE (XEXP (op, 0)) != REG
4121 || REGNO (XEXP (op, 0)) != 17
4122 || XEXP (op, 1) != const0_rtx)
4123 return 0;
4124
4125 if (inmode == CCFPmode || inmode == CCFPUmode)
4126 {
4127 enum rtx_code second_code, bypass_code;
4128
4129 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4130 if (bypass_code != NIL || second_code != NIL)
4131 return 0;
4132 code = ix86_fp_compare_code_to_integer (code);
4133 }
4134 else if (inmode != CCmode)
4135 return 0;
4136 return code == LTU;
4137 }
4138
4139 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4140
4141 int
4142 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4143 {
4144 enum machine_mode inmode;
4145 enum rtx_code code = GET_CODE (op);
4146
4147 if (mode != VOIDmode && GET_MODE (op) != mode)
4148 return 0;
4149 if (!COMPARISON_P (op))
4150 return 0;
4151 inmode = GET_MODE (XEXP (op, 0));
4152 if (inmode == CCFPmode || inmode == CCFPUmode)
4153 {
4154 enum rtx_code second_code, bypass_code;
4155
4156 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4157 if (bypass_code != NIL || second_code != NIL)
4158 return 0;
4159 code = ix86_fp_compare_code_to_integer (code);
4160 }
4161 /* i387 supports just limited amount of conditional codes. */
4162 switch (code)
4163 {
4164 case LTU: case GTU: case LEU: case GEU:
4165 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4166 return 1;
4167 return 0;
4168 case ORDERED: case UNORDERED:
4169 case EQ: case NE:
4170 return 1;
4171 default:
4172 return 0;
4173 }
4174 }
4175
4176 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4177
4178 int
4179 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4180 {
4181 switch (GET_CODE (op))
4182 {
4183 case MULT:
4184 /* Modern CPUs have same latency for HImode and SImode multiply,
4185 but 386 and 486 do HImode multiply faster. */
4186 return ix86_tune > PROCESSOR_I486;
4187 case PLUS:
4188 case AND:
4189 case IOR:
4190 case XOR:
4191 case ASHIFT:
4192 return 1;
4193 default:
4194 return 0;
4195 }
4196 }
4197
4198 /* Nearly general operand, but accept any const_double, since we wish
4199 to be able to drop them into memory rather than have them get pulled
4200 into registers. */
4201
4202 int
4203 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4204 {
4205 if (mode != VOIDmode && mode != GET_MODE (op))
4206 return 0;
4207 if (GET_CODE (op) == CONST_DOUBLE)
4208 return 1;
4209 return general_operand (op, mode);
4210 }
4211
4212 /* Match an SI or HImode register for a zero_extract. */
4213
4214 int
4215 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4216 {
4217 int regno;
4218 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4219 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4220 return 0;
4221
4222 if (!register_operand (op, VOIDmode))
4223 return 0;
4224
4225 /* Be careful to accept only registers having upper parts. */
4226 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4227 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4228 }
4229
4230 /* Return 1 if this is a valid binary floating-point operation.
4231 OP is the expression matched, and MODE is its mode. */
4232
4233 int
4234 binary_fp_operator (rtx op, enum machine_mode mode)
4235 {
4236 if (mode != VOIDmode && mode != GET_MODE (op))
4237 return 0;
4238
4239 switch (GET_CODE (op))
4240 {
4241 case PLUS:
4242 case MINUS:
4243 case MULT:
4244 case DIV:
4245 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4246
4247 default:
4248 return 0;
4249 }
4250 }
4251
4252 int
4253 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4254 {
4255 return GET_CODE (op) == MULT;
4256 }
4257
4258 int
4259 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4260 {
4261 return GET_CODE (op) == DIV;
4262 }
4263
4264 int
4265 arith_or_logical_operator (rtx op, enum machine_mode mode)
4266 {
4267 return ((mode == VOIDmode || GET_MODE (op) == mode)
4268 && ARITHMETIC_P (op));
4269 }
4270
4271 /* Returns 1 if OP is memory operand with a displacement. */
4272
4273 int
4274 memory_displacement_operand (rtx op, enum machine_mode mode)
4275 {
4276 struct ix86_address parts;
4277
4278 if (! memory_operand (op, mode))
4279 return 0;
4280
4281 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4282 abort ();
4283
4284 return parts.disp != NULL_RTX;
4285 }
4286
4287 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4288 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4289
4290 ??? It seems likely that this will only work because cmpsi is an
4291 expander, and no actual insns use this. */
4292
4293 int
4294 cmpsi_operand (rtx op, enum machine_mode mode)
4295 {
4296 if (nonimmediate_operand (op, mode))
4297 return 1;
4298
4299 if (GET_CODE (op) == AND
4300 && GET_MODE (op) == SImode
4301 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4302 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4303 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4304 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4305 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4306 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4307 return 1;
4308
4309 return 0;
4310 }
4311
4312 /* Returns 1 if OP is memory operand that can not be represented by the
4313 modRM array. */
4314
4315 int
4316 long_memory_operand (rtx op, enum machine_mode mode)
4317 {
4318 if (! memory_operand (op, mode))
4319 return 0;
4320
4321 return memory_address_length (op) != 0;
4322 }
4323
4324 /* Return nonzero if the rtx is known aligned. */
4325
4326 int
4327 aligned_operand (rtx op, enum machine_mode mode)
4328 {
4329 struct ix86_address parts;
4330
4331 if (!general_operand (op, mode))
4332 return 0;
4333
4334 /* Registers and immediate operands are always "aligned". */
4335 if (GET_CODE (op) != MEM)
4336 return 1;
4337
4338 /* Don't even try to do any aligned optimizations with volatiles. */
4339 if (MEM_VOLATILE_P (op))
4340 return 0;
4341
4342 op = XEXP (op, 0);
4343
4344 /* Pushes and pops are only valid on the stack pointer. */
4345 if (GET_CODE (op) == PRE_DEC
4346 || GET_CODE (op) == POST_INC)
4347 return 1;
4348
4349 /* Decode the address. */
4350 if (! ix86_decompose_address (op, &parts))
4351 abort ();
4352
4353 /* Look for some component that isn't known to be aligned. */
4354 if (parts.index)
4355 {
4356 if (parts.scale < 4
4357 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4358 return 0;
4359 }
4360 if (parts.base)
4361 {
4362 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4363 return 0;
4364 }
4365 if (parts.disp)
4366 {
4367 if (GET_CODE (parts.disp) != CONST_INT
4368 || (INTVAL (parts.disp) & 3) != 0)
4369 return 0;
4370 }
4371
4372 /* Didn't find one -- this must be an aligned address. */
4373 return 1;
4374 }
4375 \f
4376 /* Initialize the table of extra 80387 mathematical constants. */
4377
4378 static void
4379 init_ext_80387_constants (void)
4380 {
4381 static const char * cst[5] =
4382 {
4383 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4384 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4385 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4386 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4387 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4388 };
4389 int i;
4390
4391 for (i = 0; i < 5; i++)
4392 {
4393 real_from_string (&ext_80387_constants_table[i], cst[i]);
4394 /* Ensure each constant is rounded to XFmode precision. */
4395 real_convert (&ext_80387_constants_table[i],
4396 XFmode, &ext_80387_constants_table[i]);
4397 }
4398
4399 ext_80387_constants_init = 1;
4400 }
4401
4402 /* Return true if the constant is something that can be loaded with
4403 a special instruction. */
4404
4405 int
4406 standard_80387_constant_p (rtx x)
4407 {
4408 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4409 return -1;
4410
4411 if (x == CONST0_RTX (GET_MODE (x)))
4412 return 1;
4413 if (x == CONST1_RTX (GET_MODE (x)))
4414 return 2;
4415
4416 /* For XFmode constants, try to find a special 80387 instruction when
4417 optimizing for size or on those CPUs that benefit from them. */
4418 if (GET_MODE (x) == XFmode
4419 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4420 {
4421 REAL_VALUE_TYPE r;
4422 int i;
4423
4424 if (! ext_80387_constants_init)
4425 init_ext_80387_constants ();
4426
4427 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4428 for (i = 0; i < 5; i++)
4429 if (real_identical (&r, &ext_80387_constants_table[i]))
4430 return i + 3;
4431 }
4432
4433 return 0;
4434 }
4435
4436 /* Return the opcode of the special instruction to be used to load
4437 the constant X. */
4438
4439 const char *
4440 standard_80387_constant_opcode (rtx x)
4441 {
4442 switch (standard_80387_constant_p (x))
4443 {
4444 case 1:
4445 return "fldz";
4446 case 2:
4447 return "fld1";
4448 case 3:
4449 return "fldlg2";
4450 case 4:
4451 return "fldln2";
4452 case 5:
4453 return "fldl2e";
4454 case 6:
4455 return "fldl2t";
4456 case 7:
4457 return "fldpi";
4458 }
4459 abort ();
4460 }
4461
4462 /* Return the CONST_DOUBLE representing the 80387 constant that is
4463 loaded by the specified special instruction. The argument IDX
4464 matches the return value from standard_80387_constant_p. */
4465
4466 rtx
4467 standard_80387_constant_rtx (int idx)
4468 {
4469 int i;
4470
4471 if (! ext_80387_constants_init)
4472 init_ext_80387_constants ();
4473
4474 switch (idx)
4475 {
4476 case 3:
4477 case 4:
4478 case 5:
4479 case 6:
4480 case 7:
4481 i = idx - 3;
4482 break;
4483
4484 default:
4485 abort ();
4486 }
4487
4488 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4489 XFmode);
4490 }
4491
4492 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4493 */
4494 int
4495 standard_sse_constant_p (rtx x)
4496 {
4497 if (x == const0_rtx)
4498 return 1;
4499 return (x == CONST0_RTX (GET_MODE (x)));
4500 }
4501
4502 /* Returns 1 if OP contains a symbol reference */
4503
4504 int
4505 symbolic_reference_mentioned_p (rtx op)
4506 {
4507 const char *fmt;
4508 int i;
4509
4510 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4511 return 1;
4512
4513 fmt = GET_RTX_FORMAT (GET_CODE (op));
4514 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4515 {
4516 if (fmt[i] == 'E')
4517 {
4518 int j;
4519
4520 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4521 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4522 return 1;
4523 }
4524
4525 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4526 return 1;
4527 }
4528
4529 return 0;
4530 }
4531
4532 /* Return 1 if it is appropriate to emit `ret' instructions in the
4533 body of a function. Do this only if the epilogue is simple, needing a
4534 couple of insns. Prior to reloading, we can't tell how many registers
4535 must be saved, so return 0 then. Return 0 if there is no frame
4536 marker to de-allocate.
4537
4538 If NON_SAVING_SETJMP is defined and true, then it is not possible
4539 for the epilogue to be simple, so return 0. This is a special case
4540 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4541 until final, but jump_optimize may need to know sooner if a
4542 `return' is OK. */
4543
4544 int
4545 ix86_can_use_return_insn_p (void)
4546 {
4547 struct ix86_frame frame;
4548
4549 #ifdef NON_SAVING_SETJMP
4550 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4551 return 0;
4552 #endif
4553
4554 if (! reload_completed || frame_pointer_needed)
4555 return 0;
4556
4557 /* Don't allow more than 32 pop, since that's all we can do
4558 with one instruction. */
4559 if (current_function_pops_args
4560 && current_function_args_size >= 32768)
4561 return 0;
4562
4563 ix86_compute_frame_layout (&frame);
4564 return frame.to_allocate == 0 && frame.nregs == 0;
4565 }
4566 \f
4567 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4568 int
4569 x86_64_sign_extended_value (rtx value)
4570 {
4571 switch (GET_CODE (value))
4572 {
4573 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4574 to be at least 32 and this all acceptable constants are
4575 represented as CONST_INT. */
4576 case CONST_INT:
4577 if (HOST_BITS_PER_WIDE_INT == 32)
4578 return 1;
4579 else
4580 {
4581 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4582 return trunc_int_for_mode (val, SImode) == val;
4583 }
4584 break;
4585
4586 /* For certain code models, the symbolic references are known to fit.
4587 in CM_SMALL_PIC model we know it fits if it is local to the shared
4588 library. Don't count TLS SYMBOL_REFs here, since they should fit
4589 only if inside of UNSPEC handled below. */
4590 case SYMBOL_REF:
4591 /* TLS symbols are not constant. */
4592 if (tls_symbolic_operand (value, Pmode))
4593 return false;
4594 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4595
4596 /* For certain code models, the code is near as well. */
4597 case LABEL_REF:
4598 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4599 || ix86_cmodel == CM_KERNEL);
4600
4601 /* We also may accept the offsetted memory references in certain special
4602 cases. */
4603 case CONST:
4604 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4605 switch (XINT (XEXP (value, 0), 1))
4606 {
4607 case UNSPEC_GOTPCREL:
4608 case UNSPEC_DTPOFF:
4609 case UNSPEC_GOTNTPOFF:
4610 case UNSPEC_NTPOFF:
4611 return 1;
4612 default:
4613 break;
4614 }
4615 if (GET_CODE (XEXP (value, 0)) == PLUS)
4616 {
4617 rtx op1 = XEXP (XEXP (value, 0), 0);
4618 rtx op2 = XEXP (XEXP (value, 0), 1);
4619 HOST_WIDE_INT offset;
4620
4621 if (ix86_cmodel == CM_LARGE)
4622 return 0;
4623 if (GET_CODE (op2) != CONST_INT)
4624 return 0;
4625 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4626 switch (GET_CODE (op1))
4627 {
4628 case SYMBOL_REF:
4629 /* For CM_SMALL assume that latest object is 16MB before
4630 end of 31bits boundary. We may also accept pretty
4631 large negative constants knowing that all objects are
4632 in the positive half of address space. */
4633 if (ix86_cmodel == CM_SMALL
4634 && offset < 16*1024*1024
4635 && trunc_int_for_mode (offset, SImode) == offset)
4636 return 1;
4637 /* For CM_KERNEL we know that all object resist in the
4638 negative half of 32bits address space. We may not
4639 accept negative offsets, since they may be just off
4640 and we may accept pretty large positive ones. */
4641 if (ix86_cmodel == CM_KERNEL
4642 && offset > 0
4643 && trunc_int_for_mode (offset, SImode) == offset)
4644 return 1;
4645 break;
4646 case LABEL_REF:
4647 /* These conditions are similar to SYMBOL_REF ones, just the
4648 constraints for code models differ. */
4649 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4650 && offset < 16*1024*1024
4651 && trunc_int_for_mode (offset, SImode) == offset)
4652 return 1;
4653 if (ix86_cmodel == CM_KERNEL
4654 && offset > 0
4655 && trunc_int_for_mode (offset, SImode) == offset)
4656 return 1;
4657 break;
4658 case UNSPEC:
4659 switch (XINT (op1, 1))
4660 {
4661 case UNSPEC_DTPOFF:
4662 case UNSPEC_NTPOFF:
4663 if (offset > 0
4664 && trunc_int_for_mode (offset, SImode) == offset)
4665 return 1;
4666 }
4667 break;
4668 default:
4669 return 0;
4670 }
4671 }
4672 return 0;
4673 default:
4674 return 0;
4675 }
4676 }
4677
4678 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4679 int
4680 x86_64_zero_extended_value (rtx value)
4681 {
4682 switch (GET_CODE (value))
4683 {
4684 case CONST_DOUBLE:
4685 if (HOST_BITS_PER_WIDE_INT == 32)
4686 return (GET_MODE (value) == VOIDmode
4687 && !CONST_DOUBLE_HIGH (value));
4688 else
4689 return 0;
4690 case CONST_INT:
4691 if (HOST_BITS_PER_WIDE_INT == 32)
4692 return INTVAL (value) >= 0;
4693 else
4694 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4695 break;
4696
4697 /* For certain code models, the symbolic references are known to fit. */
4698 case SYMBOL_REF:
4699 /* TLS symbols are not constant. */
4700 if (tls_symbolic_operand (value, Pmode))
4701 return false;
4702 return ix86_cmodel == CM_SMALL;
4703
4704 /* For certain code models, the code is near as well. */
4705 case LABEL_REF:
4706 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4707
4708 /* We also may accept the offsetted memory references in certain special
4709 cases. */
4710 case CONST:
4711 if (GET_CODE (XEXP (value, 0)) == PLUS)
4712 {
4713 rtx op1 = XEXP (XEXP (value, 0), 0);
4714 rtx op2 = XEXP (XEXP (value, 0), 1);
4715
4716 if (ix86_cmodel == CM_LARGE)
4717 return 0;
4718 switch (GET_CODE (op1))
4719 {
4720 case SYMBOL_REF:
4721 return 0;
4722 /* For small code model we may accept pretty large positive
4723 offsets, since one bit is available for free. Negative
4724 offsets are limited by the size of NULL pointer area
4725 specified by the ABI. */
4726 if (ix86_cmodel == CM_SMALL
4727 && GET_CODE (op2) == CONST_INT
4728 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4729 && (trunc_int_for_mode (INTVAL (op2), SImode)
4730 == INTVAL (op2)))
4731 return 1;
4732 /* ??? For the kernel, we may accept adjustment of
4733 -0x10000000, since we know that it will just convert
4734 negative address space to positive, but perhaps this
4735 is not worthwhile. */
4736 break;
4737 case LABEL_REF:
4738 /* These conditions are similar to SYMBOL_REF ones, just the
4739 constraints for code models differ. */
4740 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4741 && GET_CODE (op2) == CONST_INT
4742 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4743 && (trunc_int_for_mode (INTVAL (op2), SImode)
4744 == INTVAL (op2)))
4745 return 1;
4746 break;
4747 default:
4748 return 0;
4749 }
4750 }
4751 return 0;
4752 default:
4753 return 0;
4754 }
4755 }
4756
4757 /* Value should be nonzero if functions must have frame pointers.
4758 Zero means the frame pointer need not be set up (and parms may
4759 be accessed via the stack pointer) in functions that seem suitable. */
4760
4761 int
4762 ix86_frame_pointer_required (void)
4763 {
4764 /* If we accessed previous frames, then the generated code expects
4765 to be able to access the saved ebp value in our frame. */
4766 if (cfun->machine->accesses_prev_frame)
4767 return 1;
4768
4769 /* Several x86 os'es need a frame pointer for other reasons,
4770 usually pertaining to setjmp. */
4771 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4772 return 1;
4773
4774 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4775 the frame pointer by default. Turn it back on now if we've not
4776 got a leaf function. */
4777 if (TARGET_OMIT_LEAF_FRAME_POINTER
4778 && (!current_function_is_leaf))
4779 return 1;
4780
4781 if (current_function_profile)
4782 return 1;
4783
4784 return 0;
4785 }
4786
4787 /* Record that the current function accesses previous call frames. */
4788
4789 void
4790 ix86_setup_frame_addresses (void)
4791 {
4792 cfun->machine->accesses_prev_frame = 1;
4793 }
4794 \f
4795 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4796 # define USE_HIDDEN_LINKONCE 1
4797 #else
4798 # define USE_HIDDEN_LINKONCE 0
4799 #endif
4800
4801 static int pic_labels_used;
4802
4803 /* Fills in the label name that should be used for a pc thunk for
4804 the given register. */
4805
4806 static void
4807 get_pc_thunk_name (char name[32], unsigned int regno)
4808 {
4809 if (USE_HIDDEN_LINKONCE)
4810 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4811 else
4812 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4813 }
4814
4815
4816 /* This function generates code for -fpic that loads %ebx with
4817 the return address of the caller and then returns. */
4818
4819 void
4820 ix86_file_end (void)
4821 {
4822 rtx xops[2];
4823 int regno;
4824
4825 for (regno = 0; regno < 8; ++regno)
4826 {
4827 char name[32];
4828
4829 if (! ((pic_labels_used >> regno) & 1))
4830 continue;
4831
4832 get_pc_thunk_name (name, regno);
4833
4834 if (USE_HIDDEN_LINKONCE)
4835 {
4836 tree decl;
4837
4838 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4839 error_mark_node);
4840 TREE_PUBLIC (decl) = 1;
4841 TREE_STATIC (decl) = 1;
4842 DECL_ONE_ONLY (decl) = 1;
4843
4844 (*targetm.asm_out.unique_section) (decl, 0);
4845 named_section (decl, NULL, 0);
4846
4847 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4848 fputs ("\t.hidden\t", asm_out_file);
4849 assemble_name (asm_out_file, name);
4850 fputc ('\n', asm_out_file);
4851 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4852 }
4853 else
4854 {
4855 text_section ();
4856 ASM_OUTPUT_LABEL (asm_out_file, name);
4857 }
4858
4859 xops[0] = gen_rtx_REG (SImode, regno);
4860 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4861 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4862 output_asm_insn ("ret", xops);
4863 }
4864
4865 if (NEED_INDICATE_EXEC_STACK)
4866 file_end_indicate_exec_stack ();
4867 }
4868
4869 /* Emit code for the SET_GOT patterns. */
4870
4871 const char *
4872 output_set_got (rtx dest)
4873 {
4874 rtx xops[3];
4875
4876 xops[0] = dest;
4877 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4878
4879 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4880 {
4881 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4882
4883 if (!flag_pic)
4884 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4885 else
4886 output_asm_insn ("call\t%a2", xops);
4887
4888 #if TARGET_MACHO
4889 /* Output the "canonical" label name ("Lxx$pb") here too. This
4890 is what will be referred to by the Mach-O PIC subsystem. */
4891 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4892 #endif
4893 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4894 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4895
4896 if (flag_pic)
4897 output_asm_insn ("pop{l}\t%0", xops);
4898 }
4899 else
4900 {
4901 char name[32];
4902 get_pc_thunk_name (name, REGNO (dest));
4903 pic_labels_used |= 1 << REGNO (dest);
4904
4905 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4906 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4907 output_asm_insn ("call\t%X2", xops);
4908 }
4909
4910 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4911 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4912 else if (!TARGET_MACHO)
4913 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4914
4915 return "";
4916 }
4917
4918 /* Generate an "push" pattern for input ARG. */
4919
4920 static rtx
4921 gen_push (rtx arg)
4922 {
4923 return gen_rtx_SET (VOIDmode,
4924 gen_rtx_MEM (Pmode,
4925 gen_rtx_PRE_DEC (Pmode,
4926 stack_pointer_rtx)),
4927 arg);
4928 }
4929
4930 /* Return >= 0 if there is an unused call-clobbered register available
4931 for the entire function. */
4932
4933 static unsigned int
4934 ix86_select_alt_pic_regnum (void)
4935 {
4936 if (current_function_is_leaf && !current_function_profile)
4937 {
4938 int i;
4939 for (i = 2; i >= 0; --i)
4940 if (!regs_ever_live[i])
4941 return i;
4942 }
4943
4944 return INVALID_REGNUM;
4945 }
4946
4947 /* Return 1 if we need to save REGNO. */
4948 static int
4949 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4950 {
4951 if (pic_offset_table_rtx
4952 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4953 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4954 || current_function_profile
4955 || current_function_calls_eh_return
4956 || current_function_uses_const_pool))
4957 {
4958 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4959 return 0;
4960 return 1;
4961 }
4962
4963 if (current_function_calls_eh_return && maybe_eh_return)
4964 {
4965 unsigned i;
4966 for (i = 0; ; i++)
4967 {
4968 unsigned test = EH_RETURN_DATA_REGNO (i);
4969 if (test == INVALID_REGNUM)
4970 break;
4971 if (test == regno)
4972 return 1;
4973 }
4974 }
4975
4976 return (regs_ever_live[regno]
4977 && !call_used_regs[regno]
4978 && !fixed_regs[regno]
4979 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4980 }
4981
4982 /* Return number of registers to be saved on the stack. */
4983
4984 static int
4985 ix86_nsaved_regs (void)
4986 {
4987 int nregs = 0;
4988 int regno;
4989
4990 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4991 if (ix86_save_reg (regno, true))
4992 nregs++;
4993 return nregs;
4994 }
4995
4996 /* Return the offset between two registers, one to be eliminated, and the other
4997 its replacement, at the start of a routine. */
4998
4999 HOST_WIDE_INT
5000 ix86_initial_elimination_offset (int from, int to)
5001 {
5002 struct ix86_frame frame;
5003 ix86_compute_frame_layout (&frame);
5004
5005 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5006 return frame.hard_frame_pointer_offset;
5007 else if (from == FRAME_POINTER_REGNUM
5008 && to == HARD_FRAME_POINTER_REGNUM)
5009 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5010 else
5011 {
5012 if (to != STACK_POINTER_REGNUM)
5013 abort ();
5014 else if (from == ARG_POINTER_REGNUM)
5015 return frame.stack_pointer_offset;
5016 else if (from != FRAME_POINTER_REGNUM)
5017 abort ();
5018 else
5019 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5020 }
5021 }
5022
5023 /* Fill structure ix86_frame about frame of currently computed function. */
5024
5025 static void
5026 ix86_compute_frame_layout (struct ix86_frame *frame)
5027 {
5028 HOST_WIDE_INT total_size;
5029 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5030 HOST_WIDE_INT offset;
5031 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5032 HOST_WIDE_INT size = get_frame_size ();
5033
5034 frame->nregs = ix86_nsaved_regs ();
5035 total_size = size;
5036
5037 /* During reload iteration the amount of registers saved can change.
5038 Recompute the value as needed. Do not recompute when amount of registers
5039 didn't change as reload does mutiple calls to the function and does not
5040 expect the decision to change within single iteration. */
5041 if (!optimize_size
5042 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5043 {
5044 int count = frame->nregs;
5045
5046 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5047 /* The fast prologue uses move instead of push to save registers. This
5048 is significantly longer, but also executes faster as modern hardware
5049 can execute the moves in parallel, but can't do that for push/pop.
5050
5051 Be careful about choosing what prologue to emit: When function takes
5052 many instructions to execute we may use slow version as well as in
5053 case function is known to be outside hot spot (this is known with
5054 feedback only). Weight the size of function by number of registers
5055 to save as it is cheap to use one or two push instructions but very
5056 slow to use many of them. */
5057 if (count)
5058 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5059 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5060 || (flag_branch_probabilities
5061 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5062 cfun->machine->use_fast_prologue_epilogue = false;
5063 else
5064 cfun->machine->use_fast_prologue_epilogue
5065 = !expensive_function_p (count);
5066 }
5067 if (TARGET_PROLOGUE_USING_MOVE
5068 && cfun->machine->use_fast_prologue_epilogue)
5069 frame->save_regs_using_mov = true;
5070 else
5071 frame->save_regs_using_mov = false;
5072
5073
5074 /* Skip return address and saved base pointer. */
5075 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5076
5077 frame->hard_frame_pointer_offset = offset;
5078
5079 /* Do some sanity checking of stack_alignment_needed and
5080 preferred_alignment, since i386 port is the only using those features
5081 that may break easily. */
5082
5083 if (size && !stack_alignment_needed)
5084 abort ();
5085 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5086 abort ();
5087 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5088 abort ();
5089 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5090 abort ();
5091
5092 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5093 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5094
5095 /* Register save area */
5096 offset += frame->nregs * UNITS_PER_WORD;
5097
5098 /* Va-arg area */
5099 if (ix86_save_varrargs_registers)
5100 {
5101 offset += X86_64_VARARGS_SIZE;
5102 frame->va_arg_size = X86_64_VARARGS_SIZE;
5103 }
5104 else
5105 frame->va_arg_size = 0;
5106
5107 /* Align start of frame for local function. */
5108 frame->padding1 = ((offset + stack_alignment_needed - 1)
5109 & -stack_alignment_needed) - offset;
5110
5111 offset += frame->padding1;
5112
5113 /* Frame pointer points here. */
5114 frame->frame_pointer_offset = offset;
5115
5116 offset += size;
5117
5118 /* Add outgoing arguments area. Can be skipped if we eliminated
5119 all the function calls as dead code.
5120 Skipping is however impossible when function calls alloca. Alloca
5121 expander assumes that last current_function_outgoing_args_size
5122 of stack frame are unused. */
5123 if (ACCUMULATE_OUTGOING_ARGS
5124 && (!current_function_is_leaf || current_function_calls_alloca))
5125 {
5126 offset += current_function_outgoing_args_size;
5127 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5128 }
5129 else
5130 frame->outgoing_arguments_size = 0;
5131
5132 /* Align stack boundary. Only needed if we're calling another function
5133 or using alloca. */
5134 if (!current_function_is_leaf || current_function_calls_alloca)
5135 frame->padding2 = ((offset + preferred_alignment - 1)
5136 & -preferred_alignment) - offset;
5137 else
5138 frame->padding2 = 0;
5139
5140 offset += frame->padding2;
5141
5142 /* We've reached end of stack frame. */
5143 frame->stack_pointer_offset = offset;
5144
5145 /* Size prologue needs to allocate. */
5146 frame->to_allocate =
5147 (size + frame->padding1 + frame->padding2
5148 + frame->outgoing_arguments_size + frame->va_arg_size);
5149
5150 if ((!frame->to_allocate && frame->nregs <= 1)
5151 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5152 frame->save_regs_using_mov = false;
5153
5154 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5155 && current_function_is_leaf)
5156 {
5157 frame->red_zone_size = frame->to_allocate;
5158 if (frame->save_regs_using_mov)
5159 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5160 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5161 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5162 }
5163 else
5164 frame->red_zone_size = 0;
5165 frame->to_allocate -= frame->red_zone_size;
5166 frame->stack_pointer_offset -= frame->red_zone_size;
5167 #if 0
5168 fprintf (stderr, "nregs: %i\n", frame->nregs);
5169 fprintf (stderr, "size: %i\n", size);
5170 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5171 fprintf (stderr, "padding1: %i\n", frame->padding1);
5172 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5173 fprintf (stderr, "padding2: %i\n", frame->padding2);
5174 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5175 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5176 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5177 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5178 frame->hard_frame_pointer_offset);
5179 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5180 #endif
5181 }
5182
5183 /* Emit code to save registers in the prologue. */
5184
5185 static void
5186 ix86_emit_save_regs (void)
5187 {
5188 int regno;
5189 rtx insn;
5190
5191 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5192 if (ix86_save_reg (regno, true))
5193 {
5194 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5195 RTX_FRAME_RELATED_P (insn) = 1;
5196 }
5197 }
5198
5199 /* Emit code to save registers using MOV insns. First register
5200 is restored from POINTER + OFFSET. */
5201 static void
5202 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5203 {
5204 int regno;
5205 rtx insn;
5206
5207 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5208 if (ix86_save_reg (regno, true))
5209 {
5210 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5211 Pmode, offset),
5212 gen_rtx_REG (Pmode, regno));
5213 RTX_FRAME_RELATED_P (insn) = 1;
5214 offset += UNITS_PER_WORD;
5215 }
5216 }
5217
5218 /* Expand prologue or epilogue stack adjustment.
5219 The pattern exist to put a dependency on all ebp-based memory accesses.
5220 STYLE should be negative if instructions should be marked as frame related,
5221 zero if %r11 register is live and cannot be freely used and positive
5222 otherwise. */
5223
5224 static void
5225 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5226 {
5227 rtx insn;
5228
5229 if (! TARGET_64BIT)
5230 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5231 else if (x86_64_immediate_operand (offset, DImode))
5232 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5233 else
5234 {
5235 rtx r11;
5236 /* r11 is used by indirect sibcall return as well, set before the
5237 epilogue and used after the epilogue. ATM indirect sibcall
5238 shouldn't be used together with huge frame sizes in one
5239 function because of the frame_size check in sibcall.c. */
5240 if (style == 0)
5241 abort ();
5242 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5243 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5244 if (style < 0)
5245 RTX_FRAME_RELATED_P (insn) = 1;
5246 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5247 offset));
5248 }
5249 if (style < 0)
5250 RTX_FRAME_RELATED_P (insn) = 1;
5251 }
5252
5253 /* Expand the prologue into a bunch of separate insns. */
5254
5255 void
5256 ix86_expand_prologue (void)
5257 {
5258 rtx insn;
5259 bool pic_reg_used;
5260 struct ix86_frame frame;
5261 HOST_WIDE_INT allocate;
5262
5263 ix86_compute_frame_layout (&frame);
5264
5265 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5266 slower on all targets. Also sdb doesn't like it. */
5267
5268 if (frame_pointer_needed)
5269 {
5270 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5271 RTX_FRAME_RELATED_P (insn) = 1;
5272
5273 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5274 RTX_FRAME_RELATED_P (insn) = 1;
5275 }
5276
5277 allocate = frame.to_allocate;
5278
5279 if (!frame.save_regs_using_mov)
5280 ix86_emit_save_regs ();
5281 else
5282 allocate += frame.nregs * UNITS_PER_WORD;
5283
5284 /* When using red zone we may start register saving before allocating
5285 the stack frame saving one cycle of the prologue. */
5286 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5287 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5288 : stack_pointer_rtx,
5289 -frame.nregs * UNITS_PER_WORD);
5290
5291 if (allocate == 0)
5292 ;
5293 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5294 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5295 GEN_INT (-allocate), -1);
5296 else
5297 {
5298 /* Only valid for Win32. */
5299 rtx eax = gen_rtx_REG (SImode, 0);
5300 bool eax_live = ix86_eax_live_at_start_p ();
5301
5302 if (TARGET_64BIT)
5303 abort ();
5304
5305 if (eax_live)
5306 {
5307 emit_insn (gen_push (eax));
5308 allocate -= 4;
5309 }
5310
5311 insn = emit_move_insn (eax, GEN_INT (allocate));
5312 RTX_FRAME_RELATED_P (insn) = 1;
5313
5314 insn = emit_insn (gen_allocate_stack_worker (eax));
5315 RTX_FRAME_RELATED_P (insn) = 1;
5316
5317 if (eax_live)
5318 {
5319 rtx t = plus_constant (stack_pointer_rtx, allocate);
5320 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5321 }
5322 }
5323
5324 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5325 {
5326 if (!frame_pointer_needed || !frame.to_allocate)
5327 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5328 else
5329 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5330 -frame.nregs * UNITS_PER_WORD);
5331 }
5332
5333 pic_reg_used = false;
5334 if (pic_offset_table_rtx
5335 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5336 || current_function_profile))
5337 {
5338 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5339
5340 if (alt_pic_reg_used != INVALID_REGNUM)
5341 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5342
5343 pic_reg_used = true;
5344 }
5345
5346 if (pic_reg_used)
5347 {
5348 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5349
5350 /* Even with accurate pre-reload life analysis, we can wind up
5351 deleting all references to the pic register after reload.
5352 Consider if cross-jumping unifies two sides of a branch
5353 controlled by a comparison vs the only read from a global.
5354 In which case, allow the set_got to be deleted, though we're
5355 too late to do anything about the ebx save in the prologue. */
5356 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5357 }
5358
5359 /* Prevent function calls from be scheduled before the call to mcount.
5360 In the pic_reg_used case, make sure that the got load isn't deleted. */
5361 if (current_function_profile)
5362 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5363 }
5364
5365 /* Emit code to restore saved registers using MOV insns. First register
5366 is restored from POINTER + OFFSET. */
5367 static void
5368 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5369 int maybe_eh_return)
5370 {
5371 int regno;
5372 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5373
5374 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5375 if (ix86_save_reg (regno, maybe_eh_return))
5376 {
5377 /* Ensure that adjust_address won't be forced to produce pointer
5378 out of range allowed by x86-64 instruction set. */
5379 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5380 {
5381 rtx r11;
5382
5383 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5384 emit_move_insn (r11, GEN_INT (offset));
5385 emit_insn (gen_adddi3 (r11, r11, pointer));
5386 base_address = gen_rtx_MEM (Pmode, r11);
5387 offset = 0;
5388 }
5389 emit_move_insn (gen_rtx_REG (Pmode, regno),
5390 adjust_address (base_address, Pmode, offset));
5391 offset += UNITS_PER_WORD;
5392 }
5393 }
5394
5395 /* Restore function stack, frame, and registers. */
5396
5397 void
5398 ix86_expand_epilogue (int style)
5399 {
5400 int regno;
5401 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5402 struct ix86_frame frame;
5403 HOST_WIDE_INT offset;
5404
5405 ix86_compute_frame_layout (&frame);
5406
5407 /* Calculate start of saved registers relative to ebp. Special care
5408 must be taken for the normal return case of a function using
5409 eh_return: the eax and edx registers are marked as saved, but not
5410 restored along this path. */
5411 offset = frame.nregs;
5412 if (current_function_calls_eh_return && style != 2)
5413 offset -= 2;
5414 offset *= -UNITS_PER_WORD;
5415
5416 /* If we're only restoring one register and sp is not valid then
5417 using a move instruction to restore the register since it's
5418 less work than reloading sp and popping the register.
5419
5420 The default code result in stack adjustment using add/lea instruction,
5421 while this code results in LEAVE instruction (or discrete equivalent),
5422 so it is profitable in some other cases as well. Especially when there
5423 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5424 and there is exactly one register to pop. This heuristic may need some
5425 tuning in future. */
5426 if ((!sp_valid && frame.nregs <= 1)
5427 || (TARGET_EPILOGUE_USING_MOVE
5428 && cfun->machine->use_fast_prologue_epilogue
5429 && (frame.nregs > 1 || frame.to_allocate))
5430 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5431 || (frame_pointer_needed && TARGET_USE_LEAVE
5432 && cfun->machine->use_fast_prologue_epilogue
5433 && frame.nregs == 1)
5434 || current_function_calls_eh_return)
5435 {
5436 /* Restore registers. We can use ebp or esp to address the memory
5437 locations. If both are available, default to ebp, since offsets
5438 are known to be small. Only exception is esp pointing directly to the
5439 end of block of saved registers, where we may simplify addressing
5440 mode. */
5441
5442 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5443 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5444 frame.to_allocate, style == 2);
5445 else
5446 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5447 offset, style == 2);
5448
5449 /* eh_return epilogues need %ecx added to the stack pointer. */
5450 if (style == 2)
5451 {
5452 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5453
5454 if (frame_pointer_needed)
5455 {
5456 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5457 tmp = plus_constant (tmp, UNITS_PER_WORD);
5458 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5459
5460 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5461 emit_move_insn (hard_frame_pointer_rtx, tmp);
5462
5463 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5464 const0_rtx, style);
5465 }
5466 else
5467 {
5468 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5469 tmp = plus_constant (tmp, (frame.to_allocate
5470 + frame.nregs * UNITS_PER_WORD));
5471 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5472 }
5473 }
5474 else if (!frame_pointer_needed)
5475 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5476 GEN_INT (frame.to_allocate
5477 + frame.nregs * UNITS_PER_WORD),
5478 style);
5479 /* If not an i386, mov & pop is faster than "leave". */
5480 else if (TARGET_USE_LEAVE || optimize_size
5481 || !cfun->machine->use_fast_prologue_epilogue)
5482 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5483 else
5484 {
5485 pro_epilogue_adjust_stack (stack_pointer_rtx,
5486 hard_frame_pointer_rtx,
5487 const0_rtx, style);
5488 if (TARGET_64BIT)
5489 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5490 else
5491 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5492 }
5493 }
5494 else
5495 {
5496 /* First step is to deallocate the stack frame so that we can
5497 pop the registers. */
5498 if (!sp_valid)
5499 {
5500 if (!frame_pointer_needed)
5501 abort ();
5502 pro_epilogue_adjust_stack (stack_pointer_rtx,
5503 hard_frame_pointer_rtx,
5504 GEN_INT (offset), style);
5505 }
5506 else if (frame.to_allocate)
5507 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5508 GEN_INT (frame.to_allocate), style);
5509
5510 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5511 if (ix86_save_reg (regno, false))
5512 {
5513 if (TARGET_64BIT)
5514 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5515 else
5516 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5517 }
5518 if (frame_pointer_needed)
5519 {
5520 /* Leave results in shorter dependency chains on CPUs that are
5521 able to grok it fast. */
5522 if (TARGET_USE_LEAVE)
5523 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5524 else if (TARGET_64BIT)
5525 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5526 else
5527 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5528 }
5529 }
5530
5531 /* Sibcall epilogues don't want a return instruction. */
5532 if (style == 0)
5533 return;
5534
5535 if (current_function_pops_args && current_function_args_size)
5536 {
5537 rtx popc = GEN_INT (current_function_pops_args);
5538
5539 /* i386 can only pop 64K bytes. If asked to pop more, pop
5540 return address, do explicit add, and jump indirectly to the
5541 caller. */
5542
5543 if (current_function_pops_args >= 65536)
5544 {
5545 rtx ecx = gen_rtx_REG (SImode, 2);
5546
5547 /* There is no "pascal" calling convention in 64bit ABI. */
5548 if (TARGET_64BIT)
5549 abort ();
5550
5551 emit_insn (gen_popsi1 (ecx));
5552 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5553 emit_jump_insn (gen_return_indirect_internal (ecx));
5554 }
5555 else
5556 emit_jump_insn (gen_return_pop_internal (popc));
5557 }
5558 else
5559 emit_jump_insn (gen_return_internal ());
5560 }
5561
5562 /* Reset from the function's potential modifications. */
5563
5564 static void
5565 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5566 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5567 {
5568 if (pic_offset_table_rtx)
5569 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5570 }
5571 \f
5572 /* Extract the parts of an RTL expression that is a valid memory address
5573 for an instruction. Return 0 if the structure of the address is
5574 grossly off. Return -1 if the address contains ASHIFT, so it is not
5575 strictly valid, but still used for computing length of lea instruction. */
5576
5577 static int
5578 ix86_decompose_address (rtx addr, struct ix86_address *out)
5579 {
5580 rtx base = NULL_RTX;
5581 rtx index = NULL_RTX;
5582 rtx disp = NULL_RTX;
5583 HOST_WIDE_INT scale = 1;
5584 rtx scale_rtx = NULL_RTX;
5585 int retval = 1;
5586 enum ix86_address_seg seg = SEG_DEFAULT;
5587
5588 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5589 base = addr;
5590 else if (GET_CODE (addr) == PLUS)
5591 {
5592 rtx addends[4], op;
5593 int n = 0, i;
5594
5595 op = addr;
5596 do
5597 {
5598 if (n >= 4)
5599 return 0;
5600 addends[n++] = XEXP (op, 1);
5601 op = XEXP (op, 0);
5602 }
5603 while (GET_CODE (op) == PLUS);
5604 if (n >= 4)
5605 return 0;
5606 addends[n] = op;
5607
5608 for (i = n; i >= 0; --i)
5609 {
5610 op = addends[i];
5611 switch (GET_CODE (op))
5612 {
5613 case MULT:
5614 if (index)
5615 return 0;
5616 index = XEXP (op, 0);
5617 scale_rtx = XEXP (op, 1);
5618 break;
5619
5620 case UNSPEC:
5621 if (XINT (op, 1) == UNSPEC_TP
5622 && TARGET_TLS_DIRECT_SEG_REFS
5623 && seg == SEG_DEFAULT)
5624 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5625 else
5626 return 0;
5627 break;
5628
5629 case REG:
5630 case SUBREG:
5631 if (!base)
5632 base = op;
5633 else if (!index)
5634 index = op;
5635 else
5636 return 0;
5637 break;
5638
5639 case CONST:
5640 case CONST_INT:
5641 case SYMBOL_REF:
5642 case LABEL_REF:
5643 if (disp)
5644 return 0;
5645 disp = op;
5646 break;
5647
5648 default:
5649 return 0;
5650 }
5651 }
5652 }
5653 else if (GET_CODE (addr) == MULT)
5654 {
5655 index = XEXP (addr, 0); /* index*scale */
5656 scale_rtx = XEXP (addr, 1);
5657 }
5658 else if (GET_CODE (addr) == ASHIFT)
5659 {
5660 rtx tmp;
5661
5662 /* We're called for lea too, which implements ashift on occasion. */
5663 index = XEXP (addr, 0);
5664 tmp = XEXP (addr, 1);
5665 if (GET_CODE (tmp) != CONST_INT)
5666 return 0;
5667 scale = INTVAL (tmp);
5668 if ((unsigned HOST_WIDE_INT) scale > 3)
5669 return 0;
5670 scale = 1 << scale;
5671 retval = -1;
5672 }
5673 else
5674 disp = addr; /* displacement */
5675
5676 /* Extract the integral value of scale. */
5677 if (scale_rtx)
5678 {
5679 if (GET_CODE (scale_rtx) != CONST_INT)
5680 return 0;
5681 scale = INTVAL (scale_rtx);
5682 }
5683
5684 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5685 if (base && index && scale == 1
5686 && (index == arg_pointer_rtx
5687 || index == frame_pointer_rtx
5688 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5689 {
5690 rtx tmp = base;
5691 base = index;
5692 index = tmp;
5693 }
5694
5695 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5696 if ((base == hard_frame_pointer_rtx
5697 || base == frame_pointer_rtx
5698 || base == arg_pointer_rtx) && !disp)
5699 disp = const0_rtx;
5700
5701 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5702 Avoid this by transforming to [%esi+0]. */
5703 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5704 && base && !index && !disp
5705 && REG_P (base)
5706 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5707 disp = const0_rtx;
5708
5709 /* Special case: encode reg+reg instead of reg*2. */
5710 if (!base && index && scale && scale == 2)
5711 base = index, scale = 1;
5712
5713 /* Special case: scaling cannot be encoded without base or displacement. */
5714 if (!base && !disp && index && scale != 1)
5715 disp = const0_rtx;
5716
5717 out->base = base;
5718 out->index = index;
5719 out->disp = disp;
5720 out->scale = scale;
5721 out->seg = seg;
5722
5723 return retval;
5724 }
5725 \f
5726 /* Return cost of the memory address x.
5727 For i386, it is better to use a complex address than let gcc copy
5728 the address into a reg and make a new pseudo. But not if the address
5729 requires to two regs - that would mean more pseudos with longer
5730 lifetimes. */
5731 static int
5732 ix86_address_cost (rtx x)
5733 {
5734 struct ix86_address parts;
5735 int cost = 1;
5736
5737 if (!ix86_decompose_address (x, &parts))
5738 abort ();
5739
5740 /* More complex memory references are better. */
5741 if (parts.disp && parts.disp != const0_rtx)
5742 cost--;
5743 if (parts.seg != SEG_DEFAULT)
5744 cost--;
5745
5746 /* Attempt to minimize number of registers in the address. */
5747 if ((parts.base
5748 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5749 || (parts.index
5750 && (!REG_P (parts.index)
5751 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5752 cost++;
5753
5754 if (parts.base
5755 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5756 && parts.index
5757 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5758 && parts.base != parts.index)
5759 cost++;
5760
5761 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5762 since it's predecode logic can't detect the length of instructions
5763 and it degenerates to vector decoded. Increase cost of such
5764 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5765 to split such addresses or even refuse such addresses at all.
5766
5767 Following addressing modes are affected:
5768 [base+scale*index]
5769 [scale*index+disp]
5770 [base+index]
5771
5772 The first and last case may be avoidable by explicitly coding the zero in
5773 memory address, but I don't have AMD-K6 machine handy to check this
5774 theory. */
5775
5776 if (TARGET_K6
5777 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5778 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5779 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5780 cost += 10;
5781
5782 return cost;
5783 }
5784 \f
5785 /* If X is a machine specific address (i.e. a symbol or label being
5786 referenced as a displacement from the GOT implemented using an
5787 UNSPEC), then return the base term. Otherwise return X. */
5788
5789 rtx
5790 ix86_find_base_term (rtx x)
5791 {
5792 rtx term;
5793
5794 if (TARGET_64BIT)
5795 {
5796 if (GET_CODE (x) != CONST)
5797 return x;
5798 term = XEXP (x, 0);
5799 if (GET_CODE (term) == PLUS
5800 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5801 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5802 term = XEXP (term, 0);
5803 if (GET_CODE (term) != UNSPEC
5804 || XINT (term, 1) != UNSPEC_GOTPCREL)
5805 return x;
5806
5807 term = XVECEXP (term, 0, 0);
5808
5809 if (GET_CODE (term) != SYMBOL_REF
5810 && GET_CODE (term) != LABEL_REF)
5811 return x;
5812
5813 return term;
5814 }
5815
5816 term = ix86_delegitimize_address (x);
5817
5818 if (GET_CODE (term) != SYMBOL_REF
5819 && GET_CODE (term) != LABEL_REF)
5820 return x;
5821
5822 return term;
5823 }
5824 \f
5825 /* Determine if a given RTX is a valid constant. We already know this
5826 satisfies CONSTANT_P. */
5827
5828 bool
5829 legitimate_constant_p (rtx x)
5830 {
5831 rtx inner;
5832
5833 switch (GET_CODE (x))
5834 {
5835 case SYMBOL_REF:
5836 /* TLS symbols are not constant. */
5837 if (tls_symbolic_operand (x, Pmode))
5838 return false;
5839 break;
5840
5841 case CONST:
5842 inner = XEXP (x, 0);
5843
5844 /* Offsets of TLS symbols are never valid.
5845 Discourage CSE from creating them. */
5846 if (GET_CODE (inner) == PLUS
5847 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5848 return false;
5849
5850 if (GET_CODE (inner) == PLUS
5851 || GET_CODE (inner) == MINUS)
5852 {
5853 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5854 return false;
5855 inner = XEXP (inner, 0);
5856 }
5857
5858 /* Only some unspecs are valid as "constants". */
5859 if (GET_CODE (inner) == UNSPEC)
5860 switch (XINT (inner, 1))
5861 {
5862 case UNSPEC_TPOFF:
5863 case UNSPEC_NTPOFF:
5864 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5865 case UNSPEC_DTPOFF:
5866 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5867 default:
5868 return false;
5869 }
5870 break;
5871
5872 default:
5873 break;
5874 }
5875
5876 /* Otherwise we handle everything else in the move patterns. */
5877 return true;
5878 }
5879
5880 /* Determine if it's legal to put X into the constant pool. This
5881 is not possible for the address of thread-local symbols, which
5882 is checked above. */
5883
5884 static bool
5885 ix86_cannot_force_const_mem (rtx x)
5886 {
5887 return !legitimate_constant_p (x);
5888 }
5889
5890 /* Determine if a given RTX is a valid constant address. */
5891
5892 bool
5893 constant_address_p (rtx x)
5894 {
5895 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5896 }
5897
5898 /* Nonzero if the constant value X is a legitimate general operand
5899 when generating PIC code. It is given that flag_pic is on and
5900 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5901
5902 bool
5903 legitimate_pic_operand_p (rtx x)
5904 {
5905 rtx inner;
5906
5907 switch (GET_CODE (x))
5908 {
5909 case CONST:
5910 inner = XEXP (x, 0);
5911
5912 /* Only some unspecs are valid as "constants". */
5913 if (GET_CODE (inner) == UNSPEC)
5914 switch (XINT (inner, 1))
5915 {
5916 case UNSPEC_TPOFF:
5917 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5918 default:
5919 return false;
5920 }
5921 /* FALLTHRU */
5922
5923 case SYMBOL_REF:
5924 case LABEL_REF:
5925 return legitimate_pic_address_disp_p (x);
5926
5927 default:
5928 return true;
5929 }
5930 }
5931
5932 /* Determine if a given CONST RTX is a valid memory displacement
5933 in PIC mode. */
5934
5935 int
5936 legitimate_pic_address_disp_p (rtx disp)
5937 {
5938 bool saw_plus;
5939
5940 /* In 64bit mode we can allow direct addresses of symbols and labels
5941 when they are not dynamic symbols. */
5942 if (TARGET_64BIT)
5943 {
5944 /* TLS references should always be enclosed in UNSPEC. */
5945 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5946 return 0;
5947 if (GET_CODE (disp) == SYMBOL_REF
5948 && ix86_cmodel == CM_SMALL_PIC
5949 && SYMBOL_REF_LOCAL_P (disp))
5950 return 1;
5951 if (GET_CODE (disp) == LABEL_REF)
5952 return 1;
5953 if (GET_CODE (disp) == CONST
5954 && GET_CODE (XEXP (disp, 0)) == PLUS)
5955 {
5956 rtx op0 = XEXP (XEXP (disp, 0), 0);
5957 rtx op1 = XEXP (XEXP (disp, 0), 1);
5958
5959 /* TLS references should always be enclosed in UNSPEC. */
5960 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5961 return 0;
5962 if (((GET_CODE (op0) == SYMBOL_REF
5963 && ix86_cmodel == CM_SMALL_PIC
5964 && SYMBOL_REF_LOCAL_P (op0))
5965 || GET_CODE (op0) == LABEL_REF)
5966 && GET_CODE (op1) == CONST_INT
5967 && INTVAL (op1) < 16*1024*1024
5968 && INTVAL (op1) >= -16*1024*1024)
5969 return 1;
5970 }
5971 }
5972 if (GET_CODE (disp) != CONST)
5973 return 0;
5974 disp = XEXP (disp, 0);
5975
5976 if (TARGET_64BIT)
5977 {
5978 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5979 of GOT tables. We should not need these anyway. */
5980 if (GET_CODE (disp) != UNSPEC
5981 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5982 return 0;
5983
5984 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5985 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5986 return 0;
5987 return 1;
5988 }
5989
5990 saw_plus = false;
5991 if (GET_CODE (disp) == PLUS)
5992 {
5993 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5994 return 0;
5995 disp = XEXP (disp, 0);
5996 saw_plus = true;
5997 }
5998
5999 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
6000 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
6001 {
6002 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6003 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6004 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6005 {
6006 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6007 if (! strcmp (sym_name, "<pic base>"))
6008 return 1;
6009 }
6010 }
6011
6012 if (GET_CODE (disp) != UNSPEC)
6013 return 0;
6014
6015 switch (XINT (disp, 1))
6016 {
6017 case UNSPEC_GOT:
6018 if (saw_plus)
6019 return false;
6020 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6021 case UNSPEC_GOTOFF:
6022 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6023 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6024 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6025 return false;
6026 case UNSPEC_GOTTPOFF:
6027 case UNSPEC_GOTNTPOFF:
6028 case UNSPEC_INDNTPOFF:
6029 if (saw_plus)
6030 return false;
6031 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6032 case UNSPEC_NTPOFF:
6033 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6034 case UNSPEC_DTPOFF:
6035 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6036 }
6037
6038 return 0;
6039 }
6040
6041 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6042 memory address for an instruction. The MODE argument is the machine mode
6043 for the MEM expression that wants to use this address.
6044
6045 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6046 convert common non-canonical forms to canonical form so that they will
6047 be recognized. */
6048
6049 int
6050 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6051 {
6052 struct ix86_address parts;
6053 rtx base, index, disp;
6054 HOST_WIDE_INT scale;
6055 const char *reason = NULL;
6056 rtx reason_rtx = NULL_RTX;
6057
6058 if (TARGET_DEBUG_ADDR)
6059 {
6060 fprintf (stderr,
6061 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6062 GET_MODE_NAME (mode), strict);
6063 debug_rtx (addr);
6064 }
6065
6066 if (ix86_decompose_address (addr, &parts) <= 0)
6067 {
6068 reason = "decomposition failed";
6069 goto report_error;
6070 }
6071
6072 base = parts.base;
6073 index = parts.index;
6074 disp = parts.disp;
6075 scale = parts.scale;
6076
6077 /* Validate base register.
6078
6079 Don't allow SUBREG's here, it can lead to spill failures when the base
6080 is one word out of a two word structure, which is represented internally
6081 as a DImode int. */
6082
6083 if (base)
6084 {
6085 reason_rtx = base;
6086
6087 if (GET_CODE (base) != REG)
6088 {
6089 reason = "base is not a register";
6090 goto report_error;
6091 }
6092
6093 if (GET_MODE (base) != Pmode)
6094 {
6095 reason = "base is not in Pmode";
6096 goto report_error;
6097 }
6098
6099 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6100 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6101 {
6102 reason = "base is not valid";
6103 goto report_error;
6104 }
6105 }
6106
6107 /* Validate index register.
6108
6109 Don't allow SUBREG's here, it can lead to spill failures when the index
6110 is one word out of a two word structure, which is represented internally
6111 as a DImode int. */
6112
6113 if (index)
6114 {
6115 reason_rtx = index;
6116
6117 if (GET_CODE (index) != REG)
6118 {
6119 reason = "index is not a register";
6120 goto report_error;
6121 }
6122
6123 if (GET_MODE (index) != Pmode)
6124 {
6125 reason = "index is not in Pmode";
6126 goto report_error;
6127 }
6128
6129 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6130 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6131 {
6132 reason = "index is not valid";
6133 goto report_error;
6134 }
6135 }
6136
6137 /* Validate scale factor. */
6138 if (scale != 1)
6139 {
6140 reason_rtx = GEN_INT (scale);
6141 if (!index)
6142 {
6143 reason = "scale without index";
6144 goto report_error;
6145 }
6146
6147 if (scale != 2 && scale != 4 && scale != 8)
6148 {
6149 reason = "scale is not a valid multiplier";
6150 goto report_error;
6151 }
6152 }
6153
6154 /* Validate displacement. */
6155 if (disp)
6156 {
6157 reason_rtx = disp;
6158
6159 if (GET_CODE (disp) == CONST
6160 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6161 switch (XINT (XEXP (disp, 0), 1))
6162 {
6163 case UNSPEC_GOT:
6164 case UNSPEC_GOTOFF:
6165 case UNSPEC_GOTPCREL:
6166 if (!flag_pic)
6167 abort ();
6168 goto is_legitimate_pic;
6169
6170 case UNSPEC_GOTTPOFF:
6171 case UNSPEC_GOTNTPOFF:
6172 case UNSPEC_INDNTPOFF:
6173 case UNSPEC_NTPOFF:
6174 case UNSPEC_DTPOFF:
6175 break;
6176
6177 default:
6178 reason = "invalid address unspec";
6179 goto report_error;
6180 }
6181
6182 else if (flag_pic && (SYMBOLIC_CONST (disp)
6183 #if TARGET_MACHO
6184 && !machopic_operand_p (disp)
6185 #endif
6186 ))
6187 {
6188 is_legitimate_pic:
6189 if (TARGET_64BIT && (index || base))
6190 {
6191 /* foo@dtpoff(%rX) is ok. */
6192 if (GET_CODE (disp) != CONST
6193 || GET_CODE (XEXP (disp, 0)) != PLUS
6194 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6195 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6196 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6197 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6198 {
6199 reason = "non-constant pic memory reference";
6200 goto report_error;
6201 }
6202 }
6203 else if (! legitimate_pic_address_disp_p (disp))
6204 {
6205 reason = "displacement is an invalid pic construct";
6206 goto report_error;
6207 }
6208
6209 /* This code used to verify that a symbolic pic displacement
6210 includes the pic_offset_table_rtx register.
6211
6212 While this is good idea, unfortunately these constructs may
6213 be created by "adds using lea" optimization for incorrect
6214 code like:
6215
6216 int a;
6217 int foo(int i)
6218 {
6219 return *(&a+i);
6220 }
6221
6222 This code is nonsensical, but results in addressing
6223 GOT table with pic_offset_table_rtx base. We can't
6224 just refuse it easily, since it gets matched by
6225 "addsi3" pattern, that later gets split to lea in the
6226 case output register differs from input. While this
6227 can be handled by separate addsi pattern for this case
6228 that never results in lea, this seems to be easier and
6229 correct fix for crash to disable this test. */
6230 }
6231 else if (GET_CODE (disp) != LABEL_REF
6232 && GET_CODE (disp) != CONST_INT
6233 && (GET_CODE (disp) != CONST
6234 || !legitimate_constant_p (disp))
6235 && (GET_CODE (disp) != SYMBOL_REF
6236 || !legitimate_constant_p (disp)))
6237 {
6238 reason = "displacement is not constant";
6239 goto report_error;
6240 }
6241 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6242 {
6243 reason = "displacement is out of range";
6244 goto report_error;
6245 }
6246 }
6247
6248 /* Everything looks valid. */
6249 if (TARGET_DEBUG_ADDR)
6250 fprintf (stderr, "Success.\n");
6251 return TRUE;
6252
6253 report_error:
6254 if (TARGET_DEBUG_ADDR)
6255 {
6256 fprintf (stderr, "Error: %s\n", reason);
6257 debug_rtx (reason_rtx);
6258 }
6259 return FALSE;
6260 }
6261 \f
6262 /* Return an unique alias set for the GOT. */
6263
6264 static HOST_WIDE_INT
6265 ix86_GOT_alias_set (void)
6266 {
6267 static HOST_WIDE_INT set = -1;
6268 if (set == -1)
6269 set = new_alias_set ();
6270 return set;
6271 }
6272
6273 /* Return a legitimate reference for ORIG (an address) using the
6274 register REG. If REG is 0, a new pseudo is generated.
6275
6276 There are two types of references that must be handled:
6277
6278 1. Global data references must load the address from the GOT, via
6279 the PIC reg. An insn is emitted to do this load, and the reg is
6280 returned.
6281
6282 2. Static data references, constant pool addresses, and code labels
6283 compute the address as an offset from the GOT, whose base is in
6284 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6285 differentiate them from global data objects. The returned
6286 address is the PIC reg + an unspec constant.
6287
6288 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6289 reg also appears in the address. */
6290
6291 static rtx
6292 legitimize_pic_address (rtx orig, rtx reg)
6293 {
6294 rtx addr = orig;
6295 rtx new = orig;
6296 rtx base;
6297
6298 #if TARGET_MACHO
6299 if (reg == 0)
6300 reg = gen_reg_rtx (Pmode);
6301 /* Use the generic Mach-O PIC machinery. */
6302 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6303 #endif
6304
6305 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6306 new = addr;
6307 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6308 {
6309 /* This symbol may be referenced via a displacement from the PIC
6310 base address (@GOTOFF). */
6311
6312 if (reload_in_progress)
6313 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6314 if (GET_CODE (addr) == CONST)
6315 addr = XEXP (addr, 0);
6316 if (GET_CODE (addr) == PLUS)
6317 {
6318 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6319 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6320 }
6321 else
6322 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6323 new = gen_rtx_CONST (Pmode, new);
6324 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6325
6326 if (reg != 0)
6327 {
6328 emit_move_insn (reg, new);
6329 new = reg;
6330 }
6331 }
6332 else if (GET_CODE (addr) == SYMBOL_REF)
6333 {
6334 if (TARGET_64BIT)
6335 {
6336 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6337 new = gen_rtx_CONST (Pmode, new);
6338 new = gen_rtx_MEM (Pmode, new);
6339 RTX_UNCHANGING_P (new) = 1;
6340 set_mem_alias_set (new, ix86_GOT_alias_set ());
6341
6342 if (reg == 0)
6343 reg = gen_reg_rtx (Pmode);
6344 /* Use directly gen_movsi, otherwise the address is loaded
6345 into register for CSE. We don't want to CSE this addresses,
6346 instead we CSE addresses from the GOT table, so skip this. */
6347 emit_insn (gen_movsi (reg, new));
6348 new = reg;
6349 }
6350 else
6351 {
6352 /* This symbol must be referenced via a load from the
6353 Global Offset Table (@GOT). */
6354
6355 if (reload_in_progress)
6356 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6357 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6358 new = gen_rtx_CONST (Pmode, new);
6359 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6360 new = gen_rtx_MEM (Pmode, new);
6361 RTX_UNCHANGING_P (new) = 1;
6362 set_mem_alias_set (new, ix86_GOT_alias_set ());
6363
6364 if (reg == 0)
6365 reg = gen_reg_rtx (Pmode);
6366 emit_move_insn (reg, new);
6367 new = reg;
6368 }
6369 }
6370 else
6371 {
6372 if (GET_CODE (addr) == CONST)
6373 {
6374 addr = XEXP (addr, 0);
6375
6376 /* We must match stuff we generate before. Assume the only
6377 unspecs that can get here are ours. Not that we could do
6378 anything with them anyway.... */
6379 if (GET_CODE (addr) == UNSPEC
6380 || (GET_CODE (addr) == PLUS
6381 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6382 return orig;
6383 if (GET_CODE (addr) != PLUS)
6384 abort ();
6385 }
6386 if (GET_CODE (addr) == PLUS)
6387 {
6388 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6389
6390 /* Check first to see if this is a constant offset from a @GOTOFF
6391 symbol reference. */
6392 if (local_symbolic_operand (op0, Pmode)
6393 && GET_CODE (op1) == CONST_INT)
6394 {
6395 if (!TARGET_64BIT)
6396 {
6397 if (reload_in_progress)
6398 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6399 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6400 UNSPEC_GOTOFF);
6401 new = gen_rtx_PLUS (Pmode, new, op1);
6402 new = gen_rtx_CONST (Pmode, new);
6403 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6404
6405 if (reg != 0)
6406 {
6407 emit_move_insn (reg, new);
6408 new = reg;
6409 }
6410 }
6411 else
6412 {
6413 if (INTVAL (op1) < -16*1024*1024
6414 || INTVAL (op1) >= 16*1024*1024)
6415 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6416 }
6417 }
6418 else
6419 {
6420 base = legitimize_pic_address (XEXP (addr, 0), reg);
6421 new = legitimize_pic_address (XEXP (addr, 1),
6422 base == reg ? NULL_RTX : reg);
6423
6424 if (GET_CODE (new) == CONST_INT)
6425 new = plus_constant (base, INTVAL (new));
6426 else
6427 {
6428 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6429 {
6430 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6431 new = XEXP (new, 1);
6432 }
6433 new = gen_rtx_PLUS (Pmode, base, new);
6434 }
6435 }
6436 }
6437 }
6438 return new;
6439 }
6440 \f
6441 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6442
6443 static rtx
6444 get_thread_pointer (int to_reg)
6445 {
6446 rtx tp, reg, insn;
6447
6448 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6449 if (!to_reg)
6450 return tp;
6451
6452 reg = gen_reg_rtx (Pmode);
6453 insn = gen_rtx_SET (VOIDmode, reg, tp);
6454 insn = emit_insn (insn);
6455
6456 return reg;
6457 }
6458
6459 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6460 false if we expect this to be used for a memory address and true if
6461 we expect to load the address into a register. */
6462
6463 static rtx
6464 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6465 {
6466 rtx dest, base, off, pic;
6467 int type;
6468
6469 switch (model)
6470 {
6471 case TLS_MODEL_GLOBAL_DYNAMIC:
6472 dest = gen_reg_rtx (Pmode);
6473 if (TARGET_64BIT)
6474 {
6475 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6476
6477 start_sequence ();
6478 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6479 insns = get_insns ();
6480 end_sequence ();
6481
6482 emit_libcall_block (insns, dest, rax, x);
6483 }
6484 else
6485 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6486 break;
6487
6488 case TLS_MODEL_LOCAL_DYNAMIC:
6489 base = gen_reg_rtx (Pmode);
6490 if (TARGET_64BIT)
6491 {
6492 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6493
6494 start_sequence ();
6495 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6496 insns = get_insns ();
6497 end_sequence ();
6498
6499 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6500 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6501 emit_libcall_block (insns, base, rax, note);
6502 }
6503 else
6504 emit_insn (gen_tls_local_dynamic_base_32 (base));
6505
6506 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6507 off = gen_rtx_CONST (Pmode, off);
6508
6509 return gen_rtx_PLUS (Pmode, base, off);
6510
6511 case TLS_MODEL_INITIAL_EXEC:
6512 if (TARGET_64BIT)
6513 {
6514 pic = NULL;
6515 type = UNSPEC_GOTNTPOFF;
6516 }
6517 else if (flag_pic)
6518 {
6519 if (reload_in_progress)
6520 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6521 pic = pic_offset_table_rtx;
6522 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6523 }
6524 else if (!TARGET_GNU_TLS)
6525 {
6526 pic = gen_reg_rtx (Pmode);
6527 emit_insn (gen_set_got (pic));
6528 type = UNSPEC_GOTTPOFF;
6529 }
6530 else
6531 {
6532 pic = NULL;
6533 type = UNSPEC_INDNTPOFF;
6534 }
6535
6536 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6537 off = gen_rtx_CONST (Pmode, off);
6538 if (pic)
6539 off = gen_rtx_PLUS (Pmode, pic, off);
6540 off = gen_rtx_MEM (Pmode, off);
6541 RTX_UNCHANGING_P (off) = 1;
6542 set_mem_alias_set (off, ix86_GOT_alias_set ());
6543
6544 if (TARGET_64BIT || TARGET_GNU_TLS)
6545 {
6546 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6547 off = force_reg (Pmode, off);
6548 return gen_rtx_PLUS (Pmode, base, off);
6549 }
6550 else
6551 {
6552 base = get_thread_pointer (true);
6553 dest = gen_reg_rtx (Pmode);
6554 emit_insn (gen_subsi3 (dest, base, off));
6555 }
6556 break;
6557
6558 case TLS_MODEL_LOCAL_EXEC:
6559 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6560 (TARGET_64BIT || TARGET_GNU_TLS)
6561 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6562 off = gen_rtx_CONST (Pmode, off);
6563
6564 if (TARGET_64BIT || TARGET_GNU_TLS)
6565 {
6566 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6567 return gen_rtx_PLUS (Pmode, base, off);
6568 }
6569 else
6570 {
6571 base = get_thread_pointer (true);
6572 dest = gen_reg_rtx (Pmode);
6573 emit_insn (gen_subsi3 (dest, base, off));
6574 }
6575 break;
6576
6577 default:
6578 abort ();
6579 }
6580
6581 return dest;
6582 }
6583
6584 /* Try machine-dependent ways of modifying an illegitimate address
6585 to be legitimate. If we find one, return the new, valid address.
6586 This macro is used in only one place: `memory_address' in explow.c.
6587
6588 OLDX is the address as it was before break_out_memory_refs was called.
6589 In some cases it is useful to look at this to decide what needs to be done.
6590
6591 MODE and WIN are passed so that this macro can use
6592 GO_IF_LEGITIMATE_ADDRESS.
6593
6594 It is always safe for this macro to do nothing. It exists to recognize
6595 opportunities to optimize the output.
6596
6597 For the 80386, we handle X+REG by loading X into a register R and
6598 using R+REG. R will go in a general reg and indexing will be used.
6599 However, if REG is a broken-out memory address or multiplication,
6600 nothing needs to be done because REG can certainly go in a general reg.
6601
6602 When -fpic is used, special handling is needed for symbolic references.
6603 See comments by legitimize_pic_address in i386.c for details. */
6604
6605 rtx
6606 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6607 {
6608 int changed = 0;
6609 unsigned log;
6610
6611 if (TARGET_DEBUG_ADDR)
6612 {
6613 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6614 GET_MODE_NAME (mode));
6615 debug_rtx (x);
6616 }
6617
6618 log = tls_symbolic_operand (x, mode);
6619 if (log)
6620 return legitimize_tls_address (x, log, false);
6621 if (GET_CODE (x) == CONST
6622 && GET_CODE (XEXP (x, 0)) == PLUS
6623 && (log = tls_symbolic_operand (XEXP (XEXP (x, 0), 0), Pmode)))
6624 {
6625 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6626 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6627 }
6628
6629 if (flag_pic && SYMBOLIC_CONST (x))
6630 return legitimize_pic_address (x, 0);
6631
6632 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6633 if (GET_CODE (x) == ASHIFT
6634 && GET_CODE (XEXP (x, 1)) == CONST_INT
6635 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6636 {
6637 changed = 1;
6638 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6639 GEN_INT (1 << log));
6640 }
6641
6642 if (GET_CODE (x) == PLUS)
6643 {
6644 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6645
6646 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6647 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6648 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6649 {
6650 changed = 1;
6651 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6652 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6653 GEN_INT (1 << log));
6654 }
6655
6656 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6657 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6658 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6659 {
6660 changed = 1;
6661 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6662 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6663 GEN_INT (1 << log));
6664 }
6665
6666 /* Put multiply first if it isn't already. */
6667 if (GET_CODE (XEXP (x, 1)) == MULT)
6668 {
6669 rtx tmp = XEXP (x, 0);
6670 XEXP (x, 0) = XEXP (x, 1);
6671 XEXP (x, 1) = tmp;
6672 changed = 1;
6673 }
6674
6675 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6676 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6677 created by virtual register instantiation, register elimination, and
6678 similar optimizations. */
6679 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6680 {
6681 changed = 1;
6682 x = gen_rtx_PLUS (Pmode,
6683 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6684 XEXP (XEXP (x, 1), 0)),
6685 XEXP (XEXP (x, 1), 1));
6686 }
6687
6688 /* Canonicalize
6689 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6690 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6691 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6692 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6693 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6694 && CONSTANT_P (XEXP (x, 1)))
6695 {
6696 rtx constant;
6697 rtx other = NULL_RTX;
6698
6699 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6700 {
6701 constant = XEXP (x, 1);
6702 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6703 }
6704 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6705 {
6706 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6707 other = XEXP (x, 1);
6708 }
6709 else
6710 constant = 0;
6711
6712 if (constant)
6713 {
6714 changed = 1;
6715 x = gen_rtx_PLUS (Pmode,
6716 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6717 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6718 plus_constant (other, INTVAL (constant)));
6719 }
6720 }
6721
6722 if (changed && legitimate_address_p (mode, x, FALSE))
6723 return x;
6724
6725 if (GET_CODE (XEXP (x, 0)) == MULT)
6726 {
6727 changed = 1;
6728 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6729 }
6730
6731 if (GET_CODE (XEXP (x, 1)) == MULT)
6732 {
6733 changed = 1;
6734 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6735 }
6736
6737 if (changed
6738 && GET_CODE (XEXP (x, 1)) == REG
6739 && GET_CODE (XEXP (x, 0)) == REG)
6740 return x;
6741
6742 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6743 {
6744 changed = 1;
6745 x = legitimize_pic_address (x, 0);
6746 }
6747
6748 if (changed && legitimate_address_p (mode, x, FALSE))
6749 return x;
6750
6751 if (GET_CODE (XEXP (x, 0)) == REG)
6752 {
6753 rtx temp = gen_reg_rtx (Pmode);
6754 rtx val = force_operand (XEXP (x, 1), temp);
6755 if (val != temp)
6756 emit_move_insn (temp, val);
6757
6758 XEXP (x, 1) = temp;
6759 return x;
6760 }
6761
6762 else if (GET_CODE (XEXP (x, 1)) == REG)
6763 {
6764 rtx temp = gen_reg_rtx (Pmode);
6765 rtx val = force_operand (XEXP (x, 0), temp);
6766 if (val != temp)
6767 emit_move_insn (temp, val);
6768
6769 XEXP (x, 0) = temp;
6770 return x;
6771 }
6772 }
6773
6774 return x;
6775 }
6776 \f
6777 /* Print an integer constant expression in assembler syntax. Addition
6778 and subtraction are the only arithmetic that may appear in these
6779 expressions. FILE is the stdio stream to write to, X is the rtx, and
6780 CODE is the operand print code from the output string. */
6781
6782 static void
6783 output_pic_addr_const (FILE *file, rtx x, int code)
6784 {
6785 char buf[256];
6786
6787 switch (GET_CODE (x))
6788 {
6789 case PC:
6790 if (flag_pic)
6791 putc ('.', file);
6792 else
6793 abort ();
6794 break;
6795
6796 case SYMBOL_REF:
6797 /* Mark the decl as referenced so that cgraph will output the function. */
6798 if (SYMBOL_REF_DECL (x))
6799 mark_decl_referenced (SYMBOL_REF_DECL (x));
6800
6801 assemble_name (file, XSTR (x, 0));
6802 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6803 fputs ("@PLT", file);
6804 break;
6805
6806 case LABEL_REF:
6807 x = XEXP (x, 0);
6808 /* FALLTHRU */
6809 case CODE_LABEL:
6810 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6811 assemble_name (asm_out_file, buf);
6812 break;
6813
6814 case CONST_INT:
6815 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6816 break;
6817
6818 case CONST:
6819 /* This used to output parentheses around the expression,
6820 but that does not work on the 386 (either ATT or BSD assembler). */
6821 output_pic_addr_const (file, XEXP (x, 0), code);
6822 break;
6823
6824 case CONST_DOUBLE:
6825 if (GET_MODE (x) == VOIDmode)
6826 {
6827 /* We can use %d if the number is <32 bits and positive. */
6828 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6829 fprintf (file, "0x%lx%08lx",
6830 (unsigned long) CONST_DOUBLE_HIGH (x),
6831 (unsigned long) CONST_DOUBLE_LOW (x));
6832 else
6833 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6834 }
6835 else
6836 /* We can't handle floating point constants;
6837 PRINT_OPERAND must handle them. */
6838 output_operand_lossage ("floating constant misused");
6839 break;
6840
6841 case PLUS:
6842 /* Some assemblers need integer constants to appear first. */
6843 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6844 {
6845 output_pic_addr_const (file, XEXP (x, 0), code);
6846 putc ('+', file);
6847 output_pic_addr_const (file, XEXP (x, 1), code);
6848 }
6849 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6850 {
6851 output_pic_addr_const (file, XEXP (x, 1), code);
6852 putc ('+', file);
6853 output_pic_addr_const (file, XEXP (x, 0), code);
6854 }
6855 else
6856 abort ();
6857 break;
6858
6859 case MINUS:
6860 if (!TARGET_MACHO)
6861 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6862 output_pic_addr_const (file, XEXP (x, 0), code);
6863 putc ('-', file);
6864 output_pic_addr_const (file, XEXP (x, 1), code);
6865 if (!TARGET_MACHO)
6866 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6867 break;
6868
6869 case UNSPEC:
6870 if (XVECLEN (x, 0) != 1)
6871 abort ();
6872 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6873 switch (XINT (x, 1))
6874 {
6875 case UNSPEC_GOT:
6876 fputs ("@GOT", file);
6877 break;
6878 case UNSPEC_GOTOFF:
6879 fputs ("@GOTOFF", file);
6880 break;
6881 case UNSPEC_GOTPCREL:
6882 fputs ("@GOTPCREL(%rip)", file);
6883 break;
6884 case UNSPEC_GOTTPOFF:
6885 /* FIXME: This might be @TPOFF in Sun ld too. */
6886 fputs ("@GOTTPOFF", file);
6887 break;
6888 case UNSPEC_TPOFF:
6889 fputs ("@TPOFF", file);
6890 break;
6891 case UNSPEC_NTPOFF:
6892 if (TARGET_64BIT)
6893 fputs ("@TPOFF", file);
6894 else
6895 fputs ("@NTPOFF", file);
6896 break;
6897 case UNSPEC_DTPOFF:
6898 fputs ("@DTPOFF", file);
6899 break;
6900 case UNSPEC_GOTNTPOFF:
6901 if (TARGET_64BIT)
6902 fputs ("@GOTTPOFF(%rip)", file);
6903 else
6904 fputs ("@GOTNTPOFF", file);
6905 break;
6906 case UNSPEC_INDNTPOFF:
6907 fputs ("@INDNTPOFF", file);
6908 break;
6909 default:
6910 output_operand_lossage ("invalid UNSPEC as operand");
6911 break;
6912 }
6913 break;
6914
6915 default:
6916 output_operand_lossage ("invalid expression as operand");
6917 }
6918 }
6919
6920 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6921 We need to handle our special PIC relocations. */
6922
6923 void
6924 i386_dwarf_output_addr_const (FILE *file, rtx x)
6925 {
6926 #ifdef ASM_QUAD
6927 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6928 #else
6929 if (TARGET_64BIT)
6930 abort ();
6931 fprintf (file, "%s", ASM_LONG);
6932 #endif
6933 if (flag_pic)
6934 output_pic_addr_const (file, x, '\0');
6935 else
6936 output_addr_const (file, x);
6937 fputc ('\n', file);
6938 }
6939
6940 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6941 We need to emit DTP-relative relocations. */
6942
6943 void
6944 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6945 {
6946 fputs (ASM_LONG, file);
6947 output_addr_const (file, x);
6948 fputs ("@DTPOFF", file);
6949 switch (size)
6950 {
6951 case 4:
6952 break;
6953 case 8:
6954 fputs (", 0", file);
6955 break;
6956 default:
6957 abort ();
6958 }
6959 }
6960
6961 /* In the name of slightly smaller debug output, and to cater to
6962 general assembler losage, recognize PIC+GOTOFF and turn it back
6963 into a direct symbol reference. */
6964
6965 static rtx
6966 ix86_delegitimize_address (rtx orig_x)
6967 {
6968 rtx x = orig_x, y;
6969
6970 if (GET_CODE (x) == MEM)
6971 x = XEXP (x, 0);
6972
6973 if (TARGET_64BIT)
6974 {
6975 if (GET_CODE (x) != CONST
6976 || GET_CODE (XEXP (x, 0)) != UNSPEC
6977 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6978 || GET_CODE (orig_x) != MEM)
6979 return orig_x;
6980 return XVECEXP (XEXP (x, 0), 0, 0);
6981 }
6982
6983 if (GET_CODE (x) != PLUS
6984 || GET_CODE (XEXP (x, 1)) != CONST)
6985 return orig_x;
6986
6987 if (GET_CODE (XEXP (x, 0)) == REG
6988 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6989 /* %ebx + GOT/GOTOFF */
6990 y = NULL;
6991 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6992 {
6993 /* %ebx + %reg * scale + GOT/GOTOFF */
6994 y = XEXP (x, 0);
6995 if (GET_CODE (XEXP (y, 0)) == REG
6996 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6997 y = XEXP (y, 1);
6998 else if (GET_CODE (XEXP (y, 1)) == REG
6999 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
7000 y = XEXP (y, 0);
7001 else
7002 return orig_x;
7003 if (GET_CODE (y) != REG
7004 && GET_CODE (y) != MULT
7005 && GET_CODE (y) != ASHIFT)
7006 return orig_x;
7007 }
7008 else
7009 return orig_x;
7010
7011 x = XEXP (XEXP (x, 1), 0);
7012 if (GET_CODE (x) == UNSPEC
7013 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7014 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7015 {
7016 if (y)
7017 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
7018 return XVECEXP (x, 0, 0);
7019 }
7020
7021 if (GET_CODE (x) == PLUS
7022 && GET_CODE (XEXP (x, 0)) == UNSPEC
7023 && GET_CODE (XEXP (x, 1)) == CONST_INT
7024 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7025 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7026 && GET_CODE (orig_x) != MEM)))
7027 {
7028 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7029 if (y)
7030 return gen_rtx_PLUS (Pmode, y, x);
7031 return x;
7032 }
7033
7034 return orig_x;
7035 }
7036 \f
7037 static void
7038 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7039 int fp, FILE *file)
7040 {
7041 const char *suffix;
7042
7043 if (mode == CCFPmode || mode == CCFPUmode)
7044 {
7045 enum rtx_code second_code, bypass_code;
7046 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7047 if (bypass_code != NIL || second_code != NIL)
7048 abort ();
7049 code = ix86_fp_compare_code_to_integer (code);
7050 mode = CCmode;
7051 }
7052 if (reverse)
7053 code = reverse_condition (code);
7054
7055 switch (code)
7056 {
7057 case EQ:
7058 suffix = "e";
7059 break;
7060 case NE:
7061 suffix = "ne";
7062 break;
7063 case GT:
7064 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7065 abort ();
7066 suffix = "g";
7067 break;
7068 case GTU:
7069 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7070 Those same assemblers have the same but opposite losage on cmov. */
7071 if (mode != CCmode)
7072 abort ();
7073 suffix = fp ? "nbe" : "a";
7074 break;
7075 case LT:
7076 if (mode == CCNOmode || mode == CCGOCmode)
7077 suffix = "s";
7078 else if (mode == CCmode || mode == CCGCmode)
7079 suffix = "l";
7080 else
7081 abort ();
7082 break;
7083 case LTU:
7084 if (mode != CCmode)
7085 abort ();
7086 suffix = "b";
7087 break;
7088 case GE:
7089 if (mode == CCNOmode || mode == CCGOCmode)
7090 suffix = "ns";
7091 else if (mode == CCmode || mode == CCGCmode)
7092 suffix = "ge";
7093 else
7094 abort ();
7095 break;
7096 case GEU:
7097 /* ??? As above. */
7098 if (mode != CCmode)
7099 abort ();
7100 suffix = fp ? "nb" : "ae";
7101 break;
7102 case LE:
7103 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7104 abort ();
7105 suffix = "le";
7106 break;
7107 case LEU:
7108 if (mode != CCmode)
7109 abort ();
7110 suffix = "be";
7111 break;
7112 case UNORDERED:
7113 suffix = fp ? "u" : "p";
7114 break;
7115 case ORDERED:
7116 suffix = fp ? "nu" : "np";
7117 break;
7118 default:
7119 abort ();
7120 }
7121 fputs (suffix, file);
7122 }
7123
7124 /* Print the name of register X to FILE based on its machine mode and number.
7125 If CODE is 'w', pretend the mode is HImode.
7126 If CODE is 'b', pretend the mode is QImode.
7127 If CODE is 'k', pretend the mode is SImode.
7128 If CODE is 'q', pretend the mode is DImode.
7129 If CODE is 'h', pretend the reg is the `high' byte register.
7130 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7131
7132 void
7133 print_reg (rtx x, int code, FILE *file)
7134 {
7135 if (REGNO (x) == ARG_POINTER_REGNUM
7136 || REGNO (x) == FRAME_POINTER_REGNUM
7137 || REGNO (x) == FLAGS_REG
7138 || REGNO (x) == FPSR_REG)
7139 abort ();
7140
7141 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7142 putc ('%', file);
7143
7144 if (code == 'w' || MMX_REG_P (x))
7145 code = 2;
7146 else if (code == 'b')
7147 code = 1;
7148 else if (code == 'k')
7149 code = 4;
7150 else if (code == 'q')
7151 code = 8;
7152 else if (code == 'y')
7153 code = 3;
7154 else if (code == 'h')
7155 code = 0;
7156 else
7157 code = GET_MODE_SIZE (GET_MODE (x));
7158
7159 /* Irritatingly, AMD extended registers use different naming convention
7160 from the normal registers. */
7161 if (REX_INT_REG_P (x))
7162 {
7163 if (!TARGET_64BIT)
7164 abort ();
7165 switch (code)
7166 {
7167 case 0:
7168 error ("extended registers have no high halves");
7169 break;
7170 case 1:
7171 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7172 break;
7173 case 2:
7174 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7175 break;
7176 case 4:
7177 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7178 break;
7179 case 8:
7180 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7181 break;
7182 default:
7183 error ("unsupported operand size for extended register");
7184 break;
7185 }
7186 return;
7187 }
7188 switch (code)
7189 {
7190 case 3:
7191 if (STACK_TOP_P (x))
7192 {
7193 fputs ("st(0)", file);
7194 break;
7195 }
7196 /* FALLTHRU */
7197 case 8:
7198 case 4:
7199 case 12:
7200 if (! ANY_FP_REG_P (x))
7201 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7202 /* FALLTHRU */
7203 case 16:
7204 case 2:
7205 normal:
7206 fputs (hi_reg_name[REGNO (x)], file);
7207 break;
7208 case 1:
7209 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7210 goto normal;
7211 fputs (qi_reg_name[REGNO (x)], file);
7212 break;
7213 case 0:
7214 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7215 goto normal;
7216 fputs (qi_high_reg_name[REGNO (x)], file);
7217 break;
7218 default:
7219 abort ();
7220 }
7221 }
7222
7223 /* Locate some local-dynamic symbol still in use by this function
7224 so that we can print its name in some tls_local_dynamic_base
7225 pattern. */
7226
7227 static const char *
7228 get_some_local_dynamic_name (void)
7229 {
7230 rtx insn;
7231
7232 if (cfun->machine->some_ld_name)
7233 return cfun->machine->some_ld_name;
7234
7235 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7236 if (INSN_P (insn)
7237 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7238 return cfun->machine->some_ld_name;
7239
7240 abort ();
7241 }
7242
7243 static int
7244 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7245 {
7246 rtx x = *px;
7247
7248 if (GET_CODE (x) == SYMBOL_REF
7249 && local_dynamic_symbolic_operand (x, Pmode))
7250 {
7251 cfun->machine->some_ld_name = XSTR (x, 0);
7252 return 1;
7253 }
7254
7255 return 0;
7256 }
7257
7258 /* Meaning of CODE:
7259 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7260 C -- print opcode suffix for set/cmov insn.
7261 c -- like C, but print reversed condition
7262 F,f -- likewise, but for floating-point.
7263 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7264 otherwise nothing
7265 R -- print the prefix for register names.
7266 z -- print the opcode suffix for the size of the current operand.
7267 * -- print a star (in certain assembler syntax)
7268 A -- print an absolute memory reference.
7269 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7270 s -- print a shift double count, followed by the assemblers argument
7271 delimiter.
7272 b -- print the QImode name of the register for the indicated operand.
7273 %b0 would print %al if operands[0] is reg 0.
7274 w -- likewise, print the HImode name of the register.
7275 k -- likewise, print the SImode name of the register.
7276 q -- likewise, print the DImode name of the register.
7277 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7278 y -- print "st(0)" instead of "st" as a register.
7279 D -- print condition for SSE cmp instruction.
7280 P -- if PIC, print an @PLT suffix.
7281 X -- don't print any sort of PIC '@' suffix for a symbol.
7282 & -- print some in-use local-dynamic symbol name.
7283 */
7284
7285 void
7286 print_operand (FILE *file, rtx x, int code)
7287 {
7288 if (code)
7289 {
7290 switch (code)
7291 {
7292 case '*':
7293 if (ASSEMBLER_DIALECT == ASM_ATT)
7294 putc ('*', file);
7295 return;
7296
7297 case '&':
7298 assemble_name (file, get_some_local_dynamic_name ());
7299 return;
7300
7301 case 'A':
7302 if (ASSEMBLER_DIALECT == ASM_ATT)
7303 putc ('*', file);
7304 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7305 {
7306 /* Intel syntax. For absolute addresses, registers should not
7307 be surrounded by braces. */
7308 if (GET_CODE (x) != REG)
7309 {
7310 putc ('[', file);
7311 PRINT_OPERAND (file, x, 0);
7312 putc (']', file);
7313 return;
7314 }
7315 }
7316 else
7317 abort ();
7318
7319 PRINT_OPERAND (file, x, 0);
7320 return;
7321
7322
7323 case 'L':
7324 if (ASSEMBLER_DIALECT == ASM_ATT)
7325 putc ('l', file);
7326 return;
7327
7328 case 'W':
7329 if (ASSEMBLER_DIALECT == ASM_ATT)
7330 putc ('w', file);
7331 return;
7332
7333 case 'B':
7334 if (ASSEMBLER_DIALECT == ASM_ATT)
7335 putc ('b', file);
7336 return;
7337
7338 case 'Q':
7339 if (ASSEMBLER_DIALECT == ASM_ATT)
7340 putc ('l', file);
7341 return;
7342
7343 case 'S':
7344 if (ASSEMBLER_DIALECT == ASM_ATT)
7345 putc ('s', file);
7346 return;
7347
7348 case 'T':
7349 if (ASSEMBLER_DIALECT == ASM_ATT)
7350 putc ('t', file);
7351 return;
7352
7353 case 'z':
7354 /* 387 opcodes don't get size suffixes if the operands are
7355 registers. */
7356 if (STACK_REG_P (x))
7357 return;
7358
7359 /* Likewise if using Intel opcodes. */
7360 if (ASSEMBLER_DIALECT == ASM_INTEL)
7361 return;
7362
7363 /* This is the size of op from size of operand. */
7364 switch (GET_MODE_SIZE (GET_MODE (x)))
7365 {
7366 case 2:
7367 #ifdef HAVE_GAS_FILDS_FISTS
7368 putc ('s', file);
7369 #endif
7370 return;
7371
7372 case 4:
7373 if (GET_MODE (x) == SFmode)
7374 {
7375 putc ('s', file);
7376 return;
7377 }
7378 else
7379 putc ('l', file);
7380 return;
7381
7382 case 12:
7383 case 16:
7384 putc ('t', file);
7385 return;
7386
7387 case 8:
7388 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7389 {
7390 #ifdef GAS_MNEMONICS
7391 putc ('q', file);
7392 #else
7393 putc ('l', file);
7394 putc ('l', file);
7395 #endif
7396 }
7397 else
7398 putc ('l', file);
7399 return;
7400
7401 default:
7402 abort ();
7403 }
7404
7405 case 'b':
7406 case 'w':
7407 case 'k':
7408 case 'q':
7409 case 'h':
7410 case 'y':
7411 case 'X':
7412 case 'P':
7413 break;
7414
7415 case 's':
7416 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7417 {
7418 PRINT_OPERAND (file, x, 0);
7419 putc (',', file);
7420 }
7421 return;
7422
7423 case 'D':
7424 /* Little bit of braindamage here. The SSE compare instructions
7425 does use completely different names for the comparisons that the
7426 fp conditional moves. */
7427 switch (GET_CODE (x))
7428 {
7429 case EQ:
7430 case UNEQ:
7431 fputs ("eq", file);
7432 break;
7433 case LT:
7434 case UNLT:
7435 fputs ("lt", file);
7436 break;
7437 case LE:
7438 case UNLE:
7439 fputs ("le", file);
7440 break;
7441 case UNORDERED:
7442 fputs ("unord", file);
7443 break;
7444 case NE:
7445 case LTGT:
7446 fputs ("neq", file);
7447 break;
7448 case UNGE:
7449 case GE:
7450 fputs ("nlt", file);
7451 break;
7452 case UNGT:
7453 case GT:
7454 fputs ("nle", file);
7455 break;
7456 case ORDERED:
7457 fputs ("ord", file);
7458 break;
7459 default:
7460 abort ();
7461 break;
7462 }
7463 return;
7464 case 'O':
7465 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7466 if (ASSEMBLER_DIALECT == ASM_ATT)
7467 {
7468 switch (GET_MODE (x))
7469 {
7470 case HImode: putc ('w', file); break;
7471 case SImode:
7472 case SFmode: putc ('l', file); break;
7473 case DImode:
7474 case DFmode: putc ('q', file); break;
7475 default: abort ();
7476 }
7477 putc ('.', file);
7478 }
7479 #endif
7480 return;
7481 case 'C':
7482 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7483 return;
7484 case 'F':
7485 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7486 if (ASSEMBLER_DIALECT == ASM_ATT)
7487 putc ('.', file);
7488 #endif
7489 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7490 return;
7491
7492 /* Like above, but reverse condition */
7493 case 'c':
7494 /* Check to see if argument to %c is really a constant
7495 and not a condition code which needs to be reversed. */
7496 if (!COMPARISON_P (x))
7497 {
7498 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7499 return;
7500 }
7501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7502 return;
7503 case 'f':
7504 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7505 if (ASSEMBLER_DIALECT == ASM_ATT)
7506 putc ('.', file);
7507 #endif
7508 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7509 return;
7510 case '+':
7511 {
7512 rtx x;
7513
7514 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7515 return;
7516
7517 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7518 if (x)
7519 {
7520 int pred_val = INTVAL (XEXP (x, 0));
7521
7522 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7523 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7524 {
7525 int taken = pred_val > REG_BR_PROB_BASE / 2;
7526 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7527
7528 /* Emit hints only in the case default branch prediction
7529 heuristics would fail. */
7530 if (taken != cputaken)
7531 {
7532 /* We use 3e (DS) prefix for taken branches and
7533 2e (CS) prefix for not taken branches. */
7534 if (taken)
7535 fputs ("ds ; ", file);
7536 else
7537 fputs ("cs ; ", file);
7538 }
7539 }
7540 }
7541 return;
7542 }
7543 default:
7544 output_operand_lossage ("invalid operand code `%c'", code);
7545 }
7546 }
7547
7548 if (GET_CODE (x) == REG)
7549 print_reg (x, code, file);
7550
7551 else if (GET_CODE (x) == MEM)
7552 {
7553 /* No `byte ptr' prefix for call instructions. */
7554 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7555 {
7556 const char * size;
7557 switch (GET_MODE_SIZE (GET_MODE (x)))
7558 {
7559 case 1: size = "BYTE"; break;
7560 case 2: size = "WORD"; break;
7561 case 4: size = "DWORD"; break;
7562 case 8: size = "QWORD"; break;
7563 case 12: size = "XWORD"; break;
7564 case 16: size = "XMMWORD"; break;
7565 default:
7566 abort ();
7567 }
7568
7569 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7570 if (code == 'b')
7571 size = "BYTE";
7572 else if (code == 'w')
7573 size = "WORD";
7574 else if (code == 'k')
7575 size = "DWORD";
7576
7577 fputs (size, file);
7578 fputs (" PTR ", file);
7579 }
7580
7581 x = XEXP (x, 0);
7582 /* Avoid (%rip) for call operands. */
7583 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7584 && GET_CODE (x) != CONST_INT)
7585 output_addr_const (file, x);
7586 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7587 output_operand_lossage ("invalid constraints for operand");
7588 else
7589 output_address (x);
7590 }
7591
7592 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7593 {
7594 REAL_VALUE_TYPE r;
7595 long l;
7596
7597 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7598 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7599
7600 if (ASSEMBLER_DIALECT == ASM_ATT)
7601 putc ('$', file);
7602 fprintf (file, "0x%08lx", l);
7603 }
7604
7605 /* These float cases don't actually occur as immediate operands. */
7606 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7607 {
7608 char dstr[30];
7609
7610 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7611 fprintf (file, "%s", dstr);
7612 }
7613
7614 else if (GET_CODE (x) == CONST_DOUBLE
7615 && GET_MODE (x) == XFmode)
7616 {
7617 char dstr[30];
7618
7619 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7620 fprintf (file, "%s", dstr);
7621 }
7622
7623 else
7624 {
7625 if (code != 'P')
7626 {
7627 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7628 {
7629 if (ASSEMBLER_DIALECT == ASM_ATT)
7630 putc ('$', file);
7631 }
7632 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7633 || GET_CODE (x) == LABEL_REF)
7634 {
7635 if (ASSEMBLER_DIALECT == ASM_ATT)
7636 putc ('$', file);
7637 else
7638 fputs ("OFFSET FLAT:", file);
7639 }
7640 }
7641 if (GET_CODE (x) == CONST_INT)
7642 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7643 else if (flag_pic)
7644 output_pic_addr_const (file, x, code);
7645 else
7646 output_addr_const (file, x);
7647 }
7648 }
7649 \f
7650 /* Print a memory operand whose address is ADDR. */
7651
7652 void
7653 print_operand_address (FILE *file, rtx addr)
7654 {
7655 struct ix86_address parts;
7656 rtx base, index, disp;
7657 int scale;
7658
7659 if (! ix86_decompose_address (addr, &parts))
7660 abort ();
7661
7662 base = parts.base;
7663 index = parts.index;
7664 disp = parts.disp;
7665 scale = parts.scale;
7666
7667 switch (parts.seg)
7668 {
7669 case SEG_DEFAULT:
7670 break;
7671 case SEG_FS:
7672 case SEG_GS:
7673 if (USER_LABEL_PREFIX[0] == 0)
7674 putc ('%', file);
7675 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7676 break;
7677 default:
7678 abort ();
7679 }
7680
7681 if (!base && !index)
7682 {
7683 /* Displacement only requires special attention. */
7684
7685 if (GET_CODE (disp) == CONST_INT)
7686 {
7687 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7688 {
7689 if (USER_LABEL_PREFIX[0] == 0)
7690 putc ('%', file);
7691 fputs ("ds:", file);
7692 }
7693 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7694 }
7695 else if (flag_pic)
7696 output_pic_addr_const (file, disp, 0);
7697 else
7698 output_addr_const (file, disp);
7699
7700 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7701 if (TARGET_64BIT
7702 && ((GET_CODE (disp) == SYMBOL_REF
7703 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7704 || GET_CODE (disp) == LABEL_REF
7705 || (GET_CODE (disp) == CONST
7706 && GET_CODE (XEXP (disp, 0)) == PLUS
7707 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7708 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7709 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7710 fputs ("(%rip)", file);
7711 }
7712 else
7713 {
7714 if (ASSEMBLER_DIALECT == ASM_ATT)
7715 {
7716 if (disp)
7717 {
7718 if (flag_pic)
7719 output_pic_addr_const (file, disp, 0);
7720 else if (GET_CODE (disp) == LABEL_REF)
7721 output_asm_label (disp);
7722 else
7723 output_addr_const (file, disp);
7724 }
7725
7726 putc ('(', file);
7727 if (base)
7728 print_reg (base, 0, file);
7729 if (index)
7730 {
7731 putc (',', file);
7732 print_reg (index, 0, file);
7733 if (scale != 1)
7734 fprintf (file, ",%d", scale);
7735 }
7736 putc (')', file);
7737 }
7738 else
7739 {
7740 rtx offset = NULL_RTX;
7741
7742 if (disp)
7743 {
7744 /* Pull out the offset of a symbol; print any symbol itself. */
7745 if (GET_CODE (disp) == CONST
7746 && GET_CODE (XEXP (disp, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7748 {
7749 offset = XEXP (XEXP (disp, 0), 1);
7750 disp = gen_rtx_CONST (VOIDmode,
7751 XEXP (XEXP (disp, 0), 0));
7752 }
7753
7754 if (flag_pic)
7755 output_pic_addr_const (file, disp, 0);
7756 else if (GET_CODE (disp) == LABEL_REF)
7757 output_asm_label (disp);
7758 else if (GET_CODE (disp) == CONST_INT)
7759 offset = disp;
7760 else
7761 output_addr_const (file, disp);
7762 }
7763
7764 putc ('[', file);
7765 if (base)
7766 {
7767 print_reg (base, 0, file);
7768 if (offset)
7769 {
7770 if (INTVAL (offset) >= 0)
7771 putc ('+', file);
7772 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7773 }
7774 }
7775 else if (offset)
7776 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7777 else
7778 putc ('0', file);
7779
7780 if (index)
7781 {
7782 putc ('+', file);
7783 print_reg (index, 0, file);
7784 if (scale != 1)
7785 fprintf (file, "*%d", scale);
7786 }
7787 putc (']', file);
7788 }
7789 }
7790 }
7791
7792 bool
7793 output_addr_const_extra (FILE *file, rtx x)
7794 {
7795 rtx op;
7796
7797 if (GET_CODE (x) != UNSPEC)
7798 return false;
7799
7800 op = XVECEXP (x, 0, 0);
7801 switch (XINT (x, 1))
7802 {
7803 case UNSPEC_GOTTPOFF:
7804 output_addr_const (file, op);
7805 /* FIXME: This might be @TPOFF in Sun ld. */
7806 fputs ("@GOTTPOFF", file);
7807 break;
7808 case UNSPEC_TPOFF:
7809 output_addr_const (file, op);
7810 fputs ("@TPOFF", file);
7811 break;
7812 case UNSPEC_NTPOFF:
7813 output_addr_const (file, op);
7814 if (TARGET_64BIT)
7815 fputs ("@TPOFF", file);
7816 else
7817 fputs ("@NTPOFF", file);
7818 break;
7819 case UNSPEC_DTPOFF:
7820 output_addr_const (file, op);
7821 fputs ("@DTPOFF", file);
7822 break;
7823 case UNSPEC_GOTNTPOFF:
7824 output_addr_const (file, op);
7825 if (TARGET_64BIT)
7826 fputs ("@GOTTPOFF(%rip)", file);
7827 else
7828 fputs ("@GOTNTPOFF", file);
7829 break;
7830 case UNSPEC_INDNTPOFF:
7831 output_addr_const (file, op);
7832 fputs ("@INDNTPOFF", file);
7833 break;
7834
7835 default:
7836 return false;
7837 }
7838
7839 return true;
7840 }
7841 \f
7842 /* Split one or more DImode RTL references into pairs of SImode
7843 references. The RTL can be REG, offsettable MEM, integer constant, or
7844 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7845 split and "num" is its length. lo_half and hi_half are output arrays
7846 that parallel "operands". */
7847
7848 void
7849 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7850 {
7851 while (num--)
7852 {
7853 rtx op = operands[num];
7854
7855 /* simplify_subreg refuse to split volatile memory addresses,
7856 but we still have to handle it. */
7857 if (GET_CODE (op) == MEM)
7858 {
7859 lo_half[num] = adjust_address (op, SImode, 0);
7860 hi_half[num] = adjust_address (op, SImode, 4);
7861 }
7862 else
7863 {
7864 lo_half[num] = simplify_gen_subreg (SImode, op,
7865 GET_MODE (op) == VOIDmode
7866 ? DImode : GET_MODE (op), 0);
7867 hi_half[num] = simplify_gen_subreg (SImode, op,
7868 GET_MODE (op) == VOIDmode
7869 ? DImode : GET_MODE (op), 4);
7870 }
7871 }
7872 }
7873 /* Split one or more TImode RTL references into pairs of SImode
7874 references. The RTL can be REG, offsettable MEM, integer constant, or
7875 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7876 split and "num" is its length. lo_half and hi_half are output arrays
7877 that parallel "operands". */
7878
7879 void
7880 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7881 {
7882 while (num--)
7883 {
7884 rtx op = operands[num];
7885
7886 /* simplify_subreg refuse to split volatile memory addresses, but we
7887 still have to handle it. */
7888 if (GET_CODE (op) == MEM)
7889 {
7890 lo_half[num] = adjust_address (op, DImode, 0);
7891 hi_half[num] = adjust_address (op, DImode, 8);
7892 }
7893 else
7894 {
7895 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7896 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7897 }
7898 }
7899 }
7900 \f
7901 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7902 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7903 is the expression of the binary operation. The output may either be
7904 emitted here, or returned to the caller, like all output_* functions.
7905
7906 There is no guarantee that the operands are the same mode, as they
7907 might be within FLOAT or FLOAT_EXTEND expressions. */
7908
7909 #ifndef SYSV386_COMPAT
7910 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7911 wants to fix the assemblers because that causes incompatibility
7912 with gcc. No-one wants to fix gcc because that causes
7913 incompatibility with assemblers... You can use the option of
7914 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7915 #define SYSV386_COMPAT 1
7916 #endif
7917
7918 const char *
7919 output_387_binary_op (rtx insn, rtx *operands)
7920 {
7921 static char buf[30];
7922 const char *p;
7923 const char *ssep;
7924 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7925
7926 #ifdef ENABLE_CHECKING
7927 /* Even if we do not want to check the inputs, this documents input
7928 constraints. Which helps in understanding the following code. */
7929 if (STACK_REG_P (operands[0])
7930 && ((REG_P (operands[1])
7931 && REGNO (operands[0]) == REGNO (operands[1])
7932 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7933 || (REG_P (operands[2])
7934 && REGNO (operands[0]) == REGNO (operands[2])
7935 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7936 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7937 ; /* ok */
7938 else if (!is_sse)
7939 abort ();
7940 #endif
7941
7942 switch (GET_CODE (operands[3]))
7943 {
7944 case PLUS:
7945 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7946 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7947 p = "fiadd";
7948 else
7949 p = "fadd";
7950 ssep = "add";
7951 break;
7952
7953 case MINUS:
7954 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7955 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7956 p = "fisub";
7957 else
7958 p = "fsub";
7959 ssep = "sub";
7960 break;
7961
7962 case MULT:
7963 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7964 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7965 p = "fimul";
7966 else
7967 p = "fmul";
7968 ssep = "mul";
7969 break;
7970
7971 case DIV:
7972 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7973 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7974 p = "fidiv";
7975 else
7976 p = "fdiv";
7977 ssep = "div";
7978 break;
7979
7980 default:
7981 abort ();
7982 }
7983
7984 if (is_sse)
7985 {
7986 strcpy (buf, ssep);
7987 if (GET_MODE (operands[0]) == SFmode)
7988 strcat (buf, "ss\t{%2, %0|%0, %2}");
7989 else
7990 strcat (buf, "sd\t{%2, %0|%0, %2}");
7991 return buf;
7992 }
7993 strcpy (buf, p);
7994
7995 switch (GET_CODE (operands[3]))
7996 {
7997 case MULT:
7998 case PLUS:
7999 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8000 {
8001 rtx temp = operands[2];
8002 operands[2] = operands[1];
8003 operands[1] = temp;
8004 }
8005
8006 /* know operands[0] == operands[1]. */
8007
8008 if (GET_CODE (operands[2]) == MEM)
8009 {
8010 p = "%z2\t%2";
8011 break;
8012 }
8013
8014 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8015 {
8016 if (STACK_TOP_P (operands[0]))
8017 /* How is it that we are storing to a dead operand[2]?
8018 Well, presumably operands[1] is dead too. We can't
8019 store the result to st(0) as st(0) gets popped on this
8020 instruction. Instead store to operands[2] (which I
8021 think has to be st(1)). st(1) will be popped later.
8022 gcc <= 2.8.1 didn't have this check and generated
8023 assembly code that the Unixware assembler rejected. */
8024 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8025 else
8026 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8027 break;
8028 }
8029
8030 if (STACK_TOP_P (operands[0]))
8031 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8032 else
8033 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8034 break;
8035
8036 case MINUS:
8037 case DIV:
8038 if (GET_CODE (operands[1]) == MEM)
8039 {
8040 p = "r%z1\t%1";
8041 break;
8042 }
8043
8044 if (GET_CODE (operands[2]) == MEM)
8045 {
8046 p = "%z2\t%2";
8047 break;
8048 }
8049
8050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8051 {
8052 #if SYSV386_COMPAT
8053 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8054 derived assemblers, confusingly reverse the direction of
8055 the operation for fsub{r} and fdiv{r} when the
8056 destination register is not st(0). The Intel assembler
8057 doesn't have this brain damage. Read !SYSV386_COMPAT to
8058 figure out what the hardware really does. */
8059 if (STACK_TOP_P (operands[0]))
8060 p = "{p\t%0, %2|rp\t%2, %0}";
8061 else
8062 p = "{rp\t%2, %0|p\t%0, %2}";
8063 #else
8064 if (STACK_TOP_P (operands[0]))
8065 /* As above for fmul/fadd, we can't store to st(0). */
8066 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8067 else
8068 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8069 #endif
8070 break;
8071 }
8072
8073 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8074 {
8075 #if SYSV386_COMPAT
8076 if (STACK_TOP_P (operands[0]))
8077 p = "{rp\t%0, %1|p\t%1, %0}";
8078 else
8079 p = "{p\t%1, %0|rp\t%0, %1}";
8080 #else
8081 if (STACK_TOP_P (operands[0]))
8082 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8083 else
8084 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8085 #endif
8086 break;
8087 }
8088
8089 if (STACK_TOP_P (operands[0]))
8090 {
8091 if (STACK_TOP_P (operands[1]))
8092 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8093 else
8094 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8095 break;
8096 }
8097 else if (STACK_TOP_P (operands[1]))
8098 {
8099 #if SYSV386_COMPAT
8100 p = "{\t%1, %0|r\t%0, %1}";
8101 #else
8102 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8103 #endif
8104 }
8105 else
8106 {
8107 #if SYSV386_COMPAT
8108 p = "{r\t%2, %0|\t%0, %2}";
8109 #else
8110 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8111 #endif
8112 }
8113 break;
8114
8115 default:
8116 abort ();
8117 }
8118
8119 strcat (buf, p);
8120 return buf;
8121 }
8122
8123 /* Output code to initialize control word copies used by
8124 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8125 is set to control word rounding downwards. */
8126 void
8127 emit_i387_cw_initialization (rtx normal, rtx round_down)
8128 {
8129 rtx reg = gen_reg_rtx (HImode);
8130
8131 emit_insn (gen_x86_fnstcw_1 (normal));
8132 emit_move_insn (reg, normal);
8133 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8134 && !TARGET_64BIT)
8135 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8136 else
8137 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8138 emit_move_insn (round_down, reg);
8139 }
8140
8141 /* Output code for INSN to convert a float to a signed int. OPERANDS
8142 are the insn operands. The output may be [HSD]Imode and the input
8143 operand may be [SDX]Fmode. */
8144
8145 const char *
8146 output_fix_trunc (rtx insn, rtx *operands)
8147 {
8148 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8149 int dimode_p = GET_MODE (operands[0]) == DImode;
8150
8151 /* Jump through a hoop or two for DImode, since the hardware has no
8152 non-popping instruction. We used to do this a different way, but
8153 that was somewhat fragile and broke with post-reload splitters. */
8154 if (dimode_p && !stack_top_dies)
8155 output_asm_insn ("fld\t%y1", operands);
8156
8157 if (!STACK_TOP_P (operands[1]))
8158 abort ();
8159
8160 if (GET_CODE (operands[0]) != MEM)
8161 abort ();
8162
8163 output_asm_insn ("fldcw\t%3", operands);
8164 if (stack_top_dies || dimode_p)
8165 output_asm_insn ("fistp%z0\t%0", operands);
8166 else
8167 output_asm_insn ("fist%z0\t%0", operands);
8168 output_asm_insn ("fldcw\t%2", operands);
8169
8170 return "";
8171 }
8172
8173 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8174 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8175 when fucom should be used. */
8176
8177 const char *
8178 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8179 {
8180 int stack_top_dies;
8181 rtx cmp_op0 = operands[0];
8182 rtx cmp_op1 = operands[1];
8183 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8184
8185 if (eflags_p == 2)
8186 {
8187 cmp_op0 = cmp_op1;
8188 cmp_op1 = operands[2];
8189 }
8190 if (is_sse)
8191 {
8192 if (GET_MODE (operands[0]) == SFmode)
8193 if (unordered_p)
8194 return "ucomiss\t{%1, %0|%0, %1}";
8195 else
8196 return "comiss\t{%1, %0|%0, %1}";
8197 else
8198 if (unordered_p)
8199 return "ucomisd\t{%1, %0|%0, %1}";
8200 else
8201 return "comisd\t{%1, %0|%0, %1}";
8202 }
8203
8204 if (! STACK_TOP_P (cmp_op0))
8205 abort ();
8206
8207 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8208
8209 if (STACK_REG_P (cmp_op1)
8210 && stack_top_dies
8211 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8212 && REGNO (cmp_op1) != FIRST_STACK_REG)
8213 {
8214 /* If both the top of the 387 stack dies, and the other operand
8215 is also a stack register that dies, then this must be a
8216 `fcompp' float compare */
8217
8218 if (eflags_p == 1)
8219 {
8220 /* There is no double popping fcomi variant. Fortunately,
8221 eflags is immune from the fstp's cc clobbering. */
8222 if (unordered_p)
8223 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8224 else
8225 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8226 return "fstp\t%y0";
8227 }
8228 else
8229 {
8230 if (eflags_p == 2)
8231 {
8232 if (unordered_p)
8233 return "fucompp\n\tfnstsw\t%0";
8234 else
8235 return "fcompp\n\tfnstsw\t%0";
8236 }
8237 else
8238 {
8239 if (unordered_p)
8240 return "fucompp";
8241 else
8242 return "fcompp";
8243 }
8244 }
8245 }
8246 else
8247 {
8248 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8249
8250 static const char * const alt[24] =
8251 {
8252 "fcom%z1\t%y1",
8253 "fcomp%z1\t%y1",
8254 "fucom%z1\t%y1",
8255 "fucomp%z1\t%y1",
8256
8257 "ficom%z1\t%y1",
8258 "ficomp%z1\t%y1",
8259 NULL,
8260 NULL,
8261
8262 "fcomi\t{%y1, %0|%0, %y1}",
8263 "fcomip\t{%y1, %0|%0, %y1}",
8264 "fucomi\t{%y1, %0|%0, %y1}",
8265 "fucomip\t{%y1, %0|%0, %y1}",
8266
8267 NULL,
8268 NULL,
8269 NULL,
8270 NULL,
8271
8272 "fcom%z2\t%y2\n\tfnstsw\t%0",
8273 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8274 "fucom%z2\t%y2\n\tfnstsw\t%0",
8275 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8276
8277 "ficom%z2\t%y2\n\tfnstsw\t%0",
8278 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8279 NULL,
8280 NULL
8281 };
8282
8283 int mask;
8284 const char *ret;
8285
8286 mask = eflags_p << 3;
8287 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8288 mask |= unordered_p << 1;
8289 mask |= stack_top_dies;
8290
8291 if (mask >= 24)
8292 abort ();
8293 ret = alt[mask];
8294 if (ret == NULL)
8295 abort ();
8296
8297 return ret;
8298 }
8299 }
8300
8301 void
8302 ix86_output_addr_vec_elt (FILE *file, int value)
8303 {
8304 const char *directive = ASM_LONG;
8305
8306 if (TARGET_64BIT)
8307 {
8308 #ifdef ASM_QUAD
8309 directive = ASM_QUAD;
8310 #else
8311 abort ();
8312 #endif
8313 }
8314
8315 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8316 }
8317
8318 void
8319 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8320 {
8321 if (TARGET_64BIT)
8322 fprintf (file, "%s%s%d-%s%d\n",
8323 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8324 else if (HAVE_AS_GOTOFF_IN_DATA)
8325 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8326 #if TARGET_MACHO
8327 else if (TARGET_MACHO)
8328 {
8329 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8330 machopic_output_function_base_name (file);
8331 fprintf(file, "\n");
8332 }
8333 #endif
8334 else
8335 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8336 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8337 }
8338 \f
8339 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8340 for the target. */
8341
8342 void
8343 ix86_expand_clear (rtx dest)
8344 {
8345 rtx tmp;
8346
8347 /* We play register width games, which are only valid after reload. */
8348 if (!reload_completed)
8349 abort ();
8350
8351 /* Avoid HImode and its attendant prefix byte. */
8352 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8353 dest = gen_rtx_REG (SImode, REGNO (dest));
8354
8355 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8356
8357 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8358 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8359 {
8360 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8361 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8362 }
8363
8364 emit_insn (tmp);
8365 }
8366
8367 /* X is an unchanging MEM. If it is a constant pool reference, return
8368 the constant pool rtx, else NULL. */
8369
8370 static rtx
8371 maybe_get_pool_constant (rtx x)
8372 {
8373 x = ix86_delegitimize_address (XEXP (x, 0));
8374
8375 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8376 return get_pool_constant (x);
8377
8378 return NULL_RTX;
8379 }
8380
8381 void
8382 ix86_expand_move (enum machine_mode mode, rtx operands[])
8383 {
8384 int strict = (reload_in_progress || reload_completed);
8385 rtx op0, op1;
8386 enum tls_model model;
8387
8388 op0 = operands[0];
8389 op1 = operands[1];
8390
8391 model = tls_symbolic_operand (op1, Pmode);
8392 if (model)
8393 {
8394 op1 = legitimize_tls_address (op1, model, true);
8395 op1 = force_operand (op1, op0);
8396 if (op1 == op0)
8397 return;
8398 }
8399
8400 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8401 {
8402 #if TARGET_MACHO
8403 if (MACHOPIC_PURE)
8404 {
8405 rtx temp = ((reload_in_progress
8406 || ((op0 && GET_CODE (op0) == REG)
8407 && mode == Pmode))
8408 ? op0 : gen_reg_rtx (Pmode));
8409 op1 = machopic_indirect_data_reference (op1, temp);
8410 op1 = machopic_legitimize_pic_address (op1, mode,
8411 temp == op1 ? 0 : temp);
8412 }
8413 else if (MACHOPIC_INDIRECT)
8414 op1 = machopic_indirect_data_reference (op1, 0);
8415 if (op0 == op1)
8416 return;
8417 #else
8418 if (GET_CODE (op0) == MEM)
8419 op1 = force_reg (Pmode, op1);
8420 else
8421 op1 = legitimize_address (op1, op1, Pmode);
8422 #endif /* TARGET_MACHO */
8423 }
8424 else
8425 {
8426 if (GET_CODE (op0) == MEM
8427 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8428 || !push_operand (op0, mode))
8429 && GET_CODE (op1) == MEM)
8430 op1 = force_reg (mode, op1);
8431
8432 if (push_operand (op0, mode)
8433 && ! general_no_elim_operand (op1, mode))
8434 op1 = copy_to_mode_reg (mode, op1);
8435
8436 /* Force large constants in 64bit compilation into register
8437 to get them CSEed. */
8438 if (TARGET_64BIT && mode == DImode
8439 && immediate_operand (op1, mode)
8440 && !x86_64_zero_extended_value (op1)
8441 && !register_operand (op0, mode)
8442 && optimize && !reload_completed && !reload_in_progress)
8443 op1 = copy_to_mode_reg (mode, op1);
8444
8445 if (FLOAT_MODE_P (mode))
8446 {
8447 /* If we are loading a floating point constant to a register,
8448 force the value to memory now, since we'll get better code
8449 out the back end. */
8450
8451 if (strict)
8452 ;
8453 else if (GET_CODE (op1) == CONST_DOUBLE)
8454 {
8455 op1 = validize_mem (force_const_mem (mode, op1));
8456 if (!register_operand (op0, mode))
8457 {
8458 rtx temp = gen_reg_rtx (mode);
8459 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8460 emit_move_insn (op0, temp);
8461 return;
8462 }
8463 }
8464 }
8465 }
8466
8467 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8468 }
8469
8470 void
8471 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8472 {
8473 /* Force constants other than zero into memory. We do not know how
8474 the instructions used to build constants modify the upper 64 bits
8475 of the register, once we have that information we may be able
8476 to handle some of them more efficiently. */
8477 if ((reload_in_progress | reload_completed) == 0
8478 && register_operand (operands[0], mode)
8479 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8480 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8481
8482 /* Make operand1 a register if it isn't already. */
8483 if (!no_new_pseudos
8484 && !register_operand (operands[0], mode)
8485 && !register_operand (operands[1], mode))
8486 {
8487 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8488 emit_move_insn (operands[0], temp);
8489 return;
8490 }
8491
8492 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8493 }
8494
8495 /* Attempt to expand a binary operator. Make the expansion closer to the
8496 actual machine, then just general_operand, which will allow 3 separate
8497 memory references (one output, two input) in a single insn. */
8498
8499 void
8500 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8501 rtx operands[])
8502 {
8503 int matching_memory;
8504 rtx src1, src2, dst, op, clob;
8505
8506 dst = operands[0];
8507 src1 = operands[1];
8508 src2 = operands[2];
8509
8510 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8511 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8512 && (rtx_equal_p (dst, src2)
8513 || immediate_operand (src1, mode)))
8514 {
8515 rtx temp = src1;
8516 src1 = src2;
8517 src2 = temp;
8518 }
8519
8520 /* If the destination is memory, and we do not have matching source
8521 operands, do things in registers. */
8522 matching_memory = 0;
8523 if (GET_CODE (dst) == MEM)
8524 {
8525 if (rtx_equal_p (dst, src1))
8526 matching_memory = 1;
8527 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8528 && rtx_equal_p (dst, src2))
8529 matching_memory = 2;
8530 else
8531 dst = gen_reg_rtx (mode);
8532 }
8533
8534 /* Both source operands cannot be in memory. */
8535 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8536 {
8537 if (matching_memory != 2)
8538 src2 = force_reg (mode, src2);
8539 else
8540 src1 = force_reg (mode, src1);
8541 }
8542
8543 /* If the operation is not commutable, source 1 cannot be a constant
8544 or non-matching memory. */
8545 if ((CONSTANT_P (src1)
8546 || (!matching_memory && GET_CODE (src1) == MEM))
8547 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8548 src1 = force_reg (mode, src1);
8549
8550 /* If optimizing, copy to regs to improve CSE */
8551 if (optimize && ! no_new_pseudos)
8552 {
8553 if (GET_CODE (dst) == MEM)
8554 dst = gen_reg_rtx (mode);
8555 if (GET_CODE (src1) == MEM)
8556 src1 = force_reg (mode, src1);
8557 if (GET_CODE (src2) == MEM)
8558 src2 = force_reg (mode, src2);
8559 }
8560
8561 /* Emit the instruction. */
8562
8563 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8564 if (reload_in_progress)
8565 {
8566 /* Reload doesn't know about the flags register, and doesn't know that
8567 it doesn't want to clobber it. We can only do this with PLUS. */
8568 if (code != PLUS)
8569 abort ();
8570 emit_insn (op);
8571 }
8572 else
8573 {
8574 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8575 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8576 }
8577
8578 /* Fix up the destination if needed. */
8579 if (dst != operands[0])
8580 emit_move_insn (operands[0], dst);
8581 }
8582
8583 /* Return TRUE or FALSE depending on whether the binary operator meets the
8584 appropriate constraints. */
8585
8586 int
8587 ix86_binary_operator_ok (enum rtx_code code,
8588 enum machine_mode mode ATTRIBUTE_UNUSED,
8589 rtx operands[3])
8590 {
8591 /* Both source operands cannot be in memory. */
8592 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8593 return 0;
8594 /* If the operation is not commutable, source 1 cannot be a constant. */
8595 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8596 return 0;
8597 /* If the destination is memory, we must have a matching source operand. */
8598 if (GET_CODE (operands[0]) == MEM
8599 && ! (rtx_equal_p (operands[0], operands[1])
8600 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8601 && rtx_equal_p (operands[0], operands[2]))))
8602 return 0;
8603 /* If the operation is not commutable and the source 1 is memory, we must
8604 have a matching destination. */
8605 if (GET_CODE (operands[1]) == MEM
8606 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8607 && ! rtx_equal_p (operands[0], operands[1]))
8608 return 0;
8609 return 1;
8610 }
8611
8612 /* Attempt to expand a unary operator. Make the expansion closer to the
8613 actual machine, then just general_operand, which will allow 2 separate
8614 memory references (one output, one input) in a single insn. */
8615
8616 void
8617 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8618 rtx operands[])
8619 {
8620 int matching_memory;
8621 rtx src, dst, op, clob;
8622
8623 dst = operands[0];
8624 src = operands[1];
8625
8626 /* If the destination is memory, and we do not have matching source
8627 operands, do things in registers. */
8628 matching_memory = 0;
8629 if (GET_CODE (dst) == MEM)
8630 {
8631 if (rtx_equal_p (dst, src))
8632 matching_memory = 1;
8633 else
8634 dst = gen_reg_rtx (mode);
8635 }
8636
8637 /* When source operand is memory, destination must match. */
8638 if (!matching_memory && GET_CODE (src) == MEM)
8639 src = force_reg (mode, src);
8640
8641 /* If optimizing, copy to regs to improve CSE */
8642 if (optimize && ! no_new_pseudos)
8643 {
8644 if (GET_CODE (dst) == MEM)
8645 dst = gen_reg_rtx (mode);
8646 if (GET_CODE (src) == MEM)
8647 src = force_reg (mode, src);
8648 }
8649
8650 /* Emit the instruction. */
8651
8652 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8653 if (reload_in_progress || code == NOT)
8654 {
8655 /* Reload doesn't know about the flags register, and doesn't know that
8656 it doesn't want to clobber it. */
8657 if (code != NOT)
8658 abort ();
8659 emit_insn (op);
8660 }
8661 else
8662 {
8663 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8664 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8665 }
8666
8667 /* Fix up the destination if needed. */
8668 if (dst != operands[0])
8669 emit_move_insn (operands[0], dst);
8670 }
8671
8672 /* Return TRUE or FALSE depending on whether the unary operator meets the
8673 appropriate constraints. */
8674
8675 int
8676 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8677 enum machine_mode mode ATTRIBUTE_UNUSED,
8678 rtx operands[2] ATTRIBUTE_UNUSED)
8679 {
8680 /* If one of operands is memory, source and destination must match. */
8681 if ((GET_CODE (operands[0]) == MEM
8682 || GET_CODE (operands[1]) == MEM)
8683 && ! rtx_equal_p (operands[0], operands[1]))
8684 return FALSE;
8685 return TRUE;
8686 }
8687
8688 /* Return TRUE or FALSE depending on whether the first SET in INSN
8689 has source and destination with matching CC modes, and that the
8690 CC mode is at least as constrained as REQ_MODE. */
8691
8692 int
8693 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8694 {
8695 rtx set;
8696 enum machine_mode set_mode;
8697
8698 set = PATTERN (insn);
8699 if (GET_CODE (set) == PARALLEL)
8700 set = XVECEXP (set, 0, 0);
8701 if (GET_CODE (set) != SET)
8702 abort ();
8703 if (GET_CODE (SET_SRC (set)) != COMPARE)
8704 abort ();
8705
8706 set_mode = GET_MODE (SET_DEST (set));
8707 switch (set_mode)
8708 {
8709 case CCNOmode:
8710 if (req_mode != CCNOmode
8711 && (req_mode != CCmode
8712 || XEXP (SET_SRC (set), 1) != const0_rtx))
8713 return 0;
8714 break;
8715 case CCmode:
8716 if (req_mode == CCGCmode)
8717 return 0;
8718 /* FALLTHRU */
8719 case CCGCmode:
8720 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8721 return 0;
8722 /* FALLTHRU */
8723 case CCGOCmode:
8724 if (req_mode == CCZmode)
8725 return 0;
8726 /* FALLTHRU */
8727 case CCZmode:
8728 break;
8729
8730 default:
8731 abort ();
8732 }
8733
8734 return (GET_MODE (SET_SRC (set)) == set_mode);
8735 }
8736
8737 /* Generate insn patterns to do an integer compare of OPERANDS. */
8738
8739 static rtx
8740 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8741 {
8742 enum machine_mode cmpmode;
8743 rtx tmp, flags;
8744
8745 cmpmode = SELECT_CC_MODE (code, op0, op1);
8746 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8747
8748 /* This is very simple, but making the interface the same as in the
8749 FP case makes the rest of the code easier. */
8750 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8751 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8752
8753 /* Return the test that should be put into the flags user, i.e.
8754 the bcc, scc, or cmov instruction. */
8755 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8756 }
8757
8758 /* Figure out whether to use ordered or unordered fp comparisons.
8759 Return the appropriate mode to use. */
8760
8761 enum machine_mode
8762 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8763 {
8764 /* ??? In order to make all comparisons reversible, we do all comparisons
8765 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8766 all forms trapping and nontrapping comparisons, we can make inequality
8767 comparisons trapping again, since it results in better code when using
8768 FCOM based compares. */
8769 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8770 }
8771
8772 enum machine_mode
8773 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8774 {
8775 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8776 return ix86_fp_compare_mode (code);
8777 switch (code)
8778 {
8779 /* Only zero flag is needed. */
8780 case EQ: /* ZF=0 */
8781 case NE: /* ZF!=0 */
8782 return CCZmode;
8783 /* Codes needing carry flag. */
8784 case GEU: /* CF=0 */
8785 case GTU: /* CF=0 & ZF=0 */
8786 case LTU: /* CF=1 */
8787 case LEU: /* CF=1 | ZF=1 */
8788 return CCmode;
8789 /* Codes possibly doable only with sign flag when
8790 comparing against zero. */
8791 case GE: /* SF=OF or SF=0 */
8792 case LT: /* SF<>OF or SF=1 */
8793 if (op1 == const0_rtx)
8794 return CCGOCmode;
8795 else
8796 /* For other cases Carry flag is not required. */
8797 return CCGCmode;
8798 /* Codes doable only with sign flag when comparing
8799 against zero, but we miss jump instruction for it
8800 so we need to use relational tests against overflow
8801 that thus needs to be zero. */
8802 case GT: /* ZF=0 & SF=OF */
8803 case LE: /* ZF=1 | SF<>OF */
8804 if (op1 == const0_rtx)
8805 return CCNOmode;
8806 else
8807 return CCGCmode;
8808 /* strcmp pattern do (use flags) and combine may ask us for proper
8809 mode. */
8810 case USE:
8811 return CCmode;
8812 default:
8813 abort ();
8814 }
8815 }
8816
8817 /* Return the fixed registers used for condition codes. */
8818
8819 static bool
8820 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8821 {
8822 *p1 = FLAGS_REG;
8823 *p2 = FPSR_REG;
8824 return true;
8825 }
8826
8827 /* If two condition code modes are compatible, return a condition code
8828 mode which is compatible with both. Otherwise, return
8829 VOIDmode. */
8830
8831 static enum machine_mode
8832 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8833 {
8834 if (m1 == m2)
8835 return m1;
8836
8837 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8838 return VOIDmode;
8839
8840 if ((m1 == CCGCmode && m2 == CCGOCmode)
8841 || (m1 == CCGOCmode && m2 == CCGCmode))
8842 return CCGCmode;
8843
8844 switch (m1)
8845 {
8846 default:
8847 abort ();
8848
8849 case CCmode:
8850 case CCGCmode:
8851 case CCGOCmode:
8852 case CCNOmode:
8853 case CCZmode:
8854 switch (m2)
8855 {
8856 default:
8857 return VOIDmode;
8858
8859 case CCmode:
8860 case CCGCmode:
8861 case CCGOCmode:
8862 case CCNOmode:
8863 case CCZmode:
8864 return CCmode;
8865 }
8866
8867 case CCFPmode:
8868 case CCFPUmode:
8869 /* These are only compatible with themselves, which we already
8870 checked above. */
8871 return VOIDmode;
8872 }
8873 }
8874
8875 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8876
8877 int
8878 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8879 {
8880 enum rtx_code swapped_code = swap_condition (code);
8881 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8882 || (ix86_fp_comparison_cost (swapped_code)
8883 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8884 }
8885
8886 /* Swap, force into registers, or otherwise massage the two operands
8887 to a fp comparison. The operands are updated in place; the new
8888 comparison code is returned. */
8889
8890 static enum rtx_code
8891 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8892 {
8893 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8894 rtx op0 = *pop0, op1 = *pop1;
8895 enum machine_mode op_mode = GET_MODE (op0);
8896 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8897
8898 /* All of the unordered compare instructions only work on registers.
8899 The same is true of the XFmode compare instructions. The same is
8900 true of the fcomi compare instructions. */
8901
8902 if (!is_sse
8903 && (fpcmp_mode == CCFPUmode
8904 || op_mode == XFmode
8905 || ix86_use_fcomi_compare (code)))
8906 {
8907 op0 = force_reg (op_mode, op0);
8908 op1 = force_reg (op_mode, op1);
8909 }
8910 else
8911 {
8912 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8913 things around if they appear profitable, otherwise force op0
8914 into a register. */
8915
8916 if (standard_80387_constant_p (op0) == 0
8917 || (GET_CODE (op0) == MEM
8918 && ! (standard_80387_constant_p (op1) == 0
8919 || GET_CODE (op1) == MEM)))
8920 {
8921 rtx tmp;
8922 tmp = op0, op0 = op1, op1 = tmp;
8923 code = swap_condition (code);
8924 }
8925
8926 if (GET_CODE (op0) != REG)
8927 op0 = force_reg (op_mode, op0);
8928
8929 if (CONSTANT_P (op1))
8930 {
8931 if (standard_80387_constant_p (op1))
8932 op1 = force_reg (op_mode, op1);
8933 else
8934 op1 = validize_mem (force_const_mem (op_mode, op1));
8935 }
8936 }
8937
8938 /* Try to rearrange the comparison to make it cheaper. */
8939 if (ix86_fp_comparison_cost (code)
8940 > ix86_fp_comparison_cost (swap_condition (code))
8941 && (GET_CODE (op1) == REG || !no_new_pseudos))
8942 {
8943 rtx tmp;
8944 tmp = op0, op0 = op1, op1 = tmp;
8945 code = swap_condition (code);
8946 if (GET_CODE (op0) != REG)
8947 op0 = force_reg (op_mode, op0);
8948 }
8949
8950 *pop0 = op0;
8951 *pop1 = op1;
8952 return code;
8953 }
8954
8955 /* Convert comparison codes we use to represent FP comparison to integer
8956 code that will result in proper branch. Return UNKNOWN if no such code
8957 is available. */
8958 static enum rtx_code
8959 ix86_fp_compare_code_to_integer (enum rtx_code code)
8960 {
8961 switch (code)
8962 {
8963 case GT:
8964 return GTU;
8965 case GE:
8966 return GEU;
8967 case ORDERED:
8968 case UNORDERED:
8969 return code;
8970 break;
8971 case UNEQ:
8972 return EQ;
8973 break;
8974 case UNLT:
8975 return LTU;
8976 break;
8977 case UNLE:
8978 return LEU;
8979 break;
8980 case LTGT:
8981 return NE;
8982 break;
8983 default:
8984 return UNKNOWN;
8985 }
8986 }
8987
8988 /* Split comparison code CODE into comparisons we can do using branch
8989 instructions. BYPASS_CODE is comparison code for branch that will
8990 branch around FIRST_CODE and SECOND_CODE. If some of branches
8991 is not required, set value to NIL.
8992 We never require more than two branches. */
8993 static void
8994 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8995 enum rtx_code *first_code,
8996 enum rtx_code *second_code)
8997 {
8998 *first_code = code;
8999 *bypass_code = NIL;
9000 *second_code = NIL;
9001
9002 /* The fcomi comparison sets flags as follows:
9003
9004 cmp ZF PF CF
9005 > 0 0 0
9006 < 0 0 1
9007 = 1 0 0
9008 un 1 1 1 */
9009
9010 switch (code)
9011 {
9012 case GT: /* GTU - CF=0 & ZF=0 */
9013 case GE: /* GEU - CF=0 */
9014 case ORDERED: /* PF=0 */
9015 case UNORDERED: /* PF=1 */
9016 case UNEQ: /* EQ - ZF=1 */
9017 case UNLT: /* LTU - CF=1 */
9018 case UNLE: /* LEU - CF=1 | ZF=1 */
9019 case LTGT: /* EQ - ZF=0 */
9020 break;
9021 case LT: /* LTU - CF=1 - fails on unordered */
9022 *first_code = UNLT;
9023 *bypass_code = UNORDERED;
9024 break;
9025 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9026 *first_code = UNLE;
9027 *bypass_code = UNORDERED;
9028 break;
9029 case EQ: /* EQ - ZF=1 - fails on unordered */
9030 *first_code = UNEQ;
9031 *bypass_code = UNORDERED;
9032 break;
9033 case NE: /* NE - ZF=0 - fails on unordered */
9034 *first_code = LTGT;
9035 *second_code = UNORDERED;
9036 break;
9037 case UNGE: /* GEU - CF=0 - fails on unordered */
9038 *first_code = GE;
9039 *second_code = UNORDERED;
9040 break;
9041 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9042 *first_code = GT;
9043 *second_code = UNORDERED;
9044 break;
9045 default:
9046 abort ();
9047 }
9048 if (!TARGET_IEEE_FP)
9049 {
9050 *second_code = NIL;
9051 *bypass_code = NIL;
9052 }
9053 }
9054
9055 /* Return cost of comparison done fcom + arithmetics operations on AX.
9056 All following functions do use number of instructions as a cost metrics.
9057 In future this should be tweaked to compute bytes for optimize_size and
9058 take into account performance of various instructions on various CPUs. */
9059 static int
9060 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9061 {
9062 if (!TARGET_IEEE_FP)
9063 return 4;
9064 /* The cost of code output by ix86_expand_fp_compare. */
9065 switch (code)
9066 {
9067 case UNLE:
9068 case UNLT:
9069 case LTGT:
9070 case GT:
9071 case GE:
9072 case UNORDERED:
9073 case ORDERED:
9074 case UNEQ:
9075 return 4;
9076 break;
9077 case LT:
9078 case NE:
9079 case EQ:
9080 case UNGE:
9081 return 5;
9082 break;
9083 case LE:
9084 case UNGT:
9085 return 6;
9086 break;
9087 default:
9088 abort ();
9089 }
9090 }
9091
9092 /* Return cost of comparison done using fcomi operation.
9093 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9094 static int
9095 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9096 {
9097 enum rtx_code bypass_code, first_code, second_code;
9098 /* Return arbitrarily high cost when instruction is not supported - this
9099 prevents gcc from using it. */
9100 if (!TARGET_CMOVE)
9101 return 1024;
9102 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9103 return (bypass_code != NIL || second_code != NIL) + 2;
9104 }
9105
9106 /* Return cost of comparison done using sahf operation.
9107 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9108 static int
9109 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9110 {
9111 enum rtx_code bypass_code, first_code, second_code;
9112 /* Return arbitrarily high cost when instruction is not preferred - this
9113 avoids gcc from using it. */
9114 if (!TARGET_USE_SAHF && !optimize_size)
9115 return 1024;
9116 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9117 return (bypass_code != NIL || second_code != NIL) + 3;
9118 }
9119
9120 /* Compute cost of the comparison done using any method.
9121 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9122 static int
9123 ix86_fp_comparison_cost (enum rtx_code code)
9124 {
9125 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9126 int min;
9127
9128 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9129 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9130
9131 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9132 if (min > sahf_cost)
9133 min = sahf_cost;
9134 if (min > fcomi_cost)
9135 min = fcomi_cost;
9136 return min;
9137 }
9138
9139 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9140
9141 static rtx
9142 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9143 rtx *second_test, rtx *bypass_test)
9144 {
9145 enum machine_mode fpcmp_mode, intcmp_mode;
9146 rtx tmp, tmp2;
9147 int cost = ix86_fp_comparison_cost (code);
9148 enum rtx_code bypass_code, first_code, second_code;
9149
9150 fpcmp_mode = ix86_fp_compare_mode (code);
9151 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9152
9153 if (second_test)
9154 *second_test = NULL_RTX;
9155 if (bypass_test)
9156 *bypass_test = NULL_RTX;
9157
9158 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9159
9160 /* Do fcomi/sahf based test when profitable. */
9161 if ((bypass_code == NIL || bypass_test)
9162 && (second_code == NIL || second_test)
9163 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9164 {
9165 if (TARGET_CMOVE)
9166 {
9167 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9168 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9169 tmp);
9170 emit_insn (tmp);
9171 }
9172 else
9173 {
9174 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9175 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9176 if (!scratch)
9177 scratch = gen_reg_rtx (HImode);
9178 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9179 emit_insn (gen_x86_sahf_1 (scratch));
9180 }
9181
9182 /* The FP codes work out to act like unsigned. */
9183 intcmp_mode = fpcmp_mode;
9184 code = first_code;
9185 if (bypass_code != NIL)
9186 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9187 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9188 const0_rtx);
9189 if (second_code != NIL)
9190 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9191 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9192 const0_rtx);
9193 }
9194 else
9195 {
9196 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9197 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9198 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9199 if (!scratch)
9200 scratch = gen_reg_rtx (HImode);
9201 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9202
9203 /* In the unordered case, we have to check C2 for NaN's, which
9204 doesn't happen to work out to anything nice combination-wise.
9205 So do some bit twiddling on the value we've got in AH to come
9206 up with an appropriate set of condition codes. */
9207
9208 intcmp_mode = CCNOmode;
9209 switch (code)
9210 {
9211 case GT:
9212 case UNGT:
9213 if (code == GT || !TARGET_IEEE_FP)
9214 {
9215 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9216 code = EQ;
9217 }
9218 else
9219 {
9220 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9221 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9222 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9223 intcmp_mode = CCmode;
9224 code = GEU;
9225 }
9226 break;
9227 case LT:
9228 case UNLT:
9229 if (code == LT && TARGET_IEEE_FP)
9230 {
9231 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9232 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9233 intcmp_mode = CCmode;
9234 code = EQ;
9235 }
9236 else
9237 {
9238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9239 code = NE;
9240 }
9241 break;
9242 case GE:
9243 case UNGE:
9244 if (code == GE || !TARGET_IEEE_FP)
9245 {
9246 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9247 code = EQ;
9248 }
9249 else
9250 {
9251 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9252 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9253 GEN_INT (0x01)));
9254 code = NE;
9255 }
9256 break;
9257 case LE:
9258 case UNLE:
9259 if (code == LE && TARGET_IEEE_FP)
9260 {
9261 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9262 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9263 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9264 intcmp_mode = CCmode;
9265 code = LTU;
9266 }
9267 else
9268 {
9269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9270 code = NE;
9271 }
9272 break;
9273 case EQ:
9274 case UNEQ:
9275 if (code == EQ && TARGET_IEEE_FP)
9276 {
9277 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9278 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9279 intcmp_mode = CCmode;
9280 code = EQ;
9281 }
9282 else
9283 {
9284 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9285 code = NE;
9286 break;
9287 }
9288 break;
9289 case NE:
9290 case LTGT:
9291 if (code == NE && TARGET_IEEE_FP)
9292 {
9293 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9294 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9295 GEN_INT (0x40)));
9296 code = NE;
9297 }
9298 else
9299 {
9300 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9301 code = EQ;
9302 }
9303 break;
9304
9305 case UNORDERED:
9306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9307 code = NE;
9308 break;
9309 case ORDERED:
9310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9311 code = EQ;
9312 break;
9313
9314 default:
9315 abort ();
9316 }
9317 }
9318
9319 /* Return the test that should be put into the flags user, i.e.
9320 the bcc, scc, or cmov instruction. */
9321 return gen_rtx_fmt_ee (code, VOIDmode,
9322 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9323 const0_rtx);
9324 }
9325
9326 rtx
9327 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9328 {
9329 rtx op0, op1, ret;
9330 op0 = ix86_compare_op0;
9331 op1 = ix86_compare_op1;
9332
9333 if (second_test)
9334 *second_test = NULL_RTX;
9335 if (bypass_test)
9336 *bypass_test = NULL_RTX;
9337
9338 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9339 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9340 second_test, bypass_test);
9341 else
9342 ret = ix86_expand_int_compare (code, op0, op1);
9343
9344 return ret;
9345 }
9346
9347 /* Return true if the CODE will result in nontrivial jump sequence. */
9348 bool
9349 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9350 {
9351 enum rtx_code bypass_code, first_code, second_code;
9352 if (!TARGET_CMOVE)
9353 return true;
9354 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9355 return bypass_code != NIL || second_code != NIL;
9356 }
9357
9358 void
9359 ix86_expand_branch (enum rtx_code code, rtx label)
9360 {
9361 rtx tmp;
9362
9363 switch (GET_MODE (ix86_compare_op0))
9364 {
9365 case QImode:
9366 case HImode:
9367 case SImode:
9368 simple:
9369 tmp = ix86_expand_compare (code, NULL, NULL);
9370 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9371 gen_rtx_LABEL_REF (VOIDmode, label),
9372 pc_rtx);
9373 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9374 return;
9375
9376 case SFmode:
9377 case DFmode:
9378 case XFmode:
9379 {
9380 rtvec vec;
9381 int use_fcomi;
9382 enum rtx_code bypass_code, first_code, second_code;
9383
9384 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9385 &ix86_compare_op1);
9386
9387 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9388
9389 /* Check whether we will use the natural sequence with one jump. If
9390 so, we can expand jump early. Otherwise delay expansion by
9391 creating compound insn to not confuse optimizers. */
9392 if (bypass_code == NIL && second_code == NIL
9393 && TARGET_CMOVE)
9394 {
9395 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9396 gen_rtx_LABEL_REF (VOIDmode, label),
9397 pc_rtx, NULL_RTX);
9398 }
9399 else
9400 {
9401 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9402 ix86_compare_op0, ix86_compare_op1);
9403 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9404 gen_rtx_LABEL_REF (VOIDmode, label),
9405 pc_rtx);
9406 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9407
9408 use_fcomi = ix86_use_fcomi_compare (code);
9409 vec = rtvec_alloc (3 + !use_fcomi);
9410 RTVEC_ELT (vec, 0) = tmp;
9411 RTVEC_ELT (vec, 1)
9412 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9413 RTVEC_ELT (vec, 2)
9414 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9415 if (! use_fcomi)
9416 RTVEC_ELT (vec, 3)
9417 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9418
9419 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9420 }
9421 return;
9422 }
9423
9424 case DImode:
9425 if (TARGET_64BIT)
9426 goto simple;
9427 /* Expand DImode branch into multiple compare+branch. */
9428 {
9429 rtx lo[2], hi[2], label2;
9430 enum rtx_code code1, code2, code3;
9431
9432 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9433 {
9434 tmp = ix86_compare_op0;
9435 ix86_compare_op0 = ix86_compare_op1;
9436 ix86_compare_op1 = tmp;
9437 code = swap_condition (code);
9438 }
9439 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9440 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9441
9442 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9443 avoid two branches. This costs one extra insn, so disable when
9444 optimizing for size. */
9445
9446 if ((code == EQ || code == NE)
9447 && (!optimize_size
9448 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9449 {
9450 rtx xor0, xor1;
9451
9452 xor1 = hi[0];
9453 if (hi[1] != const0_rtx)
9454 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9455 NULL_RTX, 0, OPTAB_WIDEN);
9456
9457 xor0 = lo[0];
9458 if (lo[1] != const0_rtx)
9459 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9460 NULL_RTX, 0, OPTAB_WIDEN);
9461
9462 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9463 NULL_RTX, 0, OPTAB_WIDEN);
9464
9465 ix86_compare_op0 = tmp;
9466 ix86_compare_op1 = const0_rtx;
9467 ix86_expand_branch (code, label);
9468 return;
9469 }
9470
9471 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9472 op1 is a constant and the low word is zero, then we can just
9473 examine the high word. */
9474
9475 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9476 switch (code)
9477 {
9478 case LT: case LTU: case GE: case GEU:
9479 ix86_compare_op0 = hi[0];
9480 ix86_compare_op1 = hi[1];
9481 ix86_expand_branch (code, label);
9482 return;
9483 default:
9484 break;
9485 }
9486
9487 /* Otherwise, we need two or three jumps. */
9488
9489 label2 = gen_label_rtx ();
9490
9491 code1 = code;
9492 code2 = swap_condition (code);
9493 code3 = unsigned_condition (code);
9494
9495 switch (code)
9496 {
9497 case LT: case GT: case LTU: case GTU:
9498 break;
9499
9500 case LE: code1 = LT; code2 = GT; break;
9501 case GE: code1 = GT; code2 = LT; break;
9502 case LEU: code1 = LTU; code2 = GTU; break;
9503 case GEU: code1 = GTU; code2 = LTU; break;
9504
9505 case EQ: code1 = NIL; code2 = NE; break;
9506 case NE: code2 = NIL; break;
9507
9508 default:
9509 abort ();
9510 }
9511
9512 /*
9513 * a < b =>
9514 * if (hi(a) < hi(b)) goto true;
9515 * if (hi(a) > hi(b)) goto false;
9516 * if (lo(a) < lo(b)) goto true;
9517 * false:
9518 */
9519
9520 ix86_compare_op0 = hi[0];
9521 ix86_compare_op1 = hi[1];
9522
9523 if (code1 != NIL)
9524 ix86_expand_branch (code1, label);
9525 if (code2 != NIL)
9526 ix86_expand_branch (code2, label2);
9527
9528 ix86_compare_op0 = lo[0];
9529 ix86_compare_op1 = lo[1];
9530 ix86_expand_branch (code3, label);
9531
9532 if (code2 != NIL)
9533 emit_label (label2);
9534 return;
9535 }
9536
9537 default:
9538 abort ();
9539 }
9540 }
9541
9542 /* Split branch based on floating point condition. */
9543 void
9544 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9545 rtx target1, rtx target2, rtx tmp)
9546 {
9547 rtx second, bypass;
9548 rtx label = NULL_RTX;
9549 rtx condition;
9550 int bypass_probability = -1, second_probability = -1, probability = -1;
9551 rtx i;
9552
9553 if (target2 != pc_rtx)
9554 {
9555 rtx tmp = target2;
9556 code = reverse_condition_maybe_unordered (code);
9557 target2 = target1;
9558 target1 = tmp;
9559 }
9560
9561 condition = ix86_expand_fp_compare (code, op1, op2,
9562 tmp, &second, &bypass);
9563
9564 if (split_branch_probability >= 0)
9565 {
9566 /* Distribute the probabilities across the jumps.
9567 Assume the BYPASS and SECOND to be always test
9568 for UNORDERED. */
9569 probability = split_branch_probability;
9570
9571 /* Value of 1 is low enough to make no need for probability
9572 to be updated. Later we may run some experiments and see
9573 if unordered values are more frequent in practice. */
9574 if (bypass)
9575 bypass_probability = 1;
9576 if (second)
9577 second_probability = 1;
9578 }
9579 if (bypass != NULL_RTX)
9580 {
9581 label = gen_label_rtx ();
9582 i = emit_jump_insn (gen_rtx_SET
9583 (VOIDmode, pc_rtx,
9584 gen_rtx_IF_THEN_ELSE (VOIDmode,
9585 bypass,
9586 gen_rtx_LABEL_REF (VOIDmode,
9587 label),
9588 pc_rtx)));
9589 if (bypass_probability >= 0)
9590 REG_NOTES (i)
9591 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9592 GEN_INT (bypass_probability),
9593 REG_NOTES (i));
9594 }
9595 i = emit_jump_insn (gen_rtx_SET
9596 (VOIDmode, pc_rtx,
9597 gen_rtx_IF_THEN_ELSE (VOIDmode,
9598 condition, target1, target2)));
9599 if (probability >= 0)
9600 REG_NOTES (i)
9601 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9602 GEN_INT (probability),
9603 REG_NOTES (i));
9604 if (second != NULL_RTX)
9605 {
9606 i = emit_jump_insn (gen_rtx_SET
9607 (VOIDmode, pc_rtx,
9608 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9609 target2)));
9610 if (second_probability >= 0)
9611 REG_NOTES (i)
9612 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9613 GEN_INT (second_probability),
9614 REG_NOTES (i));
9615 }
9616 if (label != NULL_RTX)
9617 emit_label (label);
9618 }
9619
9620 int
9621 ix86_expand_setcc (enum rtx_code code, rtx dest)
9622 {
9623 rtx ret, tmp, tmpreg, equiv;
9624 rtx second_test, bypass_test;
9625
9626 if (GET_MODE (ix86_compare_op0) == DImode
9627 && !TARGET_64BIT)
9628 return 0; /* FAIL */
9629
9630 if (GET_MODE (dest) != QImode)
9631 abort ();
9632
9633 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9634 PUT_MODE (ret, QImode);
9635
9636 tmp = dest;
9637 tmpreg = dest;
9638
9639 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9640 if (bypass_test || second_test)
9641 {
9642 rtx test = second_test;
9643 int bypass = 0;
9644 rtx tmp2 = gen_reg_rtx (QImode);
9645 if (bypass_test)
9646 {
9647 if (second_test)
9648 abort ();
9649 test = bypass_test;
9650 bypass = 1;
9651 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9652 }
9653 PUT_MODE (test, QImode);
9654 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9655
9656 if (bypass)
9657 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9658 else
9659 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9660 }
9661
9662 /* Attach a REG_EQUAL note describing the comparison result. */
9663 equiv = simplify_gen_relational (code, QImode,
9664 GET_MODE (ix86_compare_op0),
9665 ix86_compare_op0, ix86_compare_op1);
9666 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9667
9668 return 1; /* DONE */
9669 }
9670
9671 /* Expand comparison setting or clearing carry flag. Return true when
9672 successful and set pop for the operation. */
9673 static bool
9674 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9675 {
9676 enum machine_mode mode =
9677 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9678
9679 /* Do not handle DImode compares that go trought special path. Also we can't
9680 deal with FP compares yet. This is possible to add. */
9681 if ((mode == DImode && !TARGET_64BIT))
9682 return false;
9683 if (FLOAT_MODE_P (mode))
9684 {
9685 rtx second_test = NULL, bypass_test = NULL;
9686 rtx compare_op, compare_seq;
9687
9688 /* Shortcut: following common codes never translate into carry flag compares. */
9689 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9690 || code == ORDERED || code == UNORDERED)
9691 return false;
9692
9693 /* These comparisons require zero flag; swap operands so they won't. */
9694 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9695 && !TARGET_IEEE_FP)
9696 {
9697 rtx tmp = op0;
9698 op0 = op1;
9699 op1 = tmp;
9700 code = swap_condition (code);
9701 }
9702
9703 /* Try to expand the comparison and verify that we end up with carry flag
9704 based comparison. This is fails to be true only when we decide to expand
9705 comparison using arithmetic that is not too common scenario. */
9706 start_sequence ();
9707 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9708 &second_test, &bypass_test);
9709 compare_seq = get_insns ();
9710 end_sequence ();
9711
9712 if (second_test || bypass_test)
9713 return false;
9714 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9715 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9716 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9717 else
9718 code = GET_CODE (compare_op);
9719 if (code != LTU && code != GEU)
9720 return false;
9721 emit_insn (compare_seq);
9722 *pop = compare_op;
9723 return true;
9724 }
9725 if (!INTEGRAL_MODE_P (mode))
9726 return false;
9727 switch (code)
9728 {
9729 case LTU:
9730 case GEU:
9731 break;
9732
9733 /* Convert a==0 into (unsigned)a<1. */
9734 case EQ:
9735 case NE:
9736 if (op1 != const0_rtx)
9737 return false;
9738 op1 = const1_rtx;
9739 code = (code == EQ ? LTU : GEU);
9740 break;
9741
9742 /* Convert a>b into b<a or a>=b-1. */
9743 case GTU:
9744 case LEU:
9745 if (GET_CODE (op1) == CONST_INT)
9746 {
9747 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9748 /* Bail out on overflow. We still can swap operands but that
9749 would force loading of the constant into register. */
9750 if (op1 == const0_rtx
9751 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9752 return false;
9753 code = (code == GTU ? GEU : LTU);
9754 }
9755 else
9756 {
9757 rtx tmp = op1;
9758 op1 = op0;
9759 op0 = tmp;
9760 code = (code == GTU ? LTU : GEU);
9761 }
9762 break;
9763
9764 /* Convert a>=0 into (unsigned)a<0x80000000. */
9765 case LT:
9766 case GE:
9767 if (mode == DImode || op1 != const0_rtx)
9768 return false;
9769 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9770 code = (code == LT ? GEU : LTU);
9771 break;
9772 case LE:
9773 case GT:
9774 if (mode == DImode || op1 != constm1_rtx)
9775 return false;
9776 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9777 code = (code == LE ? GEU : LTU);
9778 break;
9779
9780 default:
9781 return false;
9782 }
9783 /* Swapping operands may cause constant to appear as first operand. */
9784 if (!nonimmediate_operand (op0, VOIDmode))
9785 {
9786 if (no_new_pseudos)
9787 return false;
9788 op0 = force_reg (mode, op0);
9789 }
9790 ix86_compare_op0 = op0;
9791 ix86_compare_op1 = op1;
9792 *pop = ix86_expand_compare (code, NULL, NULL);
9793 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9794 abort ();
9795 return true;
9796 }
9797
9798 int
9799 ix86_expand_int_movcc (rtx operands[])
9800 {
9801 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9802 rtx compare_seq, compare_op;
9803 rtx second_test, bypass_test;
9804 enum machine_mode mode = GET_MODE (operands[0]);
9805 bool sign_bit_compare_p = false;;
9806
9807 start_sequence ();
9808 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9809 compare_seq = get_insns ();
9810 end_sequence ();
9811
9812 compare_code = GET_CODE (compare_op);
9813
9814 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9815 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9816 sign_bit_compare_p = true;
9817
9818 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9819 HImode insns, we'd be swallowed in word prefix ops. */
9820
9821 if ((mode != HImode || TARGET_FAST_PREFIX)
9822 && (mode != DImode || TARGET_64BIT)
9823 && GET_CODE (operands[2]) == CONST_INT
9824 && GET_CODE (operands[3]) == CONST_INT)
9825 {
9826 rtx out = operands[0];
9827 HOST_WIDE_INT ct = INTVAL (operands[2]);
9828 HOST_WIDE_INT cf = INTVAL (operands[3]);
9829 HOST_WIDE_INT diff;
9830
9831 diff = ct - cf;
9832 /* Sign bit compares are better done using shifts than we do by using
9833 sbb. */
9834 if (sign_bit_compare_p
9835 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9836 ix86_compare_op1, &compare_op))
9837 {
9838 /* Detect overlap between destination and compare sources. */
9839 rtx tmp = out;
9840
9841 if (!sign_bit_compare_p)
9842 {
9843 bool fpcmp = false;
9844
9845 compare_code = GET_CODE (compare_op);
9846
9847 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9848 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9849 {
9850 fpcmp = true;
9851 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9852 }
9853
9854 /* To simplify rest of code, restrict to the GEU case. */
9855 if (compare_code == LTU)
9856 {
9857 HOST_WIDE_INT tmp = ct;
9858 ct = cf;
9859 cf = tmp;
9860 compare_code = reverse_condition (compare_code);
9861 code = reverse_condition (code);
9862 }
9863 else
9864 {
9865 if (fpcmp)
9866 PUT_CODE (compare_op,
9867 reverse_condition_maybe_unordered
9868 (GET_CODE (compare_op)));
9869 else
9870 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9871 }
9872 diff = ct - cf;
9873
9874 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9875 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9876 tmp = gen_reg_rtx (mode);
9877
9878 if (mode == DImode)
9879 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9880 else
9881 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9882 }
9883 else
9884 {
9885 if (code == GT || code == GE)
9886 code = reverse_condition (code);
9887 else
9888 {
9889 HOST_WIDE_INT tmp = ct;
9890 ct = cf;
9891 cf = tmp;
9892 diff = ct - cf;
9893 }
9894 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9895 ix86_compare_op1, VOIDmode, 0, -1);
9896 }
9897
9898 if (diff == 1)
9899 {
9900 /*
9901 * cmpl op0,op1
9902 * sbbl dest,dest
9903 * [addl dest, ct]
9904 *
9905 * Size 5 - 8.
9906 */
9907 if (ct)
9908 tmp = expand_simple_binop (mode, PLUS,
9909 tmp, GEN_INT (ct),
9910 copy_rtx (tmp), 1, OPTAB_DIRECT);
9911 }
9912 else if (cf == -1)
9913 {
9914 /*
9915 * cmpl op0,op1
9916 * sbbl dest,dest
9917 * orl $ct, dest
9918 *
9919 * Size 8.
9920 */
9921 tmp = expand_simple_binop (mode, IOR,
9922 tmp, GEN_INT (ct),
9923 copy_rtx (tmp), 1, OPTAB_DIRECT);
9924 }
9925 else if (diff == -1 && ct)
9926 {
9927 /*
9928 * cmpl op0,op1
9929 * sbbl dest,dest
9930 * notl dest
9931 * [addl dest, cf]
9932 *
9933 * Size 8 - 11.
9934 */
9935 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9936 if (cf)
9937 tmp = expand_simple_binop (mode, PLUS,
9938 copy_rtx (tmp), GEN_INT (cf),
9939 copy_rtx (tmp), 1, OPTAB_DIRECT);
9940 }
9941 else
9942 {
9943 /*
9944 * cmpl op0,op1
9945 * sbbl dest,dest
9946 * [notl dest]
9947 * andl cf - ct, dest
9948 * [addl dest, ct]
9949 *
9950 * Size 8 - 11.
9951 */
9952
9953 if (cf == 0)
9954 {
9955 cf = ct;
9956 ct = 0;
9957 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9958 }
9959
9960 tmp = expand_simple_binop (mode, AND,
9961 copy_rtx (tmp),
9962 gen_int_mode (cf - ct, mode),
9963 copy_rtx (tmp), 1, OPTAB_DIRECT);
9964 if (ct)
9965 tmp = expand_simple_binop (mode, PLUS,
9966 copy_rtx (tmp), GEN_INT (ct),
9967 copy_rtx (tmp), 1, OPTAB_DIRECT);
9968 }
9969
9970 if (!rtx_equal_p (tmp, out))
9971 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9972
9973 return 1; /* DONE */
9974 }
9975
9976 if (diff < 0)
9977 {
9978 HOST_WIDE_INT tmp;
9979 tmp = ct, ct = cf, cf = tmp;
9980 diff = -diff;
9981 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9982 {
9983 /* We may be reversing unordered compare to normal compare, that
9984 is not valid in general (we may convert non-trapping condition
9985 to trapping one), however on i386 we currently emit all
9986 comparisons unordered. */
9987 compare_code = reverse_condition_maybe_unordered (compare_code);
9988 code = reverse_condition_maybe_unordered (code);
9989 }
9990 else
9991 {
9992 compare_code = reverse_condition (compare_code);
9993 code = reverse_condition (code);
9994 }
9995 }
9996
9997 compare_code = NIL;
9998 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9999 && GET_CODE (ix86_compare_op1) == CONST_INT)
10000 {
10001 if (ix86_compare_op1 == const0_rtx
10002 && (code == LT || code == GE))
10003 compare_code = code;
10004 else if (ix86_compare_op1 == constm1_rtx)
10005 {
10006 if (code == LE)
10007 compare_code = LT;
10008 else if (code == GT)
10009 compare_code = GE;
10010 }
10011 }
10012
10013 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10014 if (compare_code != NIL
10015 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10016 && (cf == -1 || ct == -1))
10017 {
10018 /* If lea code below could be used, only optimize
10019 if it results in a 2 insn sequence. */
10020
10021 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10022 || diff == 3 || diff == 5 || diff == 9)
10023 || (compare_code == LT && ct == -1)
10024 || (compare_code == GE && cf == -1))
10025 {
10026 /*
10027 * notl op1 (if necessary)
10028 * sarl $31, op1
10029 * orl cf, op1
10030 */
10031 if (ct != -1)
10032 {
10033 cf = ct;
10034 ct = -1;
10035 code = reverse_condition (code);
10036 }
10037
10038 out = emit_store_flag (out, code, ix86_compare_op0,
10039 ix86_compare_op1, VOIDmode, 0, -1);
10040
10041 out = expand_simple_binop (mode, IOR,
10042 out, GEN_INT (cf),
10043 out, 1, OPTAB_DIRECT);
10044 if (out != operands[0])
10045 emit_move_insn (operands[0], out);
10046
10047 return 1; /* DONE */
10048 }
10049 }
10050
10051
10052 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10053 || diff == 3 || diff == 5 || diff == 9)
10054 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10055 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10056 {
10057 /*
10058 * xorl dest,dest
10059 * cmpl op1,op2
10060 * setcc dest
10061 * lea cf(dest*(ct-cf)),dest
10062 *
10063 * Size 14.
10064 *
10065 * This also catches the degenerate setcc-only case.
10066 */
10067
10068 rtx tmp;
10069 int nops;
10070
10071 out = emit_store_flag (out, code, ix86_compare_op0,
10072 ix86_compare_op1, VOIDmode, 0, 1);
10073
10074 nops = 0;
10075 /* On x86_64 the lea instruction operates on Pmode, so we need
10076 to get arithmetics done in proper mode to match. */
10077 if (diff == 1)
10078 tmp = copy_rtx (out);
10079 else
10080 {
10081 rtx out1;
10082 out1 = copy_rtx (out);
10083 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10084 nops++;
10085 if (diff & 1)
10086 {
10087 tmp = gen_rtx_PLUS (mode, tmp, out1);
10088 nops++;
10089 }
10090 }
10091 if (cf != 0)
10092 {
10093 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10094 nops++;
10095 }
10096 if (!rtx_equal_p (tmp, out))
10097 {
10098 if (nops == 1)
10099 out = force_operand (tmp, copy_rtx (out));
10100 else
10101 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10102 }
10103 if (!rtx_equal_p (out, operands[0]))
10104 emit_move_insn (operands[0], copy_rtx (out));
10105
10106 return 1; /* DONE */
10107 }
10108
10109 /*
10110 * General case: Jumpful:
10111 * xorl dest,dest cmpl op1, op2
10112 * cmpl op1, op2 movl ct, dest
10113 * setcc dest jcc 1f
10114 * decl dest movl cf, dest
10115 * andl (cf-ct),dest 1:
10116 * addl ct,dest
10117 *
10118 * Size 20. Size 14.
10119 *
10120 * This is reasonably steep, but branch mispredict costs are
10121 * high on modern cpus, so consider failing only if optimizing
10122 * for space.
10123 */
10124
10125 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10126 && BRANCH_COST >= 2)
10127 {
10128 if (cf == 0)
10129 {
10130 cf = ct;
10131 ct = 0;
10132 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10133 /* We may be reversing unordered compare to normal compare,
10134 that is not valid in general (we may convert non-trapping
10135 condition to trapping one), however on i386 we currently
10136 emit all comparisons unordered. */
10137 code = reverse_condition_maybe_unordered (code);
10138 else
10139 {
10140 code = reverse_condition (code);
10141 if (compare_code != NIL)
10142 compare_code = reverse_condition (compare_code);
10143 }
10144 }
10145
10146 if (compare_code != NIL)
10147 {
10148 /* notl op1 (if needed)
10149 sarl $31, op1
10150 andl (cf-ct), op1
10151 addl ct, op1
10152
10153 For x < 0 (resp. x <= -1) there will be no notl,
10154 so if possible swap the constants to get rid of the
10155 complement.
10156 True/false will be -1/0 while code below (store flag
10157 followed by decrement) is 0/-1, so the constants need
10158 to be exchanged once more. */
10159
10160 if (compare_code == GE || !cf)
10161 {
10162 code = reverse_condition (code);
10163 compare_code = LT;
10164 }
10165 else
10166 {
10167 HOST_WIDE_INT tmp = cf;
10168 cf = ct;
10169 ct = tmp;
10170 }
10171
10172 out = emit_store_flag (out, code, ix86_compare_op0,
10173 ix86_compare_op1, VOIDmode, 0, -1);
10174 }
10175 else
10176 {
10177 out = emit_store_flag (out, code, ix86_compare_op0,
10178 ix86_compare_op1, VOIDmode, 0, 1);
10179
10180 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10181 copy_rtx (out), 1, OPTAB_DIRECT);
10182 }
10183
10184 out = expand_simple_binop (mode, AND, copy_rtx (out),
10185 gen_int_mode (cf - ct, mode),
10186 copy_rtx (out), 1, OPTAB_DIRECT);
10187 if (ct)
10188 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10189 copy_rtx (out), 1, OPTAB_DIRECT);
10190 if (!rtx_equal_p (out, operands[0]))
10191 emit_move_insn (operands[0], copy_rtx (out));
10192
10193 return 1; /* DONE */
10194 }
10195 }
10196
10197 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10198 {
10199 /* Try a few things more with specific constants and a variable. */
10200
10201 optab op;
10202 rtx var, orig_out, out, tmp;
10203
10204 if (BRANCH_COST <= 2)
10205 return 0; /* FAIL */
10206
10207 /* If one of the two operands is an interesting constant, load a
10208 constant with the above and mask it in with a logical operation. */
10209
10210 if (GET_CODE (operands[2]) == CONST_INT)
10211 {
10212 var = operands[3];
10213 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10214 operands[3] = constm1_rtx, op = and_optab;
10215 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10216 operands[3] = const0_rtx, op = ior_optab;
10217 else
10218 return 0; /* FAIL */
10219 }
10220 else if (GET_CODE (operands[3]) == CONST_INT)
10221 {
10222 var = operands[2];
10223 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10224 operands[2] = constm1_rtx, op = and_optab;
10225 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10226 operands[2] = const0_rtx, op = ior_optab;
10227 else
10228 return 0; /* FAIL */
10229 }
10230 else
10231 return 0; /* FAIL */
10232
10233 orig_out = operands[0];
10234 tmp = gen_reg_rtx (mode);
10235 operands[0] = tmp;
10236
10237 /* Recurse to get the constant loaded. */
10238 if (ix86_expand_int_movcc (operands) == 0)
10239 return 0; /* FAIL */
10240
10241 /* Mask in the interesting variable. */
10242 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10243 OPTAB_WIDEN);
10244 if (!rtx_equal_p (out, orig_out))
10245 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10246
10247 return 1; /* DONE */
10248 }
10249
10250 /*
10251 * For comparison with above,
10252 *
10253 * movl cf,dest
10254 * movl ct,tmp
10255 * cmpl op1,op2
10256 * cmovcc tmp,dest
10257 *
10258 * Size 15.
10259 */
10260
10261 if (! nonimmediate_operand (operands[2], mode))
10262 operands[2] = force_reg (mode, operands[2]);
10263 if (! nonimmediate_operand (operands[3], mode))
10264 operands[3] = force_reg (mode, operands[3]);
10265
10266 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10267 {
10268 rtx tmp = gen_reg_rtx (mode);
10269 emit_move_insn (tmp, operands[3]);
10270 operands[3] = tmp;
10271 }
10272 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10273 {
10274 rtx tmp = gen_reg_rtx (mode);
10275 emit_move_insn (tmp, operands[2]);
10276 operands[2] = tmp;
10277 }
10278
10279 if (! register_operand (operands[2], VOIDmode)
10280 && (mode == QImode
10281 || ! register_operand (operands[3], VOIDmode)))
10282 operands[2] = force_reg (mode, operands[2]);
10283
10284 if (mode == QImode
10285 && ! register_operand (operands[3], VOIDmode))
10286 operands[3] = force_reg (mode, operands[3]);
10287
10288 emit_insn (compare_seq);
10289 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10290 gen_rtx_IF_THEN_ELSE (mode,
10291 compare_op, operands[2],
10292 operands[3])));
10293 if (bypass_test)
10294 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10295 gen_rtx_IF_THEN_ELSE (mode,
10296 bypass_test,
10297 copy_rtx (operands[3]),
10298 copy_rtx (operands[0]))));
10299 if (second_test)
10300 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10301 gen_rtx_IF_THEN_ELSE (mode,
10302 second_test,
10303 copy_rtx (operands[2]),
10304 copy_rtx (operands[0]))));
10305
10306 return 1; /* DONE */
10307 }
10308
10309 int
10310 ix86_expand_fp_movcc (rtx operands[])
10311 {
10312 enum rtx_code code;
10313 rtx tmp;
10314 rtx compare_op, second_test, bypass_test;
10315
10316 /* For SF/DFmode conditional moves based on comparisons
10317 in same mode, we may want to use SSE min/max instructions. */
10318 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10319 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10320 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10321 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10322 && (!TARGET_IEEE_FP
10323 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10324 /* We may be called from the post-reload splitter. */
10325 && (!REG_P (operands[0])
10326 || SSE_REG_P (operands[0])
10327 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10328 {
10329 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10330 code = GET_CODE (operands[1]);
10331
10332 /* See if we have (cross) match between comparison operands and
10333 conditional move operands. */
10334 if (rtx_equal_p (operands[2], op1))
10335 {
10336 rtx tmp = op0;
10337 op0 = op1;
10338 op1 = tmp;
10339 code = reverse_condition_maybe_unordered (code);
10340 }
10341 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10342 {
10343 /* Check for min operation. */
10344 if (code == LT || code == UNLE)
10345 {
10346 if (code == UNLE)
10347 {
10348 rtx tmp = op0;
10349 op0 = op1;
10350 op1 = tmp;
10351 }
10352 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10353 if (memory_operand (op0, VOIDmode))
10354 op0 = force_reg (GET_MODE (operands[0]), op0);
10355 if (GET_MODE (operands[0]) == SFmode)
10356 emit_insn (gen_minsf3 (operands[0], op0, op1));
10357 else
10358 emit_insn (gen_mindf3 (operands[0], op0, op1));
10359 return 1;
10360 }
10361 /* Check for max operation. */
10362 if (code == GT || code == UNGE)
10363 {
10364 if (code == UNGE)
10365 {
10366 rtx tmp = op0;
10367 op0 = op1;
10368 op1 = tmp;
10369 }
10370 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10371 if (memory_operand (op0, VOIDmode))
10372 op0 = force_reg (GET_MODE (operands[0]), op0);
10373 if (GET_MODE (operands[0]) == SFmode)
10374 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10375 else
10376 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10377 return 1;
10378 }
10379 }
10380 /* Manage condition to be sse_comparison_operator. In case we are
10381 in non-ieee mode, try to canonicalize the destination operand
10382 to be first in the comparison - this helps reload to avoid extra
10383 moves. */
10384 if (!sse_comparison_operator (operands[1], VOIDmode)
10385 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10386 {
10387 rtx tmp = ix86_compare_op0;
10388 ix86_compare_op0 = ix86_compare_op1;
10389 ix86_compare_op1 = tmp;
10390 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10391 VOIDmode, ix86_compare_op0,
10392 ix86_compare_op1);
10393 }
10394 /* Similarly try to manage result to be first operand of conditional
10395 move. We also don't support the NE comparison on SSE, so try to
10396 avoid it. */
10397 if ((rtx_equal_p (operands[0], operands[3])
10398 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10399 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10400 {
10401 rtx tmp = operands[2];
10402 operands[2] = operands[3];
10403 operands[3] = tmp;
10404 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10405 (GET_CODE (operands[1])),
10406 VOIDmode, ix86_compare_op0,
10407 ix86_compare_op1);
10408 }
10409 if (GET_MODE (operands[0]) == SFmode)
10410 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10411 operands[2], operands[3],
10412 ix86_compare_op0, ix86_compare_op1));
10413 else
10414 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10415 operands[2], operands[3],
10416 ix86_compare_op0, ix86_compare_op1));
10417 return 1;
10418 }
10419
10420 /* The floating point conditional move instructions don't directly
10421 support conditions resulting from a signed integer comparison. */
10422
10423 code = GET_CODE (operands[1]);
10424 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10425
10426 /* The floating point conditional move instructions don't directly
10427 support signed integer comparisons. */
10428
10429 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10430 {
10431 if (second_test != NULL || bypass_test != NULL)
10432 abort ();
10433 tmp = gen_reg_rtx (QImode);
10434 ix86_expand_setcc (code, tmp);
10435 code = NE;
10436 ix86_compare_op0 = tmp;
10437 ix86_compare_op1 = const0_rtx;
10438 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10439 }
10440 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10441 {
10442 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10443 emit_move_insn (tmp, operands[3]);
10444 operands[3] = tmp;
10445 }
10446 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10447 {
10448 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10449 emit_move_insn (tmp, operands[2]);
10450 operands[2] = tmp;
10451 }
10452
10453 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10454 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10455 compare_op,
10456 operands[2],
10457 operands[3])));
10458 if (bypass_test)
10459 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10460 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10461 bypass_test,
10462 operands[3],
10463 operands[0])));
10464 if (second_test)
10465 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10466 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10467 second_test,
10468 operands[2],
10469 operands[0])));
10470
10471 return 1;
10472 }
10473
10474 /* Expand conditional increment or decrement using adb/sbb instructions.
10475 The default case using setcc followed by the conditional move can be
10476 done by generic code. */
10477 int
10478 ix86_expand_int_addcc (rtx operands[])
10479 {
10480 enum rtx_code code = GET_CODE (operands[1]);
10481 rtx compare_op;
10482 rtx val = const0_rtx;
10483 bool fpcmp = false;
10484 enum machine_mode mode = GET_MODE (operands[0]);
10485
10486 if (operands[3] != const1_rtx
10487 && operands[3] != constm1_rtx)
10488 return 0;
10489 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10490 ix86_compare_op1, &compare_op))
10491 return 0;
10492 code = GET_CODE (compare_op);
10493
10494 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10495 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10496 {
10497 fpcmp = true;
10498 code = ix86_fp_compare_code_to_integer (code);
10499 }
10500
10501 if (code != LTU)
10502 {
10503 val = constm1_rtx;
10504 if (fpcmp)
10505 PUT_CODE (compare_op,
10506 reverse_condition_maybe_unordered
10507 (GET_CODE (compare_op)));
10508 else
10509 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10510 }
10511 PUT_MODE (compare_op, mode);
10512
10513 /* Construct either adc or sbb insn. */
10514 if ((code == LTU) == (operands[3] == constm1_rtx))
10515 {
10516 switch (GET_MODE (operands[0]))
10517 {
10518 case QImode:
10519 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10520 break;
10521 case HImode:
10522 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10523 break;
10524 case SImode:
10525 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10526 break;
10527 case DImode:
10528 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10529 break;
10530 default:
10531 abort ();
10532 }
10533 }
10534 else
10535 {
10536 switch (GET_MODE (operands[0]))
10537 {
10538 case QImode:
10539 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10540 break;
10541 case HImode:
10542 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10543 break;
10544 case SImode:
10545 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10546 break;
10547 case DImode:
10548 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10549 break;
10550 default:
10551 abort ();
10552 }
10553 }
10554 return 1; /* DONE */
10555 }
10556
10557
10558 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10559 works for floating pointer parameters and nonoffsetable memories.
10560 For pushes, it returns just stack offsets; the values will be saved
10561 in the right order. Maximally three parts are generated. */
10562
10563 static int
10564 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10565 {
10566 int size;
10567
10568 if (!TARGET_64BIT)
10569 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10570 else
10571 size = (GET_MODE_SIZE (mode) + 4) / 8;
10572
10573 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10574 abort ();
10575 if (size < 2 || size > 3)
10576 abort ();
10577
10578 /* Optimize constant pool reference to immediates. This is used by fp
10579 moves, that force all constants to memory to allow combining. */
10580 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10581 {
10582 rtx tmp = maybe_get_pool_constant (operand);
10583 if (tmp)
10584 operand = tmp;
10585 }
10586
10587 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10588 {
10589 /* The only non-offsetable memories we handle are pushes. */
10590 if (! push_operand (operand, VOIDmode))
10591 abort ();
10592
10593 operand = copy_rtx (operand);
10594 PUT_MODE (operand, Pmode);
10595 parts[0] = parts[1] = parts[2] = operand;
10596 }
10597 else if (!TARGET_64BIT)
10598 {
10599 if (mode == DImode)
10600 split_di (&operand, 1, &parts[0], &parts[1]);
10601 else
10602 {
10603 if (REG_P (operand))
10604 {
10605 if (!reload_completed)
10606 abort ();
10607 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10608 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10609 if (size == 3)
10610 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10611 }
10612 else if (offsettable_memref_p (operand))
10613 {
10614 operand = adjust_address (operand, SImode, 0);
10615 parts[0] = operand;
10616 parts[1] = adjust_address (operand, SImode, 4);
10617 if (size == 3)
10618 parts[2] = adjust_address (operand, SImode, 8);
10619 }
10620 else if (GET_CODE (operand) == CONST_DOUBLE)
10621 {
10622 REAL_VALUE_TYPE r;
10623 long l[4];
10624
10625 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10626 switch (mode)
10627 {
10628 case XFmode:
10629 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10630 parts[2] = gen_int_mode (l[2], SImode);
10631 break;
10632 case DFmode:
10633 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10634 break;
10635 default:
10636 abort ();
10637 }
10638 parts[1] = gen_int_mode (l[1], SImode);
10639 parts[0] = gen_int_mode (l[0], SImode);
10640 }
10641 else
10642 abort ();
10643 }
10644 }
10645 else
10646 {
10647 if (mode == TImode)
10648 split_ti (&operand, 1, &parts[0], &parts[1]);
10649 if (mode == XFmode || mode == TFmode)
10650 {
10651 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10652 if (REG_P (operand))
10653 {
10654 if (!reload_completed)
10655 abort ();
10656 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10657 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10658 }
10659 else if (offsettable_memref_p (operand))
10660 {
10661 operand = adjust_address (operand, DImode, 0);
10662 parts[0] = operand;
10663 parts[1] = adjust_address (operand, upper_mode, 8);
10664 }
10665 else if (GET_CODE (operand) == CONST_DOUBLE)
10666 {
10667 REAL_VALUE_TYPE r;
10668 long l[3];
10669
10670 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10671 real_to_target (l, &r, mode);
10672 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10673 if (HOST_BITS_PER_WIDE_INT >= 64)
10674 parts[0]
10675 = gen_int_mode
10676 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10677 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10678 DImode);
10679 else
10680 parts[0] = immed_double_const (l[0], l[1], DImode);
10681 if (upper_mode == SImode)
10682 parts[1] = gen_int_mode (l[2], SImode);
10683 else if (HOST_BITS_PER_WIDE_INT >= 64)
10684 parts[1]
10685 = gen_int_mode
10686 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10687 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10688 DImode);
10689 else
10690 parts[1] = immed_double_const (l[2], l[3], DImode);
10691 }
10692 else
10693 abort ();
10694 }
10695 }
10696
10697 return size;
10698 }
10699
10700 /* Emit insns to perform a move or push of DI, DF, and XF values.
10701 Return false when normal moves are needed; true when all required
10702 insns have been emitted. Operands 2-4 contain the input values
10703 int the correct order; operands 5-7 contain the output values. */
10704
10705 void
10706 ix86_split_long_move (rtx operands[])
10707 {
10708 rtx part[2][3];
10709 int nparts;
10710 int push = 0;
10711 int collisions = 0;
10712 enum machine_mode mode = GET_MODE (operands[0]);
10713
10714 /* The DFmode expanders may ask us to move double.
10715 For 64bit target this is single move. By hiding the fact
10716 here we simplify i386.md splitters. */
10717 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10718 {
10719 /* Optimize constant pool reference to immediates. This is used by
10720 fp moves, that force all constants to memory to allow combining. */
10721
10722 if (GET_CODE (operands[1]) == MEM
10723 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10724 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10725 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10726 if (push_operand (operands[0], VOIDmode))
10727 {
10728 operands[0] = copy_rtx (operands[0]);
10729 PUT_MODE (operands[0], Pmode);
10730 }
10731 else
10732 operands[0] = gen_lowpart (DImode, operands[0]);
10733 operands[1] = gen_lowpart (DImode, operands[1]);
10734 emit_move_insn (operands[0], operands[1]);
10735 return;
10736 }
10737
10738 /* The only non-offsettable memory we handle is push. */
10739 if (push_operand (operands[0], VOIDmode))
10740 push = 1;
10741 else if (GET_CODE (operands[0]) == MEM
10742 && ! offsettable_memref_p (operands[0]))
10743 abort ();
10744
10745 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10746 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10747
10748 /* When emitting push, take care for source operands on the stack. */
10749 if (push && GET_CODE (operands[1]) == MEM
10750 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10751 {
10752 if (nparts == 3)
10753 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10754 XEXP (part[1][2], 0));
10755 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10756 XEXP (part[1][1], 0));
10757 }
10758
10759 /* We need to do copy in the right order in case an address register
10760 of the source overlaps the destination. */
10761 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10762 {
10763 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10764 collisions++;
10765 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10766 collisions++;
10767 if (nparts == 3
10768 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10769 collisions++;
10770
10771 /* Collision in the middle part can be handled by reordering. */
10772 if (collisions == 1 && nparts == 3
10773 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10774 {
10775 rtx tmp;
10776 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10777 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10778 }
10779
10780 /* If there are more collisions, we can't handle it by reordering.
10781 Do an lea to the last part and use only one colliding move. */
10782 else if (collisions > 1)
10783 {
10784 rtx base;
10785
10786 collisions = 1;
10787
10788 base = part[0][nparts - 1];
10789
10790 /* Handle the case when the last part isn't valid for lea.
10791 Happens in 64-bit mode storing the 12-byte XFmode. */
10792 if (GET_MODE (base) != Pmode)
10793 base = gen_rtx_REG (Pmode, REGNO (base));
10794
10795 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10796 part[1][0] = replace_equiv_address (part[1][0], base);
10797 part[1][1] = replace_equiv_address (part[1][1],
10798 plus_constant (base, UNITS_PER_WORD));
10799 if (nparts == 3)
10800 part[1][2] = replace_equiv_address (part[1][2],
10801 plus_constant (base, 8));
10802 }
10803 }
10804
10805 if (push)
10806 {
10807 if (!TARGET_64BIT)
10808 {
10809 if (nparts == 3)
10810 {
10811 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10812 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10813 emit_move_insn (part[0][2], part[1][2]);
10814 }
10815 }
10816 else
10817 {
10818 /* In 64bit mode we don't have 32bit push available. In case this is
10819 register, it is OK - we will just use larger counterpart. We also
10820 retype memory - these comes from attempt to avoid REX prefix on
10821 moving of second half of TFmode value. */
10822 if (GET_MODE (part[1][1]) == SImode)
10823 {
10824 if (GET_CODE (part[1][1]) == MEM)
10825 part[1][1] = adjust_address (part[1][1], DImode, 0);
10826 else if (REG_P (part[1][1]))
10827 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10828 else
10829 abort ();
10830 if (GET_MODE (part[1][0]) == SImode)
10831 part[1][0] = part[1][1];
10832 }
10833 }
10834 emit_move_insn (part[0][1], part[1][1]);
10835 emit_move_insn (part[0][0], part[1][0]);
10836 return;
10837 }
10838
10839 /* Choose correct order to not overwrite the source before it is copied. */
10840 if ((REG_P (part[0][0])
10841 && REG_P (part[1][1])
10842 && (REGNO (part[0][0]) == REGNO (part[1][1])
10843 || (nparts == 3
10844 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10845 || (collisions > 0
10846 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10847 {
10848 if (nparts == 3)
10849 {
10850 operands[2] = part[0][2];
10851 operands[3] = part[0][1];
10852 operands[4] = part[0][0];
10853 operands[5] = part[1][2];
10854 operands[6] = part[1][1];
10855 operands[7] = part[1][0];
10856 }
10857 else
10858 {
10859 operands[2] = part[0][1];
10860 operands[3] = part[0][0];
10861 operands[5] = part[1][1];
10862 operands[6] = part[1][0];
10863 }
10864 }
10865 else
10866 {
10867 if (nparts == 3)
10868 {
10869 operands[2] = part[0][0];
10870 operands[3] = part[0][1];
10871 operands[4] = part[0][2];
10872 operands[5] = part[1][0];
10873 operands[6] = part[1][1];
10874 operands[7] = part[1][2];
10875 }
10876 else
10877 {
10878 operands[2] = part[0][0];
10879 operands[3] = part[0][1];
10880 operands[5] = part[1][0];
10881 operands[6] = part[1][1];
10882 }
10883 }
10884 emit_move_insn (operands[2], operands[5]);
10885 emit_move_insn (operands[3], operands[6]);
10886 if (nparts == 3)
10887 emit_move_insn (operands[4], operands[7]);
10888
10889 return;
10890 }
10891
10892 void
10893 ix86_split_ashldi (rtx *operands, rtx scratch)
10894 {
10895 rtx low[2], high[2];
10896 int count;
10897
10898 if (GET_CODE (operands[2]) == CONST_INT)
10899 {
10900 split_di (operands, 2, low, high);
10901 count = INTVAL (operands[2]) & 63;
10902
10903 if (count >= 32)
10904 {
10905 emit_move_insn (high[0], low[1]);
10906 emit_move_insn (low[0], const0_rtx);
10907
10908 if (count > 32)
10909 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10910 }
10911 else
10912 {
10913 if (!rtx_equal_p (operands[0], operands[1]))
10914 emit_move_insn (operands[0], operands[1]);
10915 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10916 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10917 }
10918 }
10919 else
10920 {
10921 if (!rtx_equal_p (operands[0], operands[1]))
10922 emit_move_insn (operands[0], operands[1]);
10923
10924 split_di (operands, 1, low, high);
10925
10926 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10927 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10928
10929 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10930 {
10931 if (! no_new_pseudos)
10932 scratch = force_reg (SImode, const0_rtx);
10933 else
10934 emit_move_insn (scratch, const0_rtx);
10935
10936 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10937 scratch));
10938 }
10939 else
10940 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10941 }
10942 }
10943
10944 void
10945 ix86_split_ashrdi (rtx *operands, rtx scratch)
10946 {
10947 rtx low[2], high[2];
10948 int count;
10949
10950 if (GET_CODE (operands[2]) == CONST_INT)
10951 {
10952 split_di (operands, 2, low, high);
10953 count = INTVAL (operands[2]) & 63;
10954
10955 if (count == 63)
10956 {
10957 emit_move_insn (high[0], high[1]);
10958 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10959 emit_move_insn (low[0], high[0]);
10960
10961 }
10962 else if (count >= 32)
10963 {
10964 emit_move_insn (low[0], high[1]);
10965
10966 if (! reload_completed)
10967 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10968 else
10969 {
10970 emit_move_insn (high[0], low[0]);
10971 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10972 }
10973
10974 if (count > 32)
10975 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10976 }
10977 else
10978 {
10979 if (!rtx_equal_p (operands[0], operands[1]))
10980 emit_move_insn (operands[0], operands[1]);
10981 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10982 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10983 }
10984 }
10985 else
10986 {
10987 if (!rtx_equal_p (operands[0], operands[1]))
10988 emit_move_insn (operands[0], operands[1]);
10989
10990 split_di (operands, 1, low, high);
10991
10992 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10993 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10994
10995 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10996 {
10997 if (! no_new_pseudos)
10998 scratch = gen_reg_rtx (SImode);
10999 emit_move_insn (scratch, high[0]);
11000 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
11001 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11002 scratch));
11003 }
11004 else
11005 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11006 }
11007 }
11008
11009 void
11010 ix86_split_lshrdi (rtx *operands, rtx scratch)
11011 {
11012 rtx low[2], high[2];
11013 int count;
11014
11015 if (GET_CODE (operands[2]) == CONST_INT)
11016 {
11017 split_di (operands, 2, low, high);
11018 count = INTVAL (operands[2]) & 63;
11019
11020 if (count >= 32)
11021 {
11022 emit_move_insn (low[0], high[1]);
11023 emit_move_insn (high[0], const0_rtx);
11024
11025 if (count > 32)
11026 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11027 }
11028 else
11029 {
11030 if (!rtx_equal_p (operands[0], operands[1]))
11031 emit_move_insn (operands[0], operands[1]);
11032 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11033 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11034 }
11035 }
11036 else
11037 {
11038 if (!rtx_equal_p (operands[0], operands[1]))
11039 emit_move_insn (operands[0], operands[1]);
11040
11041 split_di (operands, 1, low, high);
11042
11043 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11044 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11045
11046 /* Heh. By reversing the arguments, we can reuse this pattern. */
11047 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11048 {
11049 if (! no_new_pseudos)
11050 scratch = force_reg (SImode, const0_rtx);
11051 else
11052 emit_move_insn (scratch, const0_rtx);
11053
11054 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11055 scratch));
11056 }
11057 else
11058 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11059 }
11060 }
11061
11062 /* Helper function for the string operations below. Dest VARIABLE whether
11063 it is aligned to VALUE bytes. If true, jump to the label. */
11064 static rtx
11065 ix86_expand_aligntest (rtx variable, int value)
11066 {
11067 rtx label = gen_label_rtx ();
11068 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11069 if (GET_MODE (variable) == DImode)
11070 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11071 else
11072 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11073 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11074 1, label);
11075 return label;
11076 }
11077
11078 /* Adjust COUNTER by the VALUE. */
11079 static void
11080 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11081 {
11082 if (GET_MODE (countreg) == DImode)
11083 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11084 else
11085 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11086 }
11087
11088 /* Zero extend possibly SImode EXP to Pmode register. */
11089 rtx
11090 ix86_zero_extend_to_Pmode (rtx exp)
11091 {
11092 rtx r;
11093 if (GET_MODE (exp) == VOIDmode)
11094 return force_reg (Pmode, exp);
11095 if (GET_MODE (exp) == Pmode)
11096 return copy_to_mode_reg (Pmode, exp);
11097 r = gen_reg_rtx (Pmode);
11098 emit_insn (gen_zero_extendsidi2 (r, exp));
11099 return r;
11100 }
11101
11102 /* Expand string move (memcpy) operation. Use i386 string operations when
11103 profitable. expand_clrmem contains similar code. */
11104 int
11105 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11106 {
11107 rtx srcreg, destreg, countreg, srcexp, destexp;
11108 enum machine_mode counter_mode;
11109 HOST_WIDE_INT align = 0;
11110 unsigned HOST_WIDE_INT count = 0;
11111
11112 if (GET_CODE (align_exp) == CONST_INT)
11113 align = INTVAL (align_exp);
11114
11115 /* Can't use any of this if the user has appropriated esi or edi. */
11116 if (global_regs[4] || global_regs[5])
11117 return 0;
11118
11119 /* This simple hack avoids all inlining code and simplifies code below. */
11120 if (!TARGET_ALIGN_STRINGOPS)
11121 align = 64;
11122
11123 if (GET_CODE (count_exp) == CONST_INT)
11124 {
11125 count = INTVAL (count_exp);
11126 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11127 return 0;
11128 }
11129
11130 /* Figure out proper mode for counter. For 32bits it is always SImode,
11131 for 64bits use SImode when possible, otherwise DImode.
11132 Set count to number of bytes copied when known at compile time. */
11133 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11134 || x86_64_zero_extended_value (count_exp))
11135 counter_mode = SImode;
11136 else
11137 counter_mode = DImode;
11138
11139 if (counter_mode != SImode && counter_mode != DImode)
11140 abort ();
11141
11142 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11143 if (destreg != XEXP (dst, 0))
11144 dst = replace_equiv_address_nv (dst, destreg);
11145 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11146 if (srcreg != XEXP (src, 0))
11147 src = replace_equiv_address_nv (src, srcreg);
11148
11149 /* When optimizing for size emit simple rep ; movsb instruction for
11150 counts not divisible by 4. */
11151
11152 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11153 {
11154 emit_insn (gen_cld ());
11155 countreg = ix86_zero_extend_to_Pmode (count_exp);
11156 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11157 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11158 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11159 destexp, srcexp));
11160 }
11161
11162 /* For constant aligned (or small unaligned) copies use rep movsl
11163 followed by code copying the rest. For PentiumPro ensure 8 byte
11164 alignment to allow rep movsl acceleration. */
11165
11166 else if (count != 0
11167 && (align >= 8
11168 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11169 || optimize_size || count < (unsigned int) 64))
11170 {
11171 unsigned HOST_WIDE_INT offset = 0;
11172 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11173 rtx srcmem, dstmem;
11174
11175 emit_insn (gen_cld ());
11176 if (count & ~(size - 1))
11177 {
11178 countreg = copy_to_mode_reg (counter_mode,
11179 GEN_INT ((count >> (size == 4 ? 2 : 3))
11180 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11181 countreg = ix86_zero_extend_to_Pmode (countreg);
11182
11183 destexp = gen_rtx_ASHIFT (Pmode, countreg,
11184 GEN_INT (size == 4 ? 2 : 3));
11185 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11186 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11187
11188 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11189 countreg, destexp, srcexp));
11190 offset = count & ~(size - 1);
11191 }
11192 if (size == 8 && (count & 0x04))
11193 {
11194 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11195 offset);
11196 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11197 offset);
11198 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11199 offset += 4;
11200 }
11201 if (count & 0x02)
11202 {
11203 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11204 offset);
11205 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11206 offset);
11207 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11208 offset += 2;
11209 }
11210 if (count & 0x01)
11211 {
11212 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11213 offset);
11214 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11215 offset);
11216 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11217 }
11218 }
11219 /* The generic code based on the glibc implementation:
11220 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11221 allowing accelerated copying there)
11222 - copy the data using rep movsl
11223 - copy the rest. */
11224 else
11225 {
11226 rtx countreg2;
11227 rtx label = NULL;
11228 rtx srcmem, dstmem;
11229 int desired_alignment = (TARGET_PENTIUMPRO
11230 && (count == 0 || count >= (unsigned int) 260)
11231 ? 8 : UNITS_PER_WORD);
11232 /* Get rid of MEM_OFFSETs, they won't be accurate. */
11233 dst = change_address (dst, BLKmode, destreg);
11234 src = change_address (src, BLKmode, srcreg);
11235
11236 /* In case we don't know anything about the alignment, default to
11237 library version, since it is usually equally fast and result in
11238 shorter code.
11239
11240 Also emit call when we know that the count is large and call overhead
11241 will not be important. */
11242 if (!TARGET_INLINE_ALL_STRINGOPS
11243 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11244 return 0;
11245
11246 if (TARGET_SINGLE_STRINGOP)
11247 emit_insn (gen_cld ());
11248
11249 countreg2 = gen_reg_rtx (Pmode);
11250 countreg = copy_to_mode_reg (counter_mode, count_exp);
11251
11252 /* We don't use loops to align destination and to copy parts smaller
11253 than 4 bytes, because gcc is able to optimize such code better (in
11254 the case the destination or the count really is aligned, gcc is often
11255 able to predict the branches) and also it is friendlier to the
11256 hardware branch prediction.
11257
11258 Using loops is beneficial for generic case, because we can
11259 handle small counts using the loops. Many CPUs (such as Athlon)
11260 have large REP prefix setup costs.
11261
11262 This is quite costly. Maybe we can revisit this decision later or
11263 add some customizability to this code. */
11264
11265 if (count == 0 && align < desired_alignment)
11266 {
11267 label = gen_label_rtx ();
11268 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11269 LEU, 0, counter_mode, 1, label);
11270 }
11271 if (align <= 1)
11272 {
11273 rtx label = ix86_expand_aligntest (destreg, 1);
11274 srcmem = change_address (src, QImode, srcreg);
11275 dstmem = change_address (dst, QImode, destreg);
11276 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277 ix86_adjust_counter (countreg, 1);
11278 emit_label (label);
11279 LABEL_NUSES (label) = 1;
11280 }
11281 if (align <= 2)
11282 {
11283 rtx label = ix86_expand_aligntest (destreg, 2);
11284 srcmem = change_address (src, HImode, srcreg);
11285 dstmem = change_address (dst, HImode, destreg);
11286 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11287 ix86_adjust_counter (countreg, 2);
11288 emit_label (label);
11289 LABEL_NUSES (label) = 1;
11290 }
11291 if (align <= 4 && desired_alignment > 4)
11292 {
11293 rtx label = ix86_expand_aligntest (destreg, 4);
11294 srcmem = change_address (src, SImode, srcreg);
11295 dstmem = change_address (dst, SImode, destreg);
11296 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11297 ix86_adjust_counter (countreg, 4);
11298 emit_label (label);
11299 LABEL_NUSES (label) = 1;
11300 }
11301
11302 if (label && desired_alignment > 4 && !TARGET_64BIT)
11303 {
11304 emit_label (label);
11305 LABEL_NUSES (label) = 1;
11306 label = NULL_RTX;
11307 }
11308 if (!TARGET_SINGLE_STRINGOP)
11309 emit_insn (gen_cld ());
11310 if (TARGET_64BIT)
11311 {
11312 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11313 GEN_INT (3)));
11314 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11315 }
11316 else
11317 {
11318 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11319 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11320 }
11321 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11322 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11323 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11324 countreg2, destexp, srcexp));
11325
11326 if (label)
11327 {
11328 emit_label (label);
11329 LABEL_NUSES (label) = 1;
11330 }
11331 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11332 {
11333 srcmem = change_address (src, SImode, srcreg);
11334 dstmem = change_address (dst, SImode, destreg);
11335 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11336 }
11337 if ((align <= 4 || count == 0) && TARGET_64BIT)
11338 {
11339 rtx label = ix86_expand_aligntest (countreg, 4);
11340 srcmem = change_address (src, SImode, srcreg);
11341 dstmem = change_address (dst, SImode, destreg);
11342 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11343 emit_label (label);
11344 LABEL_NUSES (label) = 1;
11345 }
11346 if (align > 2 && count != 0 && (count & 2))
11347 {
11348 srcmem = change_address (src, HImode, srcreg);
11349 dstmem = change_address (dst, HImode, destreg);
11350 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11351 }
11352 if (align <= 2 || count == 0)
11353 {
11354 rtx label = ix86_expand_aligntest (countreg, 2);
11355 srcmem = change_address (src, HImode, srcreg);
11356 dstmem = change_address (dst, HImode, destreg);
11357 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11358 emit_label (label);
11359 LABEL_NUSES (label) = 1;
11360 }
11361 if (align > 1 && count != 0 && (count & 1))
11362 {
11363 srcmem = change_address (src, QImode, srcreg);
11364 dstmem = change_address (dst, QImode, destreg);
11365 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11366 }
11367 if (align <= 1 || count == 0)
11368 {
11369 rtx label = ix86_expand_aligntest (countreg, 1);
11370 srcmem = change_address (src, QImode, srcreg);
11371 dstmem = change_address (dst, QImode, destreg);
11372 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11373 emit_label (label);
11374 LABEL_NUSES (label) = 1;
11375 }
11376 }
11377
11378 return 1;
11379 }
11380
11381 /* Expand string clear operation (bzero). Use i386 string operations when
11382 profitable. expand_movmem contains similar code. */
11383 int
11384 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
11385 {
11386 rtx destreg, zeroreg, countreg, destexp;
11387 enum machine_mode counter_mode;
11388 HOST_WIDE_INT align = 0;
11389 unsigned HOST_WIDE_INT count = 0;
11390
11391 if (GET_CODE (align_exp) == CONST_INT)
11392 align = INTVAL (align_exp);
11393
11394 /* Can't use any of this if the user has appropriated esi. */
11395 if (global_regs[4])
11396 return 0;
11397
11398 /* This simple hack avoids all inlining code and simplifies code below. */
11399 if (!TARGET_ALIGN_STRINGOPS)
11400 align = 32;
11401
11402 if (GET_CODE (count_exp) == CONST_INT)
11403 {
11404 count = INTVAL (count_exp);
11405 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11406 return 0;
11407 }
11408 /* Figure out proper mode for counter. For 32bits it is always SImode,
11409 for 64bits use SImode when possible, otherwise DImode.
11410 Set count to number of bytes copied when known at compile time. */
11411 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11412 || x86_64_zero_extended_value (count_exp))
11413 counter_mode = SImode;
11414 else
11415 counter_mode = DImode;
11416
11417 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11418 if (destreg != XEXP (dst, 0))
11419 dst = replace_equiv_address_nv (dst, destreg);
11420
11421 emit_insn (gen_cld ());
11422
11423 /* When optimizing for size emit simple rep ; movsb instruction for
11424 counts not divisible by 4. */
11425
11426 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11427 {
11428 countreg = ix86_zero_extend_to_Pmode (count_exp);
11429 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11430 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11431 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11432 }
11433 else if (count != 0
11434 && (align >= 8
11435 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11436 || optimize_size || count < (unsigned int) 64))
11437 {
11438 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11439 unsigned HOST_WIDE_INT offset = 0;
11440
11441 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11442 if (count & ~(size - 1))
11443 {
11444 countreg = copy_to_mode_reg (counter_mode,
11445 GEN_INT ((count >> (size == 4 ? 2 : 3))
11446 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11447 countreg = ix86_zero_extend_to_Pmode (countreg);
11448 destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11449 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11450 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11451 offset = count & ~(size - 1);
11452 }
11453 if (size == 8 && (count & 0x04))
11454 {
11455 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11456 offset);
11457 emit_insn (gen_strset (destreg, mem,
11458 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11459 offset += 4;
11460 }
11461 if (count & 0x02)
11462 {
11463 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11464 offset);
11465 emit_insn (gen_strset (destreg, mem,
11466 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11467 offset += 2;
11468 }
11469 if (count & 0x01)
11470 {
11471 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11472 offset);
11473 emit_insn (gen_strset (destreg, mem,
11474 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11475 }
11476 }
11477 else
11478 {
11479 rtx countreg2;
11480 rtx label = NULL;
11481 /* Compute desired alignment of the string operation. */
11482 int desired_alignment = (TARGET_PENTIUMPRO
11483 && (count == 0 || count >= (unsigned int) 260)
11484 ? 8 : UNITS_PER_WORD);
11485
11486 /* In case we don't know anything about the alignment, default to
11487 library version, since it is usually equally fast and result in
11488 shorter code.
11489
11490 Also emit call when we know that the count is large and call overhead
11491 will not be important. */
11492 if (!TARGET_INLINE_ALL_STRINGOPS
11493 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11494 return 0;
11495
11496 if (TARGET_SINGLE_STRINGOP)
11497 emit_insn (gen_cld ());
11498
11499 countreg2 = gen_reg_rtx (Pmode);
11500 countreg = copy_to_mode_reg (counter_mode, count_exp);
11501 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11502 /* Get rid of MEM_OFFSET, it won't be accurate. */
11503 dst = change_address (dst, BLKmode, destreg);
11504
11505 if (count == 0 && align < desired_alignment)
11506 {
11507 label = gen_label_rtx ();
11508 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11509 LEU, 0, counter_mode, 1, label);
11510 }
11511 if (align <= 1)
11512 {
11513 rtx label = ix86_expand_aligntest (destreg, 1);
11514 emit_insn (gen_strset (destreg, dst,
11515 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11516 ix86_adjust_counter (countreg, 1);
11517 emit_label (label);
11518 LABEL_NUSES (label) = 1;
11519 }
11520 if (align <= 2)
11521 {
11522 rtx label = ix86_expand_aligntest (destreg, 2);
11523 emit_insn (gen_strset (destreg, dst,
11524 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11525 ix86_adjust_counter (countreg, 2);
11526 emit_label (label);
11527 LABEL_NUSES (label) = 1;
11528 }
11529 if (align <= 4 && desired_alignment > 4)
11530 {
11531 rtx label = ix86_expand_aligntest (destreg, 4);
11532 emit_insn (gen_strset (destreg, dst,
11533 (TARGET_64BIT
11534 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11535 : zeroreg)));
11536 ix86_adjust_counter (countreg, 4);
11537 emit_label (label);
11538 LABEL_NUSES (label) = 1;
11539 }
11540
11541 if (label && desired_alignment > 4 && !TARGET_64BIT)
11542 {
11543 emit_label (label);
11544 LABEL_NUSES (label) = 1;
11545 label = NULL_RTX;
11546 }
11547
11548 if (!TARGET_SINGLE_STRINGOP)
11549 emit_insn (gen_cld ());
11550 if (TARGET_64BIT)
11551 {
11552 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11553 GEN_INT (3)));
11554 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11555 }
11556 else
11557 {
11558 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11559 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11560 }
11561 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11562 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11563
11564 if (label)
11565 {
11566 emit_label (label);
11567 LABEL_NUSES (label) = 1;
11568 }
11569
11570 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11571 emit_insn (gen_strset (destreg, dst,
11572 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11573 if (TARGET_64BIT && (align <= 4 || count == 0))
11574 {
11575 rtx label = ix86_expand_aligntest (countreg, 4);
11576 emit_insn (gen_strset (destreg, dst,
11577 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11578 emit_label (label);
11579 LABEL_NUSES (label) = 1;
11580 }
11581 if (align > 2 && count != 0 && (count & 2))
11582 emit_insn (gen_strset (destreg, dst,
11583 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11584 if (align <= 2 || count == 0)
11585 {
11586 rtx label = ix86_expand_aligntest (countreg, 2);
11587 emit_insn (gen_strset (destreg, dst,
11588 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11589 emit_label (label);
11590 LABEL_NUSES (label) = 1;
11591 }
11592 if (align > 1 && count != 0 && (count & 1))
11593 emit_insn (gen_strset (destreg, dst,
11594 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11595 if (align <= 1 || count == 0)
11596 {
11597 rtx label = ix86_expand_aligntest (countreg, 1);
11598 emit_insn (gen_strset (destreg, dst,
11599 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11600 emit_label (label);
11601 LABEL_NUSES (label) = 1;
11602 }
11603 }
11604 return 1;
11605 }
11606
11607 /* Expand strlen. */
11608 int
11609 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11610 {
11611 rtx addr, scratch1, scratch2, scratch3, scratch4;
11612
11613 /* The generic case of strlen expander is long. Avoid it's
11614 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11615
11616 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11617 && !TARGET_INLINE_ALL_STRINGOPS
11618 && !optimize_size
11619 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11620 return 0;
11621
11622 addr = force_reg (Pmode, XEXP (src, 0));
11623 scratch1 = gen_reg_rtx (Pmode);
11624
11625 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11626 && !optimize_size)
11627 {
11628 /* Well it seems that some optimizer does not combine a call like
11629 foo(strlen(bar), strlen(bar));
11630 when the move and the subtraction is done here. It does calculate
11631 the length just once when these instructions are done inside of
11632 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11633 often used and I use one fewer register for the lifetime of
11634 output_strlen_unroll() this is better. */
11635
11636 emit_move_insn (out, addr);
11637
11638 ix86_expand_strlensi_unroll_1 (out, src, align);
11639
11640 /* strlensi_unroll_1 returns the address of the zero at the end of
11641 the string, like memchr(), so compute the length by subtracting
11642 the start address. */
11643 if (TARGET_64BIT)
11644 emit_insn (gen_subdi3 (out, out, addr));
11645 else
11646 emit_insn (gen_subsi3 (out, out, addr));
11647 }
11648 else
11649 {
11650 rtx unspec;
11651 scratch2 = gen_reg_rtx (Pmode);
11652 scratch3 = gen_reg_rtx (Pmode);
11653 scratch4 = force_reg (Pmode, constm1_rtx);
11654
11655 emit_move_insn (scratch3, addr);
11656 eoschar = force_reg (QImode, eoschar);
11657
11658 emit_insn (gen_cld ());
11659 src = replace_equiv_address_nv (src, scratch3);
11660
11661 /* If .md starts supporting :P, this can be done in .md. */
11662 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11663 scratch4), UNSPEC_SCAS);
11664 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11665 if (TARGET_64BIT)
11666 {
11667 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11668 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11669 }
11670 else
11671 {
11672 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11673 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11674 }
11675 }
11676 return 1;
11677 }
11678
11679 /* Expand the appropriate insns for doing strlen if not just doing
11680 repnz; scasb
11681
11682 out = result, initialized with the start address
11683 align_rtx = alignment of the address.
11684 scratch = scratch register, initialized with the startaddress when
11685 not aligned, otherwise undefined
11686
11687 This is just the body. It needs the initializations mentioned above and
11688 some address computing at the end. These things are done in i386.md. */
11689
11690 static void
11691 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11692 {
11693 int align;
11694 rtx tmp;
11695 rtx align_2_label = NULL_RTX;
11696 rtx align_3_label = NULL_RTX;
11697 rtx align_4_label = gen_label_rtx ();
11698 rtx end_0_label = gen_label_rtx ();
11699 rtx mem;
11700 rtx tmpreg = gen_reg_rtx (SImode);
11701 rtx scratch = gen_reg_rtx (SImode);
11702 rtx cmp;
11703
11704 align = 0;
11705 if (GET_CODE (align_rtx) == CONST_INT)
11706 align = INTVAL (align_rtx);
11707
11708 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11709
11710 /* Is there a known alignment and is it less than 4? */
11711 if (align < 4)
11712 {
11713 rtx scratch1 = gen_reg_rtx (Pmode);
11714 emit_move_insn (scratch1, out);
11715 /* Is there a known alignment and is it not 2? */
11716 if (align != 2)
11717 {
11718 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11719 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11720
11721 /* Leave just the 3 lower bits. */
11722 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11723 NULL_RTX, 0, OPTAB_WIDEN);
11724
11725 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11726 Pmode, 1, align_4_label);
11727 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
11728 Pmode, 1, align_2_label);
11729 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
11730 Pmode, 1, align_3_label);
11731 }
11732 else
11733 {
11734 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11735 check if is aligned to 4 - byte. */
11736
11737 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
11738 NULL_RTX, 0, OPTAB_WIDEN);
11739
11740 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11741 Pmode, 1, align_4_label);
11742 }
11743
11744 mem = change_address (src, QImode, out);
11745
11746 /* Now compare the bytes. */
11747
11748 /* Compare the first n unaligned byte on a byte per byte basis. */
11749 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11750 QImode, 1, end_0_label);
11751
11752 /* Increment the address. */
11753 if (TARGET_64BIT)
11754 emit_insn (gen_adddi3 (out, out, const1_rtx));
11755 else
11756 emit_insn (gen_addsi3 (out, out, const1_rtx));
11757
11758 /* Not needed with an alignment of 2 */
11759 if (align != 2)
11760 {
11761 emit_label (align_2_label);
11762
11763 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11764 end_0_label);
11765
11766 if (TARGET_64BIT)
11767 emit_insn (gen_adddi3 (out, out, const1_rtx));
11768 else
11769 emit_insn (gen_addsi3 (out, out, const1_rtx));
11770
11771 emit_label (align_3_label);
11772 }
11773
11774 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11775 end_0_label);
11776
11777 if (TARGET_64BIT)
11778 emit_insn (gen_adddi3 (out, out, const1_rtx));
11779 else
11780 emit_insn (gen_addsi3 (out, out, const1_rtx));
11781 }
11782
11783 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11784 align this loop. It gives only huge programs, but does not help to
11785 speed up. */
11786 emit_label (align_4_label);
11787
11788 mem = change_address (src, SImode, out);
11789 emit_move_insn (scratch, mem);
11790 if (TARGET_64BIT)
11791 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11792 else
11793 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11794
11795 /* This formula yields a nonzero result iff one of the bytes is zero.
11796 This saves three branches inside loop and many cycles. */
11797
11798 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11799 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11800 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11801 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11802 gen_int_mode (0x80808080, SImode)));
11803 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11804 align_4_label);
11805
11806 if (TARGET_CMOVE)
11807 {
11808 rtx reg = gen_reg_rtx (SImode);
11809 rtx reg2 = gen_reg_rtx (Pmode);
11810 emit_move_insn (reg, tmpreg);
11811 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11812
11813 /* If zero is not in the first two bytes, move two bytes forward. */
11814 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11815 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11816 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11817 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11818 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11819 reg,
11820 tmpreg)));
11821 /* Emit lea manually to avoid clobbering of flags. */
11822 emit_insn (gen_rtx_SET (SImode, reg2,
11823 gen_rtx_PLUS (Pmode, out, const2_rtx)));
11824
11825 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11826 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11827 emit_insn (gen_rtx_SET (VOIDmode, out,
11828 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11829 reg2,
11830 out)));
11831
11832 }
11833 else
11834 {
11835 rtx end_2_label = gen_label_rtx ();
11836 /* Is zero in the first two bytes? */
11837
11838 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11839 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11840 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11841 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11842 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11843 pc_rtx);
11844 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11845 JUMP_LABEL (tmp) = end_2_label;
11846
11847 /* Not in the first two. Move two bytes forward. */
11848 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11849 if (TARGET_64BIT)
11850 emit_insn (gen_adddi3 (out, out, const2_rtx));
11851 else
11852 emit_insn (gen_addsi3 (out, out, const2_rtx));
11853
11854 emit_label (end_2_label);
11855
11856 }
11857
11858 /* Avoid branch in fixing the byte. */
11859 tmpreg = gen_lowpart (QImode, tmpreg);
11860 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11861 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11862 if (TARGET_64BIT)
11863 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11864 else
11865 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11866
11867 emit_label (end_0_label);
11868 }
11869
11870 void
11871 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11872 rtx callarg2 ATTRIBUTE_UNUSED,
11873 rtx pop, int sibcall)
11874 {
11875 rtx use = NULL, call;
11876
11877 if (pop == const0_rtx)
11878 pop = NULL;
11879 if (TARGET_64BIT && pop)
11880 abort ();
11881
11882 #if TARGET_MACHO
11883 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11884 fnaddr = machopic_indirect_call_target (fnaddr);
11885 #else
11886 /* Static functions and indirect calls don't need the pic register. */
11887 if (! TARGET_64BIT && flag_pic
11888 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11889 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11890 use_reg (&use, pic_offset_table_rtx);
11891
11892 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11893 {
11894 rtx al = gen_rtx_REG (QImode, 0);
11895 emit_move_insn (al, callarg2);
11896 use_reg (&use, al);
11897 }
11898 #endif /* TARGET_MACHO */
11899
11900 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11901 {
11902 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11903 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11904 }
11905 if (sibcall && TARGET_64BIT
11906 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11907 {
11908 rtx addr;
11909 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11910 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11911 emit_move_insn (fnaddr, addr);
11912 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11913 }
11914
11915 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11916 if (retval)
11917 call = gen_rtx_SET (VOIDmode, retval, call);
11918 if (pop)
11919 {
11920 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11921 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11922 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11923 }
11924
11925 call = emit_call_insn (call);
11926 if (use)
11927 CALL_INSN_FUNCTION_USAGE (call) = use;
11928 }
11929
11930 \f
11931 /* Clear stack slot assignments remembered from previous functions.
11932 This is called from INIT_EXPANDERS once before RTL is emitted for each
11933 function. */
11934
11935 static struct machine_function *
11936 ix86_init_machine_status (void)
11937 {
11938 struct machine_function *f;
11939
11940 f = ggc_alloc_cleared (sizeof (struct machine_function));
11941 f->use_fast_prologue_epilogue_nregs = -1;
11942
11943 return f;
11944 }
11945
11946 /* Return a MEM corresponding to a stack slot with mode MODE.
11947 Allocate a new slot if necessary.
11948
11949 The RTL for a function can have several slots available: N is
11950 which slot to use. */
11951
11952 rtx
11953 assign_386_stack_local (enum machine_mode mode, int n)
11954 {
11955 struct stack_local_entry *s;
11956
11957 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11958 abort ();
11959
11960 for (s = ix86_stack_locals; s; s = s->next)
11961 if (s->mode == mode && s->n == n)
11962 return s->rtl;
11963
11964 s = (struct stack_local_entry *)
11965 ggc_alloc (sizeof (struct stack_local_entry));
11966 s->n = n;
11967 s->mode = mode;
11968 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11969
11970 s->next = ix86_stack_locals;
11971 ix86_stack_locals = s;
11972 return s->rtl;
11973 }
11974
11975 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11976
11977 static GTY(()) rtx ix86_tls_symbol;
11978 rtx
11979 ix86_tls_get_addr (void)
11980 {
11981
11982 if (!ix86_tls_symbol)
11983 {
11984 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11985 (TARGET_GNU_TLS && !TARGET_64BIT)
11986 ? "___tls_get_addr"
11987 : "__tls_get_addr");
11988 }
11989
11990 return ix86_tls_symbol;
11991 }
11992 \f
11993 /* Calculate the length of the memory address in the instruction
11994 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11995
11996 static int
11997 memory_address_length (rtx addr)
11998 {
11999 struct ix86_address parts;
12000 rtx base, index, disp;
12001 int len;
12002
12003 if (GET_CODE (addr) == PRE_DEC
12004 || GET_CODE (addr) == POST_INC
12005 || GET_CODE (addr) == PRE_MODIFY
12006 || GET_CODE (addr) == POST_MODIFY)
12007 return 0;
12008
12009 if (! ix86_decompose_address (addr, &parts))
12010 abort ();
12011
12012 base = parts.base;
12013 index = parts.index;
12014 disp = parts.disp;
12015 len = 0;
12016
12017 /* Rule of thumb:
12018 - esp as the base always wants an index,
12019 - ebp as the base always wants a displacement. */
12020
12021 /* Register Indirect. */
12022 if (base && !index && !disp)
12023 {
12024 /* esp (for its index) and ebp (for its displacement) need
12025 the two-byte modrm form. */
12026 if (addr == stack_pointer_rtx
12027 || addr == arg_pointer_rtx
12028 || addr == frame_pointer_rtx
12029 || addr == hard_frame_pointer_rtx)
12030 len = 1;
12031 }
12032
12033 /* Direct Addressing. */
12034 else if (disp && !base && !index)
12035 len = 4;
12036
12037 else
12038 {
12039 /* Find the length of the displacement constant. */
12040 if (disp)
12041 {
12042 if (GET_CODE (disp) == CONST_INT
12043 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12044 && base)
12045 len = 1;
12046 else
12047 len = 4;
12048 }
12049 /* ebp always wants a displacement. */
12050 else if (base == hard_frame_pointer_rtx)
12051 len = 1;
12052
12053 /* An index requires the two-byte modrm form.... */
12054 if (index
12055 /* ...like esp, which always wants an index. */
12056 || base == stack_pointer_rtx
12057 || base == arg_pointer_rtx
12058 || base == frame_pointer_rtx)
12059 len += 1;
12060 }
12061
12062 return len;
12063 }
12064
12065 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12066 is set, expect that insn have 8bit immediate alternative. */
12067 int
12068 ix86_attr_length_immediate_default (rtx insn, int shortform)
12069 {
12070 int len = 0;
12071 int i;
12072 extract_insn_cached (insn);
12073 for (i = recog_data.n_operands - 1; i >= 0; --i)
12074 if (CONSTANT_P (recog_data.operand[i]))
12075 {
12076 if (len)
12077 abort ();
12078 if (shortform
12079 && GET_CODE (recog_data.operand[i]) == CONST_INT
12080 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12081 len = 1;
12082 else
12083 {
12084 switch (get_attr_mode (insn))
12085 {
12086 case MODE_QI:
12087 len+=1;
12088 break;
12089 case MODE_HI:
12090 len+=2;
12091 break;
12092 case MODE_SI:
12093 len+=4;
12094 break;
12095 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
12096 case MODE_DI:
12097 len+=4;
12098 break;
12099 default:
12100 fatal_insn ("unknown insn mode", insn);
12101 }
12102 }
12103 }
12104 return len;
12105 }
12106 /* Compute default value for "length_address" attribute. */
12107 int
12108 ix86_attr_length_address_default (rtx insn)
12109 {
12110 int i;
12111
12112 if (get_attr_type (insn) == TYPE_LEA)
12113 {
12114 rtx set = PATTERN (insn);
12115 if (GET_CODE (set) == SET)
12116 ;
12117 else if (GET_CODE (set) == PARALLEL
12118 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12119 set = XVECEXP (set, 0, 0);
12120 else
12121 {
12122 #ifdef ENABLE_CHECKING
12123 abort ();
12124 #endif
12125 return 0;
12126 }
12127
12128 return memory_address_length (SET_SRC (set));
12129 }
12130
12131 extract_insn_cached (insn);
12132 for (i = recog_data.n_operands - 1; i >= 0; --i)
12133 if (GET_CODE (recog_data.operand[i]) == MEM)
12134 {
12135 return memory_address_length (XEXP (recog_data.operand[i], 0));
12136 break;
12137 }
12138 return 0;
12139 }
12140 \f
12141 /* Return the maximum number of instructions a cpu can issue. */
12142
12143 static int
12144 ix86_issue_rate (void)
12145 {
12146 switch (ix86_tune)
12147 {
12148 case PROCESSOR_PENTIUM:
12149 case PROCESSOR_K6:
12150 return 2;
12151
12152 case PROCESSOR_PENTIUMPRO:
12153 case PROCESSOR_PENTIUM4:
12154 case PROCESSOR_ATHLON:
12155 case PROCESSOR_K8:
12156 case PROCESSOR_NOCONA:
12157 return 3;
12158
12159 default:
12160 return 1;
12161 }
12162 }
12163
12164 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12165 by DEP_INSN and nothing set by DEP_INSN. */
12166
12167 static int
12168 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12169 {
12170 rtx set, set2;
12171
12172 /* Simplify the test for uninteresting insns. */
12173 if (insn_type != TYPE_SETCC
12174 && insn_type != TYPE_ICMOV
12175 && insn_type != TYPE_FCMOV
12176 && insn_type != TYPE_IBR)
12177 return 0;
12178
12179 if ((set = single_set (dep_insn)) != 0)
12180 {
12181 set = SET_DEST (set);
12182 set2 = NULL_RTX;
12183 }
12184 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12185 && XVECLEN (PATTERN (dep_insn), 0) == 2
12186 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12187 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12188 {
12189 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12190 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12191 }
12192 else
12193 return 0;
12194
12195 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12196 return 0;
12197
12198 /* This test is true if the dependent insn reads the flags but
12199 not any other potentially set register. */
12200 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12201 return 0;
12202
12203 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12204 return 0;
12205
12206 return 1;
12207 }
12208
12209 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12210 address with operands set by DEP_INSN. */
12211
12212 static int
12213 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12214 {
12215 rtx addr;
12216
12217 if (insn_type == TYPE_LEA
12218 && TARGET_PENTIUM)
12219 {
12220 addr = PATTERN (insn);
12221 if (GET_CODE (addr) == SET)
12222 ;
12223 else if (GET_CODE (addr) == PARALLEL
12224 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12225 addr = XVECEXP (addr, 0, 0);
12226 else
12227 abort ();
12228 addr = SET_SRC (addr);
12229 }
12230 else
12231 {
12232 int i;
12233 extract_insn_cached (insn);
12234 for (i = recog_data.n_operands - 1; i >= 0; --i)
12235 if (GET_CODE (recog_data.operand[i]) == MEM)
12236 {
12237 addr = XEXP (recog_data.operand[i], 0);
12238 goto found;
12239 }
12240 return 0;
12241 found:;
12242 }
12243
12244 return modified_in_p (addr, dep_insn);
12245 }
12246
12247 static int
12248 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12249 {
12250 enum attr_type insn_type, dep_insn_type;
12251 enum attr_memory memory;
12252 rtx set, set2;
12253 int dep_insn_code_number;
12254
12255 /* Anti and output dependencies have zero cost on all CPUs. */
12256 if (REG_NOTE_KIND (link) != 0)
12257 return 0;
12258
12259 dep_insn_code_number = recog_memoized (dep_insn);
12260
12261 /* If we can't recognize the insns, we can't really do anything. */
12262 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12263 return cost;
12264
12265 insn_type = get_attr_type (insn);
12266 dep_insn_type = get_attr_type (dep_insn);
12267
12268 switch (ix86_tune)
12269 {
12270 case PROCESSOR_PENTIUM:
12271 /* Address Generation Interlock adds a cycle of latency. */
12272 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12273 cost += 1;
12274
12275 /* ??? Compares pair with jump/setcc. */
12276 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12277 cost = 0;
12278
12279 /* Floating point stores require value to be ready one cycle earlier. */
12280 if (insn_type == TYPE_FMOV
12281 && get_attr_memory (insn) == MEMORY_STORE
12282 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12283 cost += 1;
12284 break;
12285
12286 case PROCESSOR_PENTIUMPRO:
12287 memory = get_attr_memory (insn);
12288
12289 /* INT->FP conversion is expensive. */
12290 if (get_attr_fp_int_src (dep_insn))
12291 cost += 5;
12292
12293 /* There is one cycle extra latency between an FP op and a store. */
12294 if (insn_type == TYPE_FMOV
12295 && (set = single_set (dep_insn)) != NULL_RTX
12296 && (set2 = single_set (insn)) != NULL_RTX
12297 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12298 && GET_CODE (SET_DEST (set2)) == MEM)
12299 cost += 1;
12300
12301 /* Show ability of reorder buffer to hide latency of load by executing
12302 in parallel with previous instruction in case
12303 previous instruction is not needed to compute the address. */
12304 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12305 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12306 {
12307 /* Claim moves to take one cycle, as core can issue one load
12308 at time and the next load can start cycle later. */
12309 if (dep_insn_type == TYPE_IMOV
12310 || dep_insn_type == TYPE_FMOV)
12311 cost = 1;
12312 else if (cost > 1)
12313 cost--;
12314 }
12315 break;
12316
12317 case PROCESSOR_K6:
12318 memory = get_attr_memory (insn);
12319
12320 /* The esp dependency is resolved before the instruction is really
12321 finished. */
12322 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12323 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12324 return 1;
12325
12326 /* INT->FP conversion is expensive. */
12327 if (get_attr_fp_int_src (dep_insn))
12328 cost += 5;
12329
12330 /* Show ability of reorder buffer to hide latency of load by executing
12331 in parallel with previous instruction in case
12332 previous instruction is not needed to compute the address. */
12333 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12334 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12335 {
12336 /* Claim moves to take one cycle, as core can issue one load
12337 at time and the next load can start cycle later. */
12338 if (dep_insn_type == TYPE_IMOV
12339 || dep_insn_type == TYPE_FMOV)
12340 cost = 1;
12341 else if (cost > 2)
12342 cost -= 2;
12343 else
12344 cost = 1;
12345 }
12346 break;
12347
12348 case PROCESSOR_ATHLON:
12349 case PROCESSOR_K8:
12350 memory = get_attr_memory (insn);
12351
12352 /* Show ability of reorder buffer to hide latency of load by executing
12353 in parallel with previous instruction in case
12354 previous instruction is not needed to compute the address. */
12355 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12356 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12357 {
12358 enum attr_unit unit = get_attr_unit (insn);
12359 int loadcost = 3;
12360
12361 /* Because of the difference between the length of integer and
12362 floating unit pipeline preparation stages, the memory operands
12363 for floating point are cheaper.
12364
12365 ??? For Athlon it the difference is most probably 2. */
12366 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12367 loadcost = 3;
12368 else
12369 loadcost = TARGET_ATHLON ? 2 : 0;
12370
12371 if (cost >= loadcost)
12372 cost -= loadcost;
12373 else
12374 cost = 0;
12375 }
12376
12377 default:
12378 break;
12379 }
12380
12381 return cost;
12382 }
12383
12384 /* How many alternative schedules to try. This should be as wide as the
12385 scheduling freedom in the DFA, but no wider. Making this value too
12386 large results extra work for the scheduler. */
12387
12388 static int
12389 ia32_multipass_dfa_lookahead (void)
12390 {
12391 if (ix86_tune == PROCESSOR_PENTIUM)
12392 return 2;
12393
12394 if (ix86_tune == PROCESSOR_PENTIUMPRO
12395 || ix86_tune == PROCESSOR_K6)
12396 return 1;
12397
12398 else
12399 return 0;
12400 }
12401
12402 \f
12403 /* Compute the alignment given to a constant that is being placed in memory.
12404 EXP is the constant and ALIGN is the alignment that the object would
12405 ordinarily have.
12406 The value of this function is used instead of that alignment to align
12407 the object. */
12408
12409 int
12410 ix86_constant_alignment (tree exp, int align)
12411 {
12412 if (TREE_CODE (exp) == REAL_CST)
12413 {
12414 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12415 return 64;
12416 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12417 return 128;
12418 }
12419 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12420 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12421 return BITS_PER_WORD;
12422
12423 return align;
12424 }
12425
12426 /* Compute the alignment for a static variable.
12427 TYPE is the data type, and ALIGN is the alignment that
12428 the object would ordinarily have. The value of this function is used
12429 instead of that alignment to align the object. */
12430
12431 int
12432 ix86_data_alignment (tree type, int align)
12433 {
12434 if (AGGREGATE_TYPE_P (type)
12435 && TYPE_SIZE (type)
12436 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12437 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12438 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12439 return 256;
12440
12441 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12442 to 16byte boundary. */
12443 if (TARGET_64BIT)
12444 {
12445 if (AGGREGATE_TYPE_P (type)
12446 && TYPE_SIZE (type)
12447 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12448 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12449 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12450 return 128;
12451 }
12452
12453 if (TREE_CODE (type) == ARRAY_TYPE)
12454 {
12455 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12456 return 64;
12457 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12458 return 128;
12459 }
12460 else if (TREE_CODE (type) == COMPLEX_TYPE)
12461 {
12462
12463 if (TYPE_MODE (type) == DCmode && align < 64)
12464 return 64;
12465 if (TYPE_MODE (type) == XCmode && align < 128)
12466 return 128;
12467 }
12468 else if ((TREE_CODE (type) == RECORD_TYPE
12469 || TREE_CODE (type) == UNION_TYPE
12470 || TREE_CODE (type) == QUAL_UNION_TYPE)
12471 && TYPE_FIELDS (type))
12472 {
12473 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12474 return 64;
12475 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12476 return 128;
12477 }
12478 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12479 || TREE_CODE (type) == INTEGER_TYPE)
12480 {
12481 if (TYPE_MODE (type) == DFmode && align < 64)
12482 return 64;
12483 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12484 return 128;
12485 }
12486
12487 return align;
12488 }
12489
12490 /* Compute the alignment for a local variable.
12491 TYPE is the data type, and ALIGN is the alignment that
12492 the object would ordinarily have. The value of this macro is used
12493 instead of that alignment to align the object. */
12494
12495 int
12496 ix86_local_alignment (tree type, int align)
12497 {
12498 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12499 to 16byte boundary. */
12500 if (TARGET_64BIT)
12501 {
12502 if (AGGREGATE_TYPE_P (type)
12503 && TYPE_SIZE (type)
12504 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12505 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12506 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12507 return 128;
12508 }
12509 if (TREE_CODE (type) == ARRAY_TYPE)
12510 {
12511 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12512 return 64;
12513 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12514 return 128;
12515 }
12516 else if (TREE_CODE (type) == COMPLEX_TYPE)
12517 {
12518 if (TYPE_MODE (type) == DCmode && align < 64)
12519 return 64;
12520 if (TYPE_MODE (type) == XCmode && align < 128)
12521 return 128;
12522 }
12523 else if ((TREE_CODE (type) == RECORD_TYPE
12524 || TREE_CODE (type) == UNION_TYPE
12525 || TREE_CODE (type) == QUAL_UNION_TYPE)
12526 && TYPE_FIELDS (type))
12527 {
12528 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12529 return 64;
12530 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12531 return 128;
12532 }
12533 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12534 || TREE_CODE (type) == INTEGER_TYPE)
12535 {
12536
12537 if (TYPE_MODE (type) == DFmode && align < 64)
12538 return 64;
12539 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12540 return 128;
12541 }
12542 return align;
12543 }
12544 \f
12545 /* Emit RTL insns to initialize the variable parts of a trampoline.
12546 FNADDR is an RTX for the address of the function's pure code.
12547 CXT is an RTX for the static chain value for the function. */
12548 void
12549 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12550 {
12551 if (!TARGET_64BIT)
12552 {
12553 /* Compute offset from the end of the jmp to the target function. */
12554 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12555 plus_constant (tramp, 10),
12556 NULL_RTX, 1, OPTAB_DIRECT);
12557 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12558 gen_int_mode (0xb9, QImode));
12559 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12560 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12561 gen_int_mode (0xe9, QImode));
12562 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12563 }
12564 else
12565 {
12566 int offset = 0;
12567 /* Try to load address using shorter movl instead of movabs.
12568 We may want to support movq for kernel mode, but kernel does not use
12569 trampolines at the moment. */
12570 if (x86_64_zero_extended_value (fnaddr))
12571 {
12572 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12573 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12574 gen_int_mode (0xbb41, HImode));
12575 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12576 gen_lowpart (SImode, fnaddr));
12577 offset += 6;
12578 }
12579 else
12580 {
12581 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12582 gen_int_mode (0xbb49, HImode));
12583 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12584 fnaddr);
12585 offset += 10;
12586 }
12587 /* Load static chain using movabs to r10. */
12588 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12589 gen_int_mode (0xba49, HImode));
12590 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12591 cxt);
12592 offset += 10;
12593 /* Jump to the r11 */
12594 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12595 gen_int_mode (0xff49, HImode));
12596 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12597 gen_int_mode (0xe3, QImode));
12598 offset += 3;
12599 if (offset > TRAMPOLINE_SIZE)
12600 abort ();
12601 }
12602
12603 #ifdef TRANSFER_FROM_TRAMPOLINE
12604 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
12605 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12606 #endif
12607 }
12608 \f
12609 #define def_builtin(MASK, NAME, TYPE, CODE) \
12610 do { \
12611 if ((MASK) & target_flags \
12612 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12613 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12614 NULL, NULL_TREE); \
12615 } while (0)
12616
12617 struct builtin_description
12618 {
12619 const unsigned int mask;
12620 const enum insn_code icode;
12621 const char *const name;
12622 const enum ix86_builtins code;
12623 const enum rtx_code comparison;
12624 const unsigned int flag;
12625 };
12626
12627 static const struct builtin_description bdesc_comi[] =
12628 {
12629 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12630 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12631 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12632 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12633 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12634 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12635 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12636 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12637 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12638 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12639 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12640 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12641 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12642 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12643 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12644 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12645 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12646 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12647 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12648 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12649 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12650 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12651 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12652 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12653 };
12654
12655 static const struct builtin_description bdesc_2arg[] =
12656 {
12657 /* SSE */
12658 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12659 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12660 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12661 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12662 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12663 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12664 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12665 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12666
12667 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12668 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12669 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12670 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12671 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12672 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12673 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12674 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12675 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12676 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12677 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12678 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12679 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12680 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12681 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12682 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12683 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12684 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12685 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12686 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12687
12688 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12689 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12690 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12691 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12692
12693 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12694 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12697
12698 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12699 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12701 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12703
12704 /* MMX */
12705 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12706 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12707 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12708 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12709 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12710 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12711 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12712 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12713
12714 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12715 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12716 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12717 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12718 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12719 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12720 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12721 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12722
12723 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12724 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12725 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12726
12727 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12728 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12729 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12730 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12731
12732 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12733 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12734
12735 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12736 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12737 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12738 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12739 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12740 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12741
12742 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12743 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12745 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12746
12747 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12749 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12750 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12751 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12752 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12753
12754 /* Special. */
12755 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12756 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12757 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12758
12759 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12760 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12761 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12762
12763 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12764 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12765 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12766 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12769
12770 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12771 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12772 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12773 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12774 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12775 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12776
12777 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12778 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12779 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12781
12782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12783 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12784
12785 /* SSE2 */
12786 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12792 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12794
12795 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12796 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12797 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12798 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12799 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12800 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12801 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12802 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12803 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12804 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12805 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12806 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12807 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12808 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12809 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12810 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12811 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12812 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12813 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12814 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12815
12816 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12820
12821 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12825
12826 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12829
12830 /* SSE2 MMX */
12831 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12837 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12839
12840 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12841 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12842 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12843 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12844 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12845 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12846 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12847 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12848
12849 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12851 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12853
12854 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12855 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12858
12859 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12860 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12861
12862 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12865 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12868
12869 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12873
12874 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12882
12883 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12886
12887 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12889
12890 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12896
12897 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12903
12904 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12908
12909 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12910
12911 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12912 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12915
12916 /* SSE3 MMX */
12917 { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12918 { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12919 { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12920 { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12921 { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12922 { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12923 };
12924
12925 static const struct builtin_description bdesc_1arg[] =
12926 {
12927 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12928 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12929
12930 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12931 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12932 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12933
12934 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12935 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12936 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12937 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12938 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12939 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12940
12941 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12945
12946 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12947
12948 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12950
12951 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12956
12957 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12958
12959 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12961 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12962 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12963
12964 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12967
12968 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12969
12970 /* SSE3 */
12971 { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12972 { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12973 { MASK_SSE3, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12974 };
12975
12976 void
12977 ix86_init_builtins (void)
12978 {
12979 if (TARGET_MMX)
12980 ix86_init_mmx_sse_builtins ();
12981 }
12982
12983 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12984 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12985 builtins. */
12986 static void
12987 ix86_init_mmx_sse_builtins (void)
12988 {
12989 const struct builtin_description * d;
12990 size_t i;
12991
12992 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
12993 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
12994 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
12995 tree V2DI_type_node = build_vector_type_for_mode (intDI_type_node, V2DImode);
12996 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
12997 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
12998 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
12999 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
13000 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
13001 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
13002
13003 tree pchar_type_node = build_pointer_type (char_type_node);
13004 tree pcchar_type_node = build_pointer_type (
13005 build_type_variant (char_type_node, 1, 0));
13006 tree pfloat_type_node = build_pointer_type (float_type_node);
13007 tree pcfloat_type_node = build_pointer_type (
13008 build_type_variant (float_type_node, 1, 0));
13009 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13010 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13011 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13012
13013 /* Comparisons. */
13014 tree int_ftype_v4sf_v4sf
13015 = build_function_type_list (integer_type_node,
13016 V4SF_type_node, V4SF_type_node, NULL_TREE);
13017 tree v4si_ftype_v4sf_v4sf
13018 = build_function_type_list (V4SI_type_node,
13019 V4SF_type_node, V4SF_type_node, NULL_TREE);
13020 /* MMX/SSE/integer conversions. */
13021 tree int_ftype_v4sf
13022 = build_function_type_list (integer_type_node,
13023 V4SF_type_node, NULL_TREE);
13024 tree int64_ftype_v4sf
13025 = build_function_type_list (long_long_integer_type_node,
13026 V4SF_type_node, NULL_TREE);
13027 tree int_ftype_v8qi
13028 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13029 tree v4sf_ftype_v4sf_int
13030 = build_function_type_list (V4SF_type_node,
13031 V4SF_type_node, integer_type_node, NULL_TREE);
13032 tree v4sf_ftype_v4sf_int64
13033 = build_function_type_list (V4SF_type_node,
13034 V4SF_type_node, long_long_integer_type_node,
13035 NULL_TREE);
13036 tree v4sf_ftype_v4sf_v2si
13037 = build_function_type_list (V4SF_type_node,
13038 V4SF_type_node, V2SI_type_node, NULL_TREE);
13039 tree int_ftype_v4hi_int
13040 = build_function_type_list (integer_type_node,
13041 V4HI_type_node, integer_type_node, NULL_TREE);
13042 tree v4hi_ftype_v4hi_int_int
13043 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13044 integer_type_node, integer_type_node,
13045 NULL_TREE);
13046 /* Miscellaneous. */
13047 tree v8qi_ftype_v4hi_v4hi
13048 = build_function_type_list (V8QI_type_node,
13049 V4HI_type_node, V4HI_type_node, NULL_TREE);
13050 tree v4hi_ftype_v2si_v2si
13051 = build_function_type_list (V4HI_type_node,
13052 V2SI_type_node, V2SI_type_node, NULL_TREE);
13053 tree v4sf_ftype_v4sf_v4sf_int
13054 = build_function_type_list (V4SF_type_node,
13055 V4SF_type_node, V4SF_type_node,
13056 integer_type_node, NULL_TREE);
13057 tree v2si_ftype_v4hi_v4hi
13058 = build_function_type_list (V2SI_type_node,
13059 V4HI_type_node, V4HI_type_node, NULL_TREE);
13060 tree v4hi_ftype_v4hi_int
13061 = build_function_type_list (V4HI_type_node,
13062 V4HI_type_node, integer_type_node, NULL_TREE);
13063 tree v4hi_ftype_v4hi_di
13064 = build_function_type_list (V4HI_type_node,
13065 V4HI_type_node, long_long_unsigned_type_node,
13066 NULL_TREE);
13067 tree v2si_ftype_v2si_di
13068 = build_function_type_list (V2SI_type_node,
13069 V2SI_type_node, long_long_unsigned_type_node,
13070 NULL_TREE);
13071 tree void_ftype_void
13072 = build_function_type (void_type_node, void_list_node);
13073 tree void_ftype_unsigned
13074 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13075 tree void_ftype_unsigned_unsigned
13076 = build_function_type_list (void_type_node, unsigned_type_node,
13077 unsigned_type_node, NULL_TREE);
13078 tree void_ftype_pcvoid_unsigned_unsigned
13079 = build_function_type_list (void_type_node, const_ptr_type_node,
13080 unsigned_type_node, unsigned_type_node,
13081 NULL_TREE);
13082 tree unsigned_ftype_void
13083 = build_function_type (unsigned_type_node, void_list_node);
13084 tree di_ftype_void
13085 = build_function_type (long_long_unsigned_type_node, void_list_node);
13086 tree v4sf_ftype_void
13087 = build_function_type (V4SF_type_node, void_list_node);
13088 tree v2si_ftype_v4sf
13089 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13090 /* Loads/stores. */
13091 tree void_ftype_v8qi_v8qi_pchar
13092 = build_function_type_list (void_type_node,
13093 V8QI_type_node, V8QI_type_node,
13094 pchar_type_node, NULL_TREE);
13095 tree v4sf_ftype_pcfloat
13096 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13097 /* @@@ the type is bogus */
13098 tree v4sf_ftype_v4sf_pv2si
13099 = build_function_type_list (V4SF_type_node,
13100 V4SF_type_node, pv2si_type_node, NULL_TREE);
13101 tree void_ftype_pv2si_v4sf
13102 = build_function_type_list (void_type_node,
13103 pv2si_type_node, V4SF_type_node, NULL_TREE);
13104 tree void_ftype_pfloat_v4sf
13105 = build_function_type_list (void_type_node,
13106 pfloat_type_node, V4SF_type_node, NULL_TREE);
13107 tree void_ftype_pdi_di
13108 = build_function_type_list (void_type_node,
13109 pdi_type_node, long_long_unsigned_type_node,
13110 NULL_TREE);
13111 tree void_ftype_pv2di_v2di
13112 = build_function_type_list (void_type_node,
13113 pv2di_type_node, V2DI_type_node, NULL_TREE);
13114 /* Normal vector unops. */
13115 tree v4sf_ftype_v4sf
13116 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13117
13118 /* Normal vector binops. */
13119 tree v4sf_ftype_v4sf_v4sf
13120 = build_function_type_list (V4SF_type_node,
13121 V4SF_type_node, V4SF_type_node, NULL_TREE);
13122 tree v8qi_ftype_v8qi_v8qi
13123 = build_function_type_list (V8QI_type_node,
13124 V8QI_type_node, V8QI_type_node, NULL_TREE);
13125 tree v4hi_ftype_v4hi_v4hi
13126 = build_function_type_list (V4HI_type_node,
13127 V4HI_type_node, V4HI_type_node, NULL_TREE);
13128 tree v2si_ftype_v2si_v2si
13129 = build_function_type_list (V2SI_type_node,
13130 V2SI_type_node, V2SI_type_node, NULL_TREE);
13131 tree di_ftype_di_di
13132 = build_function_type_list (long_long_unsigned_type_node,
13133 long_long_unsigned_type_node,
13134 long_long_unsigned_type_node, NULL_TREE);
13135
13136 tree v2si_ftype_v2sf
13137 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13138 tree v2sf_ftype_v2si
13139 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13140 tree v2si_ftype_v2si
13141 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13142 tree v2sf_ftype_v2sf
13143 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13144 tree v2sf_ftype_v2sf_v2sf
13145 = build_function_type_list (V2SF_type_node,
13146 V2SF_type_node, V2SF_type_node, NULL_TREE);
13147 tree v2si_ftype_v2sf_v2sf
13148 = build_function_type_list (V2SI_type_node,
13149 V2SF_type_node, V2SF_type_node, NULL_TREE);
13150 tree pint_type_node = build_pointer_type (integer_type_node);
13151 tree pcint_type_node = build_pointer_type (
13152 build_type_variant (integer_type_node, 1, 0));
13153 tree pdouble_type_node = build_pointer_type (double_type_node);
13154 tree pcdouble_type_node = build_pointer_type (
13155 build_type_variant (double_type_node, 1, 0));
13156 tree int_ftype_v2df_v2df
13157 = build_function_type_list (integer_type_node,
13158 V2DF_type_node, V2DF_type_node, NULL_TREE);
13159
13160 tree ti_ftype_void
13161 = build_function_type (intTI_type_node, void_list_node);
13162 tree v2di_ftype_void
13163 = build_function_type (V2DI_type_node, void_list_node);
13164 tree ti_ftype_ti_ti
13165 = build_function_type_list (intTI_type_node,
13166 intTI_type_node, intTI_type_node, NULL_TREE);
13167 tree void_ftype_pcvoid
13168 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13169 tree v2di_ftype_di
13170 = build_function_type_list (V2DI_type_node,
13171 long_long_unsigned_type_node, NULL_TREE);
13172 tree di_ftype_v2di
13173 = build_function_type_list (long_long_unsigned_type_node,
13174 V2DI_type_node, NULL_TREE);
13175 tree v4sf_ftype_v4si
13176 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13177 tree v4si_ftype_v4sf
13178 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13179 tree v2df_ftype_v4si
13180 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13181 tree v4si_ftype_v2df
13182 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13183 tree v2si_ftype_v2df
13184 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13185 tree v4sf_ftype_v2df
13186 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13187 tree v2df_ftype_v2si
13188 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13189 tree v2df_ftype_v4sf
13190 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13191 tree int_ftype_v2df
13192 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13193 tree int64_ftype_v2df
13194 = build_function_type_list (long_long_integer_type_node,
13195 V2DF_type_node, NULL_TREE);
13196 tree v2df_ftype_v2df_int
13197 = build_function_type_list (V2DF_type_node,
13198 V2DF_type_node, integer_type_node, NULL_TREE);
13199 tree v2df_ftype_v2df_int64
13200 = build_function_type_list (V2DF_type_node,
13201 V2DF_type_node, long_long_integer_type_node,
13202 NULL_TREE);
13203 tree v4sf_ftype_v4sf_v2df
13204 = build_function_type_list (V4SF_type_node,
13205 V4SF_type_node, V2DF_type_node, NULL_TREE);
13206 tree v2df_ftype_v2df_v4sf
13207 = build_function_type_list (V2DF_type_node,
13208 V2DF_type_node, V4SF_type_node, NULL_TREE);
13209 tree v2df_ftype_v2df_v2df_int
13210 = build_function_type_list (V2DF_type_node,
13211 V2DF_type_node, V2DF_type_node,
13212 integer_type_node,
13213 NULL_TREE);
13214 tree v2df_ftype_v2df_pv2si
13215 = build_function_type_list (V2DF_type_node,
13216 V2DF_type_node, pv2si_type_node, NULL_TREE);
13217 tree void_ftype_pv2si_v2df
13218 = build_function_type_list (void_type_node,
13219 pv2si_type_node, V2DF_type_node, NULL_TREE);
13220 tree void_ftype_pdouble_v2df
13221 = build_function_type_list (void_type_node,
13222 pdouble_type_node, V2DF_type_node, NULL_TREE);
13223 tree void_ftype_pint_int
13224 = build_function_type_list (void_type_node,
13225 pint_type_node, integer_type_node, NULL_TREE);
13226 tree void_ftype_v16qi_v16qi_pchar
13227 = build_function_type_list (void_type_node,
13228 V16QI_type_node, V16QI_type_node,
13229 pchar_type_node, NULL_TREE);
13230 tree v2df_ftype_pcdouble
13231 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13232 tree v2df_ftype_v2df_v2df
13233 = build_function_type_list (V2DF_type_node,
13234 V2DF_type_node, V2DF_type_node, NULL_TREE);
13235 tree v16qi_ftype_v16qi_v16qi
13236 = build_function_type_list (V16QI_type_node,
13237 V16QI_type_node, V16QI_type_node, NULL_TREE);
13238 tree v8hi_ftype_v8hi_v8hi
13239 = build_function_type_list (V8HI_type_node,
13240 V8HI_type_node, V8HI_type_node, NULL_TREE);
13241 tree v4si_ftype_v4si_v4si
13242 = build_function_type_list (V4SI_type_node,
13243 V4SI_type_node, V4SI_type_node, NULL_TREE);
13244 tree v2di_ftype_v2di_v2di
13245 = build_function_type_list (V2DI_type_node,
13246 V2DI_type_node, V2DI_type_node, NULL_TREE);
13247 tree v2di_ftype_v2df_v2df
13248 = build_function_type_list (V2DI_type_node,
13249 V2DF_type_node, V2DF_type_node, NULL_TREE);
13250 tree v2df_ftype_v2df
13251 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13252 tree v2df_ftype_double
13253 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13254 tree v2df_ftype_double_double
13255 = build_function_type_list (V2DF_type_node,
13256 double_type_node, double_type_node, NULL_TREE);
13257 tree int_ftype_v8hi_int
13258 = build_function_type_list (integer_type_node,
13259 V8HI_type_node, integer_type_node, NULL_TREE);
13260 tree v8hi_ftype_v8hi_int_int
13261 = build_function_type_list (V8HI_type_node,
13262 V8HI_type_node, integer_type_node,
13263 integer_type_node, NULL_TREE);
13264 tree v2di_ftype_v2di_int
13265 = build_function_type_list (V2DI_type_node,
13266 V2DI_type_node, integer_type_node, NULL_TREE);
13267 tree v4si_ftype_v4si_int
13268 = build_function_type_list (V4SI_type_node,
13269 V4SI_type_node, integer_type_node, NULL_TREE);
13270 tree v8hi_ftype_v8hi_int
13271 = build_function_type_list (V8HI_type_node,
13272 V8HI_type_node, integer_type_node, NULL_TREE);
13273 tree v8hi_ftype_v8hi_v2di
13274 = build_function_type_list (V8HI_type_node,
13275 V8HI_type_node, V2DI_type_node, NULL_TREE);
13276 tree v4si_ftype_v4si_v2di
13277 = build_function_type_list (V4SI_type_node,
13278 V4SI_type_node, V2DI_type_node, NULL_TREE);
13279 tree v4si_ftype_v8hi_v8hi
13280 = build_function_type_list (V4SI_type_node,
13281 V8HI_type_node, V8HI_type_node, NULL_TREE);
13282 tree di_ftype_v8qi_v8qi
13283 = build_function_type_list (long_long_unsigned_type_node,
13284 V8QI_type_node, V8QI_type_node, NULL_TREE);
13285 tree v2di_ftype_v16qi_v16qi
13286 = build_function_type_list (V2DI_type_node,
13287 V16QI_type_node, V16QI_type_node, NULL_TREE);
13288 tree int_ftype_v16qi
13289 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13290 tree v16qi_ftype_pcchar
13291 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13292 tree void_ftype_pchar_v16qi
13293 = build_function_type_list (void_type_node,
13294 pchar_type_node, V16QI_type_node, NULL_TREE);
13295 tree v4si_ftype_pcint
13296 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13297 tree void_ftype_pcint_v4si
13298 = build_function_type_list (void_type_node,
13299 pcint_type_node, V4SI_type_node, NULL_TREE);
13300 tree v2di_ftype_v2di
13301 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13302
13303 tree float80_type;
13304 tree float128_type;
13305
13306 /* The __float80 type. */
13307 if (TYPE_MODE (long_double_type_node) == XFmode)
13308 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13309 "__float80");
13310 else
13311 {
13312 /* The __float80 type. */
13313 float80_type = make_node (REAL_TYPE);
13314 TYPE_PRECISION (float80_type) = 96;
13315 layout_type (float80_type);
13316 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13317 }
13318
13319 float128_type = make_node (REAL_TYPE);
13320 TYPE_PRECISION (float128_type) = 128;
13321 layout_type (float128_type);
13322 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13323
13324 /* Add all builtins that are more or less simple operations on two
13325 operands. */
13326 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13327 {
13328 /* Use one of the operands; the target can have a different mode for
13329 mask-generating compares. */
13330 enum machine_mode mode;
13331 tree type;
13332
13333 if (d->name == 0)
13334 continue;
13335 mode = insn_data[d->icode].operand[1].mode;
13336
13337 switch (mode)
13338 {
13339 case V16QImode:
13340 type = v16qi_ftype_v16qi_v16qi;
13341 break;
13342 case V8HImode:
13343 type = v8hi_ftype_v8hi_v8hi;
13344 break;
13345 case V4SImode:
13346 type = v4si_ftype_v4si_v4si;
13347 break;
13348 case V2DImode:
13349 type = v2di_ftype_v2di_v2di;
13350 break;
13351 case V2DFmode:
13352 type = v2df_ftype_v2df_v2df;
13353 break;
13354 case TImode:
13355 type = ti_ftype_ti_ti;
13356 break;
13357 case V4SFmode:
13358 type = v4sf_ftype_v4sf_v4sf;
13359 break;
13360 case V8QImode:
13361 type = v8qi_ftype_v8qi_v8qi;
13362 break;
13363 case V4HImode:
13364 type = v4hi_ftype_v4hi_v4hi;
13365 break;
13366 case V2SImode:
13367 type = v2si_ftype_v2si_v2si;
13368 break;
13369 case DImode:
13370 type = di_ftype_di_di;
13371 break;
13372
13373 default:
13374 abort ();
13375 }
13376
13377 /* Override for comparisons. */
13378 if (d->icode == CODE_FOR_maskcmpv4sf3
13379 || d->icode == CODE_FOR_maskncmpv4sf3
13380 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13381 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13382 type = v4si_ftype_v4sf_v4sf;
13383
13384 if (d->icode == CODE_FOR_maskcmpv2df3
13385 || d->icode == CODE_FOR_maskncmpv2df3
13386 || d->icode == CODE_FOR_vmmaskcmpv2df3
13387 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13388 type = v2di_ftype_v2df_v2df;
13389
13390 def_builtin (d->mask, d->name, type, d->code);
13391 }
13392
13393 /* Add the remaining MMX insns with somewhat more complicated types. */
13394 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13395 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13396 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13397 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13398 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13399
13400 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13401 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13402 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13403
13404 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13405 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13406
13407 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13408 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13409
13410 /* comi/ucomi insns. */
13411 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13412 if (d->mask == MASK_SSE2)
13413 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13414 else
13415 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13416
13417 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13418 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13419 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13420
13421 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13422 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13423 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13424 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13425 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13426 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13427 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13428 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13429 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13430 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13431 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13432
13433 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13434 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13435
13436 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13437
13438 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13439 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13440 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13441 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13442 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13443 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13444
13445 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13446 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13447 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13448 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13449
13450 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13451 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13452 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13453 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13454
13455 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13456
13457 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13458
13459 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13460 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13461 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13462 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13463 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13464 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13465
13466 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13467
13468 /* Original 3DNow! */
13469 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13470 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13471 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13472 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13473 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13474 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13475 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13476 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13477 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13478 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13479 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13480 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13481 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13482 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13483 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13484 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13485 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13486 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13487 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13488 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13489
13490 /* 3DNow! extension as used in the Athlon CPU. */
13491 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13492 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13493 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13494 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13495 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13496 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13497
13498 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13499
13500 /* SSE2 */
13501 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13503
13504 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13505 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13507
13508 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13510 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13513 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13514
13515 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13516 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13518 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13519
13520 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13521 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13524 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13525
13526 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13527 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13530
13531 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13533
13534 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13535
13536 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13538
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13544
13545 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13546
13547 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13549 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13550 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13551
13552 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13555
13556 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13557 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13560
13561 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13568
13569 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13572
13573 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13580
13581 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13582
13583 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13586
13587 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13590
13591 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13593
13594 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13598
13599 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13603
13604 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13606
13607 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13608
13609 /* Prescott New Instructions. */
13610 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13611 void_ftype_pcvoid_unsigned_unsigned,
13612 IX86_BUILTIN_MONITOR);
13613 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13614 void_ftype_unsigned_unsigned,
13615 IX86_BUILTIN_MWAIT);
13616 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13617 v4sf_ftype_v4sf,
13618 IX86_BUILTIN_MOVSHDUP);
13619 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13620 v4sf_ftype_v4sf,
13621 IX86_BUILTIN_MOVSLDUP);
13622 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13623 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13624 def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13625 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13626 def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13627 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13628 }
13629
13630 /* Errors in the source file can cause expand_expr to return const0_rtx
13631 where we expect a vector. To avoid crashing, use one of the vector
13632 clear instructions. */
13633 static rtx
13634 safe_vector_operand (rtx x, enum machine_mode mode)
13635 {
13636 if (x != const0_rtx)
13637 return x;
13638 x = gen_reg_rtx (mode);
13639
13640 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13641 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13642 : gen_rtx_SUBREG (DImode, x, 0)));
13643 else
13644 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13645 : gen_rtx_SUBREG (V4SFmode, x, 0),
13646 CONST0_RTX (V4SFmode)));
13647 return x;
13648 }
13649
13650 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13651
13652 static rtx
13653 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13654 {
13655 rtx pat;
13656 tree arg0 = TREE_VALUE (arglist);
13657 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13658 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13659 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13660 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13661 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13662 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13663
13664 if (VECTOR_MODE_P (mode0))
13665 op0 = safe_vector_operand (op0, mode0);
13666 if (VECTOR_MODE_P (mode1))
13667 op1 = safe_vector_operand (op1, mode1);
13668
13669 if (! target
13670 || GET_MODE (target) != tmode
13671 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13672 target = gen_reg_rtx (tmode);
13673
13674 if (GET_MODE (op1) == SImode && mode1 == TImode)
13675 {
13676 rtx x = gen_reg_rtx (V4SImode);
13677 emit_insn (gen_sse2_loadd (x, op1));
13678 op1 = gen_lowpart (TImode, x);
13679 }
13680
13681 /* In case the insn wants input operands in modes different from
13682 the result, abort. */
13683 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13684 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13685 abort ();
13686
13687 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13688 op0 = copy_to_mode_reg (mode0, op0);
13689 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13690 op1 = copy_to_mode_reg (mode1, op1);
13691
13692 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13693 yet one of the two must not be a memory. This is normally enforced
13694 by expanders, but we didn't bother to create one here. */
13695 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13696 op0 = copy_to_mode_reg (mode0, op0);
13697
13698 pat = GEN_FCN (icode) (target, op0, op1);
13699 if (! pat)
13700 return 0;
13701 emit_insn (pat);
13702 return target;
13703 }
13704
13705 /* Subroutine of ix86_expand_builtin to take care of stores. */
13706
13707 static rtx
13708 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13709 {
13710 rtx pat;
13711 tree arg0 = TREE_VALUE (arglist);
13712 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13713 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13714 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13715 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13716 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13717
13718 if (VECTOR_MODE_P (mode1))
13719 op1 = safe_vector_operand (op1, mode1);
13720
13721 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13722 op1 = copy_to_mode_reg (mode1, op1);
13723
13724 pat = GEN_FCN (icode) (op0, op1);
13725 if (pat)
13726 emit_insn (pat);
13727 return 0;
13728 }
13729
13730 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13731
13732 static rtx
13733 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13734 rtx target, int do_load)
13735 {
13736 rtx pat;
13737 tree arg0 = TREE_VALUE (arglist);
13738 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13739 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13740 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13741
13742 if (! target
13743 || GET_MODE (target) != tmode
13744 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13745 target = gen_reg_rtx (tmode);
13746 if (do_load)
13747 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13748 else
13749 {
13750 if (VECTOR_MODE_P (mode0))
13751 op0 = safe_vector_operand (op0, mode0);
13752
13753 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13754 op0 = copy_to_mode_reg (mode0, op0);
13755 }
13756
13757 pat = GEN_FCN (icode) (target, op0);
13758 if (! pat)
13759 return 0;
13760 emit_insn (pat);
13761 return target;
13762 }
13763
13764 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13765 sqrtss, rsqrtss, rcpss. */
13766
13767 static rtx
13768 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13769 {
13770 rtx pat;
13771 tree arg0 = TREE_VALUE (arglist);
13772 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13773 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13774 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13775
13776 if (! target
13777 || GET_MODE (target) != tmode
13778 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13779 target = gen_reg_rtx (tmode);
13780
13781 if (VECTOR_MODE_P (mode0))
13782 op0 = safe_vector_operand (op0, mode0);
13783
13784 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13785 op0 = copy_to_mode_reg (mode0, op0);
13786
13787 op1 = op0;
13788 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13789 op1 = copy_to_mode_reg (mode0, op1);
13790
13791 pat = GEN_FCN (icode) (target, op0, op1);
13792 if (! pat)
13793 return 0;
13794 emit_insn (pat);
13795 return target;
13796 }
13797
13798 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13799
13800 static rtx
13801 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13802 rtx target)
13803 {
13804 rtx pat;
13805 tree arg0 = TREE_VALUE (arglist);
13806 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13807 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13808 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13809 rtx op2;
13810 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13811 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13812 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13813 enum rtx_code comparison = d->comparison;
13814
13815 if (VECTOR_MODE_P (mode0))
13816 op0 = safe_vector_operand (op0, mode0);
13817 if (VECTOR_MODE_P (mode1))
13818 op1 = safe_vector_operand (op1, mode1);
13819
13820 /* Swap operands if we have a comparison that isn't available in
13821 hardware. */
13822 if (d->flag)
13823 {
13824 rtx tmp = gen_reg_rtx (mode1);
13825 emit_move_insn (tmp, op1);
13826 op1 = op0;
13827 op0 = tmp;
13828 }
13829
13830 if (! target
13831 || GET_MODE (target) != tmode
13832 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13833 target = gen_reg_rtx (tmode);
13834
13835 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13836 op0 = copy_to_mode_reg (mode0, op0);
13837 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13838 op1 = copy_to_mode_reg (mode1, op1);
13839
13840 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13841 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13842 if (! pat)
13843 return 0;
13844 emit_insn (pat);
13845 return target;
13846 }
13847
13848 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13849
13850 static rtx
13851 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13852 rtx target)
13853 {
13854 rtx pat;
13855 tree arg0 = TREE_VALUE (arglist);
13856 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13857 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13858 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13859 rtx op2;
13860 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13861 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13862 enum rtx_code comparison = d->comparison;
13863
13864 if (VECTOR_MODE_P (mode0))
13865 op0 = safe_vector_operand (op0, mode0);
13866 if (VECTOR_MODE_P (mode1))
13867 op1 = safe_vector_operand (op1, mode1);
13868
13869 /* Swap operands if we have a comparison that isn't available in
13870 hardware. */
13871 if (d->flag)
13872 {
13873 rtx tmp = op1;
13874 op1 = op0;
13875 op0 = tmp;
13876 }
13877
13878 target = gen_reg_rtx (SImode);
13879 emit_move_insn (target, const0_rtx);
13880 target = gen_rtx_SUBREG (QImode, target, 0);
13881
13882 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13883 op0 = copy_to_mode_reg (mode0, op0);
13884 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13885 op1 = copy_to_mode_reg (mode1, op1);
13886
13887 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13888 pat = GEN_FCN (d->icode) (op0, op1);
13889 if (! pat)
13890 return 0;
13891 emit_insn (pat);
13892 emit_insn (gen_rtx_SET (VOIDmode,
13893 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13894 gen_rtx_fmt_ee (comparison, QImode,
13895 SET_DEST (pat),
13896 const0_rtx)));
13897
13898 return SUBREG_REG (target);
13899 }
13900
13901 /* Expand an expression EXP that calls a built-in function,
13902 with result going to TARGET if that's convenient
13903 (and in mode MODE if that's convenient).
13904 SUBTARGET may be used as the target for computing one of EXP's operands.
13905 IGNORE is nonzero if the value is to be ignored. */
13906
13907 rtx
13908 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13909 enum machine_mode mode ATTRIBUTE_UNUSED,
13910 int ignore ATTRIBUTE_UNUSED)
13911 {
13912 const struct builtin_description *d;
13913 size_t i;
13914 enum insn_code icode;
13915 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13916 tree arglist = TREE_OPERAND (exp, 1);
13917 tree arg0, arg1, arg2;
13918 rtx op0, op1, op2, pat;
13919 enum machine_mode tmode, mode0, mode1, mode2;
13920 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13921
13922 switch (fcode)
13923 {
13924 case IX86_BUILTIN_EMMS:
13925 emit_insn (gen_emms ());
13926 return 0;
13927
13928 case IX86_BUILTIN_SFENCE:
13929 emit_insn (gen_sfence ());
13930 return 0;
13931
13932 case IX86_BUILTIN_PEXTRW:
13933 case IX86_BUILTIN_PEXTRW128:
13934 icode = (fcode == IX86_BUILTIN_PEXTRW
13935 ? CODE_FOR_mmx_pextrw
13936 : CODE_FOR_sse2_pextrw);
13937 arg0 = TREE_VALUE (arglist);
13938 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13939 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13940 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13941 tmode = insn_data[icode].operand[0].mode;
13942 mode0 = insn_data[icode].operand[1].mode;
13943 mode1 = insn_data[icode].operand[2].mode;
13944
13945 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13946 op0 = copy_to_mode_reg (mode0, op0);
13947 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13948 {
13949 error ("selector must be an integer constant in the range 0..%i",
13950 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13951 return gen_reg_rtx (tmode);
13952 }
13953 if (target == 0
13954 || GET_MODE (target) != tmode
13955 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13956 target = gen_reg_rtx (tmode);
13957 pat = GEN_FCN (icode) (target, op0, op1);
13958 if (! pat)
13959 return 0;
13960 emit_insn (pat);
13961 return target;
13962
13963 case IX86_BUILTIN_PINSRW:
13964 case IX86_BUILTIN_PINSRW128:
13965 icode = (fcode == IX86_BUILTIN_PINSRW
13966 ? CODE_FOR_mmx_pinsrw
13967 : CODE_FOR_sse2_pinsrw);
13968 arg0 = TREE_VALUE (arglist);
13969 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13970 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13971 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13972 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13973 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13974 tmode = insn_data[icode].operand[0].mode;
13975 mode0 = insn_data[icode].operand[1].mode;
13976 mode1 = insn_data[icode].operand[2].mode;
13977 mode2 = insn_data[icode].operand[3].mode;
13978
13979 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13980 op0 = copy_to_mode_reg (mode0, op0);
13981 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13982 op1 = copy_to_mode_reg (mode1, op1);
13983 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13984 {
13985 error ("selector must be an integer constant in the range 0..%i",
13986 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13987 return const0_rtx;
13988 }
13989 if (target == 0
13990 || GET_MODE (target) != tmode
13991 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13992 target = gen_reg_rtx (tmode);
13993 pat = GEN_FCN (icode) (target, op0, op1, op2);
13994 if (! pat)
13995 return 0;
13996 emit_insn (pat);
13997 return target;
13998
13999 case IX86_BUILTIN_MASKMOVQ:
14000 case IX86_BUILTIN_MASKMOVDQU:
14001 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14002 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14003 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14004 : CODE_FOR_sse2_maskmovdqu));
14005 /* Note the arg order is different from the operand order. */
14006 arg1 = TREE_VALUE (arglist);
14007 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14008 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14009 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14010 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14011 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14012 mode0 = insn_data[icode].operand[0].mode;
14013 mode1 = insn_data[icode].operand[1].mode;
14014 mode2 = insn_data[icode].operand[2].mode;
14015
14016 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14017 op0 = copy_to_mode_reg (mode0, op0);
14018 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14019 op1 = copy_to_mode_reg (mode1, op1);
14020 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14021 op2 = copy_to_mode_reg (mode2, op2);
14022 pat = GEN_FCN (icode) (op0, op1, op2);
14023 if (! pat)
14024 return 0;
14025 emit_insn (pat);
14026 return 0;
14027
14028 case IX86_BUILTIN_SQRTSS:
14029 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14030 case IX86_BUILTIN_RSQRTSS:
14031 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14032 case IX86_BUILTIN_RCPSS:
14033 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14034
14035 case IX86_BUILTIN_LOADAPS:
14036 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14037
14038 case IX86_BUILTIN_LOADUPS:
14039 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14040
14041 case IX86_BUILTIN_STOREAPS:
14042 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14043
14044 case IX86_BUILTIN_STOREUPS:
14045 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14046
14047 case IX86_BUILTIN_LOADSS:
14048 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14049
14050 case IX86_BUILTIN_STORESS:
14051 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14052
14053 case IX86_BUILTIN_LOADHPS:
14054 case IX86_BUILTIN_LOADLPS:
14055 case IX86_BUILTIN_LOADHPD:
14056 case IX86_BUILTIN_LOADLPD:
14057 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14058 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14059 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14060 : CODE_FOR_sse2_movsd);
14061 arg0 = TREE_VALUE (arglist);
14062 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14063 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14064 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14065 tmode = insn_data[icode].operand[0].mode;
14066 mode0 = insn_data[icode].operand[1].mode;
14067 mode1 = insn_data[icode].operand[2].mode;
14068
14069 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14070 op0 = copy_to_mode_reg (mode0, op0);
14071 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14072 if (target == 0
14073 || GET_MODE (target) != tmode
14074 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14075 target = gen_reg_rtx (tmode);
14076 pat = GEN_FCN (icode) (target, op0, op1);
14077 if (! pat)
14078 return 0;
14079 emit_insn (pat);
14080 return target;
14081
14082 case IX86_BUILTIN_STOREHPS:
14083 case IX86_BUILTIN_STORELPS:
14084 case IX86_BUILTIN_STOREHPD:
14085 case IX86_BUILTIN_STORELPD:
14086 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14087 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14088 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14089 : CODE_FOR_sse2_movsd);
14090 arg0 = TREE_VALUE (arglist);
14091 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14092 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14093 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14094 mode0 = insn_data[icode].operand[1].mode;
14095 mode1 = insn_data[icode].operand[2].mode;
14096
14097 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14098 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14099 op1 = copy_to_mode_reg (mode1, op1);
14100
14101 pat = GEN_FCN (icode) (op0, op0, op1);
14102 if (! pat)
14103 return 0;
14104 emit_insn (pat);
14105 return 0;
14106
14107 case IX86_BUILTIN_MOVNTPS:
14108 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14109 case IX86_BUILTIN_MOVNTQ:
14110 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14111
14112 case IX86_BUILTIN_LDMXCSR:
14113 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14114 target = assign_386_stack_local (SImode, 0);
14115 emit_move_insn (target, op0);
14116 emit_insn (gen_ldmxcsr (target));
14117 return 0;
14118
14119 case IX86_BUILTIN_STMXCSR:
14120 target = assign_386_stack_local (SImode, 0);
14121 emit_insn (gen_stmxcsr (target));
14122 return copy_to_mode_reg (SImode, target);
14123
14124 case IX86_BUILTIN_SHUFPS:
14125 case IX86_BUILTIN_SHUFPD:
14126 icode = (fcode == IX86_BUILTIN_SHUFPS
14127 ? CODE_FOR_sse_shufps
14128 : CODE_FOR_sse2_shufpd);
14129 arg0 = TREE_VALUE (arglist);
14130 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14131 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14132 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14133 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14134 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14135 tmode = insn_data[icode].operand[0].mode;
14136 mode0 = insn_data[icode].operand[1].mode;
14137 mode1 = insn_data[icode].operand[2].mode;
14138 mode2 = insn_data[icode].operand[3].mode;
14139
14140 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14141 op0 = copy_to_mode_reg (mode0, op0);
14142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14143 op1 = copy_to_mode_reg (mode1, op1);
14144 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14145 {
14146 /* @@@ better error message */
14147 error ("mask must be an immediate");
14148 return gen_reg_rtx (tmode);
14149 }
14150 if (target == 0
14151 || GET_MODE (target) != tmode
14152 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14153 target = gen_reg_rtx (tmode);
14154 pat = GEN_FCN (icode) (target, op0, op1, op2);
14155 if (! pat)
14156 return 0;
14157 emit_insn (pat);
14158 return target;
14159
14160 case IX86_BUILTIN_PSHUFW:
14161 case IX86_BUILTIN_PSHUFD:
14162 case IX86_BUILTIN_PSHUFHW:
14163 case IX86_BUILTIN_PSHUFLW:
14164 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14165 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14166 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14167 : CODE_FOR_mmx_pshufw);
14168 arg0 = TREE_VALUE (arglist);
14169 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14170 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14171 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14172 tmode = insn_data[icode].operand[0].mode;
14173 mode1 = insn_data[icode].operand[1].mode;
14174 mode2 = insn_data[icode].operand[2].mode;
14175
14176 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14177 op0 = copy_to_mode_reg (mode1, op0);
14178 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14179 {
14180 /* @@@ better error message */
14181 error ("mask must be an immediate");
14182 return const0_rtx;
14183 }
14184 if (target == 0
14185 || GET_MODE (target) != tmode
14186 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14187 target = gen_reg_rtx (tmode);
14188 pat = GEN_FCN (icode) (target, op0, op1);
14189 if (! pat)
14190 return 0;
14191 emit_insn (pat);
14192 return target;
14193
14194 case IX86_BUILTIN_PSLLDQI128:
14195 case IX86_BUILTIN_PSRLDQI128:
14196 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14197 : CODE_FOR_sse2_lshrti3);
14198 arg0 = TREE_VALUE (arglist);
14199 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14200 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14201 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14202 tmode = insn_data[icode].operand[0].mode;
14203 mode1 = insn_data[icode].operand[1].mode;
14204 mode2 = insn_data[icode].operand[2].mode;
14205
14206 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14207 {
14208 op0 = copy_to_reg (op0);
14209 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14210 }
14211 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14212 {
14213 error ("shift must be an immediate");
14214 return const0_rtx;
14215 }
14216 target = gen_reg_rtx (V2DImode);
14217 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14218 if (! pat)
14219 return 0;
14220 emit_insn (pat);
14221 return target;
14222
14223 case IX86_BUILTIN_FEMMS:
14224 emit_insn (gen_femms ());
14225 return NULL_RTX;
14226
14227 case IX86_BUILTIN_PAVGUSB:
14228 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14229
14230 case IX86_BUILTIN_PF2ID:
14231 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14232
14233 case IX86_BUILTIN_PFACC:
14234 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14235
14236 case IX86_BUILTIN_PFADD:
14237 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14238
14239 case IX86_BUILTIN_PFCMPEQ:
14240 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14241
14242 case IX86_BUILTIN_PFCMPGE:
14243 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14244
14245 case IX86_BUILTIN_PFCMPGT:
14246 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14247
14248 case IX86_BUILTIN_PFMAX:
14249 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14250
14251 case IX86_BUILTIN_PFMIN:
14252 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14253
14254 case IX86_BUILTIN_PFMUL:
14255 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14256
14257 case IX86_BUILTIN_PFRCP:
14258 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14259
14260 case IX86_BUILTIN_PFRCPIT1:
14261 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14262
14263 case IX86_BUILTIN_PFRCPIT2:
14264 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14265
14266 case IX86_BUILTIN_PFRSQIT1:
14267 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14268
14269 case IX86_BUILTIN_PFRSQRT:
14270 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14271
14272 case IX86_BUILTIN_PFSUB:
14273 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14274
14275 case IX86_BUILTIN_PFSUBR:
14276 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14277
14278 case IX86_BUILTIN_PI2FD:
14279 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14280
14281 case IX86_BUILTIN_PMULHRW:
14282 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14283
14284 case IX86_BUILTIN_PF2IW:
14285 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14286
14287 case IX86_BUILTIN_PFNACC:
14288 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14289
14290 case IX86_BUILTIN_PFPNACC:
14291 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14292
14293 case IX86_BUILTIN_PI2FW:
14294 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14295
14296 case IX86_BUILTIN_PSWAPDSI:
14297 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14298
14299 case IX86_BUILTIN_PSWAPDSF:
14300 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14301
14302 case IX86_BUILTIN_SSE_ZERO:
14303 target = gen_reg_rtx (V4SFmode);
14304 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14305 return target;
14306
14307 case IX86_BUILTIN_MMX_ZERO:
14308 target = gen_reg_rtx (DImode);
14309 emit_insn (gen_mmx_clrdi (target));
14310 return target;
14311
14312 case IX86_BUILTIN_CLRTI:
14313 target = gen_reg_rtx (V2DImode);
14314 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14315 return target;
14316
14317
14318 case IX86_BUILTIN_SQRTSD:
14319 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14320 case IX86_BUILTIN_LOADAPD:
14321 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14322 case IX86_BUILTIN_LOADUPD:
14323 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14324
14325 case IX86_BUILTIN_STOREAPD:
14326 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14327 case IX86_BUILTIN_STOREUPD:
14328 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14329
14330 case IX86_BUILTIN_LOADSD:
14331 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14332
14333 case IX86_BUILTIN_STORESD:
14334 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14335
14336 case IX86_BUILTIN_SETPD1:
14337 target = assign_386_stack_local (DFmode, 0);
14338 arg0 = TREE_VALUE (arglist);
14339 emit_move_insn (adjust_address (target, DFmode, 0),
14340 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14341 op0 = gen_reg_rtx (V2DFmode);
14342 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14343 emit_insn (gen_sse2_shufpd (op0, op0, op0, const0_rtx));
14344 return op0;
14345
14346 case IX86_BUILTIN_SETPD:
14347 target = assign_386_stack_local (V2DFmode, 0);
14348 arg0 = TREE_VALUE (arglist);
14349 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14350 emit_move_insn (adjust_address (target, DFmode, 0),
14351 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14352 emit_move_insn (adjust_address (target, DFmode, 8),
14353 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14354 op0 = gen_reg_rtx (V2DFmode);
14355 emit_insn (gen_sse2_movapd (op0, target));
14356 return op0;
14357
14358 case IX86_BUILTIN_LOADRPD:
14359 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14360 gen_reg_rtx (V2DFmode), 1);
14361 emit_insn (gen_sse2_shufpd (target, target, target, const1_rtx));
14362 return target;
14363
14364 case IX86_BUILTIN_LOADPD1:
14365 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14366 gen_reg_rtx (V2DFmode), 1);
14367 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14368 return target;
14369
14370 case IX86_BUILTIN_STOREPD1:
14371 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14372 case IX86_BUILTIN_STORERPD:
14373 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14374
14375 case IX86_BUILTIN_CLRPD:
14376 target = gen_reg_rtx (V2DFmode);
14377 emit_insn (gen_sse_clrv2df (target));
14378 return target;
14379
14380 case IX86_BUILTIN_MFENCE:
14381 emit_insn (gen_sse2_mfence ());
14382 return 0;
14383 case IX86_BUILTIN_LFENCE:
14384 emit_insn (gen_sse2_lfence ());
14385 return 0;
14386
14387 case IX86_BUILTIN_CLFLUSH:
14388 arg0 = TREE_VALUE (arglist);
14389 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14390 icode = CODE_FOR_sse2_clflush;
14391 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14392 op0 = copy_to_mode_reg (Pmode, op0);
14393
14394 emit_insn (gen_sse2_clflush (op0));
14395 return 0;
14396
14397 case IX86_BUILTIN_MOVNTPD:
14398 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14399 case IX86_BUILTIN_MOVNTDQ:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14401 case IX86_BUILTIN_MOVNTI:
14402 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14403
14404 case IX86_BUILTIN_LOADDQA:
14405 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14406 case IX86_BUILTIN_LOADDQU:
14407 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14408 case IX86_BUILTIN_LOADD:
14409 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14410
14411 case IX86_BUILTIN_STOREDQA:
14412 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14413 case IX86_BUILTIN_STOREDQU:
14414 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14415 case IX86_BUILTIN_STORED:
14416 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14417
14418 case IX86_BUILTIN_MONITOR:
14419 arg0 = TREE_VALUE (arglist);
14420 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14421 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14422 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14423 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14424 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14425 if (!REG_P (op0))
14426 op0 = copy_to_mode_reg (SImode, op0);
14427 if (!REG_P (op1))
14428 op1 = copy_to_mode_reg (SImode, op1);
14429 if (!REG_P (op2))
14430 op2 = copy_to_mode_reg (SImode, op2);
14431 emit_insn (gen_monitor (op0, op1, op2));
14432 return 0;
14433
14434 case IX86_BUILTIN_MWAIT:
14435 arg0 = TREE_VALUE (arglist);
14436 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14437 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14438 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14439 if (!REG_P (op0))
14440 op0 = copy_to_mode_reg (SImode, op0);
14441 if (!REG_P (op1))
14442 op1 = copy_to_mode_reg (SImode, op1);
14443 emit_insn (gen_mwait (op0, op1));
14444 return 0;
14445
14446 case IX86_BUILTIN_LOADDDUP:
14447 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14448
14449 case IX86_BUILTIN_LDDQU:
14450 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14451 1);
14452
14453 default:
14454 break;
14455 }
14456
14457 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14458 if (d->code == fcode)
14459 {
14460 /* Compares are treated specially. */
14461 if (d->icode == CODE_FOR_maskcmpv4sf3
14462 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14463 || d->icode == CODE_FOR_maskncmpv4sf3
14464 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14465 || d->icode == CODE_FOR_maskcmpv2df3
14466 || d->icode == CODE_FOR_vmmaskcmpv2df3
14467 || d->icode == CODE_FOR_maskncmpv2df3
14468 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14469 return ix86_expand_sse_compare (d, arglist, target);
14470
14471 return ix86_expand_binop_builtin (d->icode, arglist, target);
14472 }
14473
14474 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14475 if (d->code == fcode)
14476 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14477
14478 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14479 if (d->code == fcode)
14480 return ix86_expand_sse_comi (d, arglist, target);
14481
14482 /* @@@ Should really do something sensible here. */
14483 return 0;
14484 }
14485
14486 /* Store OPERAND to the memory after reload is completed. This means
14487 that we can't easily use assign_stack_local. */
14488 rtx
14489 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14490 {
14491 rtx result;
14492 if (!reload_completed)
14493 abort ();
14494 if (TARGET_RED_ZONE)
14495 {
14496 result = gen_rtx_MEM (mode,
14497 gen_rtx_PLUS (Pmode,
14498 stack_pointer_rtx,
14499 GEN_INT (-RED_ZONE_SIZE)));
14500 emit_move_insn (result, operand);
14501 }
14502 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14503 {
14504 switch (mode)
14505 {
14506 case HImode:
14507 case SImode:
14508 operand = gen_lowpart (DImode, operand);
14509 /* FALLTHRU */
14510 case DImode:
14511 emit_insn (
14512 gen_rtx_SET (VOIDmode,
14513 gen_rtx_MEM (DImode,
14514 gen_rtx_PRE_DEC (DImode,
14515 stack_pointer_rtx)),
14516 operand));
14517 break;
14518 default:
14519 abort ();
14520 }
14521 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14522 }
14523 else
14524 {
14525 switch (mode)
14526 {
14527 case DImode:
14528 {
14529 rtx operands[2];
14530 split_di (&operand, 1, operands, operands + 1);
14531 emit_insn (
14532 gen_rtx_SET (VOIDmode,
14533 gen_rtx_MEM (SImode,
14534 gen_rtx_PRE_DEC (Pmode,
14535 stack_pointer_rtx)),
14536 operands[1]));
14537 emit_insn (
14538 gen_rtx_SET (VOIDmode,
14539 gen_rtx_MEM (SImode,
14540 gen_rtx_PRE_DEC (Pmode,
14541 stack_pointer_rtx)),
14542 operands[0]));
14543 }
14544 break;
14545 case HImode:
14546 /* It is better to store HImodes as SImodes. */
14547 if (!TARGET_PARTIAL_REG_STALL)
14548 operand = gen_lowpart (SImode, operand);
14549 /* FALLTHRU */
14550 case SImode:
14551 emit_insn (
14552 gen_rtx_SET (VOIDmode,
14553 gen_rtx_MEM (GET_MODE (operand),
14554 gen_rtx_PRE_DEC (SImode,
14555 stack_pointer_rtx)),
14556 operand));
14557 break;
14558 default:
14559 abort ();
14560 }
14561 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14562 }
14563 return result;
14564 }
14565
14566 /* Free operand from the memory. */
14567 void
14568 ix86_free_from_memory (enum machine_mode mode)
14569 {
14570 if (!TARGET_RED_ZONE)
14571 {
14572 int size;
14573
14574 if (mode == DImode || TARGET_64BIT)
14575 size = 8;
14576 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14577 size = 2;
14578 else
14579 size = 4;
14580 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14581 to pop or add instruction if registers are available. */
14582 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14583 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14584 GEN_INT (size))));
14585 }
14586 }
14587
14588 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14589 QImode must go into class Q_REGS.
14590 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14591 movdf to do mem-to-mem moves through integer regs. */
14592 enum reg_class
14593 ix86_preferred_reload_class (rtx x, enum reg_class class)
14594 {
14595 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14596 return NO_REGS;
14597 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14598 {
14599 /* SSE can't load any constant directly yet. */
14600 if (SSE_CLASS_P (class))
14601 return NO_REGS;
14602 /* Floats can load 0 and 1. */
14603 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14604 {
14605 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14606 if (MAYBE_SSE_CLASS_P (class))
14607 return (reg_class_subset_p (class, GENERAL_REGS)
14608 ? GENERAL_REGS : FLOAT_REGS);
14609 else
14610 return class;
14611 }
14612 /* General regs can load everything. */
14613 if (reg_class_subset_p (class, GENERAL_REGS))
14614 return GENERAL_REGS;
14615 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14616 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14617 return NO_REGS;
14618 }
14619 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14620 return NO_REGS;
14621 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14622 return Q_REGS;
14623 return class;
14624 }
14625
14626 /* If we are copying between general and FP registers, we need a memory
14627 location. The same is true for SSE and MMX registers.
14628
14629 The macro can't work reliably when one of the CLASSES is class containing
14630 registers from multiple units (SSE, MMX, integer). We avoid this by never
14631 combining those units in single alternative in the machine description.
14632 Ensure that this constraint holds to avoid unexpected surprises.
14633
14634 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14635 enforce these sanity checks. */
14636 int
14637 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14638 enum machine_mode mode, int strict)
14639 {
14640 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14641 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14642 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14643 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14644 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14645 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14646 {
14647 if (strict)
14648 abort ();
14649 else
14650 return 1;
14651 }
14652 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14653 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14654 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14655 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14656 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14657 }
14658 /* Return the cost of moving data from a register in class CLASS1 to
14659 one in class CLASS2.
14660
14661 It is not required that the cost always equal 2 when FROM is the same as TO;
14662 on some machines it is expensive to move between registers if they are not
14663 general registers. */
14664 int
14665 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14666 enum reg_class class2)
14667 {
14668 /* In case we require secondary memory, compute cost of the store followed
14669 by load. In order to avoid bad register allocation choices, we need
14670 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14671
14672 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14673 {
14674 int cost = 1;
14675
14676 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14677 MEMORY_MOVE_COST (mode, class1, 1));
14678 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14679 MEMORY_MOVE_COST (mode, class2, 1));
14680
14681 /* In case of copying from general_purpose_register we may emit multiple
14682 stores followed by single load causing memory size mismatch stall.
14683 Count this as arbitrarily high cost of 20. */
14684 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14685 cost += 20;
14686
14687 /* In the case of FP/MMX moves, the registers actually overlap, and we
14688 have to switch modes in order to treat them differently. */
14689 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14690 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14691 cost += 20;
14692
14693 return cost;
14694 }
14695
14696 /* Moves between SSE/MMX and integer unit are expensive. */
14697 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14698 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14699 return ix86_cost->mmxsse_to_integer;
14700 if (MAYBE_FLOAT_CLASS_P (class1))
14701 return ix86_cost->fp_move;
14702 if (MAYBE_SSE_CLASS_P (class1))
14703 return ix86_cost->sse_move;
14704 if (MAYBE_MMX_CLASS_P (class1))
14705 return ix86_cost->mmx_move;
14706 return 2;
14707 }
14708
14709 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14710 int
14711 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14712 {
14713 /* Flags and only flags can only hold CCmode values. */
14714 if (CC_REGNO_P (regno))
14715 return GET_MODE_CLASS (mode) == MODE_CC;
14716 if (GET_MODE_CLASS (mode) == MODE_CC
14717 || GET_MODE_CLASS (mode) == MODE_RANDOM
14718 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14719 return 0;
14720 if (FP_REGNO_P (regno))
14721 return VALID_FP_MODE_P (mode);
14722 if (SSE_REGNO_P (regno))
14723 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14724 if (MMX_REGNO_P (regno))
14725 return (TARGET_MMX
14726 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14727 /* We handle both integer and floats in the general purpose registers.
14728 In future we should be able to handle vector modes as well. */
14729 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14730 return 0;
14731 /* Take care for QImode values - they can be in non-QI regs, but then
14732 they do cause partial register stalls. */
14733 if (regno < 4 || mode != QImode || TARGET_64BIT)
14734 return 1;
14735 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14736 }
14737
14738 /* Return the cost of moving data of mode M between a
14739 register and memory. A value of 2 is the default; this cost is
14740 relative to those in `REGISTER_MOVE_COST'.
14741
14742 If moving between registers and memory is more expensive than
14743 between two registers, you should define this macro to express the
14744 relative cost.
14745
14746 Model also increased moving costs of QImode registers in non
14747 Q_REGS classes.
14748 */
14749 int
14750 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14751 {
14752 if (FLOAT_CLASS_P (class))
14753 {
14754 int index;
14755 switch (mode)
14756 {
14757 case SFmode:
14758 index = 0;
14759 break;
14760 case DFmode:
14761 index = 1;
14762 break;
14763 case XFmode:
14764 index = 2;
14765 break;
14766 default:
14767 return 100;
14768 }
14769 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14770 }
14771 if (SSE_CLASS_P (class))
14772 {
14773 int index;
14774 switch (GET_MODE_SIZE (mode))
14775 {
14776 case 4:
14777 index = 0;
14778 break;
14779 case 8:
14780 index = 1;
14781 break;
14782 case 16:
14783 index = 2;
14784 break;
14785 default:
14786 return 100;
14787 }
14788 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14789 }
14790 if (MMX_CLASS_P (class))
14791 {
14792 int index;
14793 switch (GET_MODE_SIZE (mode))
14794 {
14795 case 4:
14796 index = 0;
14797 break;
14798 case 8:
14799 index = 1;
14800 break;
14801 default:
14802 return 100;
14803 }
14804 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14805 }
14806 switch (GET_MODE_SIZE (mode))
14807 {
14808 case 1:
14809 if (in)
14810 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14811 : ix86_cost->movzbl_load);
14812 else
14813 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14814 : ix86_cost->int_store[0] + 4);
14815 break;
14816 case 2:
14817 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14818 default:
14819 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14820 if (mode == TFmode)
14821 mode = XFmode;
14822 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14823 * (((int) GET_MODE_SIZE (mode)
14824 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14825 }
14826 }
14827
14828 /* Compute a (partial) cost for rtx X. Return true if the complete
14829 cost has been computed, and false if subexpressions should be
14830 scanned. In either case, *TOTAL contains the cost result. */
14831
14832 static bool
14833 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14834 {
14835 enum machine_mode mode = GET_MODE (x);
14836
14837 switch (code)
14838 {
14839 case CONST_INT:
14840 case CONST:
14841 case LABEL_REF:
14842 case SYMBOL_REF:
14843 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14844 *total = 3;
14845 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14846 *total = 2;
14847 else if (flag_pic && SYMBOLIC_CONST (x)
14848 && (!TARGET_64BIT
14849 || (!GET_CODE (x) != LABEL_REF
14850 && (GET_CODE (x) != SYMBOL_REF
14851 || !SYMBOL_REF_LOCAL_P (x)))))
14852 *total = 1;
14853 else
14854 *total = 0;
14855 return true;
14856
14857 case CONST_DOUBLE:
14858 if (mode == VOIDmode)
14859 *total = 0;
14860 else
14861 switch (standard_80387_constant_p (x))
14862 {
14863 case 1: /* 0.0 */
14864 *total = 1;
14865 break;
14866 default: /* Other constants */
14867 *total = 2;
14868 break;
14869 case 0:
14870 case -1:
14871 /* Start with (MEM (SYMBOL_REF)), since that's where
14872 it'll probably end up. Add a penalty for size. */
14873 *total = (COSTS_N_INSNS (1)
14874 + (flag_pic != 0 && !TARGET_64BIT)
14875 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14876 break;
14877 }
14878 return true;
14879
14880 case ZERO_EXTEND:
14881 /* The zero extensions is often completely free on x86_64, so make
14882 it as cheap as possible. */
14883 if (TARGET_64BIT && mode == DImode
14884 && GET_MODE (XEXP (x, 0)) == SImode)
14885 *total = 1;
14886 else if (TARGET_ZERO_EXTEND_WITH_AND)
14887 *total = COSTS_N_INSNS (ix86_cost->add);
14888 else
14889 *total = COSTS_N_INSNS (ix86_cost->movzx);
14890 return false;
14891
14892 case SIGN_EXTEND:
14893 *total = COSTS_N_INSNS (ix86_cost->movsx);
14894 return false;
14895
14896 case ASHIFT:
14897 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14898 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14899 {
14900 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14901 if (value == 1)
14902 {
14903 *total = COSTS_N_INSNS (ix86_cost->add);
14904 return false;
14905 }
14906 if ((value == 2 || value == 3)
14907 && ix86_cost->lea <= ix86_cost->shift_const)
14908 {
14909 *total = COSTS_N_INSNS (ix86_cost->lea);
14910 return false;
14911 }
14912 }
14913 /* FALLTHRU */
14914
14915 case ROTATE:
14916 case ASHIFTRT:
14917 case LSHIFTRT:
14918 case ROTATERT:
14919 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14920 {
14921 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14922 {
14923 if (INTVAL (XEXP (x, 1)) > 32)
14924 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14925 else
14926 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14927 }
14928 else
14929 {
14930 if (GET_CODE (XEXP (x, 1)) == AND)
14931 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14932 else
14933 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14934 }
14935 }
14936 else
14937 {
14938 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14939 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14940 else
14941 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14942 }
14943 return false;
14944
14945 case MULT:
14946 if (FLOAT_MODE_P (mode))
14947 {
14948 *total = COSTS_N_INSNS (ix86_cost->fmul);
14949 return false;
14950 }
14951 else
14952 {
14953 rtx op0 = XEXP (x, 0);
14954 rtx op1 = XEXP (x, 1);
14955 int nbits;
14956 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14957 {
14958 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14959 for (nbits = 0; value != 0; value &= value - 1)
14960 nbits++;
14961 }
14962 else
14963 /* This is arbitrary. */
14964 nbits = 7;
14965
14966 /* Compute costs correctly for widening multiplication. */
14967 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
14968 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
14969 == GET_MODE_SIZE (mode))
14970 {
14971 int is_mulwiden = 0;
14972 enum machine_mode inner_mode = GET_MODE (op0);
14973
14974 if (GET_CODE (op0) == GET_CODE (op1))
14975 is_mulwiden = 1, op1 = XEXP (op1, 0);
14976 else if (GET_CODE (op1) == CONST_INT)
14977 {
14978 if (GET_CODE (op0) == SIGN_EXTEND)
14979 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
14980 == INTVAL (op1);
14981 else
14982 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
14983 }
14984
14985 if (is_mulwiden)
14986 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
14987 }
14988
14989 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14990 + nbits * ix86_cost->mult_bit)
14991 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
14992
14993 return true;
14994 }
14995
14996 case DIV:
14997 case UDIV:
14998 case MOD:
14999 case UMOD:
15000 if (FLOAT_MODE_P (mode))
15001 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15002 else
15003 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15004 return false;
15005
15006 case PLUS:
15007 if (FLOAT_MODE_P (mode))
15008 *total = COSTS_N_INSNS (ix86_cost->fadd);
15009 else if (GET_MODE_CLASS (mode) == MODE_INT
15010 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15011 {
15012 if (GET_CODE (XEXP (x, 0)) == PLUS
15013 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15014 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15015 && CONSTANT_P (XEXP (x, 1)))
15016 {
15017 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15018 if (val == 2 || val == 4 || val == 8)
15019 {
15020 *total = COSTS_N_INSNS (ix86_cost->lea);
15021 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15022 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15023 outer_code);
15024 *total += rtx_cost (XEXP (x, 1), outer_code);
15025 return true;
15026 }
15027 }
15028 else if (GET_CODE (XEXP (x, 0)) == MULT
15029 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15030 {
15031 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15032 if (val == 2 || val == 4 || val == 8)
15033 {
15034 *total = COSTS_N_INSNS (ix86_cost->lea);
15035 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15036 *total += rtx_cost (XEXP (x, 1), outer_code);
15037 return true;
15038 }
15039 }
15040 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15041 {
15042 *total = COSTS_N_INSNS (ix86_cost->lea);
15043 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15044 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15045 *total += rtx_cost (XEXP (x, 1), outer_code);
15046 return true;
15047 }
15048 }
15049 /* FALLTHRU */
15050
15051 case MINUS:
15052 if (FLOAT_MODE_P (mode))
15053 {
15054 *total = COSTS_N_INSNS (ix86_cost->fadd);
15055 return false;
15056 }
15057 /* FALLTHRU */
15058
15059 case AND:
15060 case IOR:
15061 case XOR:
15062 if (!TARGET_64BIT && mode == DImode)
15063 {
15064 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15065 + (rtx_cost (XEXP (x, 0), outer_code)
15066 << (GET_MODE (XEXP (x, 0)) != DImode))
15067 + (rtx_cost (XEXP (x, 1), outer_code)
15068 << (GET_MODE (XEXP (x, 1)) != DImode)));
15069 return true;
15070 }
15071 /* FALLTHRU */
15072
15073 case NEG:
15074 if (FLOAT_MODE_P (mode))
15075 {
15076 *total = COSTS_N_INSNS (ix86_cost->fchs);
15077 return false;
15078 }
15079 /* FALLTHRU */
15080
15081 case NOT:
15082 if (!TARGET_64BIT && mode == DImode)
15083 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15084 else
15085 *total = COSTS_N_INSNS (ix86_cost->add);
15086 return false;
15087
15088 case FLOAT_EXTEND:
15089 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15090 *total = 0;
15091 return false;
15092
15093 case ABS:
15094 if (FLOAT_MODE_P (mode))
15095 *total = COSTS_N_INSNS (ix86_cost->fabs);
15096 return false;
15097
15098 case SQRT:
15099 if (FLOAT_MODE_P (mode))
15100 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15101 return false;
15102
15103 case UNSPEC:
15104 if (XINT (x, 1) == UNSPEC_TP)
15105 *total = 0;
15106 return false;
15107
15108 default:
15109 return false;
15110 }
15111 }
15112
15113 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15114 static void
15115 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15116 {
15117 init_section ();
15118 fputs ("\tpushl $", asm_out_file);
15119 assemble_name (asm_out_file, XSTR (symbol, 0));
15120 fputc ('\n', asm_out_file);
15121 }
15122 #endif
15123
15124 #if TARGET_MACHO
15125
15126 static int current_machopic_label_num;
15127
15128 /* Given a symbol name and its associated stub, write out the
15129 definition of the stub. */
15130
15131 void
15132 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15133 {
15134 unsigned int length;
15135 char *binder_name, *symbol_name, lazy_ptr_name[32];
15136 int label = ++current_machopic_label_num;
15137
15138 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15139 symb = (*targetm.strip_name_encoding) (symb);
15140
15141 length = strlen (stub);
15142 binder_name = alloca (length + 32);
15143 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15144
15145 length = strlen (symb);
15146 symbol_name = alloca (length + 32);
15147 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15148
15149 sprintf (lazy_ptr_name, "L%d$lz", label);
15150
15151 if (MACHOPIC_PURE)
15152 machopic_picsymbol_stub_section ();
15153 else
15154 machopic_symbol_stub_section ();
15155
15156 fprintf (file, "%s:\n", stub);
15157 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15158
15159 if (MACHOPIC_PURE)
15160 {
15161 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15162 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15163 fprintf (file, "\tjmp %%edx\n");
15164 }
15165 else
15166 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15167
15168 fprintf (file, "%s:\n", binder_name);
15169
15170 if (MACHOPIC_PURE)
15171 {
15172 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15173 fprintf (file, "\tpushl %%eax\n");
15174 }
15175 else
15176 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15177
15178 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15179
15180 machopic_lazy_symbol_ptr_section ();
15181 fprintf (file, "%s:\n", lazy_ptr_name);
15182 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15183 fprintf (file, "\t.long %s\n", binder_name);
15184 }
15185 #endif /* TARGET_MACHO */
15186
15187 /* Order the registers for register allocator. */
15188
15189 void
15190 x86_order_regs_for_local_alloc (void)
15191 {
15192 int pos = 0;
15193 int i;
15194
15195 /* First allocate the local general purpose registers. */
15196 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15197 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15198 reg_alloc_order [pos++] = i;
15199
15200 /* Global general purpose registers. */
15201 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15202 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15203 reg_alloc_order [pos++] = i;
15204
15205 /* x87 registers come first in case we are doing FP math
15206 using them. */
15207 if (!TARGET_SSE_MATH)
15208 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15209 reg_alloc_order [pos++] = i;
15210
15211 /* SSE registers. */
15212 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15213 reg_alloc_order [pos++] = i;
15214 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15215 reg_alloc_order [pos++] = i;
15216
15217 /* x87 registers. */
15218 if (TARGET_SSE_MATH)
15219 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15220 reg_alloc_order [pos++] = i;
15221
15222 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15223 reg_alloc_order [pos++] = i;
15224
15225 /* Initialize the rest of array as we do not allocate some registers
15226 at all. */
15227 while (pos < FIRST_PSEUDO_REGISTER)
15228 reg_alloc_order [pos++] = 0;
15229 }
15230
15231 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15232 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15233 #endif
15234
15235 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15236 struct attribute_spec.handler. */
15237 static tree
15238 ix86_handle_struct_attribute (tree *node, tree name,
15239 tree args ATTRIBUTE_UNUSED,
15240 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15241 {
15242 tree *type = NULL;
15243 if (DECL_P (*node))
15244 {
15245 if (TREE_CODE (*node) == TYPE_DECL)
15246 type = &TREE_TYPE (*node);
15247 }
15248 else
15249 type = node;
15250
15251 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15252 || TREE_CODE (*type) == UNION_TYPE)))
15253 {
15254 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15255 *no_add_attrs = true;
15256 }
15257
15258 else if ((is_attribute_p ("ms_struct", name)
15259 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15260 || ((is_attribute_p ("gcc_struct", name)
15261 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15262 {
15263 warning ("`%s' incompatible attribute ignored",
15264 IDENTIFIER_POINTER (name));
15265 *no_add_attrs = true;
15266 }
15267
15268 return NULL_TREE;
15269 }
15270
15271 static bool
15272 ix86_ms_bitfield_layout_p (tree record_type)
15273 {
15274 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15275 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15277 }
15278
15279 /* Returns an expression indicating where the this parameter is
15280 located on entry to the FUNCTION. */
15281
15282 static rtx
15283 x86_this_parameter (tree function)
15284 {
15285 tree type = TREE_TYPE (function);
15286
15287 if (TARGET_64BIT)
15288 {
15289 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15290 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15291 }
15292
15293 if (ix86_function_regparm (type, function) > 0)
15294 {
15295 tree parm;
15296
15297 parm = TYPE_ARG_TYPES (type);
15298 /* Figure out whether or not the function has a variable number of
15299 arguments. */
15300 for (; parm; parm = TREE_CHAIN (parm))
15301 if (TREE_VALUE (parm) == void_type_node)
15302 break;
15303 /* If not, the this parameter is in the first argument. */
15304 if (parm)
15305 {
15306 int regno = 0;
15307 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15308 regno = 2;
15309 return gen_rtx_REG (SImode, regno);
15310 }
15311 }
15312
15313 if (aggregate_value_p (TREE_TYPE (type), type))
15314 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15315 else
15316 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15317 }
15318
15319 /* Determine whether x86_output_mi_thunk can succeed. */
15320
15321 static bool
15322 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15323 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15324 HOST_WIDE_INT vcall_offset, tree function)
15325 {
15326 /* 64-bit can handle anything. */
15327 if (TARGET_64BIT)
15328 return true;
15329
15330 /* For 32-bit, everything's fine if we have one free register. */
15331 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15332 return true;
15333
15334 /* Need a free register for vcall_offset. */
15335 if (vcall_offset)
15336 return false;
15337
15338 /* Need a free register for GOT references. */
15339 if (flag_pic && !(*targetm.binds_local_p) (function))
15340 return false;
15341
15342 /* Otherwise ok. */
15343 return true;
15344 }
15345
15346 /* Output the assembler code for a thunk function. THUNK_DECL is the
15347 declaration for the thunk function itself, FUNCTION is the decl for
15348 the target function. DELTA is an immediate constant offset to be
15349 added to THIS. If VCALL_OFFSET is nonzero, the word at
15350 *(*this + vcall_offset) should be added to THIS. */
15351
15352 static void
15353 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15354 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15355 HOST_WIDE_INT vcall_offset, tree function)
15356 {
15357 rtx xops[3];
15358 rtx this = x86_this_parameter (function);
15359 rtx this_reg, tmp;
15360
15361 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15362 pull it in now and let DELTA benefit. */
15363 if (REG_P (this))
15364 this_reg = this;
15365 else if (vcall_offset)
15366 {
15367 /* Put the this parameter into %eax. */
15368 xops[0] = this;
15369 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15370 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15371 }
15372 else
15373 this_reg = NULL_RTX;
15374
15375 /* Adjust the this parameter by a fixed constant. */
15376 if (delta)
15377 {
15378 xops[0] = GEN_INT (delta);
15379 xops[1] = this_reg ? this_reg : this;
15380 if (TARGET_64BIT)
15381 {
15382 if (!x86_64_general_operand (xops[0], DImode))
15383 {
15384 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15385 xops[1] = tmp;
15386 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15387 xops[0] = tmp;
15388 xops[1] = this;
15389 }
15390 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15391 }
15392 else
15393 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15394 }
15395
15396 /* Adjust the this parameter by a value stored in the vtable. */
15397 if (vcall_offset)
15398 {
15399 if (TARGET_64BIT)
15400 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15401 else
15402 {
15403 int tmp_regno = 2 /* ECX */;
15404 if (lookup_attribute ("fastcall",
15405 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15406 tmp_regno = 0 /* EAX */;
15407 tmp = gen_rtx_REG (SImode, tmp_regno);
15408 }
15409
15410 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15411 xops[1] = tmp;
15412 if (TARGET_64BIT)
15413 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15414 else
15415 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15416
15417 /* Adjust the this parameter. */
15418 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15419 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15420 {
15421 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15422 xops[0] = GEN_INT (vcall_offset);
15423 xops[1] = tmp2;
15424 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15425 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15426 }
15427 xops[1] = this_reg;
15428 if (TARGET_64BIT)
15429 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15430 else
15431 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15432 }
15433
15434 /* If necessary, drop THIS back to its stack slot. */
15435 if (this_reg && this_reg != this)
15436 {
15437 xops[0] = this_reg;
15438 xops[1] = this;
15439 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15440 }
15441
15442 xops[0] = XEXP (DECL_RTL (function), 0);
15443 if (TARGET_64BIT)
15444 {
15445 if (!flag_pic || (*targetm.binds_local_p) (function))
15446 output_asm_insn ("jmp\t%P0", xops);
15447 else
15448 {
15449 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15450 tmp = gen_rtx_CONST (Pmode, tmp);
15451 tmp = gen_rtx_MEM (QImode, tmp);
15452 xops[0] = tmp;
15453 output_asm_insn ("jmp\t%A0", xops);
15454 }
15455 }
15456 else
15457 {
15458 if (!flag_pic || (*targetm.binds_local_p) (function))
15459 output_asm_insn ("jmp\t%P0", xops);
15460 else
15461 #if TARGET_MACHO
15462 if (TARGET_MACHO)
15463 {
15464 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15465 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15466 tmp = gen_rtx_MEM (QImode, tmp);
15467 xops[0] = tmp;
15468 output_asm_insn ("jmp\t%0", xops);
15469 }
15470 else
15471 #endif /* TARGET_MACHO */
15472 {
15473 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15474 output_set_got (tmp);
15475
15476 xops[1] = tmp;
15477 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15478 output_asm_insn ("jmp\t{*}%1", xops);
15479 }
15480 }
15481 }
15482
15483 static void
15484 x86_file_start (void)
15485 {
15486 default_file_start ();
15487 if (X86_FILE_START_VERSION_DIRECTIVE)
15488 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15489 if (X86_FILE_START_FLTUSED)
15490 fputs ("\t.global\t__fltused\n", asm_out_file);
15491 if (ix86_asm_dialect == ASM_INTEL)
15492 fputs ("\t.intel_syntax\n", asm_out_file);
15493 }
15494
15495 int
15496 x86_field_alignment (tree field, int computed)
15497 {
15498 enum machine_mode mode;
15499 tree type = TREE_TYPE (field);
15500
15501 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15502 return computed;
15503 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15504 ? get_inner_array_type (type) : type);
15505 if (mode == DFmode || mode == DCmode
15506 || GET_MODE_CLASS (mode) == MODE_INT
15507 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15508 return MIN (32, computed);
15509 return computed;
15510 }
15511
15512 /* Output assembler code to FILE to increment profiler label # LABELNO
15513 for profiling a function entry. */
15514 void
15515 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15516 {
15517 if (TARGET_64BIT)
15518 if (flag_pic)
15519 {
15520 #ifndef NO_PROFILE_COUNTERS
15521 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15522 #endif
15523 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15524 }
15525 else
15526 {
15527 #ifndef NO_PROFILE_COUNTERS
15528 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15529 #endif
15530 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15531 }
15532 else if (flag_pic)
15533 {
15534 #ifndef NO_PROFILE_COUNTERS
15535 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15536 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15537 #endif
15538 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15539 }
15540 else
15541 {
15542 #ifndef NO_PROFILE_COUNTERS
15543 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15544 PROFILE_COUNT_REGISTER);
15545 #endif
15546 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15547 }
15548 }
15549
15550 /* We don't have exact information about the insn sizes, but we may assume
15551 quite safely that we are informed about all 1 byte insns and memory
15552 address sizes. This is enough to eliminate unnecessary padding in
15553 99% of cases. */
15554
15555 static int
15556 min_insn_size (rtx insn)
15557 {
15558 int l = 0;
15559
15560 if (!INSN_P (insn) || !active_insn_p (insn))
15561 return 0;
15562
15563 /* Discard alignments we've emit and jump instructions. */
15564 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15565 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15566 return 0;
15567 if (GET_CODE (insn) == JUMP_INSN
15568 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15569 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15570 return 0;
15571
15572 /* Important case - calls are always 5 bytes.
15573 It is common to have many calls in the row. */
15574 if (GET_CODE (insn) == CALL_INSN
15575 && symbolic_reference_mentioned_p (PATTERN (insn))
15576 && !SIBLING_CALL_P (insn))
15577 return 5;
15578 if (get_attr_length (insn) <= 1)
15579 return 1;
15580
15581 /* For normal instructions we may rely on the sizes of addresses
15582 and the presence of symbol to require 4 bytes of encoding.
15583 This is not the case for jumps where references are PC relative. */
15584 if (GET_CODE (insn) != JUMP_INSN)
15585 {
15586 l = get_attr_length_address (insn);
15587 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15588 l = 4;
15589 }
15590 if (l)
15591 return 1+l;
15592 else
15593 return 2;
15594 }
15595
15596 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15597 window. */
15598
15599 static void
15600 ix86_avoid_jump_misspredicts (void)
15601 {
15602 rtx insn, start = get_insns ();
15603 int nbytes = 0, njumps = 0;
15604 int isjump = 0;
15605
15606 /* Look for all minimal intervals of instructions containing 4 jumps.
15607 The intervals are bounded by START and INSN. NBYTES is the total
15608 size of instructions in the interval including INSN and not including
15609 START. When the NBYTES is smaller than 16 bytes, it is possible
15610 that the end of START and INSN ends up in the same 16byte page.
15611
15612 The smallest offset in the page INSN can start is the case where START
15613 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15614 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15615 */
15616 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15617 {
15618
15619 nbytes += min_insn_size (insn);
15620 if (dump_file)
15621 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
15622 INSN_UID (insn), min_insn_size (insn));
15623 if ((GET_CODE (insn) == JUMP_INSN
15624 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15625 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15626 || GET_CODE (insn) == CALL_INSN)
15627 njumps++;
15628 else
15629 continue;
15630
15631 while (njumps > 3)
15632 {
15633 start = NEXT_INSN (start);
15634 if ((GET_CODE (start) == JUMP_INSN
15635 && GET_CODE (PATTERN (start)) != ADDR_VEC
15636 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15637 || GET_CODE (start) == CALL_INSN)
15638 njumps--, isjump = 1;
15639 else
15640 isjump = 0;
15641 nbytes -= min_insn_size (start);
15642 }
15643 if (njumps < 0)
15644 abort ();
15645 if (dump_file)
15646 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
15647 INSN_UID (start), INSN_UID (insn), nbytes);
15648
15649 if (njumps == 3 && isjump && nbytes < 16)
15650 {
15651 int padsize = 15 - nbytes + min_insn_size (insn);
15652
15653 if (dump_file)
15654 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
15655 INSN_UID (insn), padsize);
15656 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15657 }
15658 }
15659 }
15660
15661 /* AMD Athlon works faster
15662 when RET is not destination of conditional jump or directly preceded
15663 by other jump instruction. We avoid the penalty by inserting NOP just
15664 before the RET instructions in such cases. */
15665 static void
15666 ix86_pad_returns (void)
15667 {
15668 edge e;
15669
15670 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15671 {
15672 basic_block bb = e->src;
15673 rtx ret = BB_END (bb);
15674 rtx prev;
15675 bool replace = false;
15676
15677 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15678 || !maybe_hot_bb_p (bb))
15679 continue;
15680 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15681 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15682 break;
15683 if (prev && GET_CODE (prev) == CODE_LABEL)
15684 {
15685 edge e;
15686 for (e = bb->pred; e; e = e->pred_next)
15687 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15688 && !(e->flags & EDGE_FALLTHRU))
15689 replace = true;
15690 }
15691 if (!replace)
15692 {
15693 prev = prev_active_insn (ret);
15694 if (prev
15695 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15696 || GET_CODE (prev) == CALL_INSN))
15697 replace = true;
15698 /* Empty functions get branch mispredict even when the jump destination
15699 is not visible to us. */
15700 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15701 replace = true;
15702 }
15703 if (replace)
15704 {
15705 emit_insn_before (gen_return_internal_long (), ret);
15706 delete_insn (ret);
15707 }
15708 }
15709 }
15710
15711 /* Implement machine specific optimizations. We implement padding of returns
15712 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
15713 static void
15714 ix86_reorg (void)
15715 {
15716 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
15717 ix86_pad_returns ();
15718 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
15719 ix86_avoid_jump_misspredicts ();
15720 }
15721
15722 /* Return nonzero when QImode register that must be represented via REX prefix
15723 is used. */
15724 bool
15725 x86_extended_QIreg_mentioned_p (rtx insn)
15726 {
15727 int i;
15728 extract_insn_cached (insn);
15729 for (i = 0; i < recog_data.n_operands; i++)
15730 if (REG_P (recog_data.operand[i])
15731 && REGNO (recog_data.operand[i]) >= 4)
15732 return true;
15733 return false;
15734 }
15735
15736 /* Return nonzero when P points to register encoded via REX prefix.
15737 Called via for_each_rtx. */
15738 static int
15739 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15740 {
15741 unsigned int regno;
15742 if (!REG_P (*p))
15743 return 0;
15744 regno = REGNO (*p);
15745 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15746 }
15747
15748 /* Return true when INSN mentions register that must be encoded using REX
15749 prefix. */
15750 bool
15751 x86_extended_reg_mentioned_p (rtx insn)
15752 {
15753 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15754 }
15755
15756 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15757 optabs would emit if we didn't have TFmode patterns. */
15758
15759 void
15760 x86_emit_floatuns (rtx operands[2])
15761 {
15762 rtx neglab, donelab, i0, i1, f0, in, out;
15763 enum machine_mode mode, inmode;
15764
15765 inmode = GET_MODE (operands[1]);
15766 if (inmode != SImode
15767 && inmode != DImode)
15768 abort ();
15769
15770 out = operands[0];
15771 in = force_reg (inmode, operands[1]);
15772 mode = GET_MODE (out);
15773 neglab = gen_label_rtx ();
15774 donelab = gen_label_rtx ();
15775 i1 = gen_reg_rtx (Pmode);
15776 f0 = gen_reg_rtx (mode);
15777
15778 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15779
15780 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15781 emit_jump_insn (gen_jump (donelab));
15782 emit_barrier ();
15783
15784 emit_label (neglab);
15785
15786 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15787 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15788 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15789 expand_float (f0, i0, 0);
15790 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15791
15792 emit_label (donelab);
15793 }
15794
15795 /* Return if we do not know how to pass TYPE solely in registers. */
15796 bool
15797 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15798 {
15799 if (default_must_pass_in_stack (mode, type))
15800 return true;
15801 return (!TARGET_64BIT && type && mode == TImode);
15802 }
15803
15804 /* Initialize vector TARGET via VALS. */
15805 void
15806 ix86_expand_vector_init (rtx target, rtx vals)
15807 {
15808 enum machine_mode mode = GET_MODE (target);
15809 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15810 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15811 int i;
15812
15813 for (i = n_elts - 1; i >= 0; i--)
15814 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15815 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15816 break;
15817
15818 /* Few special cases first...
15819 ... constants are best loaded from constant pool. */
15820 if (i < 0)
15821 {
15822 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15823 return;
15824 }
15825
15826 /* ... values where only first field is non-constant are best loaded
15827 from the pool and overwritten via move later. */
15828 if (!i)
15829 {
15830 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15831 GET_MODE_INNER (mode), 0);
15832
15833 op = force_reg (mode, op);
15834 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15835 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15836 switch (GET_MODE (target))
15837 {
15838 case V2DFmode:
15839 emit_insn (gen_sse2_movsd (target, target, op));
15840 break;
15841 case V4SFmode:
15842 emit_insn (gen_sse_movss (target, target, op));
15843 break;
15844 default:
15845 break;
15846 }
15847 return;
15848 }
15849
15850 /* And the busy sequence doing rotations. */
15851 switch (GET_MODE (target))
15852 {
15853 case V2DFmode:
15854 {
15855 rtx vecop0 =
15856 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15857 rtx vecop1 =
15858 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15859
15860 vecop0 = force_reg (V2DFmode, vecop0);
15861 vecop1 = force_reg (V2DFmode, vecop1);
15862 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15863 }
15864 break;
15865 case V4SFmode:
15866 {
15867 rtx vecop0 =
15868 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15869 rtx vecop1 =
15870 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15871 rtx vecop2 =
15872 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15873 rtx vecop3 =
15874 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15875 rtx tmp1 = gen_reg_rtx (V4SFmode);
15876 rtx tmp2 = gen_reg_rtx (V4SFmode);
15877
15878 vecop0 = force_reg (V4SFmode, vecop0);
15879 vecop1 = force_reg (V4SFmode, vecop1);
15880 vecop2 = force_reg (V4SFmode, vecop2);
15881 vecop3 = force_reg (V4SFmode, vecop3);
15882 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15883 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15884 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15885 }
15886 break;
15887 default:
15888 abort ();
15889 }
15890 }
15891
15892 /* Worker function for TARGET_MD_ASM_CLOBBERS.
15893
15894 We do this in the new i386 backend to maintain source compatibility
15895 with the old cc0-based compiler. */
15896
15897 static tree
15898 ix86_md_asm_clobbers (tree clobbers)
15899 {
15900 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
15901 clobbers);
15902 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
15903 clobbers);
15904 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
15905 clobbers);
15906 return clobbers;
15907 }
15908
15909 /* Worker function for REVERSE_CONDITION. */
15910
15911 enum rtx_code
15912 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
15913 {
15914 return (mode != CCFPmode && mode != CCFPUmode
15915 ? reverse_condition (code)
15916 : reverse_condition_maybe_unordered (code));
15917 }
15918
15919 /* Output code to perform an x87 FP register move, from OPERANDS[1]
15920 to OPERANDS[0]. */
15921
15922 const char *
15923 output_387_reg_move (rtx insn, rtx *operands)
15924 {
15925 if (REG_P (operands[1])
15926 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15927 {
15928 if (REGNO (operands[0]) == FIRST_STACK_REG
15929 && TARGET_USE_FFREEP)
15930 return "ffreep\t%y0";
15931 return "fstp\t%y0";
15932 }
15933 if (STACK_TOP_P (operands[0]))
15934 return "fld%z1\t%y1";
15935 return "fst\t%y0";
15936 }
15937
15938 /* Output code to perform a conditional jump to LABEL, if C2 flag in
15939 FP status register is set. */
15940
15941 void
15942 ix86_emit_fp_unordered_jump (rtx label)
15943 {
15944 rtx reg = gen_reg_rtx (HImode);
15945 rtx temp;
15946
15947 emit_insn (gen_x86_fnstsw_1 (reg));
15948
15949 if (TARGET_USE_SAHF)
15950 {
15951 emit_insn (gen_x86_sahf_1 (reg));
15952
15953 temp = gen_rtx_REG (CCmode, FLAGS_REG);
15954 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
15955 }
15956 else
15957 {
15958 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
15959
15960 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15961 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
15962 }
15963
15964 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
15965 gen_rtx_LABEL_REF (VOIDmode, label),
15966 pc_rtx);
15967 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
15968 emit_jump_insn (temp);
15969 }
15970
15971 /* Output code to perform a log1p XFmode calculation. */
15972
15973 void ix86_emit_i387_log1p (rtx op0, rtx op1)
15974 {
15975 rtx label1 = gen_label_rtx ();
15976 rtx label2 = gen_label_rtx ();
15977
15978 rtx tmp = gen_reg_rtx (XFmode);
15979 rtx tmp2 = gen_reg_rtx (XFmode);
15980
15981 emit_insn (gen_absxf2 (tmp, op1));
15982 emit_insn (gen_cmpxf (tmp,
15983 CONST_DOUBLE_FROM_REAL_VALUE (
15984 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
15985 XFmode)));
15986 emit_jump_insn (gen_bge (label1));
15987
15988 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15989 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
15990 emit_jump (label2);
15991
15992 emit_label (label1);
15993 emit_move_insn (tmp, CONST1_RTX (XFmode));
15994 emit_insn (gen_addxf3 (tmp, op1, tmp));
15995 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
15996 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
15997
15998 emit_label (label2);
15999 }
16000
16001 #include "gt-i386.h"