1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
52 #include "tm-constrs.h"
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
74 struct processor_costs size_cost
= { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
126 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
127 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}}
130 /* Processor costs (relative to an add) */
132 struct processor_costs i386_cost
= { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
183 DUMMY_STRINGOP_ALGS
},
184 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
185 DUMMY_STRINGOP_ALGS
},
189 struct processor_costs i486_cost
= { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
240 DUMMY_STRINGOP_ALGS
},
241 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
246 struct processor_costs pentium_cost
= {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
297 DUMMY_STRINGOP_ALGS
},
298 {{libcall
, {{-1, rep_prefix_4_byte
}}},
303 struct processor_costs pentiumpro_cost
= {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
358 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
359 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
360 DUMMY_STRINGOP_ALGS
},
361 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
362 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
367 struct processor_costs geode_cost
= {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
419 DUMMY_STRINGOP_ALGS
},
420 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
425 struct processor_costs k6_cost
= {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
476 DUMMY_STRINGOP_ALGS
},
477 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
482 struct processor_costs athlon_cost
= {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
536 DUMMY_STRINGOP_ALGS
},
537 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
542 struct processor_costs k8_cost
= {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
589 100, /* number of parallel prefetches */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
601 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
602 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
603 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
604 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
607 struct processor_costs amdfam10_cost
= {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
652 MOVD reg64, xmmreg Double FADD 3
654 MOVD reg32, xmmreg Double FADD 3
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
662 100, /* number of parallel prefetches */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
675 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
676 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
677 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
678 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
682 struct processor_costs pentium4_cost
= {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
733 DUMMY_STRINGOP_ALGS
},
734 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
736 DUMMY_STRINGOP_ALGS
},
740 struct processor_costs nocona_cost
= {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
791 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
792 {100000, unrolled_loop
}, {-1, libcall
}}}},
793 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
795 {libcall
, {{24, loop
}, {64, unrolled_loop
},
796 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
800 struct processor_costs core2_cost
= {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
850 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
851 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
852 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
853 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
854 {libcall
, {{24, loop
}, {32, unrolled_loop
},
855 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
858 /* Generic64 should produce code tuned for Nocona and K8. */
860 struct processor_costs generic64_cost
= {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS
,
917 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
918 {DUMMY_STRINGOP_ALGS
,
919 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}}
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
924 struct processor_costs generic32_cost
= {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
975 DUMMY_STRINGOP_ALGS
},
976 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
977 DUMMY_STRINGOP_ALGS
},
980 const struct processor_costs
*ix86_cost
= &pentium_cost
;
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features
[X86_TUNE_LAST
] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC64
,
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1017 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1022 /* X86_TUNE_USE_BIT_TEST */
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486
| m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6
| m_CORE2
| m_GENERIC
,
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO
| m_K6_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1030 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_PENT4
1043 | m_NOCONA
| m_CORE2
| m_GENERIC
,
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
1048 | m_CORE2
| m_GENERIC
| m_GEODE
/* m_386 | m_K6 */,
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2
| m_GENERIC
,
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386
| m_486
| m_K6_GEODE
,
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT
| m_CORE2
| m_GENERIC
),
1069 /* X86_TUNE_USE_MOV0 */
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT
| m_K6
| m_CORE2
| m_GENERIC
),
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1084 /* X86_TUNE_READ_MODIFY */
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE
| m_PENT
| m_386
| m_486
| m_ATHLON_K8_AMDFAM10
| m_CORE2
1089 | m_GENERIC
/* | m_PENT4 ? */,
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT
| m_486
| m_386
),
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386
| m_PENT4
| m_NOCONA
,
1097 /* X86_TUNE_QIMODE_MATH */
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_K6_GEODE
| m_386
1117 | m_486
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10
| m_PPRO
| m_386
| m_486
1124 | m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
1129 | m_GENERIC
| m_GEODE
),
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
| m_AMDFAM10
,
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10
,
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO
| m_PENT4
| m_NOCONA
,
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8
| m_PPRO
| m_CORE2
| m_GENERIC
,
1169 /* X86_TUNE_SHIFT1 */
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10
,
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10
| m_GENERIC
),
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
,
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_K6_GEODE
| m_PENT
| m_CORE2
| m_GENERIC
,
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10
,
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4
| m_NOCONA
| m_CORE2
| m_GENERIC
),
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10
| m_CORE2
| m_GENERIC
,
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE
| m_ATHLON_K8
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
,
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1209 m_K8
| m_GENERIC64
| m_AMDFAM10
,
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features
[X86_ARCH_LAST
] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO
| m_GEODE
| m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
,
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10
| m_PENT4
| m_NOCONA
| m_PPRO
| m_CORE2
| m_GENERIC
;
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT
| m_PPRO
| m_ATHLON_K8_AMDFAM10
| m_PENT4
1248 | m_NOCONA
| m_CORE2
| m_GENERIC
;
1250 static enum stringop_alg stringop_alg
= no_stringop
;
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1259 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1260 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1265 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1267 /* ax, dx, cx, bx */
1268 AREG
, DREG
, CREG
, BREG
,
1269 /* si, di, bp, sp */
1270 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1272 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1273 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1278 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1280 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1282 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1283 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1284 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1288 /* The "default" register map used in 32bit mode. */
1290 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1301 static int const x86_64_int_parameter_registers
[6] =
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG
/*R8 */, FIRST_REX_INT_REG
+ 1 /*R9 */
1307 static int const x86_64_int_return_registers
[4] =
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1378 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1392 rtx ix86_compare_op0
= NULL_RTX
;
1393 rtx ix86_compare_op1
= NULL_RTX
;
1394 rtx ix86_compare_emitted
= NULL_RTX
;
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1399 /* Define the structure for the machine field in struct function. */
1401 struct stack_local_entry
GTY(())
1403 unsigned short mode
;
1406 struct stack_local_entry
*next
;
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1433 HOST_WIDE_INT frame
;
1435 int outgoing_arguments_size
;
1438 HOST_WIDE_INT to_allocate
;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset
;
1441 HOST_WIDE_INT hard_frame_pointer_offset
;
1442 HOST_WIDE_INT stack_pointer_offset
;
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov
;
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel
;
1452 enum asm_dialect ix86_asm_dialect
= ASM_ATT
;
1454 enum tls_dialect ix86_tls_dialect
= TLS_DIALECT_GNU
;
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath
;
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune
;
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch
;
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse
;
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm
;
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer
;
1473 static const char ix86_force_align_arg_pointer_string
[] = "force_align_arg_pointer";
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary
;
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost
;
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1484 int ix86_section_threshold
= 65536;
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix
[16];
1488 int internal_label_prefix_len
;
1490 static bool ix86_handle_option (size_t, const char *, int);
1491 static void output_pic_addr_const (FILE *, rtx
, int);
1492 static void put_condition_code (enum rtx_code
, enum machine_mode
,
1494 static const char *get_some_local_dynamic_name (void);
1495 static int get_some_local_dynamic_name_1 (rtx
*, void *);
1496 static rtx
ix86_expand_int_compare (enum rtx_code
, rtx
, rtx
);
1497 static enum rtx_code
ix86_prepare_fp_compare_args (enum rtx_code
, rtx
*,
1499 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1500 static enum machine_mode
ix86_cc_modes_compatible (enum machine_mode
,
1502 static rtx
get_thread_pointer (int);
1503 static rtx
legitimize_tls_address (rtx
, enum tls_model
, int);
1504 static void get_pc_thunk_name (char [32], unsigned int);
1505 static rtx
gen_push (rtx
);
1506 static int ix86_flags_dependent (rtx
, rtx
, enum attr_type
);
1507 static int ix86_agi_dependent (rtx
, rtx
, enum attr_type
);
1508 static struct machine_function
* ix86_init_machine_status (void);
1509 static int ix86_split_to_parts (rtx
, rtx
*, enum machine_mode
);
1510 static int ix86_nsaved_regs (void);
1511 static void ix86_emit_save_regs (void);
1512 static void ix86_emit_save_regs_using_mov (rtx
, HOST_WIDE_INT
);
1513 static void ix86_emit_restore_regs_using_mov (rtx
, HOST_WIDE_INT
, int);
1514 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT
);
1515 static HOST_WIDE_INT
ix86_GOT_alias_set (void);
1516 static void ix86_adjust_counter (rtx
, HOST_WIDE_INT
);
1517 static void ix86_expand_strlensi_unroll_1 (rtx
, rtx
, rtx
);
1518 static int ix86_issue_rate (void);
1519 static int ix86_adjust_cost (rtx
, rtx
, rtx
, int);
1520 static int ia32_multipass_dfa_lookahead (void);
1521 static void ix86_init_mmx_sse_builtins (void);
1522 static rtx
x86_this_parameter (tree
);
1523 static void x86_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
1524 HOST_WIDE_INT
, tree
);
1525 static bool x86_can_output_mi_thunk (tree
, HOST_WIDE_INT
, HOST_WIDE_INT
, tree
);
1526 static void x86_file_start (void);
1527 static void ix86_reorg (void);
1528 static bool ix86_expand_carry_flag_compare (enum rtx_code
, rtx
, rtx
, rtx
*);
1529 static tree
ix86_build_builtin_va_list (void);
1530 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
1532 static tree
ix86_gimplify_va_arg (tree
, tree
, tree
*, tree
*);
1533 static bool ix86_scalar_mode_supported_p (enum machine_mode
);
1534 static bool ix86_vector_mode_supported_p (enum machine_mode
);
1536 static int ix86_address_cost (rtx
);
1537 static bool ix86_cannot_force_const_mem (rtx
);
1538 static rtx
ix86_delegitimize_address (rtx
);
1540 static void i386_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
1542 struct builtin_description
;
1543 static rtx
ix86_expand_sse_comi (const struct builtin_description
*,
1545 static rtx
ix86_expand_sse_compare (const struct builtin_description
*,
1547 static rtx
ix86_expand_unop1_builtin (enum insn_code
, tree
, rtx
);
1548 static rtx
ix86_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
1549 static rtx
ix86_expand_binop_builtin (enum insn_code
, tree
, rtx
);
1550 static rtx
ix86_expand_store_builtin (enum insn_code
, tree
);
1551 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
1552 static rtx
ix86_expand_fp_compare (enum rtx_code
, rtx
, rtx
, rtx
, rtx
*, rtx
*);
1553 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code
);
1554 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code
);
1555 static int ix86_fp_comparison_sahf_cost (enum rtx_code code
);
1556 static int ix86_fp_comparison_cost (enum rtx_code code
);
1557 static unsigned int ix86_select_alt_pic_regnum (void);
1558 static int ix86_save_reg (unsigned int, int);
1559 static void ix86_compute_frame_layout (struct ix86_frame
*);
1560 static int ix86_comp_type_attributes (tree
, tree
);
1561 static int ix86_function_regparm (tree
, tree
);
1562 const struct attribute_spec ix86_attribute_table
[];
1563 static bool ix86_function_ok_for_sibcall (tree
, tree
);
1564 static tree
ix86_handle_cconv_attribute (tree
*, tree
, tree
, int, bool *);
1565 static bool contains_128bit_aligned_vector_p (tree
);
1566 static rtx
ix86_struct_value_rtx (tree
, int);
1567 static bool ix86_ms_bitfield_layout_p (tree
);
1568 static tree
ix86_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1569 static int extended_reg_mentioned_1 (rtx
*, void *);
1570 static bool ix86_rtx_costs (rtx
, int, int, int *);
1571 static int min_insn_size (rtx
);
1572 static tree
ix86_md_asm_clobbers (tree outputs
, tree inputs
, tree clobbers
);
1573 static bool ix86_must_pass_in_stack (enum machine_mode mode
, tree type
);
1574 static bool ix86_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
1576 static void ix86_init_builtins (void);
1577 static rtx
ix86_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
1578 static tree
ix86_builtin_vectorized_function (enum built_in_function
, tree
, tree
);
1579 static tree
ix86_builtin_conversion (enum tree_code
, tree
);
1580 static const char *ix86_mangle_fundamental_type (tree
);
1581 static tree
ix86_stack_protect_fail (void);
1582 static rtx
ix86_internal_arg_pointer (void);
1583 static void ix86_dwarf_handle_frame_unspec (const char *, rtx
, int);
1584 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
1586 static rtx
ix86_function_value (tree
, tree
, bool);
1588 /* This function is only used on Solaris. */
1589 static void i386_solaris_elf_named_section (const char *, unsigned int, tree
)
1592 /* Register class used for passing given 64bit part of the argument.
1593 These represent classes as documented by the PS ABI, with the exception
1594 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1595 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1597 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1598 whenever possible (upper half does contain padding).
1600 enum x86_64_reg_class
1603 X86_64_INTEGER_CLASS
,
1604 X86_64_INTEGERSI_CLASS
,
1611 X86_64_COMPLEX_X87_CLASS
,
1614 static const char * const x86_64_reg_class_name
[] = {
1615 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1616 "sseup", "x87", "x87up", "cplx87", "no"
1619 #define MAX_CLASSES 4
1621 /* Table of constants used by fldpi, fldln2, etc.... */
1622 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
1623 static bool ext_80387_constants_init
= 0;
1624 static void init_ext_80387_constants (void);
1625 static bool ix86_in_large_data_p (tree
) ATTRIBUTE_UNUSED
;
1626 static void ix86_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
1627 static void x86_64_elf_unique_section (tree decl
, int reloc
) ATTRIBUTE_UNUSED
;
1628 static section
*x86_64_elf_select_section (tree decl
, int reloc
,
1629 unsigned HOST_WIDE_INT align
)
1632 /* Initialize the GCC target structure. */
1633 #undef TARGET_ATTRIBUTE_TABLE
1634 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1636 # undef TARGET_MERGE_DECL_ATTRIBUTES
1637 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1640 #undef TARGET_COMP_TYPE_ATTRIBUTES
1641 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1643 #undef TARGET_INIT_BUILTINS
1644 #define TARGET_INIT_BUILTINS ix86_init_builtins
1645 #undef TARGET_EXPAND_BUILTIN
1646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1650 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1651 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1653 #undef TARGET_ASM_FUNCTION_EPILOGUE
1654 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1656 #undef TARGET_ENCODE_SECTION_INFO
1657 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1658 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1660 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1663 #undef TARGET_ASM_OPEN_PAREN
1664 #define TARGET_ASM_OPEN_PAREN ""
1665 #undef TARGET_ASM_CLOSE_PAREN
1666 #define TARGET_ASM_CLOSE_PAREN ""
1668 #undef TARGET_ASM_ALIGNED_HI_OP
1669 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1670 #undef TARGET_ASM_ALIGNED_SI_OP
1671 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1673 #undef TARGET_ASM_ALIGNED_DI_OP
1674 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1677 #undef TARGET_ASM_UNALIGNED_HI_OP
1678 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1679 #undef TARGET_ASM_UNALIGNED_SI_OP
1680 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1681 #undef TARGET_ASM_UNALIGNED_DI_OP
1682 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1684 #undef TARGET_SCHED_ADJUST_COST
1685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1686 #undef TARGET_SCHED_ISSUE_RATE
1687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1690 ia32_multipass_dfa_lookahead
1692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1696 #undef TARGET_HAVE_TLS
1697 #define TARGET_HAVE_TLS true
1699 #undef TARGET_CANNOT_FORCE_CONST_MEM
1700 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1701 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1702 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1704 #undef TARGET_DELEGITIMIZE_ADDRESS
1705 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1707 #undef TARGET_MS_BITFIELD_LAYOUT_P
1708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1711 #undef TARGET_BINDS_LOCAL_P
1712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1715 #undef TARGET_ASM_OUTPUT_MI_THUNK
1716 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1717 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1718 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1720 #undef TARGET_ASM_FILE_START
1721 #define TARGET_ASM_FILE_START x86_file_start
1723 #undef TARGET_DEFAULT_TARGET_FLAGS
1724 #define TARGET_DEFAULT_TARGET_FLAGS \
1726 | TARGET_64BIT_DEFAULT \
1727 | TARGET_SUBTARGET_DEFAULT \
1728 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1730 #undef TARGET_HANDLE_OPTION
1731 #define TARGET_HANDLE_OPTION ix86_handle_option
1733 #undef TARGET_RTX_COSTS
1734 #define TARGET_RTX_COSTS ix86_rtx_costs
1735 #undef TARGET_ADDRESS_COST
1736 #define TARGET_ADDRESS_COST ix86_address_cost
1738 #undef TARGET_FIXED_CONDITION_CODE_REGS
1739 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1740 #undef TARGET_CC_MODES_COMPATIBLE
1741 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1743 #undef TARGET_MACHINE_DEPENDENT_REORG
1744 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1746 #undef TARGET_BUILD_BUILTIN_VA_LIST
1747 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1749 #undef TARGET_MD_ASM_CLOBBERS
1750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1752 #undef TARGET_PROMOTE_PROTOTYPES
1753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1754 #undef TARGET_STRUCT_VALUE_RTX
1755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1758 #undef TARGET_MUST_PASS_IN_STACK
1759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1760 #undef TARGET_PASS_BY_REFERENCE
1761 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1762 #undef TARGET_INTERNAL_ARG_POINTER
1763 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1764 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1765 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1768 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1770 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1771 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1774 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1782 #undef TARGET_INSERT_ATTRIBUTES
1783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1786 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1787 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1789 #undef TARGET_STACK_PROTECT_FAIL
1790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1792 #undef TARGET_FUNCTION_VALUE
1793 #define TARGET_FUNCTION_VALUE ix86_function_value
1795 struct gcc_target targetm
= TARGET_INITIALIZER
;
1798 /* The svr4 ABI for the i386 says that records and unions are returned
1800 #ifndef DEFAULT_PCC_STRUCT_RETURN
1801 #define DEFAULT_PCC_STRUCT_RETURN 1
1804 /* Implement TARGET_HANDLE_OPTION. */
1807 ix86_handle_option (size_t code
, const char *arg ATTRIBUTE_UNUSED
, int value
)
1814 target_flags
&= ~MASK_3DNOW_A
;
1815 target_flags_explicit
|= MASK_3DNOW_A
;
1822 target_flags
&= ~(MASK_3DNOW
| MASK_3DNOW_A
);
1823 target_flags_explicit
|= MASK_3DNOW
| MASK_3DNOW_A
;
1830 target_flags
&= ~(MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
);
1831 target_flags_explicit
|= MASK_SSE2
| MASK_SSE3
| MASK_SSE4A
;
1838 target_flags
&= ~(MASK_SSE3
| MASK_SSE4A
);
1839 target_flags_explicit
|= MASK_SSE3
| MASK_SSE4A
;
1846 target_flags
&= ~MASK_SSE4A
;
1847 target_flags_explicit
|= MASK_SSE4A
;
1856 /* Sometimes certain combinations of command options do not make
1857 sense on a particular target machine. You can define a macro
1858 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1859 defined, is executed once just after all the command options have
1862 Don't use this macro to turn on various extra optimizations for
1863 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1866 override_options (void)
1869 int ix86_tune_defaulted
= 0;
1870 unsigned int ix86_arch_mask
, ix86_tune_mask
;
1872 /* Comes from final.c -- no real reason to change it. */
1873 #define MAX_CODE_ALIGN 16
1877 const struct processor_costs
*cost
; /* Processor costs */
1878 const int target_enable
; /* Target flags to enable. */
1879 const int target_disable
; /* Target flags to disable. */
1880 const int align_loop
; /* Default alignments. */
1881 const int align_loop_max_skip
;
1882 const int align_jump
;
1883 const int align_jump_max_skip
;
1884 const int align_func
;
1886 const processor_target_table
[PROCESSOR_max
] =
1888 {&i386_cost
, 0, 0, 4, 3, 4, 3, 4},
1889 {&i486_cost
, 0, 0, 16, 15, 16, 15, 16},
1890 {&pentium_cost
, 0, 0, 16, 7, 16, 7, 16},
1891 {&pentiumpro_cost
, 0, 0, 16, 15, 16, 7, 16},
1892 {&geode_cost
, 0, 0, 0, 0, 0, 0, 0},
1893 {&k6_cost
, 0, 0, 32, 7, 32, 7, 32},
1894 {&athlon_cost
, 0, 0, 16, 7, 16, 7, 16},
1895 {&pentium4_cost
, 0, 0, 0, 0, 0, 0, 0},
1896 {&k8_cost
, 0, 0, 16, 7, 16, 7, 16},
1897 {&nocona_cost
, 0, 0, 0, 0, 0, 0, 0},
1898 {&core2_cost
, 0, 0, 16, 7, 16, 7, 16},
1899 {&generic32_cost
, 0, 0, 16, 7, 16, 7, 16},
1900 {&generic64_cost
, 0, 0, 16, 7, 16, 7, 16},
1901 {&amdfam10_cost
, 0, 0, 32, 7, 32, 7, 32}
1904 static const char * const cpu_names
[] = TARGET_CPU_DEFAULT_NAMES
;
1907 const char *const name
; /* processor name or nickname. */
1908 const enum processor_type processor
;
1909 const enum pta_flags
1915 PTA_PREFETCH_SSE
= 1 << 4,
1917 PTA_3DNOW_A
= 1 << 6,
1921 PTA_POPCNT
= 1 << 10,
1923 PTA_SSE4A
= 1 << 12,
1924 PTA_NO_SAHF
= 1 << 13
1927 const processor_alias_table
[] =
1929 {"i386", PROCESSOR_I386
, 0},
1930 {"i486", PROCESSOR_I486
, 0},
1931 {"i586", PROCESSOR_PENTIUM
, 0},
1932 {"pentium", PROCESSOR_PENTIUM
, 0},
1933 {"pentium-mmx", PROCESSOR_PENTIUM
, PTA_MMX
},
1934 {"winchip-c6", PROCESSOR_I486
, PTA_MMX
},
1935 {"winchip2", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1936 {"c3", PROCESSOR_I486
, PTA_MMX
| PTA_3DNOW
},
1937 {"c3-2", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_SSE
},
1938 {"i686", PROCESSOR_PENTIUMPRO
, 0},
1939 {"pentiumpro", PROCESSOR_PENTIUMPRO
, 0},
1940 {"pentium2", PROCESSOR_PENTIUMPRO
, PTA_MMX
},
1941 {"pentium3", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1942 {"pentium3m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
},
1943 {"pentium-m", PROCESSOR_PENTIUMPRO
, PTA_MMX
| PTA_SSE
| PTA_PREFETCH_SSE
| PTA_SSE2
},
1944 {"pentium4", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1945 | PTA_MMX
| PTA_PREFETCH_SSE
},
1946 {"pentium4m", PROCESSOR_PENTIUM4
, PTA_SSE
| PTA_SSE2
1947 | PTA_MMX
| PTA_PREFETCH_SSE
},
1948 {"prescott", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
1949 | PTA_MMX
| PTA_PREFETCH_SSE
},
1950 {"nocona", PROCESSOR_NOCONA
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_64BIT
1951 | PTA_MMX
| PTA_PREFETCH_SSE
1952 | PTA_CX16
| PTA_NO_SAHF
},
1953 {"core2", PROCESSOR_CORE2
, PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
1954 | PTA_64BIT
| PTA_MMX
1955 | PTA_PREFETCH_SSE
| PTA_CX16
},
1956 {"geode", PROCESSOR_GEODE
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1958 {"k6", PROCESSOR_K6
, PTA_MMX
},
1959 {"k6-2", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1960 {"k6-3", PROCESSOR_K6
, PTA_MMX
| PTA_3DNOW
},
1961 {"athlon", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1963 {"athlon-tbird", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
1964 | PTA_3DNOW
| PTA_3DNOW_A
},
1965 {"athlon-4", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1966 | PTA_3DNOW_A
| PTA_SSE
},
1967 {"athlon-xp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1968 | PTA_3DNOW_A
| PTA_SSE
},
1969 {"athlon-mp", PROCESSOR_ATHLON
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1970 | PTA_3DNOW_A
| PTA_SSE
},
1971 {"x86-64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_64BIT
1972 | PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
1973 {"k8", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
| PTA_64BIT
1974 | PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
1976 {"opteron", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1977 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1978 | PTA_SSE2
| PTA_NO_SAHF
},
1979 {"athlon64", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1980 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1981 | PTA_SSE2
| PTA_NO_SAHF
},
1982 {"athlon-fx", PROCESSOR_K8
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1983 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1984 | PTA_SSE2
| PTA_NO_SAHF
},
1985 {"amdfam10", PROCESSOR_AMDFAM10
, PTA_MMX
| PTA_PREFETCH_SSE
| PTA_3DNOW
1986 | PTA_64BIT
| PTA_3DNOW_A
| PTA_SSE
1987 | PTA_SSE2
| PTA_SSE3
| PTA_POPCNT
1988 | PTA_ABM
| PTA_SSE4A
| PTA_CX16
},
1989 {"generic32", PROCESSOR_GENERIC32
, 0 /* flags are only used for -march switch. */ },
1990 {"generic64", PROCESSOR_GENERIC64
, PTA_64BIT
/* flags are only used for -march switch. */ },
1993 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
1995 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1996 SUBTARGET_OVERRIDE_OPTIONS
;
1999 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2000 SUBSUBTARGET_OVERRIDE_OPTIONS
;
2003 /* -fPIC is the default for x86_64. */
2004 if (TARGET_MACHO
&& TARGET_64BIT
)
2007 /* Set the default values for switches whose default depends on TARGET_64BIT
2008 in case they weren't overwritten by command line options. */
2011 /* Mach-O doesn't support omitting the frame pointer for now. */
2012 if (flag_omit_frame_pointer
== 2)
2013 flag_omit_frame_pointer
= (TARGET_MACHO
? 0 : 1);
2014 if (flag_asynchronous_unwind_tables
== 2)
2015 flag_asynchronous_unwind_tables
= 1;
2016 if (flag_pcc_struct_return
== 2)
2017 flag_pcc_struct_return
= 0;
2021 if (flag_omit_frame_pointer
== 2)
2022 flag_omit_frame_pointer
= 0;
2023 if (flag_asynchronous_unwind_tables
== 2)
2024 flag_asynchronous_unwind_tables
= 0;
2025 if (flag_pcc_struct_return
== 2)
2026 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
2029 /* Need to check -mtune=generic first. */
2030 if (ix86_tune_string
)
2032 if (!strcmp (ix86_tune_string
, "generic")
2033 || !strcmp (ix86_tune_string
, "i686")
2034 /* As special support for cross compilers we read -mtune=native
2035 as -mtune=generic. With native compilers we won't see the
2036 -mtune=native, as it was changed by the driver. */
2037 || !strcmp (ix86_tune_string
, "native"))
2040 ix86_tune_string
= "generic64";
2042 ix86_tune_string
= "generic32";
2044 else if (!strncmp (ix86_tune_string
, "generic", 7))
2045 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2049 if (ix86_arch_string
)
2050 ix86_tune_string
= ix86_arch_string
;
2051 if (!ix86_tune_string
)
2053 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
2054 ix86_tune_defaulted
= 1;
2057 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2058 need to use a sensible tune option. */
2059 if (!strcmp (ix86_tune_string
, "generic")
2060 || !strcmp (ix86_tune_string
, "x86-64")
2061 || !strcmp (ix86_tune_string
, "i686"))
2064 ix86_tune_string
= "generic64";
2066 ix86_tune_string
= "generic32";
2069 if (ix86_stringop_string
)
2071 if (!strcmp (ix86_stringop_string
, "rep_byte"))
2072 stringop_alg
= rep_prefix_1_byte
;
2073 else if (!strcmp (ix86_stringop_string
, "libcall"))
2074 stringop_alg
= libcall
;
2075 else if (!strcmp (ix86_stringop_string
, "rep_4byte"))
2076 stringop_alg
= rep_prefix_4_byte
;
2077 else if (!strcmp (ix86_stringop_string
, "rep_8byte"))
2078 stringop_alg
= rep_prefix_8_byte
;
2079 else if (!strcmp (ix86_stringop_string
, "byte_loop"))
2080 stringop_alg
= loop_1_byte
;
2081 else if (!strcmp (ix86_stringop_string
, "loop"))
2082 stringop_alg
= loop
;
2083 else if (!strcmp (ix86_stringop_string
, "unrolled_loop"))
2084 stringop_alg
= unrolled_loop
;
2086 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string
);
2088 if (!strcmp (ix86_tune_string
, "x86-64"))
2089 warning (OPT_Wdeprecated
, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2090 "-mtune=generic instead as appropriate.");
2092 if (!ix86_arch_string
)
2093 ix86_arch_string
= TARGET_64BIT
? "x86-64" : "i386";
2094 if (!strcmp (ix86_arch_string
, "generic"))
2095 error ("generic CPU can be used only for -mtune= switch");
2096 if (!strncmp (ix86_arch_string
, "generic", 7))
2097 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2099 if (ix86_cmodel_string
!= 0)
2101 if (!strcmp (ix86_cmodel_string
, "small"))
2102 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2103 else if (!strcmp (ix86_cmodel_string
, "medium"))
2104 ix86_cmodel
= flag_pic
? CM_MEDIUM_PIC
: CM_MEDIUM
;
2105 else if (!strcmp (ix86_cmodel_string
, "large"))
2106 ix86_cmodel
= flag_pic
? CM_LARGE_PIC
: CM_LARGE
;
2108 error ("code model %s does not support PIC mode", ix86_cmodel_string
);
2109 else if (!strcmp (ix86_cmodel_string
, "32"))
2110 ix86_cmodel
= CM_32
;
2111 else if (!strcmp (ix86_cmodel_string
, "kernel") && !flag_pic
)
2112 ix86_cmodel
= CM_KERNEL
;
2114 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string
);
2118 ix86_cmodel
= CM_32
;
2120 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
2122 if (ix86_asm_string
!= 0)
2125 && !strcmp (ix86_asm_string
, "intel"))
2126 ix86_asm_dialect
= ASM_INTEL
;
2127 else if (!strcmp (ix86_asm_string
, "att"))
2128 ix86_asm_dialect
= ASM_ATT
;
2130 error ("bad value (%s) for -masm= switch", ix86_asm_string
);
2132 if ((TARGET_64BIT
== 0) != (ix86_cmodel
== CM_32
))
2133 error ("code model %qs not supported in the %s bit mode",
2134 ix86_cmodel_string
, TARGET_64BIT
? "64" : "32");
2135 if ((TARGET_64BIT
!= 0) != ((target_flags
& MASK_64BIT
) != 0))
2136 sorry ("%i-bit mode not compiled in",
2137 (target_flags
& MASK_64BIT
) ? 64 : 32);
2139 for (i
= 0; i
< pta_size
; i
++)
2140 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
2142 ix86_arch
= processor_alias_table
[i
].processor
;
2143 /* Default cpu tuning to the architecture. */
2144 ix86_tune
= ix86_arch
;
2145 if (processor_alias_table
[i
].flags
& PTA_MMX
2146 && !(target_flags_explicit
& MASK_MMX
))
2147 target_flags
|= MASK_MMX
;
2148 if (processor_alias_table
[i
].flags
& PTA_3DNOW
2149 && !(target_flags_explicit
& MASK_3DNOW
))
2150 target_flags
|= MASK_3DNOW
;
2151 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
2152 && !(target_flags_explicit
& MASK_3DNOW_A
))
2153 target_flags
|= MASK_3DNOW_A
;
2154 if (processor_alias_table
[i
].flags
& PTA_SSE
2155 && !(target_flags_explicit
& MASK_SSE
))
2156 target_flags
|= MASK_SSE
;
2157 if (processor_alias_table
[i
].flags
& PTA_SSE2
2158 && !(target_flags_explicit
& MASK_SSE2
))
2159 target_flags
|= MASK_SSE2
;
2160 if (processor_alias_table
[i
].flags
& PTA_SSE3
2161 && !(target_flags_explicit
& MASK_SSE3
))
2162 target_flags
|= MASK_SSE3
;
2163 if (processor_alias_table
[i
].flags
& PTA_SSSE3
2164 && !(target_flags_explicit
& MASK_SSSE3
))
2165 target_flags
|= MASK_SSSE3
;
2166 if (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
)
2167 x86_prefetch_sse
= true;
2168 if (processor_alias_table
[i
].flags
& PTA_CX16
)
2169 x86_cmpxchg16b
= true;
2170 if (processor_alias_table
[i
].flags
& PTA_POPCNT
2171 && !(target_flags_explicit
& MASK_POPCNT
))
2172 target_flags
|= MASK_POPCNT
;
2173 if (processor_alias_table
[i
].flags
& PTA_ABM
2174 && !(target_flags_explicit
& MASK_ABM
))
2175 target_flags
|= MASK_ABM
;
2176 if (processor_alias_table
[i
].flags
& PTA_SSE4A
2177 && !(target_flags_explicit
& MASK_SSE4A
))
2178 target_flags
|= MASK_SSE4A
;
2179 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
)))
2181 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2182 error ("CPU you selected does not support x86-64 "
2188 error ("bad value (%s) for -march= switch", ix86_arch_string
);
2190 ix86_arch_mask
= 1u << ix86_arch
;
2191 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
2192 ix86_arch_features
[i
] &= ix86_arch_mask
;
2194 for (i
= 0; i
< pta_size
; i
++)
2195 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
2197 ix86_tune
= processor_alias_table
[i
].processor
;
2198 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
2200 if (ix86_tune_defaulted
)
2202 ix86_tune_string
= "x86-64";
2203 for (i
= 0; i
< pta_size
; i
++)
2204 if (! strcmp (ix86_tune_string
,
2205 processor_alias_table
[i
].name
))
2207 ix86_tune
= processor_alias_table
[i
].processor
;
2210 error ("CPU you selected does not support x86-64 "
2213 /* Intel CPUs have always interpreted SSE prefetch instructions as
2214 NOPs; so, we can enable SSE prefetch instructions even when
2215 -mtune (rather than -march) points us to a processor that has them.
2216 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2217 higher processors. */
2218 if (TARGET_CMOVE
&& (processor_alias_table
[i
].flags
& PTA_PREFETCH_SSE
))
2219 x86_prefetch_sse
= true;
2223 error ("bad value (%s) for -mtune= switch", ix86_tune_string
);
2225 ix86_tune_mask
= 1u << ix86_tune
;
2226 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2227 ix86_tune_features
[i
] &= ix86_tune_mask
;
2230 ix86_cost
= &size_cost
;
2232 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
2233 target_flags
|= processor_target_table
[ix86_tune
].target_enable
;
2234 target_flags
&= ~processor_target_table
[ix86_tune
].target_disable
;
2236 /* Arrange to set up i386_stack_locals for all functions. */
2237 init_machine_status
= ix86_init_machine_status
;
2239 /* Validate -mregparm= value. */
2240 if (ix86_regparm_string
)
2242 i
= atoi (ix86_regparm_string
);
2243 if (i
< 0 || i
> REGPARM_MAX
)
2244 error ("-mregparm=%d is not between 0 and %d", i
, REGPARM_MAX
);
2250 ix86_regparm
= REGPARM_MAX
;
2252 /* If the user has provided any of the -malign-* options,
2253 warn and use that value only if -falign-* is not set.
2254 Remove this code in GCC 3.2 or later. */
2255 if (ix86_align_loops_string
)
2257 warning (0, "-malign-loops is obsolete, use -falign-loops");
2258 if (align_loops
== 0)
2260 i
= atoi (ix86_align_loops_string
);
2261 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2262 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2264 align_loops
= 1 << i
;
2268 if (ix86_align_jumps_string
)
2270 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2271 if (align_jumps
== 0)
2273 i
= atoi (ix86_align_jumps_string
);
2274 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2275 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2277 align_jumps
= 1 << i
;
2281 if (ix86_align_funcs_string
)
2283 warning (0, "-malign-functions is obsolete, use -falign-functions");
2284 if (align_functions
== 0)
2286 i
= atoi (ix86_align_funcs_string
);
2287 if (i
< 0 || i
> MAX_CODE_ALIGN
)
2288 error ("-malign-loops=%d is not between 0 and %d", i
, MAX_CODE_ALIGN
);
2290 align_functions
= 1 << i
;
2294 /* Default align_* from the processor table. */
2295 if (align_loops
== 0)
2297 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
2298 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
2300 if (align_jumps
== 0)
2302 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
2303 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
2305 if (align_functions
== 0)
2307 align_functions
= processor_target_table
[ix86_tune
].align_func
;
2310 /* Validate -mbranch-cost= value, or provide default. */
2311 ix86_branch_cost
= ix86_cost
->branch_cost
;
2312 if (ix86_branch_cost_string
)
2314 i
= atoi (ix86_branch_cost_string
);
2316 error ("-mbranch-cost=%d is not between 0 and 5", i
);
2318 ix86_branch_cost
= i
;
2320 if (ix86_section_threshold_string
)
2322 i
= atoi (ix86_section_threshold_string
);
2324 error ("-mlarge-data-threshold=%d is negative", i
);
2326 ix86_section_threshold
= i
;
2329 if (ix86_tls_dialect_string
)
2331 if (strcmp (ix86_tls_dialect_string
, "gnu") == 0)
2332 ix86_tls_dialect
= TLS_DIALECT_GNU
;
2333 else if (strcmp (ix86_tls_dialect_string
, "gnu2") == 0)
2334 ix86_tls_dialect
= TLS_DIALECT_GNU2
;
2335 else if (strcmp (ix86_tls_dialect_string
, "sun") == 0)
2336 ix86_tls_dialect
= TLS_DIALECT_SUN
;
2338 error ("bad value (%s) for -mtls-dialect= switch",
2339 ix86_tls_dialect_string
);
2342 /* Keep nonleaf frame pointers. */
2343 if (flag_omit_frame_pointer
)
2344 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
2345 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
2346 flag_omit_frame_pointer
= 1;
2348 /* If we're doing fast math, we don't care about comparison order
2349 wrt NaNs. This lets us use a shorter comparison sequence. */
2350 if (flag_finite_math_only
)
2351 target_flags
&= ~MASK_IEEE_FP
;
2353 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2354 since the insns won't need emulation. */
2355 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
2356 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
2358 /* Likewise, if the target doesn't have a 387, or we've specified
2359 software floating point, don't use 387 inline intrinsics. */
2361 target_flags
|= MASK_NO_FANCY_MATH_387
;
2363 /* Turn on SSE3 builtins for -mssse3. */
2365 target_flags
|= MASK_SSE3
;
2367 /* Turn on SSE3 builtins for -msse4a. */
2369 target_flags
|= MASK_SSE3
;
2371 /* Turn on SSE2 builtins for -msse3. */
2373 target_flags
|= MASK_SSE2
;
2375 /* Turn on SSE builtins for -msse2. */
2377 target_flags
|= MASK_SSE
;
2379 /* Turn on MMX builtins for -msse. */
2382 target_flags
|= MASK_MMX
& ~target_flags_explicit
;
2383 x86_prefetch_sse
= true;
2386 /* Turn on MMX builtins for 3Dnow. */
2388 target_flags
|= MASK_MMX
;
2390 /* Turn on POPCNT builtins for -mabm. */
2392 target_flags
|= MASK_POPCNT
;
2396 if (TARGET_ALIGN_DOUBLE
)
2397 error ("-malign-double makes no sense in the 64bit mode");
2399 error ("-mrtd calling convention not supported in the 64bit mode");
2401 /* Enable by default the SSE and MMX builtins. Do allow the user to
2402 explicitly disable any of these. In particular, disabling SSE and
2403 MMX for kernel code is extremely useful. */
2405 |= ((MASK_SSE2
| MASK_SSE
| MASK_MMX
| MASK_128BIT_LONG_DOUBLE
)
2406 & ~target_flags_explicit
);
2410 /* i386 ABI does not specify red zone. It still makes sense to use it
2411 when programmer takes care to stack from being destroyed. */
2412 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
2413 target_flags
|= MASK_NO_RED_ZONE
;
2416 /* Validate -mpreferred-stack-boundary= value, or provide default.
2417 The default of 128 bits is for Pentium III's SSE __m128. We can't
2418 change it because of optimize_size. Otherwise, we can't mix object
2419 files compiled with -Os and -On. */
2420 ix86_preferred_stack_boundary
= 128;
2421 if (ix86_preferred_stack_boundary_string
)
2423 i
= atoi (ix86_preferred_stack_boundary_string
);
2424 if (i
< (TARGET_64BIT
? 4 : 2) || i
> 12)
2425 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i
,
2426 TARGET_64BIT
? 4 : 2);
2428 ix86_preferred_stack_boundary
= (1 << i
) * BITS_PER_UNIT
;
2431 /* Accept -msseregparm only if at least SSE support is enabled. */
2432 if (TARGET_SSEREGPARM
2434 error ("-msseregparm used without SSE enabled");
2436 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
2437 if (ix86_fpmath_string
!= 0)
2439 if (! strcmp (ix86_fpmath_string
, "387"))
2440 ix86_fpmath
= FPMATH_387
;
2441 else if (! strcmp (ix86_fpmath_string
, "sse"))
2445 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2446 ix86_fpmath
= FPMATH_387
;
2449 ix86_fpmath
= FPMATH_SSE
;
2451 else if (! strcmp (ix86_fpmath_string
, "387,sse")
2452 || ! strcmp (ix86_fpmath_string
, "sse,387"))
2456 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2457 ix86_fpmath
= FPMATH_387
;
2459 else if (!TARGET_80387
)
2461 warning (0, "387 instruction set disabled, using SSE arithmetics");
2462 ix86_fpmath
= FPMATH_SSE
;
2465 ix86_fpmath
= FPMATH_SSE
| FPMATH_387
;
2468 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string
);
2471 /* If the i387 is disabled, then do not return values in it. */
2473 target_flags
&= ~MASK_FLOAT_RETURNS
;
2475 if ((x86_accumulate_outgoing_args
& ix86_tune_mask
)
2476 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2478 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2480 /* ??? Unwind info is not correct around the CFG unless either a frame
2481 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2482 unwind info generation to be aware of the CFG and propagating states
2484 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
2485 || flag_exceptions
|| flag_non_call_exceptions
)
2486 && flag_omit_frame_pointer
2487 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
2489 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
2490 warning (0, "unwind tables currently require either a frame pointer "
2491 "or -maccumulate-outgoing-args for correctness");
2492 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
2495 /* For sane SSE instruction set generation we need fcomi instruction.
2496 It is safe to enable all CMOVE instructions. */
2500 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2503 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
2504 p
= strchr (internal_label_prefix
, 'X');
2505 internal_label_prefix_len
= p
- internal_label_prefix
;
2509 /* When scheduling description is not available, disable scheduler pass
2510 so it won't slow down the compilation and make x87 code slower. */
2511 if (!TARGET_SCHEDULE
)
2512 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
2514 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES
))
2515 set_param_value ("simultaneous-prefetches",
2516 ix86_cost
->simultaneous_prefetches
);
2517 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE
))
2518 set_param_value ("l1-cache-line-size", ix86_cost
->prefetch_block
);
2521 /* switch to the appropriate section for output of DECL.
2522 DECL is either a `VAR_DECL' node or a constant of some sort.
2523 RELOC indicates whether forming the initial value of DECL requires
2524 link-time relocations. */
2527 x86_64_elf_select_section (tree decl
, int reloc
,
2528 unsigned HOST_WIDE_INT align
)
2530 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2531 && ix86_in_large_data_p (decl
))
2533 const char *sname
= NULL
;
2534 unsigned int flags
= SECTION_WRITE
;
2535 switch (categorize_decl_for_section (decl
, reloc
))
2540 case SECCAT_DATA_REL
:
2541 sname
= ".ldata.rel";
2543 case SECCAT_DATA_REL_LOCAL
:
2544 sname
= ".ldata.rel.local";
2546 case SECCAT_DATA_REL_RO
:
2547 sname
= ".ldata.rel.ro";
2549 case SECCAT_DATA_REL_RO_LOCAL
:
2550 sname
= ".ldata.rel.ro.local";
2554 flags
|= SECTION_BSS
;
2557 case SECCAT_RODATA_MERGE_STR
:
2558 case SECCAT_RODATA_MERGE_STR_INIT
:
2559 case SECCAT_RODATA_MERGE_CONST
:
2563 case SECCAT_SRODATA
:
2570 /* We don't split these for medium model. Place them into
2571 default sections and hope for best. */
2576 /* We might get called with string constants, but get_named_section
2577 doesn't like them as they are not DECLs. Also, we need to set
2578 flags in that case. */
2580 return get_section (sname
, flags
, NULL
);
2581 return get_named_section (decl
, sname
, reloc
);
2584 return default_elf_select_section (decl
, reloc
, align
);
2587 /* Build up a unique section name, expressed as a
2588 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2589 RELOC indicates whether the initial value of EXP requires
2590 link-time relocations. */
2593 x86_64_elf_unique_section (tree decl
, int reloc
)
2595 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2596 && ix86_in_large_data_p (decl
))
2598 const char *prefix
= NULL
;
2599 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2600 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
2602 switch (categorize_decl_for_section (decl
, reloc
))
2605 case SECCAT_DATA_REL
:
2606 case SECCAT_DATA_REL_LOCAL
:
2607 case SECCAT_DATA_REL_RO
:
2608 case SECCAT_DATA_REL_RO_LOCAL
:
2609 prefix
= one_only
? ".gnu.linkonce.ld." : ".ldata.";
2612 prefix
= one_only
? ".gnu.linkonce.lb." : ".lbss.";
2615 case SECCAT_RODATA_MERGE_STR
:
2616 case SECCAT_RODATA_MERGE_STR_INIT
:
2617 case SECCAT_RODATA_MERGE_CONST
:
2618 prefix
= one_only
? ".gnu.linkonce.lr." : ".lrodata.";
2620 case SECCAT_SRODATA
:
2627 /* We don't split these for medium model. Place them into
2628 default sections and hope for best. */
2636 plen
= strlen (prefix
);
2638 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
2639 name
= targetm
.strip_name_encoding (name
);
2640 nlen
= strlen (name
);
2642 string
= alloca (nlen
+ plen
+ 1);
2643 memcpy (string
, prefix
, plen
);
2644 memcpy (string
+ plen
, name
, nlen
+ 1);
2646 DECL_SECTION_NAME (decl
) = build_string (nlen
+ plen
, string
);
2650 default_unique_section (decl
, reloc
);
2653 #ifdef COMMON_ASM_OP
2654 /* This says how to output assembler code to declare an
2655 uninitialized external linkage data object.
2657 For medium model x86-64 we need to use .largecomm opcode for
2660 x86_elf_aligned_common (FILE *file
,
2661 const char *name
, unsigned HOST_WIDE_INT size
,
2664 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2665 && size
> (unsigned int)ix86_section_threshold
)
2666 fprintf (file
, ".largecomm\t");
2668 fprintf (file
, "%s", COMMON_ASM_OP
);
2669 assemble_name (file
, name
);
2670 fprintf (file
, ","HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
2671 size
, align
/ BITS_PER_UNIT
);
2674 /* Utility function for targets to use in implementing
2675 ASM_OUTPUT_ALIGNED_BSS. */
2678 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
2679 const char *name
, unsigned HOST_WIDE_INT size
,
2682 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
2683 && size
> (unsigned int)ix86_section_threshold
)
2684 switch_to_section (get_named_section (decl
, ".lbss", 0));
2686 switch_to_section (bss_section
);
2687 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
2688 #ifdef ASM_DECLARE_OBJECT_NAME
2689 last_assemble_variable_decl
= decl
;
2690 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
2692 /* Standard thing is just output label for the object. */
2693 ASM_OUTPUT_LABEL (file
, name
);
2694 #endif /* ASM_DECLARE_OBJECT_NAME */
2695 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
2699 optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
2701 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2702 make the problem with not enough registers even worse. */
2703 #ifdef INSN_SCHEDULING
2705 flag_schedule_insns
= 0;
2709 /* The Darwin libraries never set errno, so we might as well
2710 avoid calling them when that's the only reason we would. */
2711 flag_errno_math
= 0;
2713 /* The default values of these switches depend on the TARGET_64BIT
2714 that is not known at this moment. Mark these values with 2 and
2715 let user the to override these. In case there is no command line option
2716 specifying them, we will set the defaults in override_options. */
2718 flag_omit_frame_pointer
= 2;
2719 flag_pcc_struct_return
= 2;
2720 flag_asynchronous_unwind_tables
= 2;
2721 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2722 SUBTARGET_OPTIMIZATION_OPTIONS
;
2726 /* Table of valid machine attributes. */
2727 const struct attribute_spec ix86_attribute_table
[] =
2729 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2730 /* Stdcall attribute says callee is responsible for popping arguments
2731 if they are not variable. */
2732 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2733 /* Fastcall attribute says callee is responsible for popping arguments
2734 if they are not variable. */
2735 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2736 /* Cdecl attribute says the callee is a normal C declaration */
2737 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2738 /* Regparm attribute specifies how many integer arguments are to be
2739 passed in registers. */
2740 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
},
2741 /* Sseregparm attribute says we are using x86_64 calling conventions
2742 for FP arguments. */
2743 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
},
2744 /* force_align_arg_pointer says this function realigns the stack at entry. */
2745 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
2746 false, true, true, ix86_handle_cconv_attribute
},
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
2749 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
2750 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
},
2752 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2753 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
},
2754 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2755 SUBTARGET_ATTRIBUTE_TABLE
,
2757 { NULL
, 0, 0, false, false, false, NULL
}
2760 /* Decide whether we can make a sibling call to a function. DECL is the
2761 declaration of the function being targeted by the call and EXP is the
2762 CALL_EXPR representing the call. */
2765 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
2770 /* If we are generating position-independent code, we cannot sibcall
2771 optimize any indirect call, or a direct call to a global function,
2772 as the PLT requires %ebx be live. */
2773 if (!TARGET_64BIT
&& flag_pic
&& (!decl
|| !targetm
.binds_local_p (decl
)))
2780 func
= TREE_TYPE (CALL_EXPR_FN (exp
));
2781 if (POINTER_TYPE_P (func
))
2782 func
= TREE_TYPE (func
);
2785 /* Check that the return value locations are the same. Like
2786 if we are returning floats on the 80387 register stack, we cannot
2787 make a sibcall from a function that doesn't return a float to a
2788 function that does or, conversely, from a function that does return
2789 a float to a function that doesn't; the necessary stack adjustment
2790 would not be executed. This is also the place we notice
2791 differences in the return value ABI. Note that it is ok for one
2792 of the functions to have void return type as long as the return
2793 value of the other is passed in a register. */
2794 a
= ix86_function_value (TREE_TYPE (exp
), func
, false);
2795 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
2797 if (STACK_REG_P (a
) || STACK_REG_P (b
))
2799 if (!rtx_equal_p (a
, b
))
2802 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
2804 else if (!rtx_equal_p (a
, b
))
2807 /* If this call is indirect, we'll need to be able to use a call-clobbered
2808 register for the address of the target function. Make sure that all
2809 such registers are not used for passing parameters. */
2810 if (!decl
&& !TARGET_64BIT
)
2814 /* We're looking at the CALL_EXPR, we need the type of the function. */
2815 type
= CALL_EXPR_FN (exp
); /* pointer expression */
2816 type
= TREE_TYPE (type
); /* pointer type */
2817 type
= TREE_TYPE (type
); /* function type */
2819 if (ix86_function_regparm (type
, NULL
) >= 3)
2821 /* ??? Need to count the actual number of registers to be used,
2822 not the possible number of registers. Fix later. */
2827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2828 /* Dllimport'd functions are also called indirectly. */
2829 if (decl
&& DECL_DLLIMPORT_P (decl
)
2830 && ix86_function_regparm (TREE_TYPE (decl
), NULL
) >= 3)
2834 /* If we forced aligned the stack, then sibcalling would unalign the
2835 stack, which may break the called function. */
2836 if (cfun
->machine
->force_align_arg_pointer
)
2839 /* Otherwise okay. That also includes certain types of indirect calls. */
2843 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2844 calling convention attributes;
2845 arguments as in struct attribute_spec.handler. */
2848 ix86_handle_cconv_attribute (tree
*node
, tree name
,
2850 int flags ATTRIBUTE_UNUSED
,
2853 if (TREE_CODE (*node
) != FUNCTION_TYPE
2854 && TREE_CODE (*node
) != METHOD_TYPE
2855 && TREE_CODE (*node
) != FIELD_DECL
2856 && TREE_CODE (*node
) != TYPE_DECL
)
2858 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
2859 IDENTIFIER_POINTER (name
));
2860 *no_add_attrs
= true;
2864 /* Can combine regparm with all attributes but fastcall. */
2865 if (is_attribute_p ("regparm", name
))
2869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2871 error ("fastcall and regparm attributes are not compatible");
2874 cst
= TREE_VALUE (args
);
2875 if (TREE_CODE (cst
) != INTEGER_CST
)
2877 warning (OPT_Wattributes
,
2878 "%qs attribute requires an integer constant argument",
2879 IDENTIFIER_POINTER (name
));
2880 *no_add_attrs
= true;
2882 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
2884 warning (OPT_Wattributes
, "argument to %qs attribute larger than %d",
2885 IDENTIFIER_POINTER (name
), REGPARM_MAX
);
2886 *no_add_attrs
= true;
2890 && lookup_attribute (ix86_force_align_arg_pointer_string
,
2891 TYPE_ATTRIBUTES (*node
))
2892 && compare_tree_int (cst
, REGPARM_MAX
-1))
2894 error ("%s functions limited to %d register parameters",
2895 ix86_force_align_arg_pointer_string
, REGPARM_MAX
-1);
2903 warning (OPT_Wattributes
, "%qs attribute ignored",
2904 IDENTIFIER_POINTER (name
));
2905 *no_add_attrs
= true;
2909 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2910 if (is_attribute_p ("fastcall", name
))
2912 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2914 error ("fastcall and cdecl attributes are not compatible");
2916 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2918 error ("fastcall and stdcall attributes are not compatible");
2920 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
2922 error ("fastcall and regparm attributes are not compatible");
2926 /* Can combine stdcall with fastcall (redundant), regparm and
2928 else if (is_attribute_p ("stdcall", name
))
2930 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
2932 error ("stdcall and cdecl attributes are not compatible");
2934 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2936 error ("stdcall and fastcall attributes are not compatible");
2940 /* Can combine cdecl with regparm and sseregparm. */
2941 else if (is_attribute_p ("cdecl", name
))
2943 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
2945 error ("stdcall and cdecl attributes are not compatible");
2947 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
2949 error ("fastcall and cdecl attributes are not compatible");
2953 /* Can combine sseregparm with all attributes. */
2958 /* Return 0 if the attributes for two types are incompatible, 1 if they
2959 are compatible, and 2 if they are nearly compatible (which causes a
2960 warning to be generated). */
2963 ix86_comp_type_attributes (tree type1
, tree type2
)
2965 /* Check for mismatch of non-default calling convention. */
2966 const char *const rtdstr
= TARGET_RTD
? "cdecl" : "stdcall";
2968 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
2971 /* Check for mismatched fastcall/regparm types. */
2972 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1
))
2973 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2
)))
2974 || (ix86_function_regparm (type1
, NULL
)
2975 != ix86_function_regparm (type2
, NULL
)))
2978 /* Check for mismatched sseregparm types. */
2979 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1
))
2980 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2
)))
2983 /* Check for mismatched return types (cdecl vs stdcall). */
2984 if (!lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type1
))
2985 != !lookup_attribute (rtdstr
, TYPE_ATTRIBUTES (type2
)))
2991 /* Return the regparm value for a function with the indicated TYPE and DECL.
2992 DECL may be NULL when calling function indirectly
2993 or considering a libcall. */
2996 ix86_function_regparm (tree type
, tree decl
)
2999 int regparm
= ix86_regparm
;
3004 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
3006 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
3008 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
3011 /* Use register calling convention for local functions when possible. */
3012 if (decl
&& flag_unit_at_a_time
&& !profile_flag
)
3014 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3017 int local_regparm
, globals
= 0, regno
;
3020 /* Make sure no regparm register is taken by a
3021 global register variable. */
3022 for (local_regparm
= 0; local_regparm
< 3; local_regparm
++)
3023 if (global_regs
[local_regparm
])
3026 /* We can't use regparm(3) for nested functions as these use
3027 static chain pointer in third argument. */
3028 if (local_regparm
== 3
3029 && decl_function_context (decl
)
3030 && !DECL_NO_STATIC_CHAIN (decl
))
3033 /* If the function realigns its stackpointer, the prologue will
3034 clobber %ecx. If we've already generated code for the callee,
3035 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3036 scanning the attributes for the self-realigning property. */
3037 f
= DECL_STRUCT_FUNCTION (decl
);
3038 if (local_regparm
== 3
3039 && (f
? !!f
->machine
->force_align_arg_pointer
3040 : !!lookup_attribute (ix86_force_align_arg_pointer_string
,
3041 TYPE_ATTRIBUTES (TREE_TYPE (decl
)))))
3044 /* Each global register variable increases register preassure,
3045 so the more global reg vars there are, the smaller regparm
3046 optimization use, unless requested by the user explicitly. */
3047 for (regno
= 0; regno
< 6; regno
++)
3048 if (global_regs
[regno
])
3051 = globals
< local_regparm
? local_regparm
- globals
: 0;
3053 if (local_regparm
> regparm
)
3054 regparm
= local_regparm
;
3061 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3062 DFmode (2) arguments in SSE registers for a function with the
3063 indicated TYPE and DECL. DECL may be NULL when calling function
3064 indirectly or considering a libcall. Otherwise return 0. */
3067 ix86_function_sseregparm (tree type
, tree decl
)
3069 gcc_assert (!TARGET_64BIT
);
3071 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3072 by the sseregparm attribute. */
3073 if (TARGET_SSEREGPARM
3074 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
3079 error ("Calling %qD with attribute sseregparm without "
3080 "SSE/SSE2 enabled", decl
);
3082 error ("Calling %qT with attribute sseregparm without "
3083 "SSE/SSE2 enabled", type
);
3090 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3091 (and DFmode for SSE2) arguments in SSE registers. */
3092 if (decl
&& TARGET_SSE_MATH
&& flag_unit_at_a_time
&& !profile_flag
)
3094 struct cgraph_local_info
*i
= cgraph_local_info (decl
);
3096 return TARGET_SSE2
? 2 : 1;
3102 /* Return true if EAX is live at the start of the function. Used by
3103 ix86_expand_prologue to determine if we need special help before
3104 calling allocate_stack_worker. */
3107 ix86_eax_live_at_start_p (void)
3109 /* Cheat. Don't bother working forward from ix86_function_regparm
3110 to the function type to whether an actual argument is located in
3111 eax. Instead just look at cfg info, which is still close enough
3112 to correct at this point. This gives false positives for broken
3113 functions that might use uninitialized data that happens to be
3114 allocated in eax, but who cares? */
3115 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR
->il
.rtl
->global_live_at_end
, 0);
3118 /* Return true if TYPE has a variable argument list. */
3121 type_has_variadic_args_p (tree type
)
3125 for (t
= TYPE_ARG_TYPES (type
); t
; t
= TREE_CHAIN (t
))
3126 if (t
== void_list_node
)
3131 /* Value is the number of bytes of arguments automatically
3132 popped when returning from a subroutine call.
3133 FUNDECL is the declaration node of the function (as a tree),
3134 FUNTYPE is the data type of the function (as a tree),
3135 or for a library call it is an identifier node for the subroutine name.
3136 SIZE is the number of bytes of arguments passed on the stack.
3138 On the 80386, the RTD insn may be used to pop them if the number
3139 of args is fixed, but if the number is variable then the caller
3140 must pop them all. RTD can't be used for library calls now
3141 because the library is compiled with the Unix compiler.
3142 Use of RTD is a selectable option, since it is incompatible with
3143 standard Unix calling sequences. If the option is not selected,
3144 the caller must always pop the args.
3146 The attribute stdcall is equivalent to RTD on a per module basis. */
3149 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
3153 /* None of the 64-bit ABIs pop arguments. */
3157 rtd
= TARGET_RTD
&& (!fundecl
|| TREE_CODE (fundecl
) != IDENTIFIER_NODE
);
3159 /* Cdecl functions override -mrtd, and never pop the stack. */
3160 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype
)))
3162 /* Stdcall and fastcall functions will pop the stack if not
3164 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype
))
3165 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype
)))
3168 if (rtd
&& ! type_has_variadic_args_p (funtype
))
3172 /* Lose any fake structure return argument if it is passed on the stack. */
3173 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
3174 && !KEEP_AGGREGATE_RETURN_POINTER
)
3176 int nregs
= ix86_function_regparm (funtype
, fundecl
);
3178 return GET_MODE_SIZE (Pmode
);
3184 /* Argument support functions. */
3186 /* Return true when register may be used to pass function parameters. */
3188 ix86_function_arg_regno_p (int regno
)
3195 return (regno
< REGPARM_MAX
3196 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
3198 return (regno
< REGPARM_MAX
3199 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
3200 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
3201 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
3202 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
3207 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
3212 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
3213 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
3217 /* RAX is used as hidden argument to va_arg functions. */
3221 for (i
= 0; i
< REGPARM_MAX
; i
++)
3222 if (regno
== x86_64_int_parameter_registers
[i
])
3227 /* Return if we do not know how to pass TYPE solely in registers. */
3230 ix86_must_pass_in_stack (enum machine_mode mode
, tree type
)
3232 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
3235 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3236 The layout_type routine is crafty and tries to trick us into passing
3237 currently unsupported vector types on the stack by using TImode. */
3238 return (!TARGET_64BIT
&& mode
== TImode
3239 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
3242 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3243 for a call to a function whose data type is FNTYPE.
3244 For a library call, FNTYPE is 0. */
3247 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
3248 tree fntype
, /* tree ptr for function decl */
3249 rtx libname
, /* SYMBOL_REF of library name or 0 */
3252 memset (cum
, 0, sizeof (*cum
));
3254 /* Set up the number of registers to use for passing arguments. */
3255 cum
->nregs
= ix86_regparm
;
3257 cum
->sse_nregs
= SSE_REGPARM_MAX
;
3259 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
3260 cum
->warn_sse
= true;
3261 cum
->warn_mmx
= true;
3262 cum
->maybe_vaarg
= (fntype
? type_has_variadic_args_p (fntype
) : !libname
);
3266 /* If there are variable arguments, then we won't pass anything
3267 in registers in 32-bit mode. */
3268 if (cum
->maybe_vaarg
)
3278 /* Use ecx and edx registers if function has fastcall attribute,
3279 else look for regparm information. */
3282 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)))
3288 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
3291 /* Set up the number of SSE registers used for passing SFmode
3292 and DFmode arguments. Warn for mismatching ABI. */
3293 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
);
3297 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3298 But in the case of vector types, it is some vector mode.
3300 When we have only some of our vector isa extensions enabled, then there
3301 are some modes for which vector_mode_supported_p is false. For these
3302 modes, the generic vector support in gcc will choose some non-vector mode
3303 in order to implement the type. By computing the natural mode, we'll
3304 select the proper ABI location for the operand and not depend on whatever
3305 the middle-end decides to do with these vector types. */
3307 static enum machine_mode
3308 type_natural_mode (tree type
)
3310 enum machine_mode mode
= TYPE_MODE (type
);
3312 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
3314 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3315 if ((size
== 8 || size
== 16)
3316 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3317 && TYPE_VECTOR_SUBPARTS (type
) > 1)
3319 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
3321 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
3322 mode
= MIN_MODE_VECTOR_FLOAT
;
3324 mode
= MIN_MODE_VECTOR_INT
;
3326 /* Get the mode which has this inner mode and number of units. */
3327 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
3328 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
3329 && GET_MODE_INNER (mode
) == innermode
)
3339 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3340 this may not agree with the mode that the type system has chosen for the
3341 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3342 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3345 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
3350 if (orig_mode
!= BLKmode
)
3351 tmp
= gen_rtx_REG (orig_mode
, regno
);
3354 tmp
= gen_rtx_REG (mode
, regno
);
3355 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
3356 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
3362 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3363 of this code is to classify each 8bytes of incoming argument by the register
3364 class and assign registers accordingly. */
3366 /* Return the union class of CLASS1 and CLASS2.
3367 See the x86-64 PS ABI for details. */
3369 static enum x86_64_reg_class
3370 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
3372 /* Rule #1: If both classes are equal, this is the resulting class. */
3373 if (class1
== class2
)
3376 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3378 if (class1
== X86_64_NO_CLASS
)
3380 if (class2
== X86_64_NO_CLASS
)
3383 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3384 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
3385 return X86_64_MEMORY_CLASS
;
3387 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3388 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
3389 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
3390 return X86_64_INTEGERSI_CLASS
;
3391 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
3392 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
3393 return X86_64_INTEGER_CLASS
;
3395 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3397 if (class1
== X86_64_X87_CLASS
3398 || class1
== X86_64_X87UP_CLASS
3399 || class1
== X86_64_COMPLEX_X87_CLASS
3400 || class2
== X86_64_X87_CLASS
3401 || class2
== X86_64_X87UP_CLASS
3402 || class2
== X86_64_COMPLEX_X87_CLASS
)
3403 return X86_64_MEMORY_CLASS
;
3405 /* Rule #6: Otherwise class SSE is used. */
3406 return X86_64_SSE_CLASS
;
3409 /* Classify the argument of type TYPE and mode MODE.
3410 CLASSES will be filled by the register class used to pass each word
3411 of the operand. The number of words is returned. In case the parameter
3412 should be passed in memory, 0 is returned. As a special case for zero
3413 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3415 BIT_OFFSET is used internally for handling records and specifies offset
3416 of the offset in bits modulo 256 to avoid overflow cases.
3418 See the x86-64 PS ABI for details.
3422 classify_argument (enum machine_mode mode
, tree type
,
3423 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
3425 HOST_WIDE_INT bytes
=
3426 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3427 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3429 /* Variable sized entities are always passed/returned in memory. */
3433 if (mode
!= VOIDmode
3434 && targetm
.calls
.must_pass_in_stack (mode
, type
))
3437 if (type
&& AGGREGATE_TYPE_P (type
))
3441 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
3443 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3447 for (i
= 0; i
< words
; i
++)
3448 classes
[i
] = X86_64_NO_CLASS
;
3450 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3451 signalize memory class, so handle it as special case. */
3454 classes
[0] = X86_64_NO_CLASS
;
3458 /* Classify each field of record and merge classes. */
3459 switch (TREE_CODE (type
))
3462 /* And now merge the fields of structure. */
3463 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3465 if (TREE_CODE (field
) == FIELD_DECL
)
3469 if (TREE_TYPE (field
) == error_mark_node
)
3472 /* Bitfields are always classified as integer. Handle them
3473 early, since later code would consider them to be
3474 misaligned integers. */
3475 if (DECL_BIT_FIELD (field
))
3477 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3478 i
< ((int_bit_position (field
) + (bit_offset
% 64))
3479 + tree_low_cst (DECL_SIZE (field
), 0)
3482 merge_classes (X86_64_INTEGER_CLASS
,
3487 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3488 TREE_TYPE (field
), subclasses
,
3489 (int_bit_position (field
)
3490 + bit_offset
) % 256);
3493 for (i
= 0; i
< num
; i
++)
3496 (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
3498 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
3506 /* Arrays are handled as small records. */
3509 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
3510 TREE_TYPE (type
), subclasses
, bit_offset
);
3514 /* The partial classes are now full classes. */
3515 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
3516 subclasses
[0] = X86_64_SSE_CLASS
;
3517 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
&& bytes
!= 4)
3518 subclasses
[0] = X86_64_INTEGER_CLASS
;
3520 for (i
= 0; i
< words
; i
++)
3521 classes
[i
] = subclasses
[i
% num
];
3526 case QUAL_UNION_TYPE
:
3527 /* Unions are similar to RECORD_TYPE but offset is always 0.
3529 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3531 if (TREE_CODE (field
) == FIELD_DECL
)
3535 if (TREE_TYPE (field
) == error_mark_node
)
3538 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
3539 TREE_TYPE (field
), subclasses
,
3543 for (i
= 0; i
< num
; i
++)
3544 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
3553 /* Final merger cleanup. */
3554 for (i
= 0; i
< words
; i
++)
3556 /* If one class is MEMORY, everything should be passed in
3558 if (classes
[i
] == X86_64_MEMORY_CLASS
)
3561 /* The X86_64_SSEUP_CLASS should be always preceded by
3562 X86_64_SSE_CLASS. */
3563 if (classes
[i
] == X86_64_SSEUP_CLASS
3564 && (i
== 0 || classes
[i
- 1] != X86_64_SSE_CLASS
))
3565 classes
[i
] = X86_64_SSE_CLASS
;
3567 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3568 if (classes
[i
] == X86_64_X87UP_CLASS
3569 && (i
== 0 || classes
[i
- 1] != X86_64_X87_CLASS
))
3570 classes
[i
] = X86_64_SSE_CLASS
;
3575 /* Compute alignment needed. We align all types to natural boundaries with
3576 exception of XFmode that is aligned to 64bits. */
3577 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
3579 int mode_alignment
= GET_MODE_BITSIZE (mode
);
3582 mode_alignment
= 128;
3583 else if (mode
== XCmode
)
3584 mode_alignment
= 256;
3585 if (COMPLEX_MODE_P (mode
))
3586 mode_alignment
/= 2;
3587 /* Misaligned fields are always returned in memory. */
3588 if (bit_offset
% mode_alignment
)
3592 /* for V1xx modes, just use the base mode */
3593 if (VECTOR_MODE_P (mode
)
3594 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
3595 mode
= GET_MODE_INNER (mode
);
3597 /* Classification of atomic types. */
3602 classes
[0] = X86_64_SSE_CLASS
;
3605 classes
[0] = X86_64_SSE_CLASS
;
3606 classes
[1] = X86_64_SSEUP_CLASS
;
3615 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3616 classes
[0] = X86_64_INTEGERSI_CLASS
;
3618 classes
[0] = X86_64_INTEGER_CLASS
;
3622 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
3627 if (!(bit_offset
% 64))
3628 classes
[0] = X86_64_SSESF_CLASS
;
3630 classes
[0] = X86_64_SSE_CLASS
;
3633 classes
[0] = X86_64_SSEDF_CLASS
;
3636 classes
[0] = X86_64_X87_CLASS
;
3637 classes
[1] = X86_64_X87UP_CLASS
;
3640 classes
[0] = X86_64_SSE_CLASS
;
3641 classes
[1] = X86_64_SSEUP_CLASS
;
3644 classes
[0] = X86_64_SSE_CLASS
;
3647 classes
[0] = X86_64_SSEDF_CLASS
;
3648 classes
[1] = X86_64_SSEDF_CLASS
;
3651 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
3654 /* This modes is larger than 16 bytes. */
3662 classes
[0] = X86_64_SSE_CLASS
;
3663 classes
[1] = X86_64_SSEUP_CLASS
;
3669 classes
[0] = X86_64_SSE_CLASS
;
3675 gcc_assert (VECTOR_MODE_P (mode
));
3680 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
3682 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
3683 classes
[0] = X86_64_INTEGERSI_CLASS
;
3685 classes
[0] = X86_64_INTEGER_CLASS
;
3686 classes
[1] = X86_64_INTEGER_CLASS
;
3687 return 1 + (bytes
> 8);
3691 /* Examine the argument and return set number of register required in each
3692 class. Return 0 iff parameter should be passed in memory. */
3694 examine_argument (enum machine_mode mode
, tree type
, int in_return
,
3695 int *int_nregs
, int *sse_nregs
)
3697 enum x86_64_reg_class
class[MAX_CLASSES
];
3698 int n
= classify_argument (mode
, type
, class, 0);
3704 for (n
--; n
>= 0; n
--)
3707 case X86_64_INTEGER_CLASS
:
3708 case X86_64_INTEGERSI_CLASS
:
3711 case X86_64_SSE_CLASS
:
3712 case X86_64_SSESF_CLASS
:
3713 case X86_64_SSEDF_CLASS
:
3716 case X86_64_NO_CLASS
:
3717 case X86_64_SSEUP_CLASS
:
3719 case X86_64_X87_CLASS
:
3720 case X86_64_X87UP_CLASS
:
3724 case X86_64_COMPLEX_X87_CLASS
:
3725 return in_return
? 2 : 0;
3726 case X86_64_MEMORY_CLASS
:
3732 /* Construct container for the argument used by GCC interface. See
3733 FUNCTION_ARG for the detailed description. */
3736 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
3737 tree type
, int in_return
, int nintregs
, int nsseregs
,
3738 const int *intreg
, int sse_regno
)
3740 /* The following variables hold the static issued_error state. */
3741 static bool issued_sse_arg_error
;
3742 static bool issued_sse_ret_error
;
3743 static bool issued_x87_ret_error
;
3745 enum machine_mode tmpmode
;
3747 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
3748 enum x86_64_reg_class
class[MAX_CLASSES
];
3752 int needed_sseregs
, needed_intregs
;
3753 rtx exp
[MAX_CLASSES
];
3756 n
= classify_argument (mode
, type
, class, 0);
3759 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
3762 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
3765 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3766 some less clueful developer tries to use floating-point anyway. */
3767 if (needed_sseregs
&& !TARGET_SSE
)
3771 if (!issued_sse_ret_error
)
3773 error ("SSE register return with SSE disabled");
3774 issued_sse_ret_error
= true;
3777 else if (!issued_sse_arg_error
)
3779 error ("SSE register argument with SSE disabled");
3780 issued_sse_arg_error
= true;
3785 /* Likewise, error if the ABI requires us to return values in the
3786 x87 registers and the user specified -mno-80387. */
3787 if (!TARGET_80387
&& in_return
)
3788 for (i
= 0; i
< n
; i
++)
3789 if (class[i
] == X86_64_X87_CLASS
3790 || class[i
] == X86_64_X87UP_CLASS
3791 || class[i
] == X86_64_COMPLEX_X87_CLASS
)
3793 if (!issued_x87_ret_error
)
3795 error ("x87 register return with x87 disabled");
3796 issued_x87_ret_error
= true;
3801 /* First construct simple cases. Avoid SCmode, since we want to use
3802 single register to pass this type. */
3803 if (n
== 1 && mode
!= SCmode
)
3806 case X86_64_INTEGER_CLASS
:
3807 case X86_64_INTEGERSI_CLASS
:
3808 return gen_rtx_REG (mode
, intreg
[0]);
3809 case X86_64_SSE_CLASS
:
3810 case X86_64_SSESF_CLASS
:
3811 case X86_64_SSEDF_CLASS
:
3812 return gen_reg_or_parallel (mode
, orig_mode
, SSE_REGNO (sse_regno
));
3813 case X86_64_X87_CLASS
:
3814 case X86_64_COMPLEX_X87_CLASS
:
3815 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
3816 case X86_64_NO_CLASS
:
3817 /* Zero sized array, struct or class. */
3822 if (n
== 2 && class[0] == X86_64_SSE_CLASS
&& class[1] == X86_64_SSEUP_CLASS
3824 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
3827 && class[0] == X86_64_X87_CLASS
&& class[1] == X86_64_X87UP_CLASS
)
3828 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3829 if (n
== 2 && class[0] == X86_64_INTEGER_CLASS
3830 && class[1] == X86_64_INTEGER_CLASS
3831 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
3832 && intreg
[0] + 1 == intreg
[1])
3833 return gen_rtx_REG (mode
, intreg
[0]);
3835 /* Otherwise figure out the entries of the PARALLEL. */
3836 for (i
= 0; i
< n
; i
++)
3840 case X86_64_NO_CLASS
:
3842 case X86_64_INTEGER_CLASS
:
3843 case X86_64_INTEGERSI_CLASS
:
3844 /* Merge TImodes on aligned occasions here too. */
3845 if (i
* 8 + 8 > bytes
)
3846 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
3847 else if (class[i
] == X86_64_INTEGERSI_CLASS
)
3851 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3852 if (tmpmode
== BLKmode
)
3854 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3855 gen_rtx_REG (tmpmode
, *intreg
),
3859 case X86_64_SSESF_CLASS
:
3860 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3861 gen_rtx_REG (SFmode
,
3862 SSE_REGNO (sse_regno
)),
3866 case X86_64_SSEDF_CLASS
:
3867 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3868 gen_rtx_REG (DFmode
,
3869 SSE_REGNO (sse_regno
)),
3873 case X86_64_SSE_CLASS
:
3874 if (i
< n
- 1 && class[i
+ 1] == X86_64_SSEUP_CLASS
)
3878 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
3879 gen_rtx_REG (tmpmode
,
3880 SSE_REGNO (sse_regno
)),
3882 if (tmpmode
== TImode
)
3891 /* Empty aligned struct, union or class. */
3895 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
3896 for (i
= 0; i
< nexps
; i
++)
3897 XVECEXP (ret
, 0, i
) = exp
[i
];
3901 /* Update the data in CUM to advance over an argument of mode MODE
3902 and data type TYPE. (TYPE is null for libcalls where that information
3903 may not be available.) */
3906 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3907 tree type
, HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
3923 cum
->words
+= words
;
3924 cum
->nregs
-= words
;
3925 cum
->regno
+= words
;
3927 if (cum
->nregs
<= 0)
3935 if (cum
->float_in_sse
< 2)
3938 if (cum
->float_in_sse
< 1)
3949 if (!type
|| !AGGREGATE_TYPE_P (type
))
3951 cum
->sse_words
+= words
;
3952 cum
->sse_nregs
-= 1;
3953 cum
->sse_regno
+= 1;
3954 if (cum
->sse_nregs
<= 0)
3966 if (!type
|| !AGGREGATE_TYPE_P (type
))
3968 cum
->mmx_words
+= words
;
3969 cum
->mmx_nregs
-= 1;
3970 cum
->mmx_regno
+= 1;
3971 if (cum
->mmx_nregs
<= 0)
3982 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
3983 tree type
, HOST_WIDE_INT words
)
3985 int int_nregs
, sse_nregs
;
3987 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
))
3988 cum
->words
+= words
;
3989 else if (sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
3991 cum
->nregs
-= int_nregs
;
3992 cum
->sse_nregs
-= sse_nregs
;
3993 cum
->regno
+= int_nregs
;
3994 cum
->sse_regno
+= sse_nregs
;
3997 cum
->words
+= words
;
4001 function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4002 tree type
, int named ATTRIBUTE_UNUSED
)
4004 HOST_WIDE_INT bytes
, words
;
4006 if (mode
== BLKmode
)
4007 bytes
= int_size_in_bytes (type
);
4009 bytes
= GET_MODE_SIZE (mode
);
4010 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4013 mode
= type_natural_mode (type
);
4016 function_arg_advance_64 (cum
, mode
, type
, words
);
4018 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
4021 /* Define where to put the arguments to a function.
4022 Value is zero to push the argument on the stack,
4023 or a hard register in which to store the argument.
4025 MODE is the argument's machine mode.
4026 TYPE is the data type of the argument (as a tree).
4027 This is null for libcalls where that information may
4029 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4030 the preceding args and about the function being called.
4031 NAMED is nonzero if this argument is a named parameter
4032 (otherwise it is an extra parameter matching an ellipsis). */
4035 function_arg_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4036 enum machine_mode orig_mode
, tree type
,
4037 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
4039 static bool warnedsse
, warnedmmx
;
4041 /* Avoid the AL settings for the Unix64 ABI. */
4042 if (mode
== VOIDmode
)
4058 if (words
<= cum
->nregs
)
4060 int regno
= cum
->regno
;
4062 /* Fastcall allocates the first two DWORD (SImode) or
4063 smaller arguments to ECX and EDX. */
4066 if (mode
== BLKmode
|| mode
== DImode
)
4069 /* ECX not EAX is the first allocated register. */
4073 return gen_rtx_REG (mode
, regno
);
4078 if (cum
->float_in_sse
< 2)
4081 if (cum
->float_in_sse
< 1)
4091 if (!type
|| !AGGREGATE_TYPE_P (type
))
4093 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
4096 warning (0, "SSE vector argument without SSE enabled "
4100 return gen_reg_or_parallel (mode
, orig_mode
,
4101 cum
->sse_regno
+ FIRST_SSE_REG
);
4109 if (!type
|| !AGGREGATE_TYPE_P (type
))
4111 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
4114 warning (0, "MMX vector argument without MMX enabled "
4118 return gen_reg_or_parallel (mode
, orig_mode
,
4119 cum
->mmx_regno
+ FIRST_MMX_REG
);
4128 function_arg_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4129 enum machine_mode orig_mode
, tree type
)
4131 /* Handle a hidden AL argument containing number of registers
4132 for varargs x86-64 functions. */
4133 if (mode
== VOIDmode
)
4134 return GEN_INT (cum
->maybe_vaarg
4135 ? (cum
->sse_nregs
< 0
4140 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
4142 &x86_64_int_parameter_registers
[cum
->regno
],
4147 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode omode
,
4148 tree type
, int named ATTRIBUTE_UNUSED
)
4150 enum machine_mode mode
= omode
;
4151 HOST_WIDE_INT bytes
, words
;
4153 if (mode
== BLKmode
)
4154 bytes
= int_size_in_bytes (type
);
4156 bytes
= GET_MODE_SIZE (mode
);
4157 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4159 /* To simplify the code below, represent vector types with a vector mode
4160 even if MMX/SSE are not active. */
4161 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
4162 mode
= type_natural_mode (type
);
4165 return function_arg_64 (cum
, mode
, omode
, type
);
4167 return function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
4170 /* A C expression that indicates when an argument must be passed by
4171 reference. If nonzero for an argument, a copy of that argument is
4172 made in memory and a pointer to the argument is passed instead of
4173 the argument itself. The pointer is passed in whatever way is
4174 appropriate for passing a pointer to that type. */
4177 ix86_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4178 enum machine_mode mode ATTRIBUTE_UNUSED
,
4179 tree type
, bool named ATTRIBUTE_UNUSED
)
4181 if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
4187 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4188 ABI. Only called if TARGET_SSE. */
4190 contains_128bit_aligned_vector_p (tree type
)
4192 enum machine_mode mode
= TYPE_MODE (type
);
4193 if (SSE_REG_MODE_P (mode
)
4194 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
4196 if (TYPE_ALIGN (type
) < 128)
4199 if (AGGREGATE_TYPE_P (type
))
4201 /* Walk the aggregates recursively. */
4202 switch (TREE_CODE (type
))
4206 case QUAL_UNION_TYPE
:
4210 /* Walk all the structure fields. */
4211 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
4213 if (TREE_CODE (field
) == FIELD_DECL
4214 && contains_128bit_aligned_vector_p (TREE_TYPE (field
)))
4221 /* Just for use if some languages passes arrays by value. */
4222 if (contains_128bit_aligned_vector_p (TREE_TYPE (type
)))
4233 /* Gives the alignment boundary, in bits, of an argument with the
4234 specified mode and type. */
4237 ix86_function_arg_boundary (enum machine_mode mode
, tree type
)
4241 align
= TYPE_ALIGN (type
);
4243 align
= GET_MODE_ALIGNMENT (mode
);
4244 if (align
< PARM_BOUNDARY
)
4245 align
= PARM_BOUNDARY
;
4248 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4249 make an exception for SSE modes since these require 128bit
4252 The handling here differs from field_alignment. ICC aligns MMX
4253 arguments to 4 byte boundaries, while structure fields are aligned
4254 to 8 byte boundaries. */
4256 align
= PARM_BOUNDARY
;
4259 if (!SSE_REG_MODE_P (mode
))
4260 align
= PARM_BOUNDARY
;
4264 if (!contains_128bit_aligned_vector_p (type
))
4265 align
= PARM_BOUNDARY
;
4273 /* Return true if N is a possible register number of function value. */
4276 ix86_function_value_regno_p (int regno
)
4283 case FIRST_FLOAT_REG
:
4284 return TARGET_FLOAT_RETURNS_IN_80387
;
4290 if (TARGET_MACHO
|| TARGET_64BIT
)
4298 /* Define how to find the value returned by a function.
4299 VALTYPE is the data type of the value (as a tree).
4300 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4301 otherwise, FUNC is 0. */
4304 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
4305 tree fntype
, tree fn
)
4309 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4310 we normally prevent this case when mmx is not available. However
4311 some ABIs may require the result to be returned like DImode. */
4312 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4313 regno
= TARGET_MMX
? FIRST_MMX_REG
: 0;
4315 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4316 we prevent this case when sse is not available. However some ABIs
4317 may require the result to be returned like integer TImode. */
4318 else if (mode
== TImode
4319 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4320 regno
= TARGET_SSE
? FIRST_SSE_REG
: 0;
4322 /* Decimal floating point values can go in %eax, unlike other float modes. */
4323 else if (DECIMAL_FLOAT_MODE_P (mode
))
4326 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4327 else if (!SCALAR_FLOAT_MODE_P (mode
) || !TARGET_FLOAT_RETURNS_IN_80387
)
4330 /* Floating point return values in %st(0), except for local functions when
4331 SSE math is enabled or for functions with sseregparm attribute. */
4334 regno
= FIRST_FLOAT_REG
;
4336 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
4338 int sse_level
= ix86_function_sseregparm (fntype
, fn
);
4339 if ((sse_level
>= 1 && mode
== SFmode
)
4340 || (sse_level
== 2 && mode
== DFmode
))
4341 regno
= FIRST_SSE_REG
;
4345 return gen_rtx_REG (orig_mode
, regno
);
4349 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
4354 /* Handle libcalls, which don't provide a type node. */
4355 if (valtype
== NULL
)
4367 return gen_rtx_REG (mode
, FIRST_SSE_REG
);
4370 return gen_rtx_REG (mode
, FIRST_FLOAT_REG
);
4374 return gen_rtx_REG (mode
, 0);
4378 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
4379 REGPARM_MAX
, SSE_REGPARM_MAX
,
4380 x86_64_int_return_registers
, 0);
4382 /* For zero sized structures, construct_container returns NULL, but we
4383 need to keep rest of compiler happy by returning meaningful value. */
4385 ret
= gen_rtx_REG (orig_mode
, 0);
4391 ix86_function_value_1 (tree valtype
, tree fntype_or_decl
,
4392 enum machine_mode orig_mode
, enum machine_mode mode
)
4397 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4398 fn
= fntype_or_decl
;
4399 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4402 return function_value_64 (orig_mode
, mode
, valtype
);
4404 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4408 ix86_function_value (tree valtype
, tree fntype_or_decl
,
4409 bool outgoing ATTRIBUTE_UNUSED
)
4411 enum machine_mode mode
, orig_mode
;
4413 orig_mode
= TYPE_MODE (valtype
);
4414 mode
= type_natural_mode (valtype
);
4415 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4419 ix86_libcall_value (enum machine_mode mode
)
4421 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4424 /* Return true iff type is returned in memory. */
4427 return_in_memory_32 (tree type
, enum machine_mode mode
)
4431 if (mode
== BLKmode
)
4434 size
= int_size_in_bytes (type
);
4436 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4439 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4441 /* User-created vectors small enough to fit in EAX. */
4445 /* MMX/3dNow values are returned in MM0,
4446 except when it doesn't exits. */
4448 return (TARGET_MMX
? 0 : 1);
4450 /* SSE values are returned in XMM0, except when it doesn't exist. */
4452 return (TARGET_SSE
? 0 : 1);
4467 return_in_memory_64 (tree type
, enum machine_mode mode
)
4469 int needed_intregs
, needed_sseregs
;
4470 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
4474 ix86_return_in_memory (tree type
)
4476 enum machine_mode mode
= type_natural_mode (type
);
4479 return return_in_memory_64 (type
, mode
);
4481 return return_in_memory_32 (type
, mode
);
4484 /* When returning SSE vector types, we have a choice of either
4485 (1) being abi incompatible with a -march switch, or
4486 (2) generating an error.
4487 Given no good solution, I think the safest thing is one warning.
4488 The user won't be able to use -Werror, but....
4490 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4491 called in response to actually generating a caller or callee that
4492 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4493 via aggregate_value_p for general type probing from tree-ssa. */
4496 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
4498 static bool warnedsse
, warnedmmx
;
4500 if (!TARGET_64BIT
&& type
)
4502 /* Look at the return type of the function, not the function type. */
4503 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
4505 if (!TARGET_SSE
&& !warnedsse
)
4508 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
4511 warning (0, "SSE vector return without SSE enabled "
4516 if (!TARGET_MMX
&& !warnedmmx
)
4518 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
4521 warning (0, "MMX vector return without MMX enabled "
4531 /* Create the va_list data type. */
4534 ix86_build_builtin_va_list (void)
4536 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4538 /* For i386 we use plain pointer to argument area. */
4540 return build_pointer_type (char_type_node
);
4542 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4543 type_decl
= build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4545 f_gpr
= build_decl (FIELD_DECL
, get_identifier ("gp_offset"),
4546 unsigned_type_node
);
4547 f_fpr
= build_decl (FIELD_DECL
, get_identifier ("fp_offset"),
4548 unsigned_type_node
);
4549 f_ovf
= build_decl (FIELD_DECL
, get_identifier ("overflow_arg_area"),
4551 f_sav
= build_decl (FIELD_DECL
, get_identifier ("reg_save_area"),
4554 va_list_gpr_counter_field
= f_gpr
;
4555 va_list_fpr_counter_field
= f_fpr
;
4557 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4558 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4559 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4560 DECL_FIELD_CONTEXT (f_sav
) = record
;
4562 TREE_CHAIN (record
) = type_decl
;
4563 TYPE_NAME (record
) = type_decl
;
4564 TYPE_FIELDS (record
) = f_gpr
;
4565 TREE_CHAIN (f_gpr
) = f_fpr
;
4566 TREE_CHAIN (f_fpr
) = f_ovf
;
4567 TREE_CHAIN (f_ovf
) = f_sav
;
4569 layout_type (record
);
4571 /* The correct type is an array type of one element. */
4572 return build_array_type (record
, build_index_type (size_zero_node
));
4575 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4578 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4588 if (! cfun
->va_list_gpr_size
&& ! cfun
->va_list_fpr_size
)
4591 /* Indicate to allocate space on the stack for varargs save area. */
4592 ix86_save_varrargs_registers
= 1;
4593 cfun
->stack_alignment_needed
= 128;
4595 save_area
= frame_pointer_rtx
;
4596 set
= get_varargs_alias_set ();
4598 for (i
= cum
->regno
;
4600 && i
< cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4603 mem
= gen_rtx_MEM (Pmode
,
4604 plus_constant (save_area
, i
* UNITS_PER_WORD
));
4605 MEM_NOTRAP_P (mem
) = 1;
4606 set_mem_alias_set (mem
, set
);
4607 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
4608 x86_64_int_parameter_registers
[i
]));
4611 if (cum
->sse_nregs
&& cfun
->va_list_fpr_size
)
4613 /* Now emit code to save SSE registers. The AX parameter contains number
4614 of SSE parameter registers used to call this function. We use
4615 sse_prologue_save insn template that produces computed jump across
4616 SSE saves. We need some preparation work to get this working. */
4618 label
= gen_label_rtx ();
4619 label_ref
= gen_rtx_LABEL_REF (Pmode
, label
);
4621 /* Compute address to jump to :
4622 label - 5*eax + nnamed_sse_arguments*5 */
4623 tmp_reg
= gen_reg_rtx (Pmode
);
4624 nsse_reg
= gen_reg_rtx (Pmode
);
4625 emit_insn (gen_zero_extendqidi2 (nsse_reg
, gen_rtx_REG (QImode
, 0)));
4626 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4627 gen_rtx_MULT (Pmode
, nsse_reg
,
4632 gen_rtx_CONST (DImode
,
4633 gen_rtx_PLUS (DImode
,
4635 GEN_INT (cum
->sse_regno
* 4))));
4637 emit_move_insn (nsse_reg
, label_ref
);
4638 emit_insn (gen_subdi3 (nsse_reg
, nsse_reg
, tmp_reg
));
4640 /* Compute address of memory block we save into. We always use pointer
4641 pointing 127 bytes after first byte to store - this is needed to keep
4642 instruction size limited by 4 bytes. */
4643 tmp_reg
= gen_reg_rtx (Pmode
);
4644 emit_insn (gen_rtx_SET (VOIDmode
, tmp_reg
,
4645 plus_constant (save_area
,
4646 8 * REGPARM_MAX
+ 127)));
4647 mem
= gen_rtx_MEM (BLKmode
, plus_constant (tmp_reg
, -127));
4648 MEM_NOTRAP_P (mem
) = 1;
4649 set_mem_alias_set (mem
, set
);
4650 set_mem_align (mem
, BITS_PER_WORD
);
4652 /* And finally do the dirty job! */
4653 emit_insn (gen_sse_prologue_save (mem
, nsse_reg
,
4654 GEN_INT (cum
->sse_regno
), label
));
4659 ix86_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
4660 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
4663 CUMULATIVE_ARGS next_cum
;
4667 /* This argument doesn't appear to be used anymore. Which is good,
4668 because the old code here didn't suppress rtl generation. */
4669 gcc_assert (!no_rtl
);
4674 fntype
= TREE_TYPE (current_function_decl
);
4675 stdarg_p
= (TYPE_ARG_TYPES (fntype
) != 0
4676 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype
)))
4677 != void_type_node
));
4679 /* For varargs, we do not want to skip the dummy va_dcl argument.
4680 For stdargs, we do want to skip the last named argument. */
4683 function_arg_advance (&next_cum
, mode
, type
, 1);
4685 setup_incoming_varargs_64 (&next_cum
);
4688 /* Implement va_start. */
4691 ix86_va_start (tree valist
, rtx nextarg
)
4693 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4694 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4695 tree gpr
, fpr
, ovf
, sav
, t
;
4698 /* Only 64bit target needs something special. */
4701 std_expand_builtin_va_start (valist
, nextarg
);
4705 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4706 f_fpr
= TREE_CHAIN (f_gpr
);
4707 f_ovf
= TREE_CHAIN (f_fpr
);
4708 f_sav
= TREE_CHAIN (f_ovf
);
4710 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4711 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4712 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4713 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4714 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4716 /* Count number of gp and fp argument registers used. */
4717 words
= current_function_args_info
.words
;
4718 n_gpr
= current_function_args_info
.regno
;
4719 n_fpr
= current_function_args_info
.sse_regno
;
4721 if (cfun
->va_list_gpr_size
)
4723 type
= TREE_TYPE (gpr
);
4724 t
= build2 (GIMPLE_MODIFY_STMT
, type
, gpr
,
4725 build_int_cst (type
, n_gpr
* 8));
4726 TREE_SIDE_EFFECTS (t
) = 1;
4727 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4730 if (cfun
->va_list_fpr_size
)
4732 type
= TREE_TYPE (fpr
);
4733 t
= build2 (GIMPLE_MODIFY_STMT
, type
, fpr
,
4734 build_int_cst (type
, n_fpr
* 16 + 8*REGPARM_MAX
));
4735 TREE_SIDE_EFFECTS (t
) = 1;
4736 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4739 /* Find the overflow area. */
4740 type
= TREE_TYPE (ovf
);
4741 t
= make_tree (type
, virtual_incoming_args_rtx
);
4743 t
= build2 (PLUS_EXPR
, type
, t
,
4744 build_int_cst (type
, words
* UNITS_PER_WORD
));
4745 t
= build2 (GIMPLE_MODIFY_STMT
, type
, ovf
, t
);
4746 TREE_SIDE_EFFECTS (t
) = 1;
4747 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4749 if (cfun
->va_list_gpr_size
|| cfun
->va_list_fpr_size
)
4751 /* Find the register save area.
4752 Prologue of the function save it right above stack frame. */
4753 type
= TREE_TYPE (sav
);
4754 t
= make_tree (type
, frame_pointer_rtx
);
4755 t
= build2 (GIMPLE_MODIFY_STMT
, type
, sav
, t
);
4756 TREE_SIDE_EFFECTS (t
) = 1;
4757 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4761 /* Implement va_arg. */
4764 ix86_gimplify_va_arg (tree valist
, tree type
, tree
*pre_p
, tree
*post_p
)
4766 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4767 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4768 tree gpr
, fpr
, ovf
, sav
, t
;
4770 tree lab_false
, lab_over
= NULL_TREE
;
4775 enum machine_mode nat_mode
;
4777 /* Only 64bit target needs something special. */
4779 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4781 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4782 f_fpr
= TREE_CHAIN (f_gpr
);
4783 f_ovf
= TREE_CHAIN (f_fpr
);
4784 f_sav
= TREE_CHAIN (f_ovf
);
4786 valist
= build_va_arg_indirect_ref (valist
);
4787 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
4788 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4789 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4790 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4792 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
4794 type
= build_pointer_type (type
);
4795 size
= int_size_in_bytes (type
);
4796 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4798 nat_mode
= type_natural_mode (type
);
4799 container
= construct_container (nat_mode
, TYPE_MODE (type
), type
, 0,
4800 REGPARM_MAX
, SSE_REGPARM_MAX
, intreg
, 0);
4802 /* Pull the value out of the saved registers. */
4804 addr
= create_tmp_var (ptr_type_node
, "addr");
4805 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4809 int needed_intregs
, needed_sseregs
;
4811 tree int_addr
, sse_addr
;
4813 lab_false
= create_artificial_label ();
4814 lab_over
= create_artificial_label ();
4816 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4818 need_temp
= (!REG_P (container
)
4819 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4820 || TYPE_ALIGN (type
) > 128));
4822 /* In case we are passing structure, verify that it is consecutive block
4823 on the register save area. If not we need to do moves. */
4824 if (!need_temp
&& !REG_P (container
))
4826 /* Verify that all registers are strictly consecutive */
4827 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4831 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4833 rtx slot
= XVECEXP (container
, 0, i
);
4834 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4835 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4843 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4845 rtx slot
= XVECEXP (container
, 0, i
);
4846 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4847 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4859 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4860 DECL_POINTER_ALIAS_SET (int_addr
) = get_varargs_alias_set ();
4861 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4862 DECL_POINTER_ALIAS_SET (sse_addr
) = get_varargs_alias_set ();
4865 /* First ensure that we fit completely in registers. */
4868 t
= build_int_cst (TREE_TYPE (gpr
),
4869 (REGPARM_MAX
- needed_intregs
+ 1) * 8);
4870 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4871 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4872 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4873 gimplify_and_add (t
, pre_p
);
4877 t
= build_int_cst (TREE_TYPE (fpr
),
4878 (SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4880 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4881 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4882 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4883 gimplify_and_add (t
, pre_p
);
4886 /* Compute index to start of area used for integer regs. */
4889 /* int_addr = gpr + sav; */
4890 t
= fold_convert (ptr_type_node
, gpr
);
4891 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4892 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, int_addr
, t
);
4893 gimplify_and_add (t
, pre_p
);
4897 /* sse_addr = fpr + sav; */
4898 t
= fold_convert (ptr_type_node
, fpr
);
4899 t
= build2 (PLUS_EXPR
, ptr_type_node
, sav
, t
);
4900 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, sse_addr
, t
);
4901 gimplify_and_add (t
, pre_p
);
4906 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4909 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4910 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4911 gimplify_and_add (t
, pre_p
);
4913 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4915 rtx slot
= XVECEXP (container
, 0, i
);
4916 rtx reg
= XEXP (slot
, 0);
4917 enum machine_mode mode
= GET_MODE (reg
);
4918 tree piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4919 tree addr_type
= build_pointer_type (piece_type
);
4922 tree dest_addr
, dest
;
4924 if (SSE_REGNO_P (REGNO (reg
)))
4926 src_addr
= sse_addr
;
4927 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4931 src_addr
= int_addr
;
4932 src_offset
= REGNO (reg
) * 8;
4934 src_addr
= fold_convert (addr_type
, src_addr
);
4935 src_addr
= fold_build2 (PLUS_EXPR
, addr_type
, src_addr
,
4936 size_int (src_offset
));
4937 src
= build_va_arg_indirect_ref (src_addr
);
4939 dest_addr
= fold_convert (addr_type
, addr
);
4940 dest_addr
= fold_build2 (PLUS_EXPR
, addr_type
, dest_addr
,
4941 size_int (INTVAL (XEXP (slot
, 1))));
4942 dest
= build_va_arg_indirect_ref (dest_addr
);
4944 t
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, dest
, src
);
4945 gimplify_and_add (t
, pre_p
);
4951 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4952 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4953 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (gpr
), gpr
, t
);
4954 gimplify_and_add (t
, pre_p
);
4958 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4959 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4960 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (fpr
), fpr
, t
);
4961 gimplify_and_add (t
, pre_p
);
4964 t
= build1 (GOTO_EXPR
, void_type_node
, lab_over
);
4965 gimplify_and_add (t
, pre_p
);
4967 t
= build1 (LABEL_EXPR
, void_type_node
, lab_false
);
4968 append_to_statement_list (t
, pre_p
);
4971 /* ... otherwise out of the overflow area. */
4973 /* Care for on-stack alignment if needed. */
4974 if (FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) <= 64
4975 || integer_zerop (TYPE_SIZE (type
)))
4979 HOST_WIDE_INT align
= FUNCTION_ARG_BOUNDARY (VOIDmode
, type
) / 8;
4980 t
= build2 (PLUS_EXPR
, TREE_TYPE (ovf
), ovf
,
4981 build_int_cst (TREE_TYPE (ovf
), align
- 1));
4982 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4983 build_int_cst (TREE_TYPE (t
), -align
));
4985 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4987 t2
= build2 (GIMPLE_MODIFY_STMT
, void_type_node
, addr
, t
);
4988 gimplify_and_add (t2
, pre_p
);
4990 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
4991 build_int_cst (TREE_TYPE (t
), rsize
* UNITS_PER_WORD
));
4992 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (ovf
), ovf
, t
);
4993 gimplify_and_add (t
, pre_p
);
4997 t
= build1 (LABEL_EXPR
, void_type_node
, lab_over
);
4998 append_to_statement_list (t
, pre_p
);
5001 ptrtype
= build_pointer_type (type
);
5002 addr
= fold_convert (ptrtype
, addr
);
5005 addr
= build_va_arg_indirect_ref (addr
);
5006 return build_va_arg_indirect_ref (addr
);
5009 /* Return nonzero if OPNUM's MEM should be matched
5010 in movabs* patterns. */
5013 ix86_check_movabs (rtx insn
, int opnum
)
5017 set
= PATTERN (insn
);
5018 if (GET_CODE (set
) == PARALLEL
)
5019 set
= XVECEXP (set
, 0, 0);
5020 gcc_assert (GET_CODE (set
) == SET
);
5021 mem
= XEXP (set
, opnum
);
5022 while (GET_CODE (mem
) == SUBREG
)
5023 mem
= SUBREG_REG (mem
);
5024 gcc_assert (MEM_P (mem
));
5025 return (volatile_ok
|| !MEM_VOLATILE_P (mem
));
5028 /* Initialize the table of extra 80387 mathematical constants. */
5031 init_ext_80387_constants (void)
5033 static const char * cst
[5] =
5035 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5036 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5037 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5038 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5039 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5043 for (i
= 0; i
< 5; i
++)
5045 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
5046 /* Ensure each constant is rounded to XFmode precision. */
5047 real_convert (&ext_80387_constants_table
[i
],
5048 XFmode
, &ext_80387_constants_table
[i
]);
5051 ext_80387_constants_init
= 1;
5054 /* Return true if the constant is something that can be loaded with
5055 a special instruction. */
5058 standard_80387_constant_p (rtx x
)
5062 if (GET_CODE (x
) != CONST_DOUBLE
|| !FLOAT_MODE_P (GET_MODE (x
)))
5065 if (x
== CONST0_RTX (GET_MODE (x
)))
5067 if (x
== CONST1_RTX (GET_MODE (x
)))
5070 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5072 /* For XFmode constants, try to find a special 80387 instruction when
5073 optimizing for size or on those CPUs that benefit from them. */
5074 if (GET_MODE (x
) == XFmode
5075 && (optimize_size
|| TARGET_EXT_80387_CONSTANTS
))
5079 if (! ext_80387_constants_init
)
5080 init_ext_80387_constants ();
5082 for (i
= 0; i
< 5; i
++)
5083 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
5087 /* Load of the constant -0.0 or -1.0 will be split as
5088 fldz;fchs or fld1;fchs sequence. */
5089 if (real_isnegzero (&r
))
5091 if (real_identical (&r
, &dconstm1
))
5097 /* Return the opcode of the special instruction to be used to load
5101 standard_80387_constant_opcode (rtx x
)
5103 switch (standard_80387_constant_p (x
))
5127 /* Return the CONST_DOUBLE representing the 80387 constant that is
5128 loaded by the specified special instruction. The argument IDX
5129 matches the return value from standard_80387_constant_p. */
5132 standard_80387_constant_rtx (int idx
)
5136 if (! ext_80387_constants_init
)
5137 init_ext_80387_constants ();
5153 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
5157 /* Return 1 if mode is a valid mode for sse. */
5159 standard_sse_mode_p (enum machine_mode mode
)
5176 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5179 standard_sse_constant_p (rtx x
)
5181 enum machine_mode mode
= GET_MODE (x
);
5183 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
5185 if (vector_all_ones_operand (x
, mode
)
5186 && standard_sse_mode_p (mode
))
5187 return TARGET_SSE2
? 2 : -1;
5192 /* Return the opcode of the special instruction to be used to load
5196 standard_sse_constant_opcode (rtx insn
, rtx x
)
5198 switch (standard_sse_constant_p (x
))
5201 if (get_attr_mode (insn
) == MODE_V4SF
)
5202 return "xorps\t%0, %0";
5203 else if (get_attr_mode (insn
) == MODE_V2DF
)
5204 return "xorpd\t%0, %0";
5206 return "pxor\t%0, %0";
5208 return "pcmpeqd\t%0, %0";
5213 /* Returns 1 if OP contains a symbol reference */
5216 symbolic_reference_mentioned_p (rtx op
)
5221 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5224 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5225 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5231 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5232 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5236 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5243 /* Return 1 if it is appropriate to emit `ret' instructions in the
5244 body of a function. Do this only if the epilogue is simple, needing a
5245 couple of insns. Prior to reloading, we can't tell how many registers
5246 must be saved, so return 0 then. Return 0 if there is no frame
5247 marker to de-allocate. */
5250 ix86_can_use_return_insn_p (void)
5252 struct ix86_frame frame
;
5254 if (! reload_completed
|| frame_pointer_needed
)
5257 /* Don't allow more than 32 pop, since that's all we can do
5258 with one instruction. */
5259 if (current_function_pops_args
5260 && current_function_args_size
>= 32768)
5263 ix86_compute_frame_layout (&frame
);
5264 return frame
.to_allocate
== 0 && frame
.nregs
== 0;
5267 /* Value should be nonzero if functions must have frame pointers.
5268 Zero means the frame pointer need not be set up (and parms may
5269 be accessed via the stack pointer) in functions that seem suitable. */
5272 ix86_frame_pointer_required (void)
5274 /* If we accessed previous frames, then the generated code expects
5275 to be able to access the saved ebp value in our frame. */
5276 if (cfun
->machine
->accesses_prev_frame
)
5279 /* Several x86 os'es need a frame pointer for other reasons,
5280 usually pertaining to setjmp. */
5281 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5284 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5285 the frame pointer by default. Turn it back on now if we've not
5286 got a leaf function. */
5287 if (TARGET_OMIT_LEAF_FRAME_POINTER
5288 && (!current_function_is_leaf
5289 || ix86_current_function_calls_tls_descriptor
))
5292 if (current_function_profile
)
5298 /* Record that the current function accesses previous call frames. */
5301 ix86_setup_frame_addresses (void)
5303 cfun
->machine
->accesses_prev_frame
= 1;
5306 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5307 # define USE_HIDDEN_LINKONCE 1
5309 # define USE_HIDDEN_LINKONCE 0
5312 static int pic_labels_used
;
5314 /* Fills in the label name that should be used for a pc thunk for
5315 the given register. */
5318 get_pc_thunk_name (char name
[32], unsigned int regno
)
5320 gcc_assert (!TARGET_64BIT
);
5322 if (USE_HIDDEN_LINKONCE
)
5323 sprintf (name
, "__i686.get_pc_thunk.%s", reg_names
[regno
]);
5325 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5329 /* This function generates code for -fpic that loads %ebx with
5330 the return address of the caller and then returns. */
5333 ix86_file_end (void)
5338 for (regno
= 0; regno
< 8; ++regno
)
5342 if (! ((pic_labels_used
>> regno
) & 1))
5345 get_pc_thunk_name (name
, regno
);
5350 switch_to_section (darwin_sections
[text_coal_section
]);
5351 fputs ("\t.weak_definition\t", asm_out_file
);
5352 assemble_name (asm_out_file
, name
);
5353 fputs ("\n\t.private_extern\t", asm_out_file
);
5354 assemble_name (asm_out_file
, name
);
5355 fputs ("\n", asm_out_file
);
5356 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5360 if (USE_HIDDEN_LINKONCE
)
5364 decl
= build_decl (FUNCTION_DECL
, get_identifier (name
),
5366 TREE_PUBLIC (decl
) = 1;
5367 TREE_STATIC (decl
) = 1;
5368 DECL_ONE_ONLY (decl
) = 1;
5370 (*targetm
.asm_out
.unique_section
) (decl
, 0);
5371 switch_to_section (get_named_section (decl
, NULL
, 0));
5373 (*targetm
.asm_out
.globalize_label
) (asm_out_file
, name
);
5374 fputs ("\t.hidden\t", asm_out_file
);
5375 assemble_name (asm_out_file
, name
);
5376 fputc ('\n', asm_out_file
);
5377 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5381 switch_to_section (text_section
);
5382 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5385 xops
[0] = gen_rtx_REG (SImode
, regno
);
5386 xops
[1] = gen_rtx_MEM (SImode
, stack_pointer_rtx
);
5387 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops
);
5388 output_asm_insn ("ret", xops
);
5391 if (NEED_INDICATE_EXEC_STACK
)
5392 file_end_indicate_exec_stack ();
5395 /* Emit code for the SET_GOT patterns. */
5398 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
5404 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5406 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5407 xops
[2] = gen_rtx_MEM (Pmode
,
5408 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5409 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5411 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5412 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5413 an unadorned address. */
5414 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5415 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5416 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5420 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5422 if (! TARGET_DEEP_BRANCH_PREDICTION
|| !flag_pic
)
5424 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5427 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5429 output_asm_insn ("call\t%a2", xops
);
5432 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5433 is what will be referenced by the Mach-O PIC subsystem. */
5435 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5438 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
5439 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5442 output_asm_insn ("pop{l}\t%0", xops
);
5447 get_pc_thunk_name (name
, REGNO (dest
));
5448 pic_labels_used
|= 1 << REGNO (dest
);
5450 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5451 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5452 output_asm_insn ("call\t%X2", xops
);
5453 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5454 is what will be referenced by the Mach-O PIC subsystem. */
5457 ASM_OUTPUT_LABEL (asm_out_file
, machopic_function_base_name ());
5459 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5460 CODE_LABEL_NUMBER (label
));
5467 if (!flag_pic
|| TARGET_DEEP_BRANCH_PREDICTION
)
5468 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops
);
5470 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops
);
5475 /* Generate an "push" pattern for input ARG. */
5480 return gen_rtx_SET (VOIDmode
,
5482 gen_rtx_PRE_DEC (Pmode
,
5483 stack_pointer_rtx
)),
5487 /* Return >= 0 if there is an unused call-clobbered register available
5488 for the entire function. */
5491 ix86_select_alt_pic_regnum (void)
5493 if (current_function_is_leaf
&& !current_function_profile
5494 && !ix86_current_function_calls_tls_descriptor
)
5497 for (i
= 2; i
>= 0; --i
)
5498 if (!regs_ever_live
[i
])
5502 return INVALID_REGNUM
;
5505 /* Return 1 if we need to save REGNO. */
5507 ix86_save_reg (unsigned int regno
, int maybe_eh_return
)
5509 if (pic_offset_table_rtx
5510 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5511 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
5512 || current_function_profile
5513 || current_function_calls_eh_return
5514 || current_function_uses_const_pool
))
5516 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM
)
5521 if (current_function_calls_eh_return
&& maybe_eh_return
)
5526 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5527 if (test
== INVALID_REGNUM
)
5534 if (cfun
->machine
->force_align_arg_pointer
5535 && regno
== REGNO (cfun
->machine
->force_align_arg_pointer
))
5538 return (regs_ever_live
[regno
]
5539 && !call_used_regs
[regno
]
5540 && !fixed_regs
[regno
]
5541 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5544 /* Return number of registers to be saved on the stack. */
5547 ix86_nsaved_regs (void)
5552 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; regno
--)
5553 if (ix86_save_reg (regno
, true))
5558 /* Return the offset between two registers, one to be eliminated, and the other
5559 its replacement, at the start of a routine. */
5562 ix86_initial_elimination_offset (int from
, int to
)
5564 struct ix86_frame frame
;
5565 ix86_compute_frame_layout (&frame
);
5567 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5568 return frame
.hard_frame_pointer_offset
;
5569 else if (from
== FRAME_POINTER_REGNUM
5570 && to
== HARD_FRAME_POINTER_REGNUM
)
5571 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5574 gcc_assert (to
== STACK_POINTER_REGNUM
);
5576 if (from
== ARG_POINTER_REGNUM
)
5577 return frame
.stack_pointer_offset
;
5579 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5580 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5584 /* Fill structure ix86_frame about frame of currently computed function. */
5587 ix86_compute_frame_layout (struct ix86_frame
*frame
)
5589 HOST_WIDE_INT total_size
;
5590 unsigned int stack_alignment_needed
;
5591 HOST_WIDE_INT offset
;
5592 unsigned int preferred_alignment
;
5593 HOST_WIDE_INT size
= get_frame_size ();
5595 frame
->nregs
= ix86_nsaved_regs ();
5598 stack_alignment_needed
= cfun
->stack_alignment_needed
/ BITS_PER_UNIT
;
5599 preferred_alignment
= cfun
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5601 /* During reload iteration the amount of registers saved can change.
5602 Recompute the value as needed. Do not recompute when amount of registers
5603 didn't change as reload does multiple calls to the function and does not
5604 expect the decision to change within single iteration. */
5606 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
5608 int count
= frame
->nregs
;
5610 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
5611 /* The fast prologue uses move instead of push to save registers. This
5612 is significantly longer, but also executes faster as modern hardware
5613 can execute the moves in parallel, but can't do that for push/pop.
5615 Be careful about choosing what prologue to emit: When function takes
5616 many instructions to execute we may use slow version as well as in
5617 case function is known to be outside hot spot (this is known with
5618 feedback only). Weight the size of function by number of registers
5619 to save as it is cheap to use one or two push instructions but very
5620 slow to use many of them. */
5622 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5623 if (cfun
->function_frequency
< FUNCTION_FREQUENCY_NORMAL
5624 || (flag_branch_probabilities
5625 && cfun
->function_frequency
< FUNCTION_FREQUENCY_HOT
))
5626 cfun
->machine
->use_fast_prologue_epilogue
= false;
5628 cfun
->machine
->use_fast_prologue_epilogue
5629 = !expensive_function_p (count
);
5631 if (TARGET_PROLOGUE_USING_MOVE
5632 && cfun
->machine
->use_fast_prologue_epilogue
)
5633 frame
->save_regs_using_mov
= true;
5635 frame
->save_regs_using_mov
= false;
5638 /* Skip return address and saved base pointer. */
5639 offset
= frame_pointer_needed
? UNITS_PER_WORD
* 2 : UNITS_PER_WORD
;
5641 frame
->hard_frame_pointer_offset
= offset
;
5643 /* Do some sanity checking of stack_alignment_needed and
5644 preferred_alignment, since i386 port is the only using those features
5645 that may break easily. */
5647 gcc_assert (!size
|| stack_alignment_needed
);
5648 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5649 gcc_assert (preferred_alignment
<= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5650 gcc_assert (stack_alignment_needed
5651 <= PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
);
5653 if (stack_alignment_needed
< STACK_BOUNDARY
/ BITS_PER_UNIT
)
5654 stack_alignment_needed
= STACK_BOUNDARY
/ BITS_PER_UNIT
;
5656 /* Register save area */
5657 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5660 if (ix86_save_varrargs_registers
)
5662 offset
+= X86_64_VARARGS_SIZE
;
5663 frame
->va_arg_size
= X86_64_VARARGS_SIZE
;
5666 frame
->va_arg_size
= 0;
5668 /* Align start of frame for local function. */
5669 frame
->padding1
= ((offset
+ stack_alignment_needed
- 1)
5670 & -stack_alignment_needed
) - offset
;
5672 offset
+= frame
->padding1
;
5674 /* Frame pointer points here. */
5675 frame
->frame_pointer_offset
= offset
;
5679 /* Add outgoing arguments area. Can be skipped if we eliminated
5680 all the function calls as dead code.
5681 Skipping is however impossible when function calls alloca. Alloca
5682 expander assumes that last current_function_outgoing_args_size
5683 of stack frame are unused. */
5684 if (ACCUMULATE_OUTGOING_ARGS
5685 && (!current_function_is_leaf
|| current_function_calls_alloca
5686 || ix86_current_function_calls_tls_descriptor
))
5688 offset
+= current_function_outgoing_args_size
;
5689 frame
->outgoing_arguments_size
= current_function_outgoing_args_size
;
5692 frame
->outgoing_arguments_size
= 0;
5694 /* Align stack boundary. Only needed if we're calling another function
5696 if (!current_function_is_leaf
|| current_function_calls_alloca
5697 || ix86_current_function_calls_tls_descriptor
)
5698 frame
->padding2
= ((offset
+ preferred_alignment
- 1)
5699 & -preferred_alignment
) - offset
;
5701 frame
->padding2
= 0;
5703 offset
+= frame
->padding2
;
5705 /* We've reached end of stack frame. */
5706 frame
->stack_pointer_offset
= offset
;
5708 /* Size prologue needs to allocate. */
5709 frame
->to_allocate
=
5710 (size
+ frame
->padding1
+ frame
->padding2
5711 + frame
->outgoing_arguments_size
+ frame
->va_arg_size
);
5713 if ((!frame
->to_allocate
&& frame
->nregs
<= 1)
5714 || (TARGET_64BIT
&& frame
->to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
5715 frame
->save_regs_using_mov
= false;
5717 if (TARGET_RED_ZONE
&& current_function_sp_is_unchanging
5718 && current_function_is_leaf
5719 && !ix86_current_function_calls_tls_descriptor
)
5721 frame
->red_zone_size
= frame
->to_allocate
;
5722 if (frame
->save_regs_using_mov
)
5723 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
5724 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
5725 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
5728 frame
->red_zone_size
= 0;
5729 frame
->to_allocate
-= frame
->red_zone_size
;
5730 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
5732 fprintf (stderr
, "\n");
5733 fprintf (stderr
, "nregs: %ld\n", (long)frame
->nregs
);
5734 fprintf (stderr
, "size: %ld\n", (long)size
);
5735 fprintf (stderr
, "alignment1: %ld\n", (long)stack_alignment_needed
);
5736 fprintf (stderr
, "padding1: %ld\n", (long)frame
->padding1
);
5737 fprintf (stderr
, "va_arg: %ld\n", (long)frame
->va_arg_size
);
5738 fprintf (stderr
, "padding2: %ld\n", (long)frame
->padding2
);
5739 fprintf (stderr
, "to_allocate: %ld\n", (long)frame
->to_allocate
);
5740 fprintf (stderr
, "red_zone_size: %ld\n", (long)frame
->red_zone_size
);
5741 fprintf (stderr
, "frame_pointer_offset: %ld\n", (long)frame
->frame_pointer_offset
);
5742 fprintf (stderr
, "hard_frame_pointer_offset: %ld\n",
5743 (long)frame
->hard_frame_pointer_offset
);
5744 fprintf (stderr
, "stack_pointer_offset: %ld\n", (long)frame
->stack_pointer_offset
);
5745 fprintf (stderr
, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf
);
5746 fprintf (stderr
, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca
);
5747 fprintf (stderr
, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor
);
5751 /* Emit code to save registers in the prologue. */
5754 ix86_emit_save_regs (void)
5759 for (regno
= FIRST_PSEUDO_REGISTER
; regno
-- > 0; )
5760 if (ix86_save_reg (regno
, true))
5762 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
5763 RTX_FRAME_RELATED_P (insn
) = 1;
5767 /* Emit code to save registers using MOV insns. First register
5768 is restored from POINTER + OFFSET. */
5770 ix86_emit_save_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
)
5775 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5776 if (ix86_save_reg (regno
, true))
5778 insn
= emit_move_insn (adjust_address (gen_rtx_MEM (Pmode
, pointer
),
5780 gen_rtx_REG (Pmode
, regno
));
5781 RTX_FRAME_RELATED_P (insn
) = 1;
5782 offset
+= UNITS_PER_WORD
;
5786 /* Expand prologue or epilogue stack adjustment.
5787 The pattern exist to put a dependency on all ebp-based memory accesses.
5788 STYLE should be negative if instructions should be marked as frame related,
5789 zero if %r11 register is live and cannot be freely used and positive
5793 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
, int style
)
5798 insn
= emit_insn (gen_pro_epilogue_adjust_stack_1 (dest
, src
, offset
));
5799 else if (x86_64_immediate_operand (offset
, DImode
))
5800 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest
, src
, offset
));
5804 /* r11 is used by indirect sibcall return as well, set before the
5805 epilogue and used after the epilogue. ATM indirect sibcall
5806 shouldn't be used together with huge frame sizes in one
5807 function because of the frame_size check in sibcall.c. */
5809 r11
= gen_rtx_REG (DImode
, R11_REG
);
5810 insn
= emit_insn (gen_rtx_SET (DImode
, r11
, offset
));
5812 RTX_FRAME_RELATED_P (insn
) = 1;
5813 insn
= emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest
, src
, r11
,
5817 RTX_FRAME_RELATED_P (insn
) = 1;
5820 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5823 ix86_internal_arg_pointer (void)
5825 bool has_force_align_arg_pointer
=
5826 (0 != lookup_attribute (ix86_force_align_arg_pointer_string
,
5827 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))));
5828 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5829 && DECL_NAME (current_function_decl
)
5830 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
5831 && DECL_FILE_SCOPE_P (current_function_decl
))
5832 || ix86_force_align_arg_pointer
5833 || has_force_align_arg_pointer
)
5835 /* Nested functions can't realign the stack due to a register
5837 if (DECL_CONTEXT (current_function_decl
)
5838 && TREE_CODE (DECL_CONTEXT (current_function_decl
)) == FUNCTION_DECL
)
5840 if (ix86_force_align_arg_pointer
)
5841 warning (0, "-mstackrealign ignored for nested functions");
5842 if (has_force_align_arg_pointer
)
5843 error ("%s not supported for nested functions",
5844 ix86_force_align_arg_pointer_string
);
5845 return virtual_incoming_args_rtx
;
5847 cfun
->machine
->force_align_arg_pointer
= gen_rtx_REG (Pmode
, 2);
5848 return copy_to_reg (cfun
->machine
->force_align_arg_pointer
);
5851 return virtual_incoming_args_rtx
;
5854 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5855 This is called from dwarf2out.c to emit call frame instructions
5856 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5858 ix86_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
5860 rtx unspec
= SET_SRC (pattern
);
5861 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
5865 case UNSPEC_REG_SAVE
:
5866 dwarf2out_reg_save_reg (label
, XVECEXP (unspec
, 0, 0),
5867 SET_DEST (pattern
));
5869 case UNSPEC_DEF_CFA
:
5870 dwarf2out_def_cfa (label
, REGNO (SET_DEST (pattern
)),
5871 INTVAL (XVECEXP (unspec
, 0, 0)));
5878 /* Expand the prologue into a bunch of separate insns. */
5881 ix86_expand_prologue (void)
5885 struct ix86_frame frame
;
5886 HOST_WIDE_INT allocate
;
5888 ix86_compute_frame_layout (&frame
);
5890 if (cfun
->machine
->force_align_arg_pointer
)
5894 /* Grab the argument pointer. */
5895 x
= plus_constant (stack_pointer_rtx
, 4);
5896 y
= cfun
->machine
->force_align_arg_pointer
;
5897 insn
= emit_insn (gen_rtx_SET (VOIDmode
, y
, x
));
5898 RTX_FRAME_RELATED_P (insn
) = 1;
5900 /* The unwind info consists of two parts: install the fafp as the cfa,
5901 and record the fafp as the "save register" of the stack pointer.
5902 The later is there in order that the unwinder can see where it
5903 should restore the stack pointer across the and insn. */
5904 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
), UNSPEC_DEF_CFA
);
5905 x
= gen_rtx_SET (VOIDmode
, y
, x
);
5906 RTX_FRAME_RELATED_P (x
) = 1;
5907 y
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, stack_pointer_rtx
),
5909 y
= gen_rtx_SET (VOIDmode
, cfun
->machine
->force_align_arg_pointer
, y
);
5910 RTX_FRAME_RELATED_P (y
) = 1;
5911 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
));
5912 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5913 REG_NOTES (insn
) = x
;
5915 /* Align the stack. */
5916 emit_insn (gen_andsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
5919 /* And here we cheat like madmen with the unwind info. We force the
5920 cfa register back to sp+4, which is exactly what it was at the
5921 start of the function. Re-pushing the return address results in
5922 the return at the same spot relative to the cfa, and thus is
5923 correct wrt the unwind info. */
5924 x
= cfun
->machine
->force_align_arg_pointer
;
5925 x
= gen_frame_mem (Pmode
, plus_constant (x
, -4));
5926 insn
= emit_insn (gen_push (x
));
5927 RTX_FRAME_RELATED_P (insn
) = 1;
5930 x
= gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, x
), UNSPEC_DEF_CFA
);
5931 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
5932 x
= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, x
, NULL
);
5933 REG_NOTES (insn
) = x
;
5936 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5937 slower on all targets. Also sdb doesn't like it. */
5939 if (frame_pointer_needed
)
5941 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
5942 RTX_FRAME_RELATED_P (insn
) = 1;
5944 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
5945 RTX_FRAME_RELATED_P (insn
) = 1;
5948 allocate
= frame
.to_allocate
;
5950 if (!frame
.save_regs_using_mov
)
5951 ix86_emit_save_regs ();
5953 allocate
+= frame
.nregs
* UNITS_PER_WORD
;
5955 /* When using red zone we may start register saving before allocating
5956 the stack frame saving one cycle of the prologue. */
5957 if (TARGET_RED_ZONE
&& frame
.save_regs_using_mov
)
5958 ix86_emit_save_regs_using_mov (frame_pointer_needed
? hard_frame_pointer_rtx
5959 : stack_pointer_rtx
,
5960 -frame
.nregs
* UNITS_PER_WORD
);
5964 else if (! TARGET_STACK_PROBE
|| allocate
< CHECK_STACK_LIMIT
)
5965 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
5966 GEN_INT (-allocate
), -1);
5969 /* Only valid for Win32. */
5970 rtx eax
= gen_rtx_REG (SImode
, 0);
5971 bool eax_live
= ix86_eax_live_at_start_p ();
5974 gcc_assert (!TARGET_64BIT
);
5978 emit_insn (gen_push (eax
));
5982 emit_move_insn (eax
, GEN_INT (allocate
));
5984 insn
= emit_insn (gen_allocate_stack_worker (eax
));
5985 RTX_FRAME_RELATED_P (insn
) = 1;
5986 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (-allocate
));
5987 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
5988 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
5989 t
, REG_NOTES (insn
));
5993 if (frame_pointer_needed
)
5994 t
= plus_constant (hard_frame_pointer_rtx
,
5997 - frame
.nregs
* UNITS_PER_WORD
);
5999 t
= plus_constant (stack_pointer_rtx
, allocate
);
6000 emit_move_insn (eax
, gen_rtx_MEM (SImode
, t
));
6004 if (frame
.save_regs_using_mov
&& !TARGET_RED_ZONE
)
6006 if (!frame_pointer_needed
|| !frame
.to_allocate
)
6007 ix86_emit_save_regs_using_mov (stack_pointer_rtx
, frame
.to_allocate
);
6009 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx
,
6010 -frame
.nregs
* UNITS_PER_WORD
);
6013 pic_reg_used
= false;
6014 if (pic_offset_table_rtx
6015 && (regs_ever_live
[REAL_PIC_OFFSET_TABLE_REGNUM
]
6016 || current_function_profile
))
6018 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
6020 if (alt_pic_reg_used
!= INVALID_REGNUM
)
6021 REGNO (pic_offset_table_rtx
) = alt_pic_reg_used
;
6023 pic_reg_used
= true;
6030 if (ix86_cmodel
== CM_LARGE_PIC
)
6032 rtx tmp_reg
= gen_rtx_REG (DImode
,
6033 FIRST_REX_INT_REG
+ 3 /* R11 */);
6034 rtx label
= gen_label_rtx ();
6036 LABEL_PRESERVE_P (label
) = 1;
6037 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
6038 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
6039 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6040 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
6041 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6042 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
6043 pic_offset_table_rtx
, tmp_reg
));
6046 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
6049 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
6051 /* Even with accurate pre-reload life analysis, we can wind up
6052 deleting all references to the pic register after reload.
6053 Consider if cross-jumping unifies two sides of a branch
6054 controlled by a comparison vs the only read from a global.
6055 In which case, allow the set_got to be deleted, though we're
6056 too late to do anything about the ebx save in the prologue. */
6057 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
, NULL
);
6060 /* Prevent function calls from be scheduled before the call to mcount.
6061 In the pic_reg_used case, make sure that the got load isn't deleted. */
6062 if (current_function_profile
)
6063 emit_insn (gen_blockage (pic_reg_used
? pic_offset_table_rtx
: const0_rtx
));
6066 /* Emit code to restore saved registers using MOV insns. First register
6067 is restored from POINTER + OFFSET. */
6069 ix86_emit_restore_regs_using_mov (rtx pointer
, HOST_WIDE_INT offset
,
6070 int maybe_eh_return
)
6073 rtx base_address
= gen_rtx_MEM (Pmode
, pointer
);
6075 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6076 if (ix86_save_reg (regno
, maybe_eh_return
))
6078 /* Ensure that adjust_address won't be forced to produce pointer
6079 out of range allowed by x86-64 instruction set. */
6080 if (TARGET_64BIT
&& offset
!= trunc_int_for_mode (offset
, SImode
))
6084 r11
= gen_rtx_REG (DImode
, R11_REG
);
6085 emit_move_insn (r11
, GEN_INT (offset
));
6086 emit_insn (gen_adddi3 (r11
, r11
, pointer
));
6087 base_address
= gen_rtx_MEM (Pmode
, r11
);
6090 emit_move_insn (gen_rtx_REG (Pmode
, regno
),
6091 adjust_address (base_address
, Pmode
, offset
));
6092 offset
+= UNITS_PER_WORD
;
6096 /* Restore function stack, frame, and registers. */
6099 ix86_expand_epilogue (int style
)
6102 int sp_valid
= !frame_pointer_needed
|| current_function_sp_is_unchanging
;
6103 struct ix86_frame frame
;
6104 HOST_WIDE_INT offset
;
6106 ix86_compute_frame_layout (&frame
);
6108 /* Calculate start of saved registers relative to ebp. Special care
6109 must be taken for the normal return case of a function using
6110 eh_return: the eax and edx registers are marked as saved, but not
6111 restored along this path. */
6112 offset
= frame
.nregs
;
6113 if (current_function_calls_eh_return
&& style
!= 2)
6115 offset
*= -UNITS_PER_WORD
;
6117 /* If we're only restoring one register and sp is not valid then
6118 using a move instruction to restore the register since it's
6119 less work than reloading sp and popping the register.
6121 The default code result in stack adjustment using add/lea instruction,
6122 while this code results in LEAVE instruction (or discrete equivalent),
6123 so it is profitable in some other cases as well. Especially when there
6124 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6125 and there is exactly one register to pop. This heuristic may need some
6126 tuning in future. */
6127 if ((!sp_valid
&& frame
.nregs
<= 1)
6128 || (TARGET_EPILOGUE_USING_MOVE
6129 && cfun
->machine
->use_fast_prologue_epilogue
6130 && (frame
.nregs
> 1 || frame
.to_allocate
))
6131 || (frame_pointer_needed
&& !frame
.nregs
&& frame
.to_allocate
)
6132 || (frame_pointer_needed
&& TARGET_USE_LEAVE
6133 && cfun
->machine
->use_fast_prologue_epilogue
6134 && frame
.nregs
== 1)
6135 || current_function_calls_eh_return
)
6137 /* Restore registers. We can use ebp or esp to address the memory
6138 locations. If both are available, default to ebp, since offsets
6139 are known to be small. Only exception is esp pointing directly to the
6140 end of block of saved registers, where we may simplify addressing
6143 if (!frame_pointer_needed
|| (sp_valid
&& !frame
.to_allocate
))
6144 ix86_emit_restore_regs_using_mov (stack_pointer_rtx
,
6145 frame
.to_allocate
, style
== 2);
6147 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx
,
6148 offset
, style
== 2);
6150 /* eh_return epilogues need %ecx added to the stack pointer. */
6153 rtx tmp
, sa
= EH_RETURN_STACKADJ_RTX
;
6155 if (frame_pointer_needed
)
6157 tmp
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
6158 tmp
= plus_constant (tmp
, UNITS_PER_WORD
);
6159 emit_insn (gen_rtx_SET (VOIDmode
, sa
, tmp
));
6161 tmp
= gen_rtx_MEM (Pmode
, hard_frame_pointer_rtx
);
6162 emit_move_insn (hard_frame_pointer_rtx
, tmp
);
6164 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
6169 tmp
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
6170 tmp
= plus_constant (tmp
, (frame
.to_allocate
6171 + frame
.nregs
* UNITS_PER_WORD
));
6172 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
));
6175 else if (!frame_pointer_needed
)
6176 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6177 GEN_INT (frame
.to_allocate
6178 + frame
.nregs
* UNITS_PER_WORD
),
6180 /* If not an i386, mov & pop is faster than "leave". */
6181 else if (TARGET_USE_LEAVE
|| optimize_size
6182 || !cfun
->machine
->use_fast_prologue_epilogue
)
6183 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6186 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6187 hard_frame_pointer_rtx
,
6190 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6192 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6197 /* First step is to deallocate the stack frame so that we can
6198 pop the registers. */
6201 gcc_assert (frame_pointer_needed
);
6202 pro_epilogue_adjust_stack (stack_pointer_rtx
,
6203 hard_frame_pointer_rtx
,
6204 GEN_INT (offset
), style
);
6206 else if (frame
.to_allocate
)
6207 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6208 GEN_INT (frame
.to_allocate
), style
);
6210 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6211 if (ix86_save_reg (regno
, false))
6214 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode
, regno
)));
6216 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode
, regno
)));
6218 if (frame_pointer_needed
)
6220 /* Leave results in shorter dependency chains on CPUs that are
6221 able to grok it fast. */
6222 if (TARGET_USE_LEAVE
)
6223 emit_insn (TARGET_64BIT
? gen_leave_rex64 () : gen_leave ());
6224 else if (TARGET_64BIT
)
6225 emit_insn (gen_popdi1 (hard_frame_pointer_rtx
));
6227 emit_insn (gen_popsi1 (hard_frame_pointer_rtx
));
6231 if (cfun
->machine
->force_align_arg_pointer
)
6233 emit_insn (gen_addsi3 (stack_pointer_rtx
,
6234 cfun
->machine
->force_align_arg_pointer
,
6238 /* Sibcall epilogues don't want a return instruction. */
6242 if (current_function_pops_args
&& current_function_args_size
)
6244 rtx popc
= GEN_INT (current_function_pops_args
);
6246 /* i386 can only pop 64K bytes. If asked to pop more, pop
6247 return address, do explicit add, and jump indirectly to the
6250 if (current_function_pops_args
>= 65536)
6252 rtx ecx
= gen_rtx_REG (SImode
, 2);
6254 /* There is no "pascal" calling convention in 64bit ABI. */
6255 gcc_assert (!TARGET_64BIT
);
6257 emit_insn (gen_popsi1 (ecx
));
6258 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, popc
));
6259 emit_jump_insn (gen_return_indirect_internal (ecx
));
6262 emit_jump_insn (gen_return_pop_internal (popc
));
6265 emit_jump_insn (gen_return_internal ());
6268 /* Reset from the function's potential modifications. */
6271 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
6272 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
6274 if (pic_offset_table_rtx
)
6275 REGNO (pic_offset_table_rtx
) = REAL_PIC_OFFSET_TABLE_REGNUM
;
6277 /* Mach-O doesn't support labels at the end of objects, so if
6278 it looks like we might want one, insert a NOP. */
6280 rtx insn
= get_last_insn ();
6283 && NOTE_LINE_NUMBER (insn
) != NOTE_INSN_DELETED_LABEL
)
6284 insn
= PREV_INSN (insn
);
6288 && NOTE_LINE_NUMBER (insn
) == NOTE_INSN_DELETED_LABEL
)))
6289 fputs ("\tnop\n", file
);
6295 /* Extract the parts of an RTL expression that is a valid memory address
6296 for an instruction. Return 0 if the structure of the address is
6297 grossly off. Return -1 if the address contains ASHIFT, so it is not
6298 strictly valid, but still used for computing length of lea instruction. */
6301 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
6303 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
6304 rtx base_reg
, index_reg
;
6305 HOST_WIDE_INT scale
= 1;
6306 rtx scale_rtx
= NULL_RTX
;
6308 enum ix86_address_seg seg
= SEG_DEFAULT
;
6310 if (REG_P (addr
) || GET_CODE (addr
) == SUBREG
)
6312 else if (GET_CODE (addr
) == PLUS
)
6322 addends
[n
++] = XEXP (op
, 1);
6325 while (GET_CODE (op
) == PLUS
);
6330 for (i
= n
; i
>= 0; --i
)
6333 switch (GET_CODE (op
))
6338 index
= XEXP (op
, 0);
6339 scale_rtx
= XEXP (op
, 1);
6343 if (XINT (op
, 1) == UNSPEC_TP
6344 && TARGET_TLS_DIRECT_SEG_REFS
6345 && seg
== SEG_DEFAULT
)
6346 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
6375 else if (GET_CODE (addr
) == MULT
)
6377 index
= XEXP (addr
, 0); /* index*scale */
6378 scale_rtx
= XEXP (addr
, 1);
6380 else if (GET_CODE (addr
) == ASHIFT
)
6384 /* We're called for lea too, which implements ashift on occasion. */
6385 index
= XEXP (addr
, 0);
6386 tmp
= XEXP (addr
, 1);
6387 if (!CONST_INT_P (tmp
))
6389 scale
= INTVAL (tmp
);
6390 if ((unsigned HOST_WIDE_INT
) scale
> 3)
6396 disp
= addr
; /* displacement */
6398 /* Extract the integral value of scale. */
6401 if (!CONST_INT_P (scale_rtx
))
6403 scale
= INTVAL (scale_rtx
);
6406 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
6407 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
6409 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6410 if (base_reg
&& index_reg
&& scale
== 1
6411 && (index_reg
== arg_pointer_rtx
6412 || index_reg
== frame_pointer_rtx
6413 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
6416 tmp
= base
, base
= index
, index
= tmp
;
6417 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
6420 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6421 if ((base_reg
== hard_frame_pointer_rtx
6422 || base_reg
== frame_pointer_rtx
6423 || base_reg
== arg_pointer_rtx
) && !disp
)
6426 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6427 Avoid this by transforming to [%esi+0]. */
6428 if (ix86_tune
== PROCESSOR_K6
&& !optimize_size
6429 && base_reg
&& !index_reg
&& !disp
6431 && REGNO_REG_CLASS (REGNO (base_reg
)) == SIREG
)
6434 /* Special case: encode reg+reg instead of reg*2. */
6435 if (!base
&& index
&& scale
&& scale
== 2)
6436 base
= index
, base_reg
= index_reg
, scale
= 1;
6438 /* Special case: scaling cannot be encoded without base or displacement. */
6439 if (!base
&& !disp
&& index
&& scale
!= 1)
6451 /* Return cost of the memory address x.
6452 For i386, it is better to use a complex address than let gcc copy
6453 the address into a reg and make a new pseudo. But not if the address
6454 requires to two regs - that would mean more pseudos with longer
6457 ix86_address_cost (rtx x
)
6459 struct ix86_address parts
;
6461 int ok
= ix86_decompose_address (x
, &parts
);
6465 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
6466 parts
.base
= SUBREG_REG (parts
.base
);
6467 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
6468 parts
.index
= SUBREG_REG (parts
.index
);
6470 /* More complex memory references are better. */
6471 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
6473 if (parts
.seg
!= SEG_DEFAULT
)
6476 /* Attempt to minimize number of registers in the address. */
6478 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
6480 && (!REG_P (parts
.index
)
6481 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
6485 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
6487 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
6488 && parts
.base
!= parts
.index
)
6491 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6492 since it's predecode logic can't detect the length of instructions
6493 and it degenerates to vector decoded. Increase cost of such
6494 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6495 to split such addresses or even refuse such addresses at all.
6497 Following addressing modes are affected:
6502 The first and last case may be avoidable by explicitly coding the zero in
6503 memory address, but I don't have AMD-K6 machine handy to check this
6507 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6508 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
6509 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
6515 /* If X is a machine specific address (i.e. a symbol or label being
6516 referenced as a displacement from the GOT implemented using an
6517 UNSPEC), then return the base term. Otherwise return X. */
6520 ix86_find_base_term (rtx x
)
6526 if (GET_CODE (x
) != CONST
)
6529 if (GET_CODE (term
) == PLUS
6530 && (CONST_INT_P (XEXP (term
, 1))
6531 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
6532 term
= XEXP (term
, 0);
6533 if (GET_CODE (term
) != UNSPEC
6534 || XINT (term
, 1) != UNSPEC_GOTPCREL
)
6537 term
= XVECEXP (term
, 0, 0);
6539 if (GET_CODE (term
) != SYMBOL_REF
6540 && GET_CODE (term
) != LABEL_REF
)
6546 term
= ix86_delegitimize_address (x
);
6548 if (GET_CODE (term
) != SYMBOL_REF
6549 && GET_CODE (term
) != LABEL_REF
)
6555 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6556 this is used for to form addresses to local data when -fPIC is in
6560 darwin_local_data_pic (rtx disp
)
6562 if (GET_CODE (disp
) == MINUS
)
6564 if (GET_CODE (XEXP (disp
, 0)) == LABEL_REF
6565 || GET_CODE (XEXP (disp
, 0)) == SYMBOL_REF
)
6566 if (GET_CODE (XEXP (disp
, 1)) == SYMBOL_REF
)
6568 const char *sym_name
= XSTR (XEXP (disp
, 1), 0);
6569 if (! strcmp (sym_name
, "<pic base>"))
6577 /* Determine if a given RTX is a valid constant. We already know this
6578 satisfies CONSTANT_P. */
6581 legitimate_constant_p (rtx x
)
6583 switch (GET_CODE (x
))
6588 if (GET_CODE (x
) == PLUS
)
6590 if (!CONST_INT_P (XEXP (x
, 1)))
6595 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
6598 /* Only some unspecs are valid as "constants". */
6599 if (GET_CODE (x
) == UNSPEC
)
6600 switch (XINT (x
, 1))
6605 return TARGET_64BIT
;
6608 x
= XVECEXP (x
, 0, 0);
6609 return (GET_CODE (x
) == SYMBOL_REF
6610 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6612 x
= XVECEXP (x
, 0, 0);
6613 return (GET_CODE (x
) == SYMBOL_REF
6614 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
6619 /* We must have drilled down to a symbol. */
6620 if (GET_CODE (x
) == LABEL_REF
)
6622 if (GET_CODE (x
) != SYMBOL_REF
)
6627 /* TLS symbols are never valid. */
6628 if (SYMBOL_REF_TLS_MODEL (x
))
6633 if (GET_MODE (x
) == TImode
6634 && x
!= CONST0_RTX (TImode
)
6640 if (x
== CONST0_RTX (GET_MODE (x
)))
6648 /* Otherwise we handle everything else in the move patterns. */
6652 /* Determine if it's legal to put X into the constant pool. This
6653 is not possible for the address of thread-local symbols, which
6654 is checked above. */
6657 ix86_cannot_force_const_mem (rtx x
)
6659 /* We can always put integral constants and vectors in memory. */
6660 switch (GET_CODE (x
))
6670 return !legitimate_constant_p (x
);
6673 /* Determine if a given RTX is a valid constant address. */
6676 constant_address_p (rtx x
)
6678 return CONSTANT_P (x
) && legitimate_address_p (Pmode
, x
, 1);
6681 /* Nonzero if the constant value X is a legitimate general operand
6682 when generating PIC code. It is given that flag_pic is on and
6683 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6686 legitimate_pic_operand_p (rtx x
)
6690 switch (GET_CODE (x
))
6693 inner
= XEXP (x
, 0);
6694 if (GET_CODE (inner
) == PLUS
6695 && CONST_INT_P (XEXP (inner
, 1)))
6696 inner
= XEXP (inner
, 0);
6698 /* Only some unspecs are valid as "constants". */
6699 if (GET_CODE (inner
) == UNSPEC
)
6700 switch (XINT (inner
, 1))
6705 return TARGET_64BIT
;
6707 x
= XVECEXP (inner
, 0, 0);
6708 return (GET_CODE (x
) == SYMBOL_REF
6709 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
6717 return legitimate_pic_address_disp_p (x
);
6724 /* Determine if a given CONST RTX is a valid memory displacement
6728 legitimate_pic_address_disp_p (rtx disp
)
6732 /* In 64bit mode we can allow direct addresses of symbols and labels
6733 when they are not dynamic symbols. */
6736 rtx op0
= disp
, op1
;
6738 switch (GET_CODE (disp
))
6744 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
6746 op0
= XEXP (XEXP (disp
, 0), 0);
6747 op1
= XEXP (XEXP (disp
, 0), 1);
6748 if (!CONST_INT_P (op1
)
6749 || INTVAL (op1
) >= 16*1024*1024
6750 || INTVAL (op1
) < -16*1024*1024)
6752 if (GET_CODE (op0
) == LABEL_REF
)
6754 if (GET_CODE (op0
) != SYMBOL_REF
)
6759 /* TLS references should always be enclosed in UNSPEC. */
6760 if (SYMBOL_REF_TLS_MODEL (op0
))
6762 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
6763 && ix86_cmodel
!= CM_LARGE_PIC
)
6771 if (GET_CODE (disp
) != CONST
)
6773 disp
= XEXP (disp
, 0);
6777 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6778 of GOT tables. We should not need these anyway. */
6779 if (GET_CODE (disp
) != UNSPEC
6780 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
6781 && XINT (disp
, 1) != UNSPEC_GOTOFF
6782 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
6785 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
6786 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
6792 if (GET_CODE (disp
) == PLUS
)
6794 if (!CONST_INT_P (XEXP (disp
, 1)))
6796 disp
= XEXP (disp
, 0);
6800 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
6803 if (GET_CODE (disp
) != UNSPEC
)
6806 switch (XINT (disp
, 1))
6811 /* We need to check for both symbols and labels because VxWorks loads
6812 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6814 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6815 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
6817 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6818 While ABI specify also 32bit relocation but we don't produce it in
6819 small PIC model at all. */
6820 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
6821 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
6823 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
6825 case UNSPEC_GOTTPOFF
:
6826 case UNSPEC_GOTNTPOFF
:
6827 case UNSPEC_INDNTPOFF
:
6830 disp
= XVECEXP (disp
, 0, 0);
6831 return (GET_CODE (disp
) == SYMBOL_REF
6832 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
6834 disp
= XVECEXP (disp
, 0, 0);
6835 return (GET_CODE (disp
) == SYMBOL_REF
6836 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
6838 disp
= XVECEXP (disp
, 0, 0);
6839 return (GET_CODE (disp
) == SYMBOL_REF
6840 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
6846 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6847 memory address for an instruction. The MODE argument is the machine mode
6848 for the MEM expression that wants to use this address.
6850 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6851 convert common non-canonical forms to canonical form so that they will
6855 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
6856 rtx addr
, int strict
)
6858 struct ix86_address parts
;
6859 rtx base
, index
, disp
;
6860 HOST_WIDE_INT scale
;
6861 const char *reason
= NULL
;
6862 rtx reason_rtx
= NULL_RTX
;
6864 if (ix86_decompose_address (addr
, &parts
) <= 0)
6866 reason
= "decomposition failed";
6871 index
= parts
.index
;
6873 scale
= parts
.scale
;
6875 /* Validate base register.
6877 Don't allow SUBREG's that span more than a word here. It can lead to spill
6878 failures when the base is one word out of a two word structure, which is
6879 represented internally as a DImode int. */
6888 else if (GET_CODE (base
) == SUBREG
6889 && REG_P (SUBREG_REG (base
))
6890 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base
)))
6892 reg
= SUBREG_REG (base
);
6895 reason
= "base is not a register";
6899 if (GET_MODE (base
) != Pmode
)
6901 reason
= "base is not in Pmode";
6905 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
6906 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
6908 reason
= "base is not valid";
6913 /* Validate index register.
6915 Don't allow SUBREG's that span more than a word here -- same as above. */
6924 else if (GET_CODE (index
) == SUBREG
6925 && REG_P (SUBREG_REG (index
))
6926 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index
)))
6928 reg
= SUBREG_REG (index
);
6931 reason
= "index is not a register";
6935 if (GET_MODE (index
) != Pmode
)
6937 reason
= "index is not in Pmode";
6941 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
6942 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
6944 reason
= "index is not valid";
6949 /* Validate scale factor. */
6952 reason_rtx
= GEN_INT (scale
);
6955 reason
= "scale without index";
6959 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
6961 reason
= "scale is not a valid multiplier";
6966 /* Validate displacement. */
6971 if (GET_CODE (disp
) == CONST
6972 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
)
6973 switch (XINT (XEXP (disp
, 0), 1))
6975 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6976 used. While ABI specify also 32bit relocations, we don't produce
6977 them at all and use IP relative instead. */
6980 gcc_assert (flag_pic
);
6982 goto is_legitimate_pic
;
6983 reason
= "64bit address unspec";
6986 case UNSPEC_GOTPCREL
:
6987 gcc_assert (flag_pic
);
6988 goto is_legitimate_pic
;
6990 case UNSPEC_GOTTPOFF
:
6991 case UNSPEC_GOTNTPOFF
:
6992 case UNSPEC_INDNTPOFF
:
6998 reason
= "invalid address unspec";
7002 else if (SYMBOLIC_CONST (disp
)
7006 && MACHOPIC_INDIRECT
7007 && !machopic_operand_p (disp
)
7013 if (TARGET_64BIT
&& (index
|| base
))
7015 /* foo@dtpoff(%rX) is ok. */
7016 if (GET_CODE (disp
) != CONST
7017 || GET_CODE (XEXP (disp
, 0)) != PLUS
7018 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
7019 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
7020 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
7021 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
7023 reason
= "non-constant pic memory reference";
7027 else if (! legitimate_pic_address_disp_p (disp
))
7029 reason
= "displacement is an invalid pic construct";
7033 /* This code used to verify that a symbolic pic displacement
7034 includes the pic_offset_table_rtx register.
7036 While this is good idea, unfortunately these constructs may
7037 be created by "adds using lea" optimization for incorrect
7046 This code is nonsensical, but results in addressing
7047 GOT table with pic_offset_table_rtx base. We can't
7048 just refuse it easily, since it gets matched by
7049 "addsi3" pattern, that later gets split to lea in the
7050 case output register differs from input. While this
7051 can be handled by separate addsi pattern for this case
7052 that never results in lea, this seems to be easier and
7053 correct fix for crash to disable this test. */
7055 else if (GET_CODE (disp
) != LABEL_REF
7056 && !CONST_INT_P (disp
)
7057 && (GET_CODE (disp
) != CONST
7058 || !legitimate_constant_p (disp
))
7059 && (GET_CODE (disp
) != SYMBOL_REF
7060 || !legitimate_constant_p (disp
)))
7062 reason
= "displacement is not constant";
7065 else if (TARGET_64BIT
7066 && !x86_64_immediate_operand (disp
, VOIDmode
))
7068 reason
= "displacement is out of range";
7073 /* Everything looks valid. */
7080 /* Return a unique alias set for the GOT. */
7082 static HOST_WIDE_INT
7083 ix86_GOT_alias_set (void)
7085 static HOST_WIDE_INT set
= -1;
7087 set
= new_alias_set ();
7091 /* Return a legitimate reference for ORIG (an address) using the
7092 register REG. If REG is 0, a new pseudo is generated.
7094 There are two types of references that must be handled:
7096 1. Global data references must load the address from the GOT, via
7097 the PIC reg. An insn is emitted to do this load, and the reg is
7100 2. Static data references, constant pool addresses, and code labels
7101 compute the address as an offset from the GOT, whose base is in
7102 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7103 differentiate them from global data objects. The returned
7104 address is the PIC reg + an unspec constant.
7106 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7107 reg also appears in the address. */
7110 legitimize_pic_address (rtx orig
, rtx reg
)
7117 if (TARGET_MACHO
&& !TARGET_64BIT
)
7120 reg
= gen_reg_rtx (Pmode
);
7121 /* Use the generic Mach-O PIC machinery. */
7122 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
7126 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
7128 else if (TARGET_64BIT
7129 && ix86_cmodel
!= CM_SMALL_PIC
7130 && gotoff_operand (addr
, Pmode
))
7133 /* This symbol may be referenced via a displacement from the PIC
7134 base address (@GOTOFF). */
7136 if (reload_in_progress
)
7137 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7138 if (GET_CODE (addr
) == CONST
)
7139 addr
= XEXP (addr
, 0);
7140 if (GET_CODE (addr
) == PLUS
)
7142 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7143 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7146 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7147 new = gen_rtx_CONST (Pmode
, new);
7149 tmpreg
= gen_reg_rtx (Pmode
);
7152 emit_move_insn (tmpreg
, new);
7156 new = expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
7157 tmpreg
, 1, OPTAB_DIRECT
);
7160 else new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
7162 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
7164 /* This symbol may be referenced via a displacement from the PIC
7165 base address (@GOTOFF). */
7167 if (reload_in_progress
)
7168 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7169 if (GET_CODE (addr
) == CONST
)
7170 addr
= XEXP (addr
, 0);
7171 if (GET_CODE (addr
) == PLUS
)
7173 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)), UNSPEC_GOTOFF
);
7174 new = gen_rtx_PLUS (Pmode
, new, XEXP (addr
, 1));
7177 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
7178 new = gen_rtx_CONST (Pmode
, new);
7179 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7183 emit_move_insn (reg
, new);
7187 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
7188 /* We can't use @GOTOFF for text labels on VxWorks;
7189 see gotoff_operand. */
7190 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
7192 if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
7194 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
7195 new = gen_rtx_CONST (Pmode
, new);
7196 new = gen_const_mem (Pmode
, new);
7197 set_mem_alias_set (new, ix86_GOT_alias_set ());
7200 reg
= gen_reg_rtx (Pmode
);
7201 /* Use directly gen_movsi, otherwise the address is loaded
7202 into register for CSE. We don't want to CSE this addresses,
7203 instead we CSE addresses from the GOT table, so skip this. */
7204 emit_insn (gen_movsi (reg
, new));
7209 /* This symbol must be referenced via a load from the
7210 Global Offset Table (@GOT). */
7212 if (reload_in_progress
)
7213 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7214 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
7215 new = gen_rtx_CONST (Pmode
, new);
7217 new = force_reg (Pmode
, new);
7218 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7219 new = gen_const_mem (Pmode
, new);
7220 set_mem_alias_set (new, ix86_GOT_alias_set ());
7223 reg
= gen_reg_rtx (Pmode
);
7224 emit_move_insn (reg
, new);
7230 if (CONST_INT_P (addr
)
7231 && !x86_64_immediate_operand (addr
, VOIDmode
))
7235 emit_move_insn (reg
, addr
);
7239 new = force_reg (Pmode
, addr
);
7241 else if (GET_CODE (addr
) == CONST
)
7243 addr
= XEXP (addr
, 0);
7245 /* We must match stuff we generate before. Assume the only
7246 unspecs that can get here are ours. Not that we could do
7247 anything with them anyway.... */
7248 if (GET_CODE (addr
) == UNSPEC
7249 || (GET_CODE (addr
) == PLUS
7250 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
7252 gcc_assert (GET_CODE (addr
) == PLUS
);
7254 if (GET_CODE (addr
) == PLUS
)
7256 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
7258 /* Check first to see if this is a constant offset from a @GOTOFF
7259 symbol reference. */
7260 if (gotoff_operand (op0
, Pmode
)
7261 && CONST_INT_P (op1
))
7265 if (reload_in_progress
)
7266 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7267 new = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
7269 new = gen_rtx_PLUS (Pmode
, new, op1
);
7270 new = gen_rtx_CONST (Pmode
, new);
7271 new = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new);
7275 emit_move_insn (reg
, new);
7281 if (INTVAL (op1
) < -16*1024*1024
7282 || INTVAL (op1
) >= 16*1024*1024)
7284 if (!x86_64_immediate_operand (op1
, Pmode
))
7285 op1
= force_reg (Pmode
, op1
);
7286 new = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
7292 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
7293 new = legitimize_pic_address (XEXP (addr
, 1),
7294 base
== reg
? NULL_RTX
: reg
);
7296 if (CONST_INT_P (new))
7297 new = plus_constant (base
, INTVAL (new));
7300 if (GET_CODE (new) == PLUS
&& CONSTANT_P (XEXP (new, 1)))
7302 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new, 0));
7303 new = XEXP (new, 1);
7305 new = gen_rtx_PLUS (Pmode
, base
, new);
7313 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7316 get_thread_pointer (int to_reg
)
7320 tp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
7324 reg
= gen_reg_rtx (Pmode
);
7325 insn
= gen_rtx_SET (VOIDmode
, reg
, tp
);
7326 insn
= emit_insn (insn
);
7331 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7332 false if we expect this to be used for a memory address and true if
7333 we expect to load the address into a register. */
7336 legitimize_tls_address (rtx x
, enum tls_model model
, int for_mov
)
7338 rtx dest
, base
, off
, pic
, tp
;
7343 case TLS_MODEL_GLOBAL_DYNAMIC
:
7344 dest
= gen_reg_rtx (Pmode
);
7345 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7347 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7349 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
;
7352 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
));
7353 insns
= get_insns ();
7356 emit_libcall_block (insns
, dest
, rax
, x
);
7358 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7359 emit_insn (gen_tls_global_dynamic_64 (dest
, x
));
7361 emit_insn (gen_tls_global_dynamic_32 (dest
, x
));
7363 if (TARGET_GNU2_TLS
)
7365 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
7367 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7371 case TLS_MODEL_LOCAL_DYNAMIC
:
7372 base
= gen_reg_rtx (Pmode
);
7373 tp
= TARGET_GNU2_TLS
? get_thread_pointer (1) : 0;
7375 if (TARGET_64BIT
&& ! TARGET_GNU2_TLS
)
7377 rtx rax
= gen_rtx_REG (Pmode
, 0), insns
, note
;
7380 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
));
7381 insns
= get_insns ();
7384 note
= gen_rtx_EXPR_LIST (VOIDmode
, const0_rtx
, NULL
);
7385 note
= gen_rtx_EXPR_LIST (VOIDmode
, ix86_tls_get_addr (), note
);
7386 emit_libcall_block (insns
, base
, rax
, note
);
7388 else if (TARGET_64BIT
&& TARGET_GNU2_TLS
)
7389 emit_insn (gen_tls_local_dynamic_base_64 (base
));
7391 emit_insn (gen_tls_local_dynamic_base_32 (base
));
7393 if (TARGET_GNU2_TLS
)
7395 rtx x
= ix86_tls_module_base ();
7397 set_unique_reg_note (get_last_insn (), REG_EQUIV
,
7398 gen_rtx_MINUS (Pmode
, x
, tp
));
7401 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
7402 off
= gen_rtx_CONST (Pmode
, off
);
7404 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
7406 if (TARGET_GNU2_TLS
)
7408 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
7410 set_unique_reg_note (get_last_insn (), REG_EQUIV
, x
);
7415 case TLS_MODEL_INITIAL_EXEC
:
7419 type
= UNSPEC_GOTNTPOFF
;
7423 if (reload_in_progress
)
7424 regs_ever_live
[PIC_OFFSET_TABLE_REGNUM
] = 1;
7425 pic
= pic_offset_table_rtx
;
7426 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
7428 else if (!TARGET_ANY_GNU_TLS
)
7430 pic
= gen_reg_rtx (Pmode
);
7431 emit_insn (gen_set_got (pic
));
7432 type
= UNSPEC_GOTTPOFF
;
7437 type
= UNSPEC_INDNTPOFF
;
7440 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
7441 off
= gen_rtx_CONST (Pmode
, off
);
7443 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
7444 off
= gen_const_mem (Pmode
, off
);
7445 set_mem_alias_set (off
, ix86_GOT_alias_set ());
7447 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7449 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7450 off
= force_reg (Pmode
, off
);
7451 return gen_rtx_PLUS (Pmode
, base
, off
);
7455 base
= get_thread_pointer (true);
7456 dest
= gen_reg_rtx (Pmode
);
7457 emit_insn (gen_subsi3 (dest
, base
, off
));
7461 case TLS_MODEL_LOCAL_EXEC
:
7462 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
7463 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7464 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
7465 off
= gen_rtx_CONST (Pmode
, off
);
7467 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
7469 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
7470 return gen_rtx_PLUS (Pmode
, base
, off
);
7474 base
= get_thread_pointer (true);
7475 dest
= gen_reg_rtx (Pmode
);
7476 emit_insn (gen_subsi3 (dest
, base
, off
));
7487 /* Try machine-dependent ways of modifying an illegitimate address
7488 to be legitimate. If we find one, return the new, valid address.
7489 This macro is used in only one place: `memory_address' in explow.c.
7491 OLDX is the address as it was before break_out_memory_refs was called.
7492 In some cases it is useful to look at this to decide what needs to be done.
7494 MODE and WIN are passed so that this macro can use
7495 GO_IF_LEGITIMATE_ADDRESS.
7497 It is always safe for this macro to do nothing. It exists to recognize
7498 opportunities to optimize the output.
7500 For the 80386, we handle X+REG by loading X into a register R and
7501 using R+REG. R will go in a general reg and indexing will be used.
7502 However, if REG is a broken-out memory address or multiplication,
7503 nothing needs to be done because REG can certainly go in a general reg.
7505 When -fpic is used, special handling is needed for symbolic references.
7506 See comments by legitimize_pic_address in i386.c for details. */
7509 legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
, enum machine_mode mode
)
7514 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
7516 return legitimize_tls_address (x
, log
, false);
7517 if (GET_CODE (x
) == CONST
7518 && GET_CODE (XEXP (x
, 0)) == PLUS
7519 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
7520 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
7522 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0), log
, false);
7523 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
7526 if (flag_pic
&& SYMBOLIC_CONST (x
))
7527 return legitimize_pic_address (x
, 0);
7529 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7530 if (GET_CODE (x
) == ASHIFT
7531 && CONST_INT_P (XEXP (x
, 1))
7532 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
7535 log
= INTVAL (XEXP (x
, 1));
7536 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
7537 GEN_INT (1 << log
));
7540 if (GET_CODE (x
) == PLUS
)
7542 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7544 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
7545 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7546 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
7549 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
7550 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
7551 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
7552 GEN_INT (1 << log
));
7555 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
7556 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
7557 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
7560 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
7561 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
7562 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
7563 GEN_INT (1 << log
));
7566 /* Put multiply first if it isn't already. */
7567 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7569 rtx tmp
= XEXP (x
, 0);
7570 XEXP (x
, 0) = XEXP (x
, 1);
7575 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7576 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7577 created by virtual register instantiation, register elimination, and
7578 similar optimizations. */
7579 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
7582 x
= gen_rtx_PLUS (Pmode
,
7583 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
7584 XEXP (XEXP (x
, 1), 0)),
7585 XEXP (XEXP (x
, 1), 1));
7589 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7590 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7591 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
7592 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7593 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
7594 && CONSTANT_P (XEXP (x
, 1)))
7597 rtx other
= NULL_RTX
;
7599 if (CONST_INT_P (XEXP (x
, 1)))
7601 constant
= XEXP (x
, 1);
7602 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7604 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
7606 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
7607 other
= XEXP (x
, 1);
7615 x
= gen_rtx_PLUS (Pmode
,
7616 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
7617 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
7618 plus_constant (other
, INTVAL (constant
)));
7622 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7625 if (GET_CODE (XEXP (x
, 0)) == MULT
)
7628 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
7631 if (GET_CODE (XEXP (x
, 1)) == MULT
)
7634 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
7638 && REG_P (XEXP (x
, 1))
7639 && REG_P (XEXP (x
, 0)))
7642 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
7645 x
= legitimize_pic_address (x
, 0);
7648 if (changed
&& legitimate_address_p (mode
, x
, FALSE
))
7651 if (REG_P (XEXP (x
, 0)))
7653 rtx temp
= gen_reg_rtx (Pmode
);
7654 rtx val
= force_operand (XEXP (x
, 1), temp
);
7656 emit_move_insn (temp
, val
);
7662 else if (REG_P (XEXP (x
, 1)))
7664 rtx temp
= gen_reg_rtx (Pmode
);
7665 rtx val
= force_operand (XEXP (x
, 0), temp
);
7667 emit_move_insn (temp
, val
);
7677 /* Print an integer constant expression in assembler syntax. Addition
7678 and subtraction are the only arithmetic that may appear in these
7679 expressions. FILE is the stdio stream to write to, X is the rtx, and
7680 CODE is the operand print code from the output string. */
7683 output_pic_addr_const (FILE *file
, rtx x
, int code
)
7687 switch (GET_CODE (x
))
7690 gcc_assert (flag_pic
);
7695 if (! TARGET_MACHO
|| TARGET_64BIT
)
7696 output_addr_const (file
, x
);
7699 const char *name
= XSTR (x
, 0);
7701 /* Mark the decl as referenced so that cgraph will output the function. */
7702 if (SYMBOL_REF_DECL (x
))
7703 mark_decl_referenced (SYMBOL_REF_DECL (x
));
7706 if (MACHOPIC_INDIRECT
7707 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
7708 name
= machopic_indirection_name (x
, /*stub_p=*/true);
7710 assemble_name (file
, name
);
7712 if (!TARGET_MACHO
&& code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
7713 fputs ("@PLT", file
);
7720 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
7721 assemble_name (asm_out_file
, buf
);
7725 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
7729 /* This used to output parentheses around the expression,
7730 but that does not work on the 386 (either ATT or BSD assembler). */
7731 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7735 if (GET_MODE (x
) == VOIDmode
)
7737 /* We can use %d if the number is <32 bits and positive. */
7738 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
7739 fprintf (file
, "0x%lx%08lx",
7740 (unsigned long) CONST_DOUBLE_HIGH (x
),
7741 (unsigned long) CONST_DOUBLE_LOW (x
));
7743 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
7746 /* We can't handle floating point constants;
7747 PRINT_OPERAND must handle them. */
7748 output_operand_lossage ("floating constant misused");
7752 /* Some assemblers need integer constants to appear first. */
7753 if (CONST_INT_P (XEXP (x
, 0)))
7755 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7757 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7761 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
7762 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7764 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7770 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
7771 output_pic_addr_const (file
, XEXP (x
, 0), code
);
7773 output_pic_addr_const (file
, XEXP (x
, 1), code
);
7775 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
7779 gcc_assert (XVECLEN (x
, 0) == 1);
7780 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
7781 switch (XINT (x
, 1))
7784 fputs ("@GOT", file
);
7787 fputs ("@GOTOFF", file
);
7790 fputs ("@PLTOFF", file
);
7792 case UNSPEC_GOTPCREL
:
7793 fputs ("@GOTPCREL(%rip)", file
);
7795 case UNSPEC_GOTTPOFF
:
7796 /* FIXME: This might be @TPOFF in Sun ld too. */
7797 fputs ("@GOTTPOFF", file
);
7800 fputs ("@TPOFF", file
);
7804 fputs ("@TPOFF", file
);
7806 fputs ("@NTPOFF", file
);
7809 fputs ("@DTPOFF", file
);
7811 case UNSPEC_GOTNTPOFF
:
7813 fputs ("@GOTTPOFF(%rip)", file
);
7815 fputs ("@GOTNTPOFF", file
);
7817 case UNSPEC_INDNTPOFF
:
7818 fputs ("@INDNTPOFF", file
);
7821 output_operand_lossage ("invalid UNSPEC as operand");
7827 output_operand_lossage ("invalid expression as operand");
7831 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7832 We need to emit DTP-relative relocations. */
7835 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7837 fputs (ASM_LONG
, file
);
7838 output_addr_const (file
, x
);
7839 fputs ("@DTPOFF", file
);
7845 fputs (", 0", file
);
7852 /* In the name of slightly smaller debug output, and to cater to
7853 general assembler lossage, recognize PIC+GOTOFF and turn it back
7854 into a direct symbol reference.
7856 On Darwin, this is necessary to avoid a crash, because Darwin
7857 has a different PIC label for each routine but the DWARF debugging
7858 information is not associated with any particular routine, so it's
7859 necessary to remove references to the PIC label from RTL stored by
7860 the DWARF output code. */
7863 ix86_delegitimize_address (rtx orig_x
)
7866 /* reg_addend is NULL or a multiple of some register. */
7867 rtx reg_addend
= NULL_RTX
;
7868 /* const_addend is NULL or a const_int. */
7869 rtx const_addend
= NULL_RTX
;
7870 /* This is the result, or NULL. */
7871 rtx result
= NULL_RTX
;
7878 if (GET_CODE (x
) != CONST
7879 || GET_CODE (XEXP (x
, 0)) != UNSPEC
7880 || XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
7883 return XVECEXP (XEXP (x
, 0), 0, 0);
7886 if (GET_CODE (x
) != PLUS
7887 || GET_CODE (XEXP (x
, 1)) != CONST
)
7890 if (REG_P (XEXP (x
, 0))
7891 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7892 /* %ebx + GOT/GOTOFF */
7894 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
7896 /* %ebx + %reg * scale + GOT/GOTOFF */
7897 reg_addend
= XEXP (x
, 0);
7898 if (REG_P (XEXP (reg_addend
, 0))
7899 && REGNO (XEXP (reg_addend
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7900 reg_addend
= XEXP (reg_addend
, 1);
7901 else if (REG_P (XEXP (reg_addend
, 1))
7902 && REGNO (XEXP (reg_addend
, 1)) == PIC_OFFSET_TABLE_REGNUM
)
7903 reg_addend
= XEXP (reg_addend
, 0);
7906 if (!REG_P (reg_addend
)
7907 && GET_CODE (reg_addend
) != MULT
7908 && GET_CODE (reg_addend
) != ASHIFT
)
7914 x
= XEXP (XEXP (x
, 1), 0);
7915 if (GET_CODE (x
) == PLUS
7916 && CONST_INT_P (XEXP (x
, 1)))
7918 const_addend
= XEXP (x
, 1);
7922 if (GET_CODE (x
) == UNSPEC
7923 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
))
7924 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
7925 result
= XVECEXP (x
, 0, 0);
7927 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
7929 result
= XEXP (x
, 0);
7935 result
= gen_rtx_PLUS (Pmode
, result
, const_addend
);
7937 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
7942 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
7947 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
7949 enum rtx_code second_code
, bypass_code
;
7950 ix86_fp_comparison_codes (code
, &bypass_code
, &code
, &second_code
);
7951 gcc_assert (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
);
7952 code
= ix86_fp_compare_code_to_integer (code
);
7956 code
= reverse_condition (code
);
7967 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
7971 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7972 Those same assemblers have the same but opposite lossage on cmov. */
7973 gcc_assert (mode
== CCmode
);
7974 suffix
= fp
? "nbe" : "a";
7994 gcc_assert (mode
== CCmode
);
8016 gcc_assert (mode
== CCmode
);
8017 suffix
= fp
? "nb" : "ae";
8020 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
8024 gcc_assert (mode
== CCmode
);
8028 suffix
= fp
? "u" : "p";
8031 suffix
= fp
? "nu" : "np";
8036 fputs (suffix
, file
);
8039 /* Print the name of register X to FILE based on its machine mode and number.
8040 If CODE is 'w', pretend the mode is HImode.
8041 If CODE is 'b', pretend the mode is QImode.
8042 If CODE is 'k', pretend the mode is SImode.
8043 If CODE is 'q', pretend the mode is DImode.
8044 If CODE is 'h', pretend the reg is the 'high' byte register.
8045 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8048 print_reg (rtx x
, int code
, FILE *file
)
8050 gcc_assert (REGNO (x
) != ARG_POINTER_REGNUM
8051 && REGNO (x
) != FRAME_POINTER_REGNUM
8052 && REGNO (x
) != FLAGS_REG
8053 && REGNO (x
) != FPSR_REG
8054 && REGNO (x
) != FPCR_REG
);
8056 if (ASSEMBLER_DIALECT
== ASM_ATT
|| USER_LABEL_PREFIX
[0] == 0)
8059 if (code
== 'w' || MMX_REG_P (x
))
8061 else if (code
== 'b')
8063 else if (code
== 'k')
8065 else if (code
== 'q')
8067 else if (code
== 'y')
8069 else if (code
== 'h')
8072 code
= GET_MODE_SIZE (GET_MODE (x
));
8074 /* Irritatingly, AMD extended registers use different naming convention
8075 from the normal registers. */
8076 if (REX_INT_REG_P (x
))
8078 gcc_assert (TARGET_64BIT
);
8082 error ("extended registers have no high halves");
8085 fprintf (file
, "r%ib", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8088 fprintf (file
, "r%iw", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8091 fprintf (file
, "r%id", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8094 fprintf (file
, "r%i", REGNO (x
) - FIRST_REX_INT_REG
+ 8);
8097 error ("unsupported operand size for extended register");
8105 if (STACK_TOP_P (x
))
8107 fputs ("st(0)", file
);
8114 if (! ANY_FP_REG_P (x
))
8115 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
8120 fputs (hi_reg_name
[REGNO (x
)], file
);
8123 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
8125 fputs (qi_reg_name
[REGNO (x
)], file
);
8128 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
8130 fputs (qi_high_reg_name
[REGNO (x
)], file
);
8137 /* Locate some local-dynamic symbol still in use by this function
8138 so that we can print its name in some tls_local_dynamic_base
8142 get_some_local_dynamic_name (void)
8146 if (cfun
->machine
->some_ld_name
)
8147 return cfun
->machine
->some_ld_name
;
8149 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8151 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
8152 return cfun
->machine
->some_ld_name
;
8158 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
8162 if (GET_CODE (x
) == SYMBOL_REF
8163 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
8165 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
8173 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8174 C -- print opcode suffix for set/cmov insn.
8175 c -- like C, but print reversed condition
8176 F,f -- likewise, but for floating-point.
8177 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8179 R -- print the prefix for register names.
8180 z -- print the opcode suffix for the size of the current operand.
8181 * -- print a star (in certain assembler syntax)
8182 A -- print an absolute memory reference.
8183 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8184 s -- print a shift double count, followed by the assemblers argument
8186 b -- print the QImode name of the register for the indicated operand.
8187 %b0 would print %al if operands[0] is reg 0.
8188 w -- likewise, print the HImode name of the register.
8189 k -- likewise, print the SImode name of the register.
8190 q -- likewise, print the DImode name of the register.
8191 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8192 y -- print "st(0)" instead of "st" as a register.
8193 D -- print condition for SSE cmp instruction.
8194 P -- if PIC, print an @PLT suffix.
8195 X -- don't print any sort of PIC '@' suffix for a symbol.
8196 & -- print some in-use local-dynamic symbol name.
8197 H -- print a memory address offset by 8; used for sse high-parts
8201 print_operand (FILE *file
, rtx x
, int code
)
8208 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8213 assemble_name (file
, get_some_local_dynamic_name ());
8217 switch (ASSEMBLER_DIALECT
)
8224 /* Intel syntax. For absolute addresses, registers should not
8225 be surrounded by braces. */
8229 PRINT_OPERAND (file
, x
, 0);
8239 PRINT_OPERAND (file
, x
, 0);
8244 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8249 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8254 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8259 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8264 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8269 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8274 /* 387 opcodes don't get size suffixes if the operands are
8276 if (STACK_REG_P (x
))
8279 /* Likewise if using Intel opcodes. */
8280 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
8283 /* This is the size of op from size of operand. */
8284 switch (GET_MODE_SIZE (GET_MODE (x
)))
8291 #ifdef HAVE_GAS_FILDS_FISTS
8297 if (GET_MODE (x
) == SFmode
)
8312 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
8314 #ifdef GAS_MNEMONICS
8340 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
8342 PRINT_OPERAND (file
, x
, 0);
8348 /* Little bit of braindamage here. The SSE compare instructions
8349 does use completely different names for the comparisons that the
8350 fp conditional moves. */
8351 switch (GET_CODE (x
))
8366 fputs ("unord", file
);
8370 fputs ("neq", file
);
8374 fputs ("nlt", file
);
8378 fputs ("nle", file
);
8381 fputs ("ord", file
);
8388 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8389 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8391 switch (GET_MODE (x
))
8393 case HImode
: putc ('w', file
); break;
8395 case SFmode
: putc ('l', file
); break;
8397 case DFmode
: putc ('q', file
); break;
8398 default: gcc_unreachable ();
8405 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
8408 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8409 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8412 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
8415 /* Like above, but reverse condition */
8417 /* Check to see if argument to %c is really a constant
8418 and not a condition code which needs to be reversed. */
8419 if (!COMPARISON_P (x
))
8421 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8424 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
8427 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8428 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8431 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
8435 /* It doesn't actually matter what mode we use here, as we're
8436 only going to use this for printing. */
8437 x
= adjust_address_nv (x
, DImode
, 8);
8444 if (!optimize
|| optimize_size
|| !TARGET_BRANCH_PREDICTION_HINTS
)
8447 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
8450 int pred_val
= INTVAL (XEXP (x
, 0));
8452 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
8453 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
8455 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
8456 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
8458 /* Emit hints only in the case default branch prediction
8459 heuristics would fail. */
8460 if (taken
!= cputaken
)
8462 /* We use 3e (DS) prefix for taken branches and
8463 2e (CS) prefix for not taken branches. */
8465 fputs ("ds ; ", file
);
8467 fputs ("cs ; ", file
);
8474 output_operand_lossage ("invalid operand code '%c'", code
);
8479 print_reg (x
, code
, file
);
8483 /* No `byte ptr' prefix for call instructions. */
8484 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
8487 switch (GET_MODE_SIZE (GET_MODE (x
)))
8489 case 1: size
= "BYTE"; break;
8490 case 2: size
= "WORD"; break;
8491 case 4: size
= "DWORD"; break;
8492 case 8: size
= "QWORD"; break;
8493 case 12: size
= "XWORD"; break;
8494 case 16: size
= "XMMWORD"; break;
8499 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8502 else if (code
== 'w')
8504 else if (code
== 'k')
8508 fputs (" PTR ", file
);
8512 /* Avoid (%rip) for call operands. */
8513 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
8514 && !CONST_INT_P (x
))
8515 output_addr_const (file
, x
);
8516 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
8517 output_operand_lossage ("invalid constraints for operand");
8522 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
8527 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8528 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
8530 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8532 fprintf (file
, "0x%08lx", l
);
8535 /* These float cases don't actually occur as immediate operands. */
8536 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
8540 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8541 fprintf (file
, "%s", dstr
);
8544 else if (GET_CODE (x
) == CONST_DOUBLE
8545 && GET_MODE (x
) == XFmode
)
8549 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
8550 fprintf (file
, "%s", dstr
);
8555 /* We have patterns that allow zero sets of memory, for instance.
8556 In 64-bit mode, we should probably support all 8-byte vectors,
8557 since we can in fact encode that into an immediate. */
8558 if (GET_CODE (x
) == CONST_VECTOR
)
8560 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
8566 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
8568 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8571 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
8572 || GET_CODE (x
) == LABEL_REF
)
8574 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8577 fputs ("OFFSET FLAT:", file
);
8580 if (CONST_INT_P (x
))
8581 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
8583 output_pic_addr_const (file
, x
, code
);
8585 output_addr_const (file
, x
);
8589 /* Print a memory operand whose address is ADDR. */
8592 print_operand_address (FILE *file
, rtx addr
)
8594 struct ix86_address parts
;
8595 rtx base
, index
, disp
;
8597 int ok
= ix86_decompose_address (addr
, &parts
);
8602 index
= parts
.index
;
8604 scale
= parts
.scale
;
8612 if (USER_LABEL_PREFIX
[0] == 0)
8614 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
8620 if (!base
&& !index
)
8622 /* Displacement only requires special attention. */
8624 if (CONST_INT_P (disp
))
8626 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
8628 if (USER_LABEL_PREFIX
[0] == 0)
8630 fputs ("ds:", file
);
8632 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
8635 output_pic_addr_const (file
, disp
, 0);
8637 output_addr_const (file
, disp
);
8639 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8642 if (GET_CODE (disp
) == CONST
8643 && GET_CODE (XEXP (disp
, 0)) == PLUS
8644 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8645 disp
= XEXP (XEXP (disp
, 0), 0);
8646 if (GET_CODE (disp
) == LABEL_REF
8647 || (GET_CODE (disp
) == SYMBOL_REF
8648 && SYMBOL_REF_TLS_MODEL (disp
) == 0))
8649 fputs ("(%rip)", file
);
8654 if (ASSEMBLER_DIALECT
== ASM_ATT
)
8659 output_pic_addr_const (file
, disp
, 0);
8660 else if (GET_CODE (disp
) == LABEL_REF
)
8661 output_asm_label (disp
);
8663 output_addr_const (file
, disp
);
8668 print_reg (base
, 0, file
);
8672 print_reg (index
, 0, file
);
8674 fprintf (file
, ",%d", scale
);
8680 rtx offset
= NULL_RTX
;
8684 /* Pull out the offset of a symbol; print any symbol itself. */
8685 if (GET_CODE (disp
) == CONST
8686 && GET_CODE (XEXP (disp
, 0)) == PLUS
8687 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
8689 offset
= XEXP (XEXP (disp
, 0), 1);
8690 disp
= gen_rtx_CONST (VOIDmode
,
8691 XEXP (XEXP (disp
, 0), 0));
8695 output_pic_addr_const (file
, disp
, 0);
8696 else if (GET_CODE (disp
) == LABEL_REF
)
8697 output_asm_label (disp
);
8698 else if (CONST_INT_P (disp
))
8701 output_addr_const (file
, disp
);
8707 print_reg (base
, 0, file
);
8710 if (INTVAL (offset
) >= 0)
8712 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8716 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
8723 print_reg (index
, 0, file
);
8725 fprintf (file
, "*%d", scale
);
8733 output_addr_const_extra (FILE *file
, rtx x
)
8737 if (GET_CODE (x
) != UNSPEC
)
8740 op
= XVECEXP (x
, 0, 0);
8741 switch (XINT (x
, 1))
8743 case UNSPEC_GOTTPOFF
:
8744 output_addr_const (file
, op
);
8745 /* FIXME: This might be @TPOFF in Sun ld. */
8746 fputs ("@GOTTPOFF", file
);
8749 output_addr_const (file
, op
);
8750 fputs ("@TPOFF", file
);
8753 output_addr_const (file
, op
);
8755 fputs ("@TPOFF", file
);
8757 fputs ("@NTPOFF", file
);
8760 output_addr_const (file
, op
);
8761 fputs ("@DTPOFF", file
);
8763 case UNSPEC_GOTNTPOFF
:
8764 output_addr_const (file
, op
);
8766 fputs ("@GOTTPOFF(%rip)", file
);
8768 fputs ("@GOTNTPOFF", file
);
8770 case UNSPEC_INDNTPOFF
:
8771 output_addr_const (file
, op
);
8772 fputs ("@INDNTPOFF", file
);
8782 /* Split one or more DImode RTL references into pairs of SImode
8783 references. The RTL can be REG, offsettable MEM, integer constant, or
8784 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8785 split and "num" is its length. lo_half and hi_half are output arrays
8786 that parallel "operands". */
8789 split_di (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8793 rtx op
= operands
[num
];
8795 /* simplify_subreg refuse to split volatile memory addresses,
8796 but we still have to handle it. */
8799 lo_half
[num
] = adjust_address (op
, SImode
, 0);
8800 hi_half
[num
] = adjust_address (op
, SImode
, 4);
8804 lo_half
[num
] = simplify_gen_subreg (SImode
, op
,
8805 GET_MODE (op
) == VOIDmode
8806 ? DImode
: GET_MODE (op
), 0);
8807 hi_half
[num
] = simplify_gen_subreg (SImode
, op
,
8808 GET_MODE (op
) == VOIDmode
8809 ? DImode
: GET_MODE (op
), 4);
8813 /* Split one or more TImode RTL references into pairs of DImode
8814 references. The RTL can be REG, offsettable MEM, integer constant, or
8815 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8816 split and "num" is its length. lo_half and hi_half are output arrays
8817 that parallel "operands". */
8820 split_ti (rtx operands
[], int num
, rtx lo_half
[], rtx hi_half
[])
8824 rtx op
= operands
[num
];
8826 /* simplify_subreg refuse to split volatile memory addresses, but we
8827 still have to handle it. */
8830 lo_half
[num
] = adjust_address (op
, DImode
, 0);
8831 hi_half
[num
] = adjust_address (op
, DImode
, 8);
8835 lo_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 0);
8836 hi_half
[num
] = simplify_gen_subreg (DImode
, op
, TImode
, 8);
8841 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8842 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8843 is the expression of the binary operation. The output may either be
8844 emitted here, or returned to the caller, like all output_* functions.
8846 There is no guarantee that the operands are the same mode, as they
8847 might be within FLOAT or FLOAT_EXTEND expressions. */
8849 #ifndef SYSV386_COMPAT
8850 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8851 wants to fix the assemblers because that causes incompatibility
8852 with gcc. No-one wants to fix gcc because that causes
8853 incompatibility with assemblers... You can use the option of
8854 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8855 #define SYSV386_COMPAT 1
8859 output_387_binary_op (rtx insn
, rtx
*operands
)
8861 static char buf
[30];
8864 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
8866 #ifdef ENABLE_CHECKING
8867 /* Even if we do not want to check the inputs, this documents input
8868 constraints. Which helps in understanding the following code. */
8869 if (STACK_REG_P (operands
[0])
8870 && ((REG_P (operands
[1])
8871 && REGNO (operands
[0]) == REGNO (operands
[1])
8872 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
8873 || (REG_P (operands
[2])
8874 && REGNO (operands
[0]) == REGNO (operands
[2])
8875 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
8876 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
8879 gcc_assert (is_sse
);
8882 switch (GET_CODE (operands
[3]))
8885 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8886 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8894 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8895 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8903 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8904 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8912 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
8913 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
8927 if (GET_MODE (operands
[0]) == SFmode
)
8928 strcat (buf
, "ss\t{%2, %0|%0, %2}");
8930 strcat (buf
, "sd\t{%2, %0|%0, %2}");
8935 switch (GET_CODE (operands
[3]))
8939 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
8941 rtx temp
= operands
[2];
8942 operands
[2] = operands
[1];
8946 /* know operands[0] == operands[1]. */
8948 if (MEM_P (operands
[2]))
8954 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8956 if (STACK_TOP_P (operands
[0]))
8957 /* How is it that we are storing to a dead operand[2]?
8958 Well, presumably operands[1] is dead too. We can't
8959 store the result to st(0) as st(0) gets popped on this
8960 instruction. Instead store to operands[2] (which I
8961 think has to be st(1)). st(1) will be popped later.
8962 gcc <= 2.8.1 didn't have this check and generated
8963 assembly code that the Unixware assembler rejected. */
8964 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8966 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8970 if (STACK_TOP_P (operands
[0]))
8971 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8973 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8978 if (MEM_P (operands
[1]))
8984 if (MEM_P (operands
[2]))
8990 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
8993 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8994 derived assemblers, confusingly reverse the direction of
8995 the operation for fsub{r} and fdiv{r} when the
8996 destination register is not st(0). The Intel assembler
8997 doesn't have this brain damage. Read !SYSV386_COMPAT to
8998 figure out what the hardware really does. */
8999 if (STACK_TOP_P (operands
[0]))
9000 p
= "{p\t%0, %2|rp\t%2, %0}";
9002 p
= "{rp\t%2, %0|p\t%0, %2}";
9004 if (STACK_TOP_P (operands
[0]))
9005 /* As above for fmul/fadd, we can't store to st(0). */
9006 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9008 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9013 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
9016 if (STACK_TOP_P (operands
[0]))
9017 p
= "{rp\t%0, %1|p\t%1, %0}";
9019 p
= "{p\t%1, %0|rp\t%0, %1}";
9021 if (STACK_TOP_P (operands
[0]))
9022 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9024 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9029 if (STACK_TOP_P (operands
[0]))
9031 if (STACK_TOP_P (operands
[1]))
9032 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9034 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9037 else if (STACK_TOP_P (operands
[1]))
9040 p
= "{\t%1, %0|r\t%0, %1}";
9042 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9048 p
= "{r\t%2, %0|\t%0, %2}";
9050 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9063 /* Return needed mode for entity in optimize_mode_switching pass. */
9066 ix86_mode_needed (int entity
, rtx insn
)
9068 enum attr_i387_cw mode
;
9070 /* The mode UNINITIALIZED is used to store control word after a
9071 function call or ASM pattern. The mode ANY specify that function
9072 has no requirements on the control word and make no changes in the
9073 bits we are interested in. */
9076 || (NONJUMP_INSN_P (insn
)
9077 && (asm_noperands (PATTERN (insn
)) >= 0
9078 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
9079 return I387_CW_UNINITIALIZED
;
9081 if (recog_memoized (insn
) < 0)
9084 mode
= get_attr_i387_cw (insn
);
9089 if (mode
== I387_CW_TRUNC
)
9094 if (mode
== I387_CW_FLOOR
)
9099 if (mode
== I387_CW_CEIL
)
9104 if (mode
== I387_CW_MASK_PM
)
9115 /* Output code to initialize control word copies used by trunc?f?i and
9116 rounding patterns. CURRENT_MODE is set to current control word,
9117 while NEW_MODE is set to new control word. */
9120 emit_i387_cw_initialization (int mode
)
9122 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
9127 rtx reg
= gen_reg_rtx (HImode
);
9129 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
9130 emit_move_insn (reg
, copy_rtx (stored_mode
));
9132 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
|| optimize_size
)
9137 /* round toward zero (truncate) */
9138 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
9139 slot
= SLOT_CW_TRUNC
;
9143 /* round down toward -oo */
9144 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9145 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
9146 slot
= SLOT_CW_FLOOR
;
9150 /* round up toward +oo */
9151 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
9152 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
9153 slot
= SLOT_CW_CEIL
;
9156 case I387_CW_MASK_PM
:
9157 /* mask precision exception for nearbyint() */
9158 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9159 slot
= SLOT_CW_MASK_PM
;
9171 /* round toward zero (truncate) */
9172 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
9173 slot
= SLOT_CW_TRUNC
;
9177 /* round down toward -oo */
9178 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
9179 slot
= SLOT_CW_FLOOR
;
9183 /* round up toward +oo */
9184 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
9185 slot
= SLOT_CW_CEIL
;
9188 case I387_CW_MASK_PM
:
9189 /* mask precision exception for nearbyint() */
9190 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
9191 slot
= SLOT_CW_MASK_PM
;
9199 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
9201 new_mode
= assign_386_stack_local (HImode
, slot
);
9202 emit_move_insn (new_mode
, reg
);
9205 /* Output code for INSN to convert a float to a signed int. OPERANDS
9206 are the insn operands. The output may be [HSD]Imode and the input
9207 operand may be [SDX]Fmode. */
9210 output_fix_trunc (rtx insn
, rtx
*operands
, int fisttp
)
9212 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9213 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
9214 int round_mode
= get_attr_i387_cw (insn
);
9216 /* Jump through a hoop or two for DImode, since the hardware has no
9217 non-popping instruction. We used to do this a different way, but
9218 that was somewhat fragile and broke with post-reload splitters. */
9219 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
9220 output_asm_insn ("fld\t%y1", operands
);
9222 gcc_assert (STACK_TOP_P (operands
[1]));
9223 gcc_assert (MEM_P (operands
[0]));
9226 output_asm_insn ("fisttp%z0\t%0", operands
);
9229 if (round_mode
!= I387_CW_ANY
)
9230 output_asm_insn ("fldcw\t%3", operands
);
9231 if (stack_top_dies
|| dimode_p
)
9232 output_asm_insn ("fistp%z0\t%0", operands
);
9234 output_asm_insn ("fist%z0\t%0", operands
);
9235 if (round_mode
!= I387_CW_ANY
)
9236 output_asm_insn ("fldcw\t%2", operands
);
9242 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9243 have the values zero or one, indicates the ffreep insn's operand
9244 from the OPERANDS array. */
9247 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
9249 if (TARGET_USE_FFREEP
)
9250 #if HAVE_AS_IX86_FFREEP
9251 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
9254 static char retval
[] = ".word\t0xc_df";
9255 int regno
= REGNO (operands
[opno
]);
9257 gcc_assert (FP_REGNO_P (regno
));
9259 retval
[9] = '0' + (regno
- FIRST_STACK_REG
);
9264 return opno
? "fstp\t%y1" : "fstp\t%y0";
9268 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9269 should be used. UNORDERED_P is true when fucom should be used. */
9272 output_fp_compare (rtx insn
, rtx
*operands
, int eflags_p
, int unordered_p
)
9275 rtx cmp_op0
, cmp_op1
;
9276 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
9280 cmp_op0
= operands
[0];
9281 cmp_op1
= operands
[1];
9285 cmp_op0
= operands
[1];
9286 cmp_op1
= operands
[2];
9291 if (GET_MODE (operands
[0]) == SFmode
)
9293 return "ucomiss\t{%1, %0|%0, %1}";
9295 return "comiss\t{%1, %0|%0, %1}";
9298 return "ucomisd\t{%1, %0|%0, %1}";
9300 return "comisd\t{%1, %0|%0, %1}";
9303 gcc_assert (STACK_TOP_P (cmp_op0
));
9305 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
9307 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
9311 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
9312 return output_387_ffreep (operands
, 1);
9315 return "ftst\n\tfnstsw\t%0";
9318 if (STACK_REG_P (cmp_op1
)
9320 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
9321 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
9323 /* If both the top of the 387 stack dies, and the other operand
9324 is also a stack register that dies, then this must be a
9325 `fcompp' float compare */
9329 /* There is no double popping fcomi variant. Fortunately,
9330 eflags is immune from the fstp's cc clobbering. */
9332 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
9334 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
9335 return output_387_ffreep (operands
, 0);
9340 return "fucompp\n\tfnstsw\t%0";
9342 return "fcompp\n\tfnstsw\t%0";
9347 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9349 static const char * const alt
[16] =
9351 "fcom%z2\t%y2\n\tfnstsw\t%0",
9352 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9353 "fucom%z2\t%y2\n\tfnstsw\t%0",
9354 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9356 "ficom%z2\t%y2\n\tfnstsw\t%0",
9357 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9361 "fcomi\t{%y1, %0|%0, %y1}",
9362 "fcomip\t{%y1, %0|%0, %y1}",
9363 "fucomi\t{%y1, %0|%0, %y1}",
9364 "fucomip\t{%y1, %0|%0, %y1}",
9375 mask
= eflags_p
<< 3;
9376 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
9377 mask
|= unordered_p
<< 1;
9378 mask
|= stack_top_dies
;
9380 gcc_assert (mask
< 16);
9389 ix86_output_addr_vec_elt (FILE *file
, int value
)
9391 const char *directive
= ASM_LONG
;
9395 directive
= ASM_QUAD
;
9397 gcc_assert (!TARGET_64BIT
);
9400 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
9404 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
9406 const char *directive
= ASM_LONG
;
9409 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
9410 directive
= ASM_QUAD
;
9412 gcc_assert (!TARGET_64BIT
);
9414 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9415 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
9416 fprintf (file
, "%s%s%d-%s%d\n",
9417 directive
, LPREFIX
, value
, LPREFIX
, rel
);
9418 else if (HAVE_AS_GOTOFF_IN_DATA
)
9419 fprintf (file
, "%s%s%d@GOTOFF\n", ASM_LONG
, LPREFIX
, value
);
9421 else if (TARGET_MACHO
)
9423 fprintf (file
, "%s%s%d-", ASM_LONG
, LPREFIX
, value
);
9424 machopic_output_function_base_name (file
);
9425 fprintf(file
, "\n");
9429 asm_fprintf (file
, "%s%U%s+[.-%s%d]\n",
9430 ASM_LONG
, GOT_SYMBOL_NAME
, LPREFIX
, value
);
9433 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9437 ix86_expand_clear (rtx dest
)
9441 /* We play register width games, which are only valid after reload. */
9442 gcc_assert (reload_completed
);
9444 /* Avoid HImode and its attendant prefix byte. */
9445 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
9446 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
9448 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
9450 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9451 if (reload_completed
&& (!TARGET_USE_MOV0
|| optimize_size
))
9453 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, 17));
9454 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
9460 /* X is an unchanging MEM. If it is a constant pool reference, return
9461 the constant pool rtx, else NULL. */
9464 maybe_get_pool_constant (rtx x
)
9466 x
= ix86_delegitimize_address (XEXP (x
, 0));
9468 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9469 return get_pool_constant (x
);
9475 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
9477 int strict
= (reload_in_progress
|| reload_completed
);
9479 enum tls_model model
;
9484 if (GET_CODE (op1
) == SYMBOL_REF
)
9486 model
= SYMBOL_REF_TLS_MODEL (op1
);
9489 op1
= legitimize_tls_address (op1
, model
, true);
9490 op1
= force_operand (op1
, op0
);
9495 else if (GET_CODE (op1
) == CONST
9496 && GET_CODE (XEXP (op1
, 0)) == PLUS
9497 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
9499 model
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1
, 0), 0));
9502 rtx addend
= XEXP (XEXP (op1
, 0), 1);
9503 op1
= legitimize_tls_address (XEXP (XEXP (op1
, 0), 0), model
, true);
9504 op1
= force_operand (op1
, NULL
);
9505 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
9506 op0
, 1, OPTAB_DIRECT
);
9512 if (flag_pic
&& mode
== Pmode
&& symbolic_operand (op1
, Pmode
))
9514 if (TARGET_MACHO
&& !TARGET_64BIT
)
9519 rtx temp
= ((reload_in_progress
9520 || ((op0
&& REG_P (op0
))
9522 ? op0
: gen_reg_rtx (Pmode
));
9523 op1
= machopic_indirect_data_reference (op1
, temp
);
9524 op1
= machopic_legitimize_pic_address (op1
, mode
,
9525 temp
== op1
? 0 : temp
);
9527 else if (MACHOPIC_INDIRECT
)
9528 op1
= machopic_indirect_data_reference (op1
, 0);
9536 op1
= force_reg (Pmode
, op1
);
9537 else if (!TARGET_64BIT
|| !x86_64_movabs_operand (op1
, Pmode
))
9539 rtx reg
= no_new_pseudos
? op0
: NULL_RTX
;
9540 op1
= legitimize_pic_address (op1
, reg
);
9549 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
9550 || !push_operand (op0
, mode
))
9552 op1
= force_reg (mode
, op1
);
9554 if (push_operand (op0
, mode
)
9555 && ! general_no_elim_operand (op1
, mode
))
9556 op1
= copy_to_mode_reg (mode
, op1
);
9558 /* Force large constants in 64bit compilation into register
9559 to get them CSEed. */
9560 if (TARGET_64BIT
&& mode
== DImode
9561 && immediate_operand (op1
, mode
)
9562 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
9563 && !register_operand (op0
, mode
)
9564 && optimize
&& !reload_completed
&& !reload_in_progress
)
9565 op1
= copy_to_mode_reg (mode
, op1
);
9567 if (FLOAT_MODE_P (mode
))
9569 /* If we are loading a floating point constant to a register,
9570 force the value to memory now, since we'll get better code
9571 out the back end. */
9575 else if (GET_CODE (op1
) == CONST_DOUBLE
)
9577 op1
= validize_mem (force_const_mem (mode
, op1
));
9578 if (!register_operand (op0
, mode
))
9580 rtx temp
= gen_reg_rtx (mode
);
9581 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
9582 emit_move_insn (op0
, temp
);
9589 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9593 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
9595 rtx op0
= operands
[0], op1
= operands
[1];
9597 /* Force constants other than zero into memory. We do not know how
9598 the instructions used to build constants modify the upper 64 bits
9599 of the register, once we have that information we may be able
9600 to handle some of them more efficiently. */
9601 if ((reload_in_progress
| reload_completed
) == 0
9602 && register_operand (op0
, mode
)
9604 && standard_sse_constant_p (op1
) <= 0)
9605 op1
= validize_mem (force_const_mem (mode
, op1
));
9607 /* Make operand1 a register if it isn't already. */
9609 && !register_operand (op0
, mode
)
9610 && !register_operand (op1
, mode
))
9612 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
9616 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
9619 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9620 straight to ix86_expand_vector_move. */
9621 /* Code generation for scalar reg-reg moves of single and double precision data:
9622 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9626 if (x86_sse_partial_reg_dependency == true)
9631 Code generation for scalar loads of double precision data:
9632 if (x86_sse_split_regs == true)
9633 movlpd mem, reg (gas syntax)
9637 Code generation for unaligned packed loads of single precision data
9638 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9639 if (x86_sse_unaligned_move_optimal)
9642 if (x86_sse_partial_reg_dependency == true)
9654 Code generation for unaligned packed loads of double precision data
9655 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9656 if (x86_sse_unaligned_move_optimal)
9659 if (x86_sse_split_regs == true)
9672 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
9681 /* If we're optimizing for size, movups is the smallest. */
9684 op0
= gen_lowpart (V4SFmode
, op0
);
9685 op1
= gen_lowpart (V4SFmode
, op1
);
9686 emit_insn (gen_sse_movups (op0
, op1
));
9690 /* ??? If we have typed data, then it would appear that using
9691 movdqu is the only way to get unaligned data loaded with
9693 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9695 op0
= gen_lowpart (V16QImode
, op0
);
9696 op1
= gen_lowpart (V16QImode
, op1
);
9697 emit_insn (gen_sse2_movdqu (op0
, op1
));
9701 if (TARGET_SSE2
&& mode
== V2DFmode
)
9705 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9707 op0
= gen_lowpart (V2DFmode
, op0
);
9708 op1
= gen_lowpart (V2DFmode
, op1
);
9709 emit_insn (gen_sse2_movupd (op0
, op1
));
9713 /* When SSE registers are split into halves, we can avoid
9714 writing to the top half twice. */
9715 if (TARGET_SSE_SPLIT_REGS
)
9717 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9722 /* ??? Not sure about the best option for the Intel chips.
9723 The following would seem to satisfy; the register is
9724 entirely cleared, breaking the dependency chain. We
9725 then store to the upper half, with a dependency depth
9726 of one. A rumor has it that Intel recommends two movsd
9727 followed by an unpacklpd, but this is unconfirmed. And
9728 given that the dependency depth of the unpacklpd would
9729 still be one, I'm not sure why this would be better. */
9730 zero
= CONST0_RTX (V2DFmode
);
9733 m
= adjust_address (op1
, DFmode
, 0);
9734 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
9735 m
= adjust_address (op1
, DFmode
, 8);
9736 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
9740 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL
)
9742 op0
= gen_lowpart (V4SFmode
, op0
);
9743 op1
= gen_lowpart (V4SFmode
, op1
);
9744 emit_insn (gen_sse_movups (op0
, op1
));
9748 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
9749 emit_move_insn (op0
, CONST0_RTX (mode
));
9751 emit_insn (gen_rtx_CLOBBER (VOIDmode
, op0
));
9753 if (mode
!= V4SFmode
)
9754 op0
= gen_lowpart (V4SFmode
, op0
);
9755 m
= adjust_address (op1
, V2SFmode
, 0);
9756 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
9757 m
= adjust_address (op1
, V2SFmode
, 8);
9758 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
9761 else if (MEM_P (op0
))
9763 /* If we're optimizing for size, movups is the smallest. */
9766 op0
= gen_lowpart (V4SFmode
, op0
);
9767 op1
= gen_lowpart (V4SFmode
, op1
);
9768 emit_insn (gen_sse_movups (op0
, op1
));
9772 /* ??? Similar to above, only less clear because of quote
9773 typeless stores unquote. */
9774 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
9775 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
9777 op0
= gen_lowpart (V16QImode
, op0
);
9778 op1
= gen_lowpart (V16QImode
, op1
);
9779 emit_insn (gen_sse2_movdqu (op0
, op1
));
9783 if (TARGET_SSE2
&& mode
== V2DFmode
)
9785 m
= adjust_address (op0
, DFmode
, 0);
9786 emit_insn (gen_sse2_storelpd (m
, op1
));
9787 m
= adjust_address (op0
, DFmode
, 8);
9788 emit_insn (gen_sse2_storehpd (m
, op1
));
9792 if (mode
!= V4SFmode
)
9793 op1
= gen_lowpart (V4SFmode
, op1
);
9794 m
= adjust_address (op0
, V2SFmode
, 0);
9795 emit_insn (gen_sse_storelps (m
, op1
));
9796 m
= adjust_address (op0
, V2SFmode
, 8);
9797 emit_insn (gen_sse_storehps (m
, op1
));
9804 /* Expand a push in MODE. This is some mode for which we do not support
9805 proper push instructions, at least from the registers that we expect
9806 the value to live in. */
9809 ix86_expand_push (enum machine_mode mode
, rtx x
)
9813 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
9814 GEN_INT (-GET_MODE_SIZE (mode
)),
9815 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
9816 if (tmp
!= stack_pointer_rtx
)
9817 emit_move_insn (stack_pointer_rtx
, tmp
);
9819 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
9820 emit_move_insn (tmp
, x
);
9823 /* Helper function of ix86_fixup_binary_operands to canonicalize
9824 operand order. Returns true if the operands should be swapped. */
9827 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
9830 rtx dst
= operands
[0];
9831 rtx src1
= operands
[1];
9832 rtx src2
= operands
[2];
9834 /* If the operation is not commutative, we can't do anything. */
9835 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
9838 /* Highest priority is that src1 should match dst. */
9839 if (rtx_equal_p (dst
, src1
))
9841 if (rtx_equal_p (dst
, src2
))
9844 /* Next highest priority is that immediate constants come second. */
9845 if (immediate_operand (src2
, mode
))
9847 if (immediate_operand (src1
, mode
))
9850 /* Lowest priority is that memory references should come second. */
9860 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9861 destination to use for the operation. If different from the true
9862 destination in operands[0], a copy operation will be required. */
9865 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
9868 rtx dst
= operands
[0];
9869 rtx src1
= operands
[1];
9870 rtx src2
= operands
[2];
9872 /* Canonicalize operand order. */
9873 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9880 /* Both source operands cannot be in memory. */
9881 if (MEM_P (src1
) && MEM_P (src2
))
9883 /* Optimization: Only read from memory once. */
9884 if (rtx_equal_p (src1
, src2
))
9886 src2
= force_reg (mode
, src2
);
9890 src2
= force_reg (mode
, src2
);
9893 /* If the destination is memory, and we do not have matching source
9894 operands, do things in registers. */
9895 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9896 dst
= gen_reg_rtx (mode
);
9898 /* Source 1 cannot be a constant. */
9899 if (CONSTANT_P (src1
))
9900 src1
= force_reg (mode
, src1
);
9902 /* Source 1 cannot be a non-matching memory. */
9903 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9904 src1
= force_reg (mode
, src1
);
9911 /* Similarly, but assume that the destination has already been
9915 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
9916 enum machine_mode mode
, rtx operands
[])
9918 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9919 gcc_assert (dst
== operands
[0]);
9922 /* Attempt to expand a binary operator. Make the expansion closer to the
9923 actual machine, then just general_operand, which will allow 3 separate
9924 memory references (one output, two input) in a single insn. */
9927 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
9930 rtx src1
, src2
, dst
, op
, clob
;
9932 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
9936 /* Emit the instruction. */
9938 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
9939 if (reload_in_progress
)
9941 /* Reload doesn't know about the flags register, and doesn't know that
9942 it doesn't want to clobber it. We can only do this with PLUS. */
9943 gcc_assert (code
== PLUS
);
9948 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
9949 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
9952 /* Fix up the destination if needed. */
9953 if (dst
!= operands
[0])
9954 emit_move_insn (operands
[0], dst
);
9957 /* Return TRUE or FALSE depending on whether the binary operator meets the
9958 appropriate constraints. */
9961 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
9964 rtx dst
= operands
[0];
9965 rtx src1
= operands
[1];
9966 rtx src2
= operands
[2];
9968 /* Both source operands cannot be in memory. */
9969 if (MEM_P (src1
) && MEM_P (src2
))
9972 /* Canonicalize operand order for commutative operators. */
9973 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
9980 /* If the destination is memory, we must have a matching source operand. */
9981 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
9984 /* Source 1 cannot be a constant. */
9985 if (CONSTANT_P (src1
))
9988 /* Source 1 cannot be a non-matching memory. */
9989 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
9995 /* Attempt to expand a unary operator. Make the expansion closer to the
9996 actual machine, then just general_operand, which will allow 2 separate
9997 memory references (one output, one input) in a single insn. */
10000 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
10003 int matching_memory
;
10004 rtx src
, dst
, op
, clob
;
10009 /* If the destination is memory, and we do not have matching source
10010 operands, do things in registers. */
10011 matching_memory
= 0;
10014 if (rtx_equal_p (dst
, src
))
10015 matching_memory
= 1;
10017 dst
= gen_reg_rtx (mode
);
10020 /* When source operand is memory, destination must match. */
10021 if (MEM_P (src
) && !matching_memory
)
10022 src
= force_reg (mode
, src
);
10024 /* Emit the instruction. */
10026 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
10027 if (reload_in_progress
|| code
== NOT
)
10029 /* Reload doesn't know about the flags register, and doesn't know that
10030 it doesn't want to clobber it. */
10031 gcc_assert (code
== NOT
);
10036 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10037 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
10040 /* Fix up the destination if needed. */
10041 if (dst
!= operands
[0])
10042 emit_move_insn (operands
[0], dst
);
10045 /* Return TRUE or FALSE depending on whether the unary operator meets the
10046 appropriate constraints. */
10049 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
10050 enum machine_mode mode ATTRIBUTE_UNUSED
,
10051 rtx operands
[2] ATTRIBUTE_UNUSED
)
10053 /* If one of operands is memory, source and destination must match. */
10054 if ((MEM_P (operands
[0])
10055 || MEM_P (operands
[1]))
10056 && ! rtx_equal_p (operands
[0], operands
[1]))
10061 /* Post-reload splitter for converting an SF or DFmode value in an
10062 SSE register into an unsigned SImode. */
10065 ix86_split_convert_uns_si_sse (rtx operands
[])
10067 enum machine_mode vecmode
;
10068 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
10070 large
= operands
[1];
10071 zero_or_two31
= operands
[2];
10072 input
= operands
[3];
10073 two31
= operands
[4];
10074 vecmode
= GET_MODE (large
);
10075 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
10077 /* Load up the value into the low element. We must ensure that the other
10078 elements are valid floats -- zero is the easiest such value. */
10081 if (vecmode
== V4SFmode
)
10082 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
10084 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
10088 input
= gen_rtx_REG (vecmode
, REGNO (input
));
10089 emit_move_insn (value
, CONST0_RTX (vecmode
));
10090 if (vecmode
== V4SFmode
)
10091 emit_insn (gen_sse_movss (value
, value
, input
));
10093 emit_insn (gen_sse2_movsd (value
, value
, input
));
10096 emit_move_insn (large
, two31
);
10097 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
10099 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
10100 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
10102 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
10103 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
10105 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
10106 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
10108 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
10109 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
10111 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
10112 if (vecmode
== V4SFmode
)
10113 emit_insn (gen_sse2_cvttps2dq (x
, value
));
10115 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
10118 emit_insn (gen_xorv4si3 (value
, value
, large
));
10121 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10122 Expects the 64-bit DImode to be supplied in a pair of integral
10123 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10124 -mfpmath=sse, !optimize_size only. */
10127 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
10129 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
10130 rtx int_xmm
, fp_xmm
;
10131 rtx biases
, exponents
;
10134 int_xmm
= gen_reg_rtx (V4SImode
);
10135 if (TARGET_INTER_UNIT_MOVES
)
10136 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
10137 else if (TARGET_SSE_SPLIT_REGS
)
10139 emit_insn (gen_rtx_CLOBBER (VOIDmode
, int_xmm
));
10140 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
10144 x
= gen_reg_rtx (V2DImode
);
10145 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
10146 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
10149 x
= gen_rtx_CONST_VECTOR (V4SImode
,
10150 gen_rtvec (4, GEN_INT (0x43300000UL
),
10151 GEN_INT (0x45300000UL
),
10152 const0_rtx
, const0_rtx
));
10153 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
10155 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10156 emit_insn (gen_sse2_punpckldq (int_xmm
, int_xmm
, exponents
));
10158 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10159 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10160 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10161 (0x1.0p84 + double(fp_value_hi_xmm)).
10162 Note these exponents differ by 32. */
10164 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
10166 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10167 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10168 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
10169 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
10170 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
10171 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
10172 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
10173 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
10174 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
10176 /* Add the upper and lower DFmode values together. */
10178 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
10181 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
10182 emit_insn (gen_sse2_unpckhpd (fp_xmm
, fp_xmm
, fp_xmm
));
10183 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
10186 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
10189 /* Convert an unsigned SImode value into a DFmode. Only currently used
10190 for SSE, but applicable anywhere. */
10193 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
10195 REAL_VALUE_TYPE TWO31r
;
10198 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
10199 NULL
, 1, OPTAB_DIRECT
);
10201 fp
= gen_reg_rtx (DFmode
);
10202 emit_insn (gen_floatsidf2 (fp
, x
));
10204 real_ldexp (&TWO31r
, &dconst1
, 31);
10205 x
= const_double_from_real_value (TWO31r
, DFmode
);
10207 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
10209 emit_move_insn (target
, x
);
10212 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10213 32-bit mode; otherwise we have a direct convert instruction. */
10216 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
10218 REAL_VALUE_TYPE TWO32r
;
10219 rtx fp_lo
, fp_hi
, x
;
10221 fp_lo
= gen_reg_rtx (DFmode
);
10222 fp_hi
= gen_reg_rtx (DFmode
);
10224 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
10226 real_ldexp (&TWO32r
, &dconst1
, 32);
10227 x
= const_double_from_real_value (TWO32r
, DFmode
);
10228 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
10230 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
10232 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10235 emit_move_insn (target
, x
);
10238 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10239 For x86_32, -mfpmath=sse, !optimize_size only. */
10241 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
10243 REAL_VALUE_TYPE ONE16r
;
10244 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
10246 real_ldexp (&ONE16r
, &dconst1
, 16);
10247 x
= const_double_from_real_value (ONE16r
, SFmode
);
10248 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
10249 NULL
, 0, OPTAB_DIRECT
);
10250 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
10251 NULL
, 0, OPTAB_DIRECT
);
10252 fp_hi
= gen_reg_rtx (SFmode
);
10253 fp_lo
= gen_reg_rtx (SFmode
);
10254 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
10255 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
10256 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
10258 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
10260 if (!rtx_equal_p (target
, fp_hi
))
10261 emit_move_insn (target
, fp_hi
);
10264 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10265 then replicate the value for all elements of the vector
10269 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
10276 v
= gen_rtvec (4, value
, value
, value
, value
);
10278 v
= gen_rtvec (4, value
, CONST0_RTX (SFmode
),
10279 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10280 return gen_rtx_CONST_VECTOR (V4SFmode
, v
);
10284 v
= gen_rtvec (2, value
, value
);
10286 v
= gen_rtvec (2, value
, CONST0_RTX (DFmode
));
10287 return gen_rtx_CONST_VECTOR (V2DFmode
, v
);
10290 gcc_unreachable ();
10294 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10295 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10296 true, then replicate the mask for all elements of the vector register.
10297 If INVERT is true, then create a mask excluding the sign bit. */
10300 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
10302 enum machine_mode vec_mode
;
10303 HOST_WIDE_INT hi
, lo
;
10308 /* Find the sign bit, sign extended to 2*HWI. */
10309 if (mode
== SFmode
)
10310 lo
= 0x80000000, hi
= lo
< 0;
10311 else if (HOST_BITS_PER_WIDE_INT
>= 64)
10312 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
10314 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
10317 lo
= ~lo
, hi
= ~hi
;
10319 /* Force this value into the low part of a fp vector constant. */
10320 mask
= immed_double_const (lo
, hi
, mode
== SFmode
? SImode
: DImode
);
10321 mask
= gen_lowpart (mode
, mask
);
10323 v
= ix86_build_const_vector (mode
, vect
, mask
);
10324 vec_mode
= (mode
== SFmode
) ? V4SFmode
: V2DFmode
;
10325 return force_reg (vec_mode
, v
);
10328 /* Generate code for floating point ABS or NEG. */
10331 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
10334 rtx mask
, set
, use
, clob
, dst
, src
;
10335 bool matching_memory
;
10336 bool use_sse
= false;
10337 bool vector_mode
= VECTOR_MODE_P (mode
);
10338 enum machine_mode elt_mode
= mode
;
10342 elt_mode
= GET_MODE_INNER (mode
);
10345 else if (TARGET_SSE_MATH
)
10346 use_sse
= SSE_FLOAT_MODE_P (mode
);
10348 /* NEG and ABS performed with SSE use bitwise mask operations.
10349 Create the appropriate mask now. */
10351 mask
= ix86_build_signbit_mask (elt_mode
, vector_mode
, code
== ABS
);
10358 /* If the destination is memory, and we don't have matching source
10359 operands or we're using the x87, do things in registers. */
10360 matching_memory
= false;
10363 if (use_sse
&& rtx_equal_p (dst
, src
))
10364 matching_memory
= true;
10366 dst
= gen_reg_rtx (mode
);
10368 if (MEM_P (src
) && !matching_memory
)
10369 src
= force_reg (mode
, src
);
10373 set
= gen_rtx_fmt_ee (code
== NEG
? XOR
: AND
, mode
, src
, mask
);
10374 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10379 set
= gen_rtx_fmt_e (code
, mode
, src
);
10380 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
10383 use
= gen_rtx_USE (VOIDmode
, mask
);
10384 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
10385 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
10386 gen_rtvec (3, set
, use
, clob
)));
10392 if (dst
!= operands
[0])
10393 emit_move_insn (operands
[0], dst
);
10396 /* Expand a copysign operation. Special case operand 0 being a constant. */
10399 ix86_expand_copysign (rtx operands
[])
10401 enum machine_mode mode
, vmode
;
10402 rtx dest
, op0
, op1
, mask
, nmask
;
10404 dest
= operands
[0];
10408 mode
= GET_MODE (dest
);
10409 vmode
= mode
== SFmode
? V4SFmode
: V2DFmode
;
10411 if (GET_CODE (op0
) == CONST_DOUBLE
)
10415 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
10416 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
10418 if (op0
== CONST0_RTX (mode
))
10419 op0
= CONST0_RTX (vmode
);
10422 if (mode
== SFmode
)
10423 v
= gen_rtvec (4, op0
, CONST0_RTX (SFmode
),
10424 CONST0_RTX (SFmode
), CONST0_RTX (SFmode
));
10426 v
= gen_rtvec (2, op0
, CONST0_RTX (DFmode
));
10427 op0
= force_reg (vmode
, gen_rtx_CONST_VECTOR (vmode
, v
));
10430 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10432 if (mode
== SFmode
)
10433 emit_insn (gen_copysignsf3_const (dest
, op0
, op1
, mask
));
10435 emit_insn (gen_copysigndf3_const (dest
, op0
, op1
, mask
));
10439 nmask
= ix86_build_signbit_mask (mode
, 0, 1);
10440 mask
= ix86_build_signbit_mask (mode
, 0, 0);
10442 if (mode
== SFmode
)
10443 emit_insn (gen_copysignsf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10445 emit_insn (gen_copysigndf3_var (dest
, NULL
, op0
, op1
, nmask
, mask
));
10449 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10450 be a constant, and so has already been expanded into a vector constant. */
10453 ix86_split_copysign_const (rtx operands
[])
10455 enum machine_mode mode
, vmode
;
10456 rtx dest
, op0
, op1
, mask
, x
;
10458 dest
= operands
[0];
10461 mask
= operands
[3];
10463 mode
= GET_MODE (dest
);
10464 vmode
= GET_MODE (mask
);
10466 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
10467 x
= gen_rtx_AND (vmode
, dest
, mask
);
10468 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10470 if (op0
!= CONST0_RTX (vmode
))
10472 x
= gen_rtx_IOR (vmode
, dest
, op0
);
10473 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10477 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10478 so we have to do two masks. */
10481 ix86_split_copysign_var (rtx operands
[])
10483 enum machine_mode mode
, vmode
;
10484 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
10486 dest
= operands
[0];
10487 scratch
= operands
[1];
10490 nmask
= operands
[4];
10491 mask
= operands
[5];
10493 mode
= GET_MODE (dest
);
10494 vmode
= GET_MODE (mask
);
10496 if (rtx_equal_p (op0
, op1
))
10498 /* Shouldn't happen often (it's useless, obviously), but when it does
10499 we'd generate incorrect code if we continue below. */
10500 emit_move_insn (dest
, op0
);
10504 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
10506 gcc_assert (REGNO (op1
) == REGNO (scratch
));
10508 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10509 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10512 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10513 x
= gen_rtx_NOT (vmode
, dest
);
10514 x
= gen_rtx_AND (vmode
, x
, op0
);
10515 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10519 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
10521 x
= gen_rtx_AND (vmode
, scratch
, mask
);
10523 else /* alternative 2,4 */
10525 gcc_assert (REGNO (mask
) == REGNO (scratch
));
10526 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
10527 x
= gen_rtx_AND (vmode
, scratch
, op1
);
10529 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
10531 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
10533 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10534 x
= gen_rtx_AND (vmode
, dest
, nmask
);
10536 else /* alternative 3,4 */
10538 gcc_assert (REGNO (nmask
) == REGNO (dest
));
10540 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
10541 x
= gen_rtx_AND (vmode
, dest
, op0
);
10543 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10546 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
10547 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
10550 /* Return TRUE or FALSE depending on whether the first SET in INSN
10551 has source and destination with matching CC modes, and that the
10552 CC mode is at least as constrained as REQ_MODE. */
10555 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
10558 enum machine_mode set_mode
;
10560 set
= PATTERN (insn
);
10561 if (GET_CODE (set
) == PARALLEL
)
10562 set
= XVECEXP (set
, 0, 0);
10563 gcc_assert (GET_CODE (set
) == SET
);
10564 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
10566 set_mode
= GET_MODE (SET_DEST (set
));
10570 if (req_mode
!= CCNOmode
10571 && (req_mode
!= CCmode
10572 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
10576 if (req_mode
== CCGCmode
)
10580 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
10584 if (req_mode
== CCZmode
)
10591 gcc_unreachable ();
10594 return (GET_MODE (SET_SRC (set
)) == set_mode
);
10597 /* Generate insn patterns to do an integer compare of OPERANDS. */
10600 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
10602 enum machine_mode cmpmode
;
10605 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
10606 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
10608 /* This is very simple, but making the interface the same as in the
10609 FP case makes the rest of the code easier. */
10610 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
10611 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
10613 /* Return the test that should be put into the flags user, i.e.
10614 the bcc, scc, or cmov instruction. */
10615 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
10618 /* Figure out whether to use ordered or unordered fp comparisons.
10619 Return the appropriate mode to use. */
10622 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
10624 /* ??? In order to make all comparisons reversible, we do all comparisons
10625 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10626 all forms trapping and nontrapping comparisons, we can make inequality
10627 comparisons trapping again, since it results in better code when using
10628 FCOM based compares. */
10629 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
10633 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
10635 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
10636 return ix86_fp_compare_mode (code
);
10639 /* Only zero flag is needed. */
10640 case EQ
: /* ZF=0 */
10641 case NE
: /* ZF!=0 */
10643 /* Codes needing carry flag. */
10644 case GEU
: /* CF=0 */
10645 case GTU
: /* CF=0 & ZF=0 */
10646 case LTU
: /* CF=1 */
10647 case LEU
: /* CF=1 | ZF=1 */
10649 /* Codes possibly doable only with sign flag when
10650 comparing against zero. */
10651 case GE
: /* SF=OF or SF=0 */
10652 case LT
: /* SF<>OF or SF=1 */
10653 if (op1
== const0_rtx
)
10656 /* For other cases Carry flag is not required. */
10658 /* Codes doable only with sign flag when comparing
10659 against zero, but we miss jump instruction for it
10660 so we need to use relational tests against overflow
10661 that thus needs to be zero. */
10662 case GT
: /* ZF=0 & SF=OF */
10663 case LE
: /* ZF=1 | SF<>OF */
10664 if (op1
== const0_rtx
)
10668 /* strcmp pattern do (use flags) and combine may ask us for proper
10673 gcc_unreachable ();
10677 /* Return the fixed registers used for condition codes. */
10680 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
10687 /* If two condition code modes are compatible, return a condition code
10688 mode which is compatible with both. Otherwise, return
10691 static enum machine_mode
10692 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
10697 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
10700 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
10701 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
10707 gcc_unreachable ();
10729 /* These are only compatible with themselves, which we already
10735 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10738 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED
)
10740 enum rtx_code swapped_code
= swap_condition (code
);
10741 return ((ix86_fp_comparison_cost (code
) == ix86_fp_comparison_fcomi_cost (code
))
10742 || (ix86_fp_comparison_cost (swapped_code
)
10743 == ix86_fp_comparison_fcomi_cost (swapped_code
)));
10746 /* Swap, force into registers, or otherwise massage the two operands
10747 to a fp comparison. The operands are updated in place; the new
10748 comparison code is returned. */
10750 static enum rtx_code
10751 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
10753 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
10754 rtx op0
= *pop0
, op1
= *pop1
;
10755 enum machine_mode op_mode
= GET_MODE (op0
);
10756 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
10758 /* All of the unordered compare instructions only work on registers.
10759 The same is true of the fcomi compare instructions. The XFmode
10760 compare instructions require registers except when comparing
10761 against zero or when converting operand 1 from fixed point to
10765 && (fpcmp_mode
== CCFPUmode
10766 || (op_mode
== XFmode
10767 && ! (standard_80387_constant_p (op0
) == 1
10768 || standard_80387_constant_p (op1
) == 1)
10769 && GET_CODE (op1
) != FLOAT
)
10770 || ix86_use_fcomi_compare (code
)))
10772 op0
= force_reg (op_mode
, op0
);
10773 op1
= force_reg (op_mode
, op1
);
10777 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10778 things around if they appear profitable, otherwise force op0
10779 into a register. */
10781 if (standard_80387_constant_p (op0
) == 0
10783 && ! (standard_80387_constant_p (op1
) == 0
10787 tmp
= op0
, op0
= op1
, op1
= tmp
;
10788 code
= swap_condition (code
);
10792 op0
= force_reg (op_mode
, op0
);
10794 if (CONSTANT_P (op1
))
10796 int tmp
= standard_80387_constant_p (op1
);
10798 op1
= validize_mem (force_const_mem (op_mode
, op1
));
10802 op1
= force_reg (op_mode
, op1
);
10805 op1
= force_reg (op_mode
, op1
);
10809 /* Try to rearrange the comparison to make it cheaper. */
10810 if (ix86_fp_comparison_cost (code
)
10811 > ix86_fp_comparison_cost (swap_condition (code
))
10812 && (REG_P (op1
) || !no_new_pseudos
))
10815 tmp
= op0
, op0
= op1
, op1
= tmp
;
10816 code
= swap_condition (code
);
10818 op0
= force_reg (op_mode
, op0
);
10826 /* Convert comparison codes we use to represent FP comparison to integer
10827 code that will result in proper branch. Return UNKNOWN if no such code
10831 ix86_fp_compare_code_to_integer (enum rtx_code code
)
10860 /* Split comparison code CODE into comparisons we can do using branch
10861 instructions. BYPASS_CODE is comparison code for branch that will
10862 branch around FIRST_CODE and SECOND_CODE. If some of branches
10863 is not required, set value to UNKNOWN.
10864 We never require more than two branches. */
10867 ix86_fp_comparison_codes (enum rtx_code code
, enum rtx_code
*bypass_code
,
10868 enum rtx_code
*first_code
,
10869 enum rtx_code
*second_code
)
10871 *first_code
= code
;
10872 *bypass_code
= UNKNOWN
;
10873 *second_code
= UNKNOWN
;
10875 /* The fcomi comparison sets flags as follows:
10885 case GT
: /* GTU - CF=0 & ZF=0 */
10886 case GE
: /* GEU - CF=0 */
10887 case ORDERED
: /* PF=0 */
10888 case UNORDERED
: /* PF=1 */
10889 case UNEQ
: /* EQ - ZF=1 */
10890 case UNLT
: /* LTU - CF=1 */
10891 case UNLE
: /* LEU - CF=1 | ZF=1 */
10892 case LTGT
: /* EQ - ZF=0 */
10894 case LT
: /* LTU - CF=1 - fails on unordered */
10895 *first_code
= UNLT
;
10896 *bypass_code
= UNORDERED
;
10898 case LE
: /* LEU - CF=1 | ZF=1 - fails on unordered */
10899 *first_code
= UNLE
;
10900 *bypass_code
= UNORDERED
;
10902 case EQ
: /* EQ - ZF=1 - fails on unordered */
10903 *first_code
= UNEQ
;
10904 *bypass_code
= UNORDERED
;
10906 case NE
: /* NE - ZF=0 - fails on unordered */
10907 *first_code
= LTGT
;
10908 *second_code
= UNORDERED
;
10910 case UNGE
: /* GEU - CF=0 - fails on unordered */
10912 *second_code
= UNORDERED
;
10914 case UNGT
: /* GTU - CF=0 & ZF=0 - fails on unordered */
10916 *second_code
= UNORDERED
;
10919 gcc_unreachable ();
10921 if (!TARGET_IEEE_FP
)
10923 *second_code
= UNKNOWN
;
10924 *bypass_code
= UNKNOWN
;
10928 /* Return cost of comparison done fcom + arithmetics operations on AX.
10929 All following functions do use number of instructions as a cost metrics.
10930 In future this should be tweaked to compute bytes for optimize_size and
10931 take into account performance of various instructions on various CPUs. */
10933 ix86_fp_comparison_arithmetics_cost (enum rtx_code code
)
10935 if (!TARGET_IEEE_FP
)
10937 /* The cost of code output by ix86_expand_fp_compare. */
10961 gcc_unreachable ();
10965 /* Return cost of comparison done using fcomi operation.
10966 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10968 ix86_fp_comparison_fcomi_cost (enum rtx_code code
)
10970 enum rtx_code bypass_code
, first_code
, second_code
;
10971 /* Return arbitrarily high cost when instruction is not supported - this
10972 prevents gcc from using it. */
10975 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10976 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 2;
10979 /* Return cost of comparison done using sahf operation.
10980 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10982 ix86_fp_comparison_sahf_cost (enum rtx_code code
)
10984 enum rtx_code bypass_code
, first_code
, second_code
;
10985 /* Return arbitrarily high cost when instruction is not preferred - this
10986 avoids gcc from using it. */
10987 if (!(TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
)))
10989 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
10990 return (bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
) + 3;
10993 /* Compute cost of the comparison done using any method.
10994 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10996 ix86_fp_comparison_cost (enum rtx_code code
)
10998 int fcomi_cost
, sahf_cost
, arithmetics_cost
= 1024;
11001 fcomi_cost
= ix86_fp_comparison_fcomi_cost (code
);
11002 sahf_cost
= ix86_fp_comparison_sahf_cost (code
);
11004 min
= arithmetics_cost
= ix86_fp_comparison_arithmetics_cost (code
);
11005 if (min
> sahf_cost
)
11007 if (min
> fcomi_cost
)
11012 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11015 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
,
11016 rtx
*second_test
, rtx
*bypass_test
)
11018 enum machine_mode fpcmp_mode
, intcmp_mode
;
11020 int cost
= ix86_fp_comparison_cost (code
);
11021 enum rtx_code bypass_code
, first_code
, second_code
;
11023 fpcmp_mode
= ix86_fp_compare_mode (code
);
11024 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
11027 *second_test
= NULL_RTX
;
11029 *bypass_test
= NULL_RTX
;
11031 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11033 /* Do fcomi/sahf based test when profitable. */
11034 if ((TARGET_CMOVE
|| TARGET_SAHF
)
11035 && (bypass_code
== UNKNOWN
|| bypass_test
)
11036 && (second_code
== UNKNOWN
|| second_test
)
11037 && ix86_fp_comparison_arithmetics_cost (code
) > cost
)
11041 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11042 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
11048 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11049 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11051 scratch
= gen_reg_rtx (HImode
);
11052 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11053 emit_insn (gen_x86_sahf_1 (scratch
));
11056 /* The FP codes work out to act like unsigned. */
11057 intcmp_mode
= fpcmp_mode
;
11059 if (bypass_code
!= UNKNOWN
)
11060 *bypass_test
= gen_rtx_fmt_ee (bypass_code
, VOIDmode
,
11061 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11063 if (second_code
!= UNKNOWN
)
11064 *second_test
= gen_rtx_fmt_ee (second_code
, VOIDmode
,
11065 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11070 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11071 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
11072 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
11074 scratch
= gen_reg_rtx (HImode
);
11075 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
11077 /* In the unordered case, we have to check C2 for NaN's, which
11078 doesn't happen to work out to anything nice combination-wise.
11079 So do some bit twiddling on the value we've got in AH to come
11080 up with an appropriate set of condition codes. */
11082 intcmp_mode
= CCNOmode
;
11087 if (code
== GT
|| !TARGET_IEEE_FP
)
11089 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11094 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11095 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11096 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
11097 intcmp_mode
= CCmode
;
11103 if (code
== LT
&& TARGET_IEEE_FP
)
11105 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11106 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x01)));
11107 intcmp_mode
= CCmode
;
11112 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x01)));
11118 if (code
== GE
|| !TARGET_IEEE_FP
)
11120 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
11125 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11126 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11133 if (code
== LE
&& TARGET_IEEE_FP
)
11135 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11136 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
11137 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11138 intcmp_mode
= CCmode
;
11143 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
11149 if (code
== EQ
&& TARGET_IEEE_FP
)
11151 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11152 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
11153 intcmp_mode
= CCmode
;
11158 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11165 if (code
== NE
&& TARGET_IEEE_FP
)
11167 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
11168 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
11174 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
11180 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11184 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
11189 gcc_unreachable ();
11193 /* Return the test that should be put into the flags user, i.e.
11194 the bcc, scc, or cmov instruction. */
11195 return gen_rtx_fmt_ee (code
, VOIDmode
,
11196 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
11201 ix86_expand_compare (enum rtx_code code
, rtx
*second_test
, rtx
*bypass_test
)
11204 op0
= ix86_compare_op0
;
11205 op1
= ix86_compare_op1
;
11208 *second_test
= NULL_RTX
;
11210 *bypass_test
= NULL_RTX
;
11212 if (ix86_compare_emitted
)
11214 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, ix86_compare_emitted
, const0_rtx
);
11215 ix86_compare_emitted
= NULL_RTX
;
11217 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
11218 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11219 second_test
, bypass_test
);
11221 ret
= ix86_expand_int_compare (code
, op0
, op1
);
11226 /* Return true if the CODE will result in nontrivial jump sequence. */
11228 ix86_fp_jump_nontrivial_p (enum rtx_code code
)
11230 enum rtx_code bypass_code
, first_code
, second_code
;
11233 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11234 return bypass_code
!= UNKNOWN
|| second_code
!= UNKNOWN
;
11238 ix86_expand_branch (enum rtx_code code
, rtx label
)
11242 /* If we have emitted a compare insn, go straight to simple.
11243 ix86_expand_compare won't emit anything if ix86_compare_emitted
11245 if (ix86_compare_emitted
)
11248 switch (GET_MODE (ix86_compare_op0
))
11254 tmp
= ix86_expand_compare (code
, NULL
, NULL
);
11255 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11256 gen_rtx_LABEL_REF (VOIDmode
, label
),
11258 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
11267 enum rtx_code bypass_code
, first_code
, second_code
;
11269 code
= ix86_prepare_fp_compare_args (code
, &ix86_compare_op0
,
11270 &ix86_compare_op1
);
11272 ix86_fp_comparison_codes (code
, &bypass_code
, &first_code
, &second_code
);
11274 /* Check whether we will use the natural sequence with one jump. If
11275 so, we can expand jump early. Otherwise delay expansion by
11276 creating compound insn to not confuse optimizers. */
11277 if (bypass_code
== UNKNOWN
&& second_code
== UNKNOWN
11280 ix86_split_fp_branch (code
, ix86_compare_op0
, ix86_compare_op1
,
11281 gen_rtx_LABEL_REF (VOIDmode
, label
),
11282 pc_rtx
, NULL_RTX
, NULL_RTX
);
11286 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
,
11287 ix86_compare_op0
, ix86_compare_op1
);
11288 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
11289 gen_rtx_LABEL_REF (VOIDmode
, label
),
11291 tmp
= gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
);
11293 use_fcomi
= ix86_use_fcomi_compare (code
);
11294 vec
= rtvec_alloc (3 + !use_fcomi
);
11295 RTVEC_ELT (vec
, 0) = tmp
;
11297 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 18));
11299 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCFPmode
, 17));
11302 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (HImode
));
11304 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode
, vec
));
11313 /* Expand DImode branch into multiple compare+branch. */
11315 rtx lo
[2], hi
[2], label2
;
11316 enum rtx_code code1
, code2
, code3
;
11317 enum machine_mode submode
;
11319 if (CONSTANT_P (ix86_compare_op0
) && ! CONSTANT_P (ix86_compare_op1
))
11321 tmp
= ix86_compare_op0
;
11322 ix86_compare_op0
= ix86_compare_op1
;
11323 ix86_compare_op1
= tmp
;
11324 code
= swap_condition (code
);
11326 if (GET_MODE (ix86_compare_op0
) == DImode
)
11328 split_di (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11329 split_di (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11334 split_ti (&ix86_compare_op0
, 1, lo
+0, hi
+0);
11335 split_ti (&ix86_compare_op1
, 1, lo
+1, hi
+1);
11339 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11340 avoid two branches. This costs one extra insn, so disable when
11341 optimizing for size. */
11343 if ((code
== EQ
|| code
== NE
)
11345 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
11350 if (hi
[1] != const0_rtx
)
11351 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
11352 NULL_RTX
, 0, OPTAB_WIDEN
);
11355 if (lo
[1] != const0_rtx
)
11356 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
11357 NULL_RTX
, 0, OPTAB_WIDEN
);
11359 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
11360 NULL_RTX
, 0, OPTAB_WIDEN
);
11362 ix86_compare_op0
= tmp
;
11363 ix86_compare_op1
= const0_rtx
;
11364 ix86_expand_branch (code
, label
);
11368 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11369 op1 is a constant and the low word is zero, then we can just
11370 examine the high word. */
11372 if (CONST_INT_P (hi
[1]) && lo
[1] == const0_rtx
)
11375 case LT
: case LTU
: case GE
: case GEU
:
11376 ix86_compare_op0
= hi
[0];
11377 ix86_compare_op1
= hi
[1];
11378 ix86_expand_branch (code
, label
);
11384 /* Otherwise, we need two or three jumps. */
11386 label2
= gen_label_rtx ();
11389 code2
= swap_condition (code
);
11390 code3
= unsigned_condition (code
);
11394 case LT
: case GT
: case LTU
: case GTU
:
11397 case LE
: code1
= LT
; code2
= GT
; break;
11398 case GE
: code1
= GT
; code2
= LT
; break;
11399 case LEU
: code1
= LTU
; code2
= GTU
; break;
11400 case GEU
: code1
= GTU
; code2
= LTU
; break;
11402 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
11403 case NE
: code2
= UNKNOWN
; break;
11406 gcc_unreachable ();
11411 * if (hi(a) < hi(b)) goto true;
11412 * if (hi(a) > hi(b)) goto false;
11413 * if (lo(a) < lo(b)) goto true;
11417 ix86_compare_op0
= hi
[0];
11418 ix86_compare_op1
= hi
[1];
11420 if (code1
!= UNKNOWN
)
11421 ix86_expand_branch (code1
, label
);
11422 if (code2
!= UNKNOWN
)
11423 ix86_expand_branch (code2
, label2
);
11425 ix86_compare_op0
= lo
[0];
11426 ix86_compare_op1
= lo
[1];
11427 ix86_expand_branch (code3
, label
);
11429 if (code2
!= UNKNOWN
)
11430 emit_label (label2
);
11435 gcc_unreachable ();
11439 /* Split branch based on floating point condition. */
11441 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
11442 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
11444 rtx second
, bypass
;
11445 rtx label
= NULL_RTX
;
11447 int bypass_probability
= -1, second_probability
= -1, probability
= -1;
11450 if (target2
!= pc_rtx
)
11453 code
= reverse_condition_maybe_unordered (code
);
11458 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
11459 tmp
, &second
, &bypass
);
11461 /* Remove pushed operand from stack. */
11463 ix86_free_from_memory (GET_MODE (pushed
));
11465 if (split_branch_probability
>= 0)
11467 /* Distribute the probabilities across the jumps.
11468 Assume the BYPASS and SECOND to be always test
11470 probability
= split_branch_probability
;
11472 /* Value of 1 is low enough to make no need for probability
11473 to be updated. Later we may run some experiments and see
11474 if unordered values are more frequent in practice. */
11476 bypass_probability
= 1;
11478 second_probability
= 1;
11480 if (bypass
!= NULL_RTX
)
11482 label
= gen_label_rtx ();
11483 i
= emit_jump_insn (gen_rtx_SET
11485 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11487 gen_rtx_LABEL_REF (VOIDmode
,
11490 if (bypass_probability
>= 0)
11492 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11493 GEN_INT (bypass_probability
),
11496 i
= emit_jump_insn (gen_rtx_SET
11498 gen_rtx_IF_THEN_ELSE (VOIDmode
,
11499 condition
, target1
, target2
)));
11500 if (probability
>= 0)
11502 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11503 GEN_INT (probability
),
11505 if (second
!= NULL_RTX
)
11507 i
= emit_jump_insn (gen_rtx_SET
11509 gen_rtx_IF_THEN_ELSE (VOIDmode
, second
, target1
,
11511 if (second_probability
>= 0)
11513 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
11514 GEN_INT (second_probability
),
11517 if (label
!= NULL_RTX
)
11518 emit_label (label
);
11522 ix86_expand_setcc (enum rtx_code code
, rtx dest
)
11524 rtx ret
, tmp
, tmpreg
, equiv
;
11525 rtx second_test
, bypass_test
;
11527 if (GET_MODE (ix86_compare_op0
) == (TARGET_64BIT
? TImode
: DImode
))
11528 return 0; /* FAIL */
11530 gcc_assert (GET_MODE (dest
) == QImode
);
11532 ret
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11533 PUT_MODE (ret
, QImode
);
11538 emit_insn (gen_rtx_SET (VOIDmode
, tmp
, ret
));
11539 if (bypass_test
|| second_test
)
11541 rtx test
= second_test
;
11543 rtx tmp2
= gen_reg_rtx (QImode
);
11546 gcc_assert (!second_test
);
11547 test
= bypass_test
;
11549 PUT_CODE (test
, reverse_condition_maybe_unordered (GET_CODE (test
)));
11551 PUT_MODE (test
, QImode
);
11552 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, test
));
11555 emit_insn (gen_andqi3 (tmp
, tmpreg
, tmp2
));
11557 emit_insn (gen_iorqi3 (tmp
, tmpreg
, tmp2
));
11560 /* Attach a REG_EQUAL note describing the comparison result. */
11561 if (ix86_compare_op0
&& ix86_compare_op1
)
11563 equiv
= simplify_gen_relational (code
, QImode
,
11564 GET_MODE (ix86_compare_op0
),
11565 ix86_compare_op0
, ix86_compare_op1
);
11566 set_unique_reg_note (get_last_insn (), REG_EQUAL
, equiv
);
11569 return 1; /* DONE */
11572 /* Expand comparison setting or clearing carry flag. Return true when
11573 successful and set pop for the operation. */
11575 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
11577 enum machine_mode mode
=
11578 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
11580 /* Do not handle DImode compares that go through special path. Also we can't
11581 deal with FP compares yet. This is possible to add. */
11582 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
11584 if (FLOAT_MODE_P (mode
))
11586 rtx second_test
= NULL
, bypass_test
= NULL
;
11587 rtx compare_op
, compare_seq
;
11589 /* Shortcut: following common codes never translate into carry flag compares. */
11590 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
11591 || code
== ORDERED
|| code
== UNORDERED
)
11594 /* These comparisons require zero flag; swap operands so they won't. */
11595 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
11596 && !TARGET_IEEE_FP
)
11601 code
= swap_condition (code
);
11604 /* Try to expand the comparison and verify that we end up with carry flag
11605 based comparison. This is fails to be true only when we decide to expand
11606 comparison using arithmetic that is not too common scenario. */
11608 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
,
11609 &second_test
, &bypass_test
);
11610 compare_seq
= get_insns ();
11613 if (second_test
|| bypass_test
)
11615 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11616 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11617 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
11619 code
= GET_CODE (compare_op
);
11620 if (code
!= LTU
&& code
!= GEU
)
11622 emit_insn (compare_seq
);
11626 if (!INTEGRAL_MODE_P (mode
))
11634 /* Convert a==0 into (unsigned)a<1. */
11637 if (op1
!= const0_rtx
)
11640 code
= (code
== EQ
? LTU
: GEU
);
11643 /* Convert a>b into b<a or a>=b-1. */
11646 if (CONST_INT_P (op1
))
11648 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
11649 /* Bail out on overflow. We still can swap operands but that
11650 would force loading of the constant into register. */
11651 if (op1
== const0_rtx
11652 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
11654 code
= (code
== GTU
? GEU
: LTU
);
11661 code
= (code
== GTU
? LTU
: GEU
);
11665 /* Convert a>=0 into (unsigned)a<0x80000000. */
11668 if (mode
== DImode
|| op1
!= const0_rtx
)
11670 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11671 code
= (code
== LT
? GEU
: LTU
);
11675 if (mode
== DImode
|| op1
!= constm1_rtx
)
11677 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
11678 code
= (code
== LE
? GEU
: LTU
);
11684 /* Swapping operands may cause constant to appear as first operand. */
11685 if (!nonimmediate_operand (op0
, VOIDmode
))
11687 if (no_new_pseudos
)
11689 op0
= force_reg (mode
, op0
);
11691 ix86_compare_op0
= op0
;
11692 ix86_compare_op1
= op1
;
11693 *pop
= ix86_expand_compare (code
, NULL
, NULL
);
11694 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
11699 ix86_expand_int_movcc (rtx operands
[])
11701 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
11702 rtx compare_seq
, compare_op
;
11703 rtx second_test
, bypass_test
;
11704 enum machine_mode mode
= GET_MODE (operands
[0]);
11705 bool sign_bit_compare_p
= false;;
11708 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
11709 compare_seq
= get_insns ();
11712 compare_code
= GET_CODE (compare_op
);
11714 if ((ix86_compare_op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
11715 || (ix86_compare_op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
11716 sign_bit_compare_p
= true;
11718 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11719 HImode insns, we'd be swallowed in word prefix ops. */
11721 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
11722 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
11723 && CONST_INT_P (operands
[2])
11724 && CONST_INT_P (operands
[3]))
11726 rtx out
= operands
[0];
11727 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
11728 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
11729 HOST_WIDE_INT diff
;
11732 /* Sign bit compares are better done using shifts than we do by using
11734 if (sign_bit_compare_p
11735 || ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
11736 ix86_compare_op1
, &compare_op
))
11738 /* Detect overlap between destination and compare sources. */
11741 if (!sign_bit_compare_p
)
11743 bool fpcmp
= false;
11745 compare_code
= GET_CODE (compare_op
);
11747 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
11748 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
11751 compare_code
= ix86_fp_compare_code_to_integer (compare_code
);
11754 /* To simplify rest of code, restrict to the GEU case. */
11755 if (compare_code
== LTU
)
11757 HOST_WIDE_INT tmp
= ct
;
11760 compare_code
= reverse_condition (compare_code
);
11761 code
= reverse_condition (code
);
11766 PUT_CODE (compare_op
,
11767 reverse_condition_maybe_unordered
11768 (GET_CODE (compare_op
)));
11770 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
11774 if (reg_overlap_mentioned_p (out
, ix86_compare_op0
)
11775 || reg_overlap_mentioned_p (out
, ix86_compare_op1
))
11776 tmp
= gen_reg_rtx (mode
);
11778 if (mode
== DImode
)
11779 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp
, compare_op
));
11781 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
), compare_op
));
11785 if (code
== GT
|| code
== GE
)
11786 code
= reverse_condition (code
);
11789 HOST_WIDE_INT tmp
= ct
;
11794 tmp
= emit_store_flag (tmp
, code
, ix86_compare_op0
,
11795 ix86_compare_op1
, VOIDmode
, 0, -1);
11808 tmp
= expand_simple_binop (mode
, PLUS
,
11810 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11821 tmp
= expand_simple_binop (mode
, IOR
,
11823 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11825 else if (diff
== -1 && ct
)
11835 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11837 tmp
= expand_simple_binop (mode
, PLUS
,
11838 copy_rtx (tmp
), GEN_INT (cf
),
11839 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11847 * andl cf - ct, dest
11857 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
11860 tmp
= expand_simple_binop (mode
, AND
,
11862 gen_int_mode (cf
- ct
, mode
),
11863 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11865 tmp
= expand_simple_binop (mode
, PLUS
,
11866 copy_rtx (tmp
), GEN_INT (ct
),
11867 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
11870 if (!rtx_equal_p (tmp
, out
))
11871 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
11873 return 1; /* DONE */
11879 tmp
= ct
, ct
= cf
, cf
= tmp
;
11881 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
11883 /* We may be reversing unordered compare to normal compare, that
11884 is not valid in general (we may convert non-trapping condition
11885 to trapping one), however on i386 we currently emit all
11886 comparisons unordered. */
11887 compare_code
= reverse_condition_maybe_unordered (compare_code
);
11888 code
= reverse_condition_maybe_unordered (code
);
11892 compare_code
= reverse_condition (compare_code
);
11893 code
= reverse_condition (code
);
11897 compare_code
= UNKNOWN
;
11898 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0
)) == MODE_INT
11899 && CONST_INT_P (ix86_compare_op1
))
11901 if (ix86_compare_op1
== const0_rtx
11902 && (code
== LT
|| code
== GE
))
11903 compare_code
= code
;
11904 else if (ix86_compare_op1
== constm1_rtx
)
11908 else if (code
== GT
)
11913 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11914 if (compare_code
!= UNKNOWN
11915 && GET_MODE (ix86_compare_op0
) == GET_MODE (out
)
11916 && (cf
== -1 || ct
== -1))
11918 /* If lea code below could be used, only optimize
11919 if it results in a 2 insn sequence. */
11921 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11922 || diff
== 3 || diff
== 5 || diff
== 9)
11923 || (compare_code
== LT
&& ct
== -1)
11924 || (compare_code
== GE
&& cf
== -1))
11927 * notl op1 (if necessary)
11935 code
= reverse_condition (code
);
11938 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11939 ix86_compare_op1
, VOIDmode
, 0, -1);
11941 out
= expand_simple_binop (mode
, IOR
,
11943 out
, 1, OPTAB_DIRECT
);
11944 if (out
!= operands
[0])
11945 emit_move_insn (operands
[0], out
);
11947 return 1; /* DONE */
11952 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
11953 || diff
== 3 || diff
== 5 || diff
== 9)
11954 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
11956 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
11962 * lea cf(dest*(ct-cf)),dest
11966 * This also catches the degenerate setcc-only case.
11972 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
11973 ix86_compare_op1
, VOIDmode
, 0, 1);
11976 /* On x86_64 the lea instruction operates on Pmode, so we need
11977 to get arithmetics done in proper mode to match. */
11979 tmp
= copy_rtx (out
);
11983 out1
= copy_rtx (out
);
11984 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
11988 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
11994 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
11997 if (!rtx_equal_p (tmp
, out
))
12000 out
= force_operand (tmp
, copy_rtx (out
));
12002 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
12004 if (!rtx_equal_p (out
, operands
[0]))
12005 emit_move_insn (operands
[0], copy_rtx (out
));
12007 return 1; /* DONE */
12011 * General case: Jumpful:
12012 * xorl dest,dest cmpl op1, op2
12013 * cmpl op1, op2 movl ct, dest
12014 * setcc dest jcc 1f
12015 * decl dest movl cf, dest
12016 * andl (cf-ct),dest 1:
12019 * Size 20. Size 14.
12021 * This is reasonably steep, but branch mispredict costs are
12022 * high on modern cpus, so consider failing only if optimizing
12026 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12027 && BRANCH_COST
>= 2)
12033 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0
)))
12034 /* We may be reversing unordered compare to normal compare,
12035 that is not valid in general (we may convert non-trapping
12036 condition to trapping one), however on i386 we currently
12037 emit all comparisons unordered. */
12038 code
= reverse_condition_maybe_unordered (code
);
12041 code
= reverse_condition (code
);
12042 if (compare_code
!= UNKNOWN
)
12043 compare_code
= reverse_condition (compare_code
);
12047 if (compare_code
!= UNKNOWN
)
12049 /* notl op1 (if needed)
12054 For x < 0 (resp. x <= -1) there will be no notl,
12055 so if possible swap the constants to get rid of the
12057 True/false will be -1/0 while code below (store flag
12058 followed by decrement) is 0/-1, so the constants need
12059 to be exchanged once more. */
12061 if (compare_code
== GE
|| !cf
)
12063 code
= reverse_condition (code
);
12068 HOST_WIDE_INT tmp
= cf
;
12073 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12074 ix86_compare_op1
, VOIDmode
, 0, -1);
12078 out
= emit_store_flag (out
, code
, ix86_compare_op0
,
12079 ix86_compare_op1
, VOIDmode
, 0, 1);
12081 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), constm1_rtx
,
12082 copy_rtx (out
), 1, OPTAB_DIRECT
);
12085 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
12086 gen_int_mode (cf
- ct
, mode
),
12087 copy_rtx (out
), 1, OPTAB_DIRECT
);
12089 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
12090 copy_rtx (out
), 1, OPTAB_DIRECT
);
12091 if (!rtx_equal_p (out
, operands
[0]))
12092 emit_move_insn (operands
[0], copy_rtx (out
));
12094 return 1; /* DONE */
12098 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
12100 /* Try a few things more with specific constants and a variable. */
12103 rtx var
, orig_out
, out
, tmp
;
12105 if (BRANCH_COST
<= 2)
12106 return 0; /* FAIL */
12108 /* If one of the two operands is an interesting constant, load a
12109 constant with the above and mask it in with a logical operation. */
12111 if (CONST_INT_P (operands
[2]))
12114 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
12115 operands
[3] = constm1_rtx
, op
= and_optab
;
12116 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
12117 operands
[3] = const0_rtx
, op
= ior_optab
;
12119 return 0; /* FAIL */
12121 else if (CONST_INT_P (operands
[3]))
12124 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
12125 operands
[2] = constm1_rtx
, op
= and_optab
;
12126 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
12127 operands
[2] = const0_rtx
, op
= ior_optab
;
12129 return 0; /* FAIL */
12132 return 0; /* FAIL */
12134 orig_out
= operands
[0];
12135 tmp
= gen_reg_rtx (mode
);
12138 /* Recurse to get the constant loaded. */
12139 if (ix86_expand_int_movcc (operands
) == 0)
12140 return 0; /* FAIL */
12142 /* Mask in the interesting variable. */
12143 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
12145 if (!rtx_equal_p (out
, orig_out
))
12146 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
12148 return 1; /* DONE */
12152 * For comparison with above,
12162 if (! nonimmediate_operand (operands
[2], mode
))
12163 operands
[2] = force_reg (mode
, operands
[2]);
12164 if (! nonimmediate_operand (operands
[3], mode
))
12165 operands
[3] = force_reg (mode
, operands
[3]);
12167 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12169 rtx tmp
= gen_reg_rtx (mode
);
12170 emit_move_insn (tmp
, operands
[3]);
12173 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12175 rtx tmp
= gen_reg_rtx (mode
);
12176 emit_move_insn (tmp
, operands
[2]);
12180 if (! register_operand (operands
[2], VOIDmode
)
12182 || ! register_operand (operands
[3], VOIDmode
)))
12183 operands
[2] = force_reg (mode
, operands
[2]);
12186 && ! register_operand (operands
[3], VOIDmode
))
12187 operands
[3] = force_reg (mode
, operands
[3]);
12189 emit_insn (compare_seq
);
12190 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12191 gen_rtx_IF_THEN_ELSE (mode
,
12192 compare_op
, operands
[2],
12195 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12196 gen_rtx_IF_THEN_ELSE (mode
,
12198 copy_rtx (operands
[3]),
12199 copy_rtx (operands
[0]))));
12201 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (operands
[0]),
12202 gen_rtx_IF_THEN_ELSE (mode
,
12204 copy_rtx (operands
[2]),
12205 copy_rtx (operands
[0]))));
12207 return 1; /* DONE */
12210 /* Swap, force into registers, or otherwise massage the two operands
12211 to an sse comparison with a mask result. Thus we differ a bit from
12212 ix86_prepare_fp_compare_args which expects to produce a flags result.
12214 The DEST operand exists to help determine whether to commute commutative
12215 operators. The POP0/POP1 operands are updated in place. The new
12216 comparison code is returned, or UNKNOWN if not implementable. */
12218 static enum rtx_code
12219 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
12220 rtx
*pop0
, rtx
*pop1
)
12228 /* We have no LTGT as an operator. We could implement it with
12229 NE & ORDERED, but this requires an extra temporary. It's
12230 not clear that it's worth it. */
12237 /* These are supported directly. */
12244 /* For commutative operators, try to canonicalize the destination
12245 operand to be first in the comparison - this helps reload to
12246 avoid extra moves. */
12247 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
12255 /* These are not supported directly. Swap the comparison operands
12256 to transform into something that is supported. */
12260 code
= swap_condition (code
);
12264 gcc_unreachable ();
12270 /* Detect conditional moves that exactly match min/max operational
12271 semantics. Note that this is IEEE safe, as long as we don't
12272 interchange the operands.
12274 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12275 and TRUE if the operation is successful and instructions are emitted. */
12278 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
12279 rtx cmp_op1
, rtx if_true
, rtx if_false
)
12281 enum machine_mode mode
;
12287 else if (code
== UNGE
)
12290 if_true
= if_false
;
12296 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
12298 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
12303 mode
= GET_MODE (dest
);
12305 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12306 but MODE may be a vector mode and thus not appropriate. */
12307 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
12309 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
12312 if_true
= force_reg (mode
, if_true
);
12313 v
= gen_rtvec (2, if_true
, if_false
);
12314 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
12318 code
= is_min
? SMIN
: SMAX
;
12319 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
12322 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
12326 /* Expand an sse vector comparison. Return the register with the result. */
12329 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
12330 rtx op_true
, rtx op_false
)
12332 enum machine_mode mode
= GET_MODE (dest
);
12335 cmp_op0
= force_reg (mode
, cmp_op0
);
12336 if (!nonimmediate_operand (cmp_op1
, mode
))
12337 cmp_op1
= force_reg (mode
, cmp_op1
);
12340 || reg_overlap_mentioned_p (dest
, op_true
)
12341 || reg_overlap_mentioned_p (dest
, op_false
))
12342 dest
= gen_reg_rtx (mode
);
12344 x
= gen_rtx_fmt_ee (code
, mode
, cmp_op0
, cmp_op1
);
12345 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12350 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12351 operations. This is used for both scalar and vector conditional moves. */
12354 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
12356 enum machine_mode mode
= GET_MODE (dest
);
12359 if (op_false
== CONST0_RTX (mode
))
12361 op_true
= force_reg (mode
, op_true
);
12362 x
= gen_rtx_AND (mode
, cmp
, op_true
);
12363 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12365 else if (op_true
== CONST0_RTX (mode
))
12367 op_false
= force_reg (mode
, op_false
);
12368 x
= gen_rtx_NOT (mode
, cmp
);
12369 x
= gen_rtx_AND (mode
, x
, op_false
);
12370 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12374 op_true
= force_reg (mode
, op_true
);
12375 op_false
= force_reg (mode
, op_false
);
12377 t2
= gen_reg_rtx (mode
);
12379 t3
= gen_reg_rtx (mode
);
12383 x
= gen_rtx_AND (mode
, op_true
, cmp
);
12384 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
12386 x
= gen_rtx_NOT (mode
, cmp
);
12387 x
= gen_rtx_AND (mode
, x
, op_false
);
12388 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
12390 x
= gen_rtx_IOR (mode
, t3
, t2
);
12391 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
12395 /* Expand a floating-point conditional move. Return true if successful. */
12398 ix86_expand_fp_movcc (rtx operands
[])
12400 enum machine_mode mode
= GET_MODE (operands
[0]);
12401 enum rtx_code code
= GET_CODE (operands
[1]);
12402 rtx tmp
, compare_op
, second_test
, bypass_test
;
12404 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
12406 enum machine_mode cmode
;
12408 /* Since we've no cmove for sse registers, don't force bad register
12409 allocation just to gain access to it. Deny movcc when the
12410 comparison mode doesn't match the move mode. */
12411 cmode
= GET_MODE (ix86_compare_op0
);
12412 if (cmode
== VOIDmode
)
12413 cmode
= GET_MODE (ix86_compare_op1
);
12417 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12419 &ix86_compare_op1
);
12420 if (code
== UNKNOWN
)
12423 if (ix86_expand_sse_fp_minmax (operands
[0], code
, ix86_compare_op0
,
12424 ix86_compare_op1
, operands
[2],
12428 tmp
= ix86_expand_sse_cmp (operands
[0], code
, ix86_compare_op0
,
12429 ix86_compare_op1
, operands
[2], operands
[3]);
12430 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
12434 /* The floating point conditional move instructions don't directly
12435 support conditions resulting from a signed integer comparison. */
12437 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12439 /* The floating point conditional move instructions don't directly
12440 support signed integer comparisons. */
12442 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
12444 gcc_assert (!second_test
&& !bypass_test
);
12445 tmp
= gen_reg_rtx (QImode
);
12446 ix86_expand_setcc (code
, tmp
);
12448 ix86_compare_op0
= tmp
;
12449 ix86_compare_op1
= const0_rtx
;
12450 compare_op
= ix86_expand_compare (code
, &second_test
, &bypass_test
);
12452 if (bypass_test
&& reg_overlap_mentioned_p (operands
[0], operands
[3]))
12454 tmp
= gen_reg_rtx (mode
);
12455 emit_move_insn (tmp
, operands
[3]);
12458 if (second_test
&& reg_overlap_mentioned_p (operands
[0], operands
[2]))
12460 tmp
= gen_reg_rtx (mode
);
12461 emit_move_insn (tmp
, operands
[2]);
12465 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12466 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
12467 operands
[2], operands
[3])));
12469 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12470 gen_rtx_IF_THEN_ELSE (mode
, bypass_test
,
12471 operands
[3], operands
[0])));
12473 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
12474 gen_rtx_IF_THEN_ELSE (mode
, second_test
,
12475 operands
[2], operands
[0])));
12480 /* Expand a floating-point vector conditional move; a vcond operation
12481 rather than a movcc operation. */
12484 ix86_expand_fp_vcond (rtx operands
[])
12486 enum rtx_code code
= GET_CODE (operands
[3]);
12489 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
12490 &operands
[4], &operands
[5]);
12491 if (code
== UNKNOWN
)
12494 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
12495 operands
[5], operands
[1], operands
[2]))
12498 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
12499 operands
[1], operands
[2]);
12500 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
12504 /* Expand a signed integral vector conditional move. */
12507 ix86_expand_int_vcond (rtx operands
[])
12509 enum machine_mode mode
= GET_MODE (operands
[0]);
12510 enum rtx_code code
= GET_CODE (operands
[3]);
12511 bool negate
= false;
12514 cop0
= operands
[4];
12515 cop1
= operands
[5];
12517 /* Canonicalize the comparison to EQ, GT, GTU. */
12528 code
= reverse_condition (code
);
12534 code
= reverse_condition (code
);
12540 code
= swap_condition (code
);
12541 x
= cop0
, cop0
= cop1
, cop1
= x
;
12545 gcc_unreachable ();
12548 /* Unsigned parallel compare is not supported by the hardware. Play some
12549 tricks to turn this into a signed comparison against 0. */
12552 cop0
= force_reg (mode
, cop0
);
12560 /* Perform a parallel modulo subtraction. */
12561 t1
= gen_reg_rtx (mode
);
12562 emit_insn (gen_subv4si3 (t1
, cop0
, cop1
));
12564 /* Extract the original sign bit of op0. */
12565 mask
= GEN_INT (-0x80000000);
12566 mask
= gen_rtx_CONST_VECTOR (mode
,
12567 gen_rtvec (4, mask
, mask
, mask
, mask
));
12568 mask
= force_reg (mode
, mask
);
12569 t2
= gen_reg_rtx (mode
);
12570 emit_insn (gen_andv4si3 (t2
, cop0
, mask
));
12572 /* XOR it back into the result of the subtraction. This results
12573 in the sign bit set iff we saw unsigned underflow. */
12574 x
= gen_reg_rtx (mode
);
12575 emit_insn (gen_xorv4si3 (x
, t1
, t2
));
12583 /* Perform a parallel unsigned saturating subtraction. */
12584 x
= gen_reg_rtx (mode
);
12585 emit_insn (gen_rtx_SET (VOIDmode
, x
,
12586 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
12593 gcc_unreachable ();
12597 cop1
= CONST0_RTX (mode
);
12600 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
12601 operands
[1+negate
], operands
[2-negate
]);
12603 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
12604 operands
[2-negate
]);
12608 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12609 true if we should do zero extension, else sign extension. HIGH_P is
12610 true if we want the N/2 high elements, else the low elements. */
12613 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
12615 enum machine_mode imode
= GET_MODE (operands
[1]);
12616 rtx (*unpack
)(rtx
, rtx
, rtx
);
12623 unpack
= gen_vec_interleave_highv16qi
;
12625 unpack
= gen_vec_interleave_lowv16qi
;
12629 unpack
= gen_vec_interleave_highv8hi
;
12631 unpack
= gen_vec_interleave_lowv8hi
;
12635 unpack
= gen_vec_interleave_highv4si
;
12637 unpack
= gen_vec_interleave_lowv4si
;
12640 gcc_unreachable ();
12643 dest
= gen_lowpart (imode
, operands
[0]);
12646 se
= force_reg (imode
, CONST0_RTX (imode
));
12648 se
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
12649 operands
[1], pc_rtx
, pc_rtx
);
12651 emit_insn (unpack (dest
, operands
[1], se
));
12654 /* Expand conditional increment or decrement using adb/sbb instructions.
12655 The default case using setcc followed by the conditional move can be
12656 done by generic code. */
12658 ix86_expand_int_addcc (rtx operands
[])
12660 enum rtx_code code
= GET_CODE (operands
[1]);
12662 rtx val
= const0_rtx
;
12663 bool fpcmp
= false;
12664 enum machine_mode mode
= GET_MODE (operands
[0]);
12666 if (operands
[3] != const1_rtx
12667 && operands
[3] != constm1_rtx
)
12669 if (!ix86_expand_carry_flag_compare (code
, ix86_compare_op0
,
12670 ix86_compare_op1
, &compare_op
))
12672 code
= GET_CODE (compare_op
);
12674 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
12675 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
12678 code
= ix86_fp_compare_code_to_integer (code
);
12685 PUT_CODE (compare_op
,
12686 reverse_condition_maybe_unordered
12687 (GET_CODE (compare_op
)));
12689 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
12691 PUT_MODE (compare_op
, mode
);
12693 /* Construct either adc or sbb insn. */
12694 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
12696 switch (GET_MODE (operands
[0]))
12699 emit_insn (gen_subqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12702 emit_insn (gen_subhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12705 emit_insn (gen_subsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12708 emit_insn (gen_subdi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12711 gcc_unreachable ();
12716 switch (GET_MODE (operands
[0]))
12719 emit_insn (gen_addqi3_carry (operands
[0], operands
[2], val
, compare_op
));
12722 emit_insn (gen_addhi3_carry (operands
[0], operands
[2], val
, compare_op
));
12725 emit_insn (gen_addsi3_carry (operands
[0], operands
[2], val
, compare_op
));
12728 emit_insn (gen_adddi3_carry_rex64 (operands
[0], operands
[2], val
, compare_op
));
12731 gcc_unreachable ();
12734 return 1; /* DONE */
12738 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12739 works for floating pointer parameters and nonoffsetable memories.
12740 For pushes, it returns just stack offsets; the values will be saved
12741 in the right order. Maximally three parts are generated. */
12744 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
12749 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
12751 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
12753 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
12754 gcc_assert (size
>= 2 && size
<= 3);
12756 /* Optimize constant pool reference to immediates. This is used by fp
12757 moves, that force all constants to memory to allow combining. */
12758 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
12760 rtx tmp
= maybe_get_pool_constant (operand
);
12765 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
12767 /* The only non-offsetable memories we handle are pushes. */
12768 int ok
= push_operand (operand
, VOIDmode
);
12772 operand
= copy_rtx (operand
);
12773 PUT_MODE (operand
, Pmode
);
12774 parts
[0] = parts
[1] = parts
[2] = operand
;
12778 if (GET_CODE (operand
) == CONST_VECTOR
)
12780 enum machine_mode imode
= int_mode_for_mode (mode
);
12781 /* Caution: if we looked through a constant pool memory above,
12782 the operand may actually have a different mode now. That's
12783 ok, since we want to pun this all the way back to an integer. */
12784 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
12785 gcc_assert (operand
!= NULL
);
12791 if (mode
== DImode
)
12792 split_di (&operand
, 1, &parts
[0], &parts
[1]);
12795 if (REG_P (operand
))
12797 gcc_assert (reload_completed
);
12798 parts
[0] = gen_rtx_REG (SImode
, REGNO (operand
) + 0);
12799 parts
[1] = gen_rtx_REG (SImode
, REGNO (operand
) + 1);
12801 parts
[2] = gen_rtx_REG (SImode
, REGNO (operand
) + 2);
12803 else if (offsettable_memref_p (operand
))
12805 operand
= adjust_address (operand
, SImode
, 0);
12806 parts
[0] = operand
;
12807 parts
[1] = adjust_address (operand
, SImode
, 4);
12809 parts
[2] = adjust_address (operand
, SImode
, 8);
12811 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12816 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12820 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
12821 parts
[2] = gen_int_mode (l
[2], SImode
);
12824 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
12827 gcc_unreachable ();
12829 parts
[1] = gen_int_mode (l
[1], SImode
);
12830 parts
[0] = gen_int_mode (l
[0], SImode
);
12833 gcc_unreachable ();
12838 if (mode
== TImode
)
12839 split_ti (&operand
, 1, &parts
[0], &parts
[1]);
12840 if (mode
== XFmode
|| mode
== TFmode
)
12842 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
12843 if (REG_P (operand
))
12845 gcc_assert (reload_completed
);
12846 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
12847 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
12849 else if (offsettable_memref_p (operand
))
12851 operand
= adjust_address (operand
, DImode
, 0);
12852 parts
[0] = operand
;
12853 parts
[1] = adjust_address (operand
, upper_mode
, 8);
12855 else if (GET_CODE (operand
) == CONST_DOUBLE
)
12860 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
12861 real_to_target (l
, &r
, mode
);
12863 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12864 if (HOST_BITS_PER_WIDE_INT
>= 64)
12867 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12868 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
12871 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
12873 if (upper_mode
== SImode
)
12874 parts
[1] = gen_int_mode (l
[2], SImode
);
12875 else if (HOST_BITS_PER_WIDE_INT
>= 64)
12878 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
12879 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
12882 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
12885 gcc_unreachable ();
12892 /* Emit insns to perform a move or push of DI, DF, and XF values.
12893 Return false when normal moves are needed; true when all required
12894 insns have been emitted. Operands 2-4 contain the input values
12895 int the correct order; operands 5-7 contain the output values. */
12898 ix86_split_long_move (rtx operands
[])
12903 int collisions
= 0;
12904 enum machine_mode mode
= GET_MODE (operands
[0]);
12906 /* The DFmode expanders may ask us to move double.
12907 For 64bit target this is single move. By hiding the fact
12908 here we simplify i386.md splitters. */
12909 if (GET_MODE_SIZE (GET_MODE (operands
[0])) == 8 && TARGET_64BIT
)
12911 /* Optimize constant pool reference to immediates. This is used by
12912 fp moves, that force all constants to memory to allow combining. */
12914 if (MEM_P (operands
[1])
12915 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
12916 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
12917 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
12918 if (push_operand (operands
[0], VOIDmode
))
12920 operands
[0] = copy_rtx (operands
[0]);
12921 PUT_MODE (operands
[0], Pmode
);
12924 operands
[0] = gen_lowpart (DImode
, operands
[0]);
12925 operands
[1] = gen_lowpart (DImode
, operands
[1]);
12926 emit_move_insn (operands
[0], operands
[1]);
12930 /* The only non-offsettable memory we handle is push. */
12931 if (push_operand (operands
[0], VOIDmode
))
12934 gcc_assert (!MEM_P (operands
[0])
12935 || offsettable_memref_p (operands
[0]));
12937 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
12938 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
12940 /* When emitting push, take care for source operands on the stack. */
12941 if (push
&& MEM_P (operands
[1])
12942 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
12945 part
[1][1] = change_address (part
[1][1], GET_MODE (part
[1][1]),
12946 XEXP (part
[1][2], 0));
12947 part
[1][0] = change_address (part
[1][0], GET_MODE (part
[1][0]),
12948 XEXP (part
[1][1], 0));
12951 /* We need to do copy in the right order in case an address register
12952 of the source overlaps the destination. */
12953 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
12955 if (reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0)))
12957 if (reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12960 && reg_overlap_mentioned_p (part
[0][2], XEXP (part
[1][0], 0)))
12963 /* Collision in the middle part can be handled by reordering. */
12964 if (collisions
== 1 && nparts
== 3
12965 && reg_overlap_mentioned_p (part
[0][1], XEXP (part
[1][0], 0)))
12968 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
12969 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
12972 /* If there are more collisions, we can't handle it by reordering.
12973 Do an lea to the last part and use only one colliding move. */
12974 else if (collisions
> 1)
12980 base
= part
[0][nparts
- 1];
12982 /* Handle the case when the last part isn't valid for lea.
12983 Happens in 64-bit mode storing the 12-byte XFmode. */
12984 if (GET_MODE (base
) != Pmode
)
12985 base
= gen_rtx_REG (Pmode
, REGNO (base
));
12987 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
12988 part
[1][0] = replace_equiv_address (part
[1][0], base
);
12989 part
[1][1] = replace_equiv_address (part
[1][1],
12990 plus_constant (base
, UNITS_PER_WORD
));
12992 part
[1][2] = replace_equiv_address (part
[1][2],
12993 plus_constant (base
, 8));
13003 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
13004 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, GEN_INT (-4)));
13005 emit_move_insn (part
[0][2], part
[1][2]);
13010 /* In 64bit mode we don't have 32bit push available. In case this is
13011 register, it is OK - we will just use larger counterpart. We also
13012 retype memory - these comes from attempt to avoid REX prefix on
13013 moving of second half of TFmode value. */
13014 if (GET_MODE (part
[1][1]) == SImode
)
13016 switch (GET_CODE (part
[1][1]))
13019 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
13023 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
13027 gcc_unreachable ();
13030 if (GET_MODE (part
[1][0]) == SImode
)
13031 part
[1][0] = part
[1][1];
13034 emit_move_insn (part
[0][1], part
[1][1]);
13035 emit_move_insn (part
[0][0], part
[1][0]);
13039 /* Choose correct order to not overwrite the source before it is copied. */
13040 if ((REG_P (part
[0][0])
13041 && REG_P (part
[1][1])
13042 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
13044 && REGNO (part
[0][0]) == REGNO (part
[1][2]))))
13046 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
13050 operands
[2] = part
[0][2];
13051 operands
[3] = part
[0][1];
13052 operands
[4] = part
[0][0];
13053 operands
[5] = part
[1][2];
13054 operands
[6] = part
[1][1];
13055 operands
[7] = part
[1][0];
13059 operands
[2] = part
[0][1];
13060 operands
[3] = part
[0][0];
13061 operands
[5] = part
[1][1];
13062 operands
[6] = part
[1][0];
13069 operands
[2] = part
[0][0];
13070 operands
[3] = part
[0][1];
13071 operands
[4] = part
[0][2];
13072 operands
[5] = part
[1][0];
13073 operands
[6] = part
[1][1];
13074 operands
[7] = part
[1][2];
13078 operands
[2] = part
[0][0];
13079 operands
[3] = part
[0][1];
13080 operands
[5] = part
[1][0];
13081 operands
[6] = part
[1][1];
13085 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13088 if (CONST_INT_P (operands
[5])
13089 && operands
[5] != const0_rtx
13090 && REG_P (operands
[2]))
13092 if (CONST_INT_P (operands
[6])
13093 && INTVAL (operands
[6]) == INTVAL (operands
[5]))
13094 operands
[6] = operands
[2];
13097 && CONST_INT_P (operands
[7])
13098 && INTVAL (operands
[7]) == INTVAL (operands
[5]))
13099 operands
[7] = operands
[2];
13103 && CONST_INT_P (operands
[6])
13104 && operands
[6] != const0_rtx
13105 && REG_P (operands
[3])
13106 && CONST_INT_P (operands
[7])
13107 && INTVAL (operands
[7]) == INTVAL (operands
[6]))
13108 operands
[7] = operands
[3];
13111 emit_move_insn (operands
[2], operands
[5]);
13112 emit_move_insn (operands
[3], operands
[6]);
13114 emit_move_insn (operands
[4], operands
[7]);
13119 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13120 left shift by a constant, either using a single shift or
13121 a sequence of add instructions. */
13124 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
13128 emit_insn ((mode
== DImode
13130 : gen_adddi3
) (operand
, operand
, operand
));
13132 else if (!optimize_size
13133 && count
* ix86_cost
->add
<= ix86_cost
->shift_const
)
13136 for (i
=0; i
<count
; i
++)
13138 emit_insn ((mode
== DImode
13140 : gen_adddi3
) (operand
, operand
, operand
));
13144 emit_insn ((mode
== DImode
13146 : gen_ashldi3
) (operand
, operand
, GEN_INT (count
)));
13150 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13152 rtx low
[2], high
[2];
13154 const int single_width
= mode
== DImode
? 32 : 64;
13156 if (CONST_INT_P (operands
[2]))
13158 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13159 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13161 if (count
>= single_width
)
13163 emit_move_insn (high
[0], low
[1]);
13164 emit_move_insn (low
[0], const0_rtx
);
13166 if (count
> single_width
)
13167 ix86_expand_ashl_const (high
[0], count
- single_width
, mode
);
13171 if (!rtx_equal_p (operands
[0], operands
[1]))
13172 emit_move_insn (operands
[0], operands
[1]);
13173 emit_insn ((mode
== DImode
13175 : gen_x86_64_shld
) (high
[0], low
[0], GEN_INT (count
)));
13176 ix86_expand_ashl_const (low
[0], count
, mode
);
13181 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13183 if (operands
[1] == const1_rtx
)
13185 /* Assuming we've chosen a QImode capable registers, then 1 << N
13186 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13187 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
13189 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
13191 ix86_expand_clear (low
[0]);
13192 ix86_expand_clear (high
[0]);
13193 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (single_width
)));
13195 d
= gen_lowpart (QImode
, low
[0]);
13196 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13197 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
13198 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13200 d
= gen_lowpart (QImode
, high
[0]);
13201 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
13202 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
13203 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
13206 /* Otherwise, we can get the same results by manually performing
13207 a bit extract operation on bit 5/6, and then performing the two
13208 shifts. The two methods of getting 0/1 into low/high are exactly
13209 the same size. Avoiding the shift in the bit extract case helps
13210 pentium4 a bit; no one else seems to care much either way. */
13215 if (TARGET_PARTIAL_REG_STALL
&& !optimize_size
)
13216 x
= gen_rtx_ZERO_EXTEND (mode
== DImode
? SImode
: DImode
, operands
[2]);
13218 x
= gen_lowpart (mode
== DImode
? SImode
: DImode
, operands
[2]);
13219 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
13221 emit_insn ((mode
== DImode
13223 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (mode
== DImode
? 5 : 6)));
13224 emit_insn ((mode
== DImode
13226 : gen_anddi3
) (high
[0], high
[0], GEN_INT (1)));
13227 emit_move_insn (low
[0], high
[0]);
13228 emit_insn ((mode
== DImode
13230 : gen_xordi3
) (low
[0], low
[0], GEN_INT (1)));
13233 emit_insn ((mode
== DImode
13235 : gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13236 emit_insn ((mode
== DImode
13238 : gen_ashldi3
) (high
[0], high
[0], operands
[2]));
13242 if (operands
[1] == constm1_rtx
)
13244 /* For -1 << N, we can avoid the shld instruction, because we
13245 know that we're shifting 0...31/63 ones into a -1. */
13246 emit_move_insn (low
[0], constm1_rtx
);
13248 emit_move_insn (high
[0], low
[0]);
13250 emit_move_insn (high
[0], constm1_rtx
);
13254 if (!rtx_equal_p (operands
[0], operands
[1]))
13255 emit_move_insn (operands
[0], operands
[1]);
13257 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13258 emit_insn ((mode
== DImode
13260 : gen_x86_64_shld
) (high
[0], low
[0], operands
[2]));
13263 emit_insn ((mode
== DImode
? gen_ashlsi3
: gen_ashldi3
) (low
[0], low
[0], operands
[2]));
13265 if (TARGET_CMOVE
&& scratch
)
13267 ix86_expand_clear (scratch
);
13268 emit_insn ((mode
== DImode
13269 ? gen_x86_shift_adj_1
13270 : gen_x86_64_shift_adj
) (high
[0], low
[0], operands
[2], scratch
));
13273 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
13277 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13279 rtx low
[2], high
[2];
13281 const int single_width
= mode
== DImode
? 32 : 64;
13283 if (CONST_INT_P (operands
[2]))
13285 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13286 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13288 if (count
== single_width
* 2 - 1)
13290 emit_move_insn (high
[0], high
[1]);
13291 emit_insn ((mode
== DImode
13293 : gen_ashrdi3
) (high
[0], high
[0],
13294 GEN_INT (single_width
- 1)));
13295 emit_move_insn (low
[0], high
[0]);
13298 else if (count
>= single_width
)
13300 emit_move_insn (low
[0], high
[1]);
13301 emit_move_insn (high
[0], low
[0]);
13302 emit_insn ((mode
== DImode
13304 : gen_ashrdi3
) (high
[0], high
[0],
13305 GEN_INT (single_width
- 1)));
13306 if (count
> single_width
)
13307 emit_insn ((mode
== DImode
13309 : gen_ashrdi3
) (low
[0], low
[0],
13310 GEN_INT (count
- single_width
)));
13314 if (!rtx_equal_p (operands
[0], operands
[1]))
13315 emit_move_insn (operands
[0], operands
[1]);
13316 emit_insn ((mode
== DImode
13318 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13319 emit_insn ((mode
== DImode
13321 : gen_ashrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13326 if (!rtx_equal_p (operands
[0], operands
[1]))
13327 emit_move_insn (operands
[0], operands
[1]);
13329 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13331 emit_insn ((mode
== DImode
13333 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13334 emit_insn ((mode
== DImode
13336 : gen_ashrdi3
) (high
[0], high
[0], operands
[2]));
13338 if (TARGET_CMOVE
&& scratch
)
13340 emit_move_insn (scratch
, high
[0]);
13341 emit_insn ((mode
== DImode
13343 : gen_ashrdi3
) (scratch
, scratch
,
13344 GEN_INT (single_width
- 1)));
13345 emit_insn ((mode
== DImode
13346 ? gen_x86_shift_adj_1
13347 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13351 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
13356 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
13358 rtx low
[2], high
[2];
13360 const int single_width
= mode
== DImode
? 32 : 64;
13362 if (CONST_INT_P (operands
[2]))
13364 (mode
== DImode
? split_di
: split_ti
) (operands
, 2, low
, high
);
13365 count
= INTVAL (operands
[2]) & (single_width
* 2 - 1);
13367 if (count
>= single_width
)
13369 emit_move_insn (low
[0], high
[1]);
13370 ix86_expand_clear (high
[0]);
13372 if (count
> single_width
)
13373 emit_insn ((mode
== DImode
13375 : gen_lshrdi3
) (low
[0], low
[0],
13376 GEN_INT (count
- single_width
)));
13380 if (!rtx_equal_p (operands
[0], operands
[1]))
13381 emit_move_insn (operands
[0], operands
[1]);
13382 emit_insn ((mode
== DImode
13384 : gen_x86_64_shrd
) (low
[0], high
[0], GEN_INT (count
)));
13385 emit_insn ((mode
== DImode
13387 : gen_lshrdi3
) (high
[0], high
[0], GEN_INT (count
)));
13392 if (!rtx_equal_p (operands
[0], operands
[1]))
13393 emit_move_insn (operands
[0], operands
[1]);
13395 (mode
== DImode
? split_di
: split_ti
) (operands
, 1, low
, high
);
13397 emit_insn ((mode
== DImode
13399 : gen_x86_64_shrd
) (low
[0], high
[0], operands
[2]));
13400 emit_insn ((mode
== DImode
13402 : gen_lshrdi3
) (high
[0], high
[0], operands
[2]));
13404 /* Heh. By reversing the arguments, we can reuse this pattern. */
13405 if (TARGET_CMOVE
&& scratch
)
13407 ix86_expand_clear (scratch
);
13408 emit_insn ((mode
== DImode
13409 ? gen_x86_shift_adj_1
13410 : gen_x86_64_shift_adj
) (low
[0], high
[0], operands
[2],
13414 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
13418 /* Predict just emitted jump instruction to be taken with probability PROB. */
13420 predict_jump (int prob
)
13422 rtx insn
= get_last_insn ();
13423 gcc_assert (JUMP_P (insn
));
13425 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
13430 /* Helper function for the string operations below. Dest VARIABLE whether
13431 it is aligned to VALUE bytes. If true, jump to the label. */
13433 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
13435 rtx label
= gen_label_rtx ();
13436 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
13437 if (GET_MODE (variable
) == DImode
)
13438 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
13440 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
13441 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
13444 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
13446 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
13450 /* Adjust COUNTER by the VALUE. */
13452 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
13454 if (GET_MODE (countreg
) == DImode
)
13455 emit_insn (gen_adddi3 (countreg
, countreg
, GEN_INT (-value
)));
13457 emit_insn (gen_addsi3 (countreg
, countreg
, GEN_INT (-value
)));
13460 /* Zero extend possibly SImode EXP to Pmode register. */
13462 ix86_zero_extend_to_Pmode (rtx exp
)
13465 if (GET_MODE (exp
) == VOIDmode
)
13466 return force_reg (Pmode
, exp
);
13467 if (GET_MODE (exp
) == Pmode
)
13468 return copy_to_mode_reg (Pmode
, exp
);
13469 r
= gen_reg_rtx (Pmode
);
13470 emit_insn (gen_zero_extendsidi2 (r
, exp
));
13474 /* Divide COUNTREG by SCALE. */
13476 scale_counter (rtx countreg
, int scale
)
13479 rtx piece_size_mask
;
13483 if (CONST_INT_P (countreg
))
13484 return GEN_INT (INTVAL (countreg
) / scale
);
13485 gcc_assert (REG_P (countreg
));
13487 piece_size_mask
= GEN_INT (scale
- 1);
13488 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
13489 GEN_INT (exact_log2 (scale
)),
13490 NULL
, 1, OPTAB_DIRECT
);
13494 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13495 for constant loop counts. */
13497 static enum machine_mode
13498 counter_mode (rtx count_exp
)
13500 if (GET_MODE (count_exp
) != VOIDmode
)
13501 return GET_MODE (count_exp
);
13502 if (GET_CODE (count_exp
) != CONST_INT
)
13504 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
13509 /* When SRCPTR is non-NULL, output simple loop to move memory
13510 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13511 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13512 equivalent loop to set memory by VALUE (supposed to be in MODE).
13514 The size is rounded down to whole number of chunk size moved at once.
13515 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13519 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
13520 rtx destptr
, rtx srcptr
, rtx value
,
13521 rtx count
, enum machine_mode mode
, int unroll
,
13524 rtx out_label
, top_label
, iter
, tmp
;
13525 enum machine_mode iter_mode
= counter_mode (count
);
13526 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
13527 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
13533 top_label
= gen_label_rtx ();
13534 out_label
= gen_label_rtx ();
13535 iter
= gen_reg_rtx (iter_mode
);
13537 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
13538 NULL
, 1, OPTAB_DIRECT
);
13539 /* Those two should combine. */
13540 if (piece_size
== const1_rtx
)
13542 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
13544 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
13546 emit_move_insn (iter
, const0_rtx
);
13548 emit_label (top_label
);
13550 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
13551 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
13552 destmem
= change_address (destmem
, mode
, x_addr
);
13556 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
13557 srcmem
= change_address (srcmem
, mode
, y_addr
);
13559 /* When unrolling for chips that reorder memory reads and writes,
13560 we can save registers by using single temporary.
13561 Also using 4 temporaries is overkill in 32bit mode. */
13562 if (!TARGET_64BIT
&& 0)
13564 for (i
= 0; i
< unroll
; i
++)
13569 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13571 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13573 emit_move_insn (destmem
, srcmem
);
13579 gcc_assert (unroll
<= 4);
13580 for (i
= 0; i
< unroll
; i
++)
13582 tmpreg
[i
] = gen_reg_rtx (mode
);
13586 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
13588 emit_move_insn (tmpreg
[i
], srcmem
);
13590 for (i
= 0; i
< unroll
; i
++)
13595 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13597 emit_move_insn (destmem
, tmpreg
[i
]);
13602 for (i
= 0; i
< unroll
; i
++)
13606 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
13607 emit_move_insn (destmem
, value
);
13610 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
13611 true, OPTAB_LIB_WIDEN
);
13613 emit_move_insn (iter
, tmp
);
13615 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
13617 if (expected_size
!= -1)
13619 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
13620 if (expected_size
== 0)
13622 else if (expected_size
> REG_BR_PROB_BASE
)
13623 predict_jump (REG_BR_PROB_BASE
- 1);
13625 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
13628 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
13629 iter
= ix86_zero_extend_to_Pmode (iter
);
13630 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
13631 true, OPTAB_LIB_WIDEN
);
13632 if (tmp
!= destptr
)
13633 emit_move_insn (destptr
, tmp
);
13636 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
13637 true, OPTAB_LIB_WIDEN
);
13639 emit_move_insn (srcptr
, tmp
);
13641 emit_label (out_label
);
13644 /* Output "rep; mov" instruction.
13645 Arguments have same meaning as for previous function */
13647 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
13648 rtx destptr
, rtx srcptr
,
13650 enum machine_mode mode
)
13656 /* If the size is known, it is shorter to use rep movs. */
13657 if (mode
== QImode
&& CONST_INT_P (count
)
13658 && !(INTVAL (count
) & 3))
13661 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13662 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13663 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
13664 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
13665 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13666 if (mode
!= QImode
)
13668 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13669 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13670 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13671 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13672 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13673 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
13677 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13678 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
13680 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
13684 /* Output "rep; stos" instruction.
13685 Arguments have same meaning as for previous function */
13687 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
13689 enum machine_mode mode
)
13694 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
13695 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
13696 value
= force_reg (mode
, gen_lowpart (mode
, value
));
13697 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
13698 if (mode
!= QImode
)
13700 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
13701 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
13702 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
13705 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
13706 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
13710 emit_strmov (rtx destmem
, rtx srcmem
,
13711 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
13713 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
13714 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
13715 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13718 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13720 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
13721 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
13724 if (CONST_INT_P (count
))
13726 HOST_WIDE_INT countval
= INTVAL (count
);
13729 if ((countval
& 0x10) && max_size
> 16)
13733 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13734 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
13737 gcc_unreachable ();
13740 if ((countval
& 0x08) && max_size
> 8)
13743 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
13746 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13747 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
13751 if ((countval
& 0x04) && max_size
> 4)
13753 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
13756 if ((countval
& 0x02) && max_size
> 2)
13758 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
13761 if ((countval
& 0x01) && max_size
> 1)
13763 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
13770 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
13771 count
, 1, OPTAB_DIRECT
);
13772 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
13773 count
, QImode
, 1, 4);
13777 /* When there are stringops, we can cheaply increase dest and src pointers.
13778 Otherwise we save code size by maintaining offset (zero is readily
13779 available from preceding rep operation) and using x86 addressing modes.
13781 if (TARGET_SINGLE_STRINGOP
)
13785 rtx label
= ix86_expand_aligntest (count
, 4, true);
13786 src
= change_address (srcmem
, SImode
, srcptr
);
13787 dest
= change_address (destmem
, SImode
, destptr
);
13788 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13789 emit_label (label
);
13790 LABEL_NUSES (label
) = 1;
13794 rtx label
= ix86_expand_aligntest (count
, 2, true);
13795 src
= change_address (srcmem
, HImode
, srcptr
);
13796 dest
= change_address (destmem
, HImode
, destptr
);
13797 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13798 emit_label (label
);
13799 LABEL_NUSES (label
) = 1;
13803 rtx label
= ix86_expand_aligntest (count
, 1, true);
13804 src
= change_address (srcmem
, QImode
, srcptr
);
13805 dest
= change_address (destmem
, QImode
, destptr
);
13806 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
13807 emit_label (label
);
13808 LABEL_NUSES (label
) = 1;
13813 rtx offset
= force_reg (Pmode
, const0_rtx
);
13818 rtx label
= ix86_expand_aligntest (count
, 4, true);
13819 src
= change_address (srcmem
, SImode
, srcptr
);
13820 dest
= change_address (destmem
, SImode
, destptr
);
13821 emit_move_insn (dest
, src
);
13822 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
13823 true, OPTAB_LIB_WIDEN
);
13825 emit_move_insn (offset
, tmp
);
13826 emit_label (label
);
13827 LABEL_NUSES (label
) = 1;
13831 rtx label
= ix86_expand_aligntest (count
, 2, true);
13832 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13833 src
= change_address (srcmem
, HImode
, tmp
);
13834 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13835 dest
= change_address (destmem
, HImode
, tmp
);
13836 emit_move_insn (dest
, src
);
13837 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
13838 true, OPTAB_LIB_WIDEN
);
13840 emit_move_insn (offset
, tmp
);
13841 emit_label (label
);
13842 LABEL_NUSES (label
) = 1;
13846 rtx label
= ix86_expand_aligntest (count
, 1, true);
13847 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
13848 src
= change_address (srcmem
, QImode
, tmp
);
13849 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
13850 dest
= change_address (destmem
, QImode
, tmp
);
13851 emit_move_insn (dest
, src
);
13852 emit_label (label
);
13853 LABEL_NUSES (label
) = 1;
13858 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13860 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
13861 rtx count
, int max_size
)
13864 expand_simple_binop (counter_mode (count
), AND
, count
,
13865 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
13866 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
13867 gen_lowpart (QImode
, value
), count
, QImode
,
13871 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13873 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
13877 if (CONST_INT_P (count
))
13879 HOST_WIDE_INT countval
= INTVAL (count
);
13882 if ((countval
& 0x10) && max_size
> 16)
13886 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13887 emit_insn (gen_strset (destptr
, dest
, value
));
13888 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
13889 emit_insn (gen_strset (destptr
, dest
, value
));
13892 gcc_unreachable ();
13895 if ((countval
& 0x08) && max_size
> 8)
13899 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
13900 emit_insn (gen_strset (destptr
, dest
, value
));
13904 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13905 emit_insn (gen_strset (destptr
, dest
, value
));
13906 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
13907 emit_insn (gen_strset (destptr
, dest
, value
));
13911 if ((countval
& 0x04) && max_size
> 4)
13913 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
13914 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13917 if ((countval
& 0x02) && max_size
> 2)
13919 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
13920 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13923 if ((countval
& 0x01) && max_size
> 1)
13925 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
13926 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13933 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
13938 rtx label
= ix86_expand_aligntest (count
, 16, true);
13941 dest
= change_address (destmem
, DImode
, destptr
);
13942 emit_insn (gen_strset (destptr
, dest
, value
));
13943 emit_insn (gen_strset (destptr
, dest
, value
));
13947 dest
= change_address (destmem
, SImode
, destptr
);
13948 emit_insn (gen_strset (destptr
, dest
, value
));
13949 emit_insn (gen_strset (destptr
, dest
, value
));
13950 emit_insn (gen_strset (destptr
, dest
, value
));
13951 emit_insn (gen_strset (destptr
, dest
, value
));
13953 emit_label (label
);
13954 LABEL_NUSES (label
) = 1;
13958 rtx label
= ix86_expand_aligntest (count
, 8, true);
13961 dest
= change_address (destmem
, DImode
, destptr
);
13962 emit_insn (gen_strset (destptr
, dest
, value
));
13966 dest
= change_address (destmem
, SImode
, destptr
);
13967 emit_insn (gen_strset (destptr
, dest
, value
));
13968 emit_insn (gen_strset (destptr
, dest
, value
));
13970 emit_label (label
);
13971 LABEL_NUSES (label
) = 1;
13975 rtx label
= ix86_expand_aligntest (count
, 4, true);
13976 dest
= change_address (destmem
, SImode
, destptr
);
13977 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
13978 emit_label (label
);
13979 LABEL_NUSES (label
) = 1;
13983 rtx label
= ix86_expand_aligntest (count
, 2, true);
13984 dest
= change_address (destmem
, HImode
, destptr
);
13985 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
13986 emit_label (label
);
13987 LABEL_NUSES (label
) = 1;
13991 rtx label
= ix86_expand_aligntest (count
, 1, true);
13992 dest
= change_address (destmem
, QImode
, destptr
);
13993 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
13994 emit_label (label
);
13995 LABEL_NUSES (label
) = 1;
13999 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14000 DESIRED_ALIGNMENT. */
14002 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
14003 rtx destptr
, rtx srcptr
, rtx count
,
14004 int align
, int desired_alignment
)
14006 if (align
<= 1 && desired_alignment
> 1)
14008 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14009 srcmem
= change_address (srcmem
, QImode
, srcptr
);
14010 destmem
= change_address (destmem
, QImode
, destptr
);
14011 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14012 ix86_adjust_counter (count
, 1);
14013 emit_label (label
);
14014 LABEL_NUSES (label
) = 1;
14016 if (align
<= 2 && desired_alignment
> 2)
14018 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14019 srcmem
= change_address (srcmem
, HImode
, srcptr
);
14020 destmem
= change_address (destmem
, HImode
, destptr
);
14021 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14022 ix86_adjust_counter (count
, 2);
14023 emit_label (label
);
14024 LABEL_NUSES (label
) = 1;
14026 if (align
<= 4 && desired_alignment
> 4)
14028 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14029 srcmem
= change_address (srcmem
, SImode
, srcptr
);
14030 destmem
= change_address (destmem
, SImode
, destptr
);
14031 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
14032 ix86_adjust_counter (count
, 4);
14033 emit_label (label
);
14034 LABEL_NUSES (label
) = 1;
14036 gcc_assert (desired_alignment
<= 8);
14039 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14040 DESIRED_ALIGNMENT. */
14042 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
14043 int align
, int desired_alignment
)
14045 if (align
<= 1 && desired_alignment
> 1)
14047 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
14048 destmem
= change_address (destmem
, QImode
, destptr
);
14049 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
14050 ix86_adjust_counter (count
, 1);
14051 emit_label (label
);
14052 LABEL_NUSES (label
) = 1;
14054 if (align
<= 2 && desired_alignment
> 2)
14056 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
14057 destmem
= change_address (destmem
, HImode
, destptr
);
14058 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
14059 ix86_adjust_counter (count
, 2);
14060 emit_label (label
);
14061 LABEL_NUSES (label
) = 1;
14063 if (align
<= 4 && desired_alignment
> 4)
14065 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
14066 destmem
= change_address (destmem
, SImode
, destptr
);
14067 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
14068 ix86_adjust_counter (count
, 4);
14069 emit_label (label
);
14070 LABEL_NUSES (label
) = 1;
14072 gcc_assert (desired_alignment
<= 8);
14075 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14076 static enum stringop_alg
14077 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
14078 int *dynamic_check
)
14080 const struct stringop_algs
* algs
;
14082 *dynamic_check
= -1;
14084 algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
14086 algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
14087 if (stringop_alg
!= no_stringop
)
14088 return stringop_alg
;
14089 /* rep; movq or rep; movl is the smallest variant. */
14090 else if (optimize_size
)
14092 if (!count
|| (count
& 3))
14093 return rep_prefix_1_byte
;
14095 return rep_prefix_4_byte
;
14097 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14099 else if (expected_size
!= -1 && expected_size
< 4)
14100 return loop_1_byte
;
14101 else if (expected_size
!= -1)
14104 enum stringop_alg alg
= libcall
;
14105 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14107 gcc_assert (algs
->size
[i
].max
);
14108 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
14110 if (algs
->size
[i
].alg
!= libcall
)
14111 alg
= algs
->size
[i
].alg
;
14112 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14113 last non-libcall inline algorithm. */
14114 if (TARGET_INLINE_ALL_STRINGOPS
)
14116 /* When the current size is best to be copied by a libcall,
14117 but we are still forced to inline, run the heuristic bellow
14118 that will pick code for medium sized blocks. */
14119 if (alg
!= libcall
)
14124 return algs
->size
[i
].alg
;
14127 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
);
14129 /* When asked to inline the call anyway, try to pick meaningful choice.
14130 We look for maximal size of block that is faster to copy by hand and
14131 take blocks of at most of that size guessing that average size will
14132 be roughly half of the block.
14134 If this turns out to be bad, we might simply specify the preferred
14135 choice in ix86_costs. */
14136 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14137 && algs
->unknown_size
== libcall
)
14140 enum stringop_alg alg
;
14143 for (i
= 0; i
< NAX_STRINGOP_ALGS
; i
++)
14144 if (algs
->size
[i
].alg
!= libcall
&& algs
->size
[i
].alg
)
14145 max
= algs
->size
[i
].max
;
14148 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
14149 gcc_assert (*dynamic_check
== -1);
14150 gcc_assert (alg
!= libcall
);
14151 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
14152 *dynamic_check
= max
;
14155 return algs
->unknown_size
;
14158 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14159 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14161 decide_alignment (int align
,
14162 enum stringop_alg alg
,
14165 int desired_align
= 0;
14169 gcc_unreachable ();
14171 case unrolled_loop
:
14172 desired_align
= GET_MODE_SIZE (Pmode
);
14174 case rep_prefix_8_byte
:
14177 case rep_prefix_4_byte
:
14178 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14179 copying whole cacheline at once. */
14180 if (TARGET_PENTIUMPRO
)
14185 case rep_prefix_1_byte
:
14186 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14187 copying whole cacheline at once. */
14188 if (TARGET_PENTIUMPRO
)
14202 if (desired_align
< align
)
14203 desired_align
= align
;
14204 if (expected_size
!= -1 && expected_size
< 4)
14205 desired_align
= align
;
14206 return desired_align
;
14209 /* Return the smallest power of 2 greater than VAL. */
14211 smallest_pow2_greater_than (int val
)
14219 /* Expand string move (memcpy) operation. Use i386 string operations when
14220 profitable. expand_clrmem contains similar code. The code depends upon
14221 architecture, block size and alignment, but always has the same
14224 1) Prologue guard: Conditional that jumps up to epilogues for small
14225 blocks that can be handled by epilogue alone. This is faster but
14226 also needed for correctness, since prologue assume the block is larger
14227 than the desired alignment.
14229 Optional dynamic check for size and libcall for large
14230 blocks is emitted here too, with -minline-stringops-dynamically.
14232 2) Prologue: copy first few bytes in order to get destination aligned
14233 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14234 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14235 We emit either a jump tree on power of two sized blocks, or a byte loop.
14237 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14238 with specified algorithm.
14240 4) Epilogue: code copying tail of the block that is too small to be
14241 handled by main body (or up to size guarded by prologue guard). */
14244 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
14245 rtx expected_align_exp
, rtx expected_size_exp
)
14251 rtx jump_around_label
= NULL
;
14252 HOST_WIDE_INT align
= 1;
14253 unsigned HOST_WIDE_INT count
= 0;
14254 HOST_WIDE_INT expected_size
= -1;
14255 int size_needed
= 0, epilogue_size_needed
;
14256 int desired_align
= 0;
14257 enum stringop_alg alg
;
14260 if (CONST_INT_P (align_exp
))
14261 align
= INTVAL (align_exp
);
14262 /* i386 can do misaligned access on reasonably increased cost. */
14263 if (CONST_INT_P (expected_align_exp
)
14264 && INTVAL (expected_align_exp
) > align
)
14265 align
= INTVAL (expected_align_exp
);
14266 if (CONST_INT_P (count_exp
))
14267 count
= expected_size
= INTVAL (count_exp
);
14268 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14269 expected_size
= INTVAL (expected_size_exp
);
14271 /* Step 0: Decide on preferred algorithm, desired alignment and
14272 size of chunks to be copied by main loop. */
14274 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
14275 desired_align
= decide_alignment (align
, alg
, expected_size
);
14277 if (!TARGET_ALIGN_STRINGOPS
)
14278 align
= desired_align
;
14280 if (alg
== libcall
)
14282 gcc_assert (alg
!= no_stringop
);
14284 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
14285 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14286 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
14291 gcc_unreachable ();
14293 size_needed
= GET_MODE_SIZE (Pmode
);
14295 case unrolled_loop
:
14296 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
14298 case rep_prefix_8_byte
:
14301 case rep_prefix_4_byte
:
14304 case rep_prefix_1_byte
:
14310 epilogue_size_needed
= size_needed
;
14312 /* Step 1: Prologue guard. */
14314 /* Alignment code needs count to be in register. */
14315 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14317 enum machine_mode mode
= SImode
;
14318 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14320 count_exp
= force_reg (mode
, count_exp
);
14322 gcc_assert (desired_align
>= 1 && align
>= 1);
14324 /* Ensure that alignment prologue won't copy past end of block. */
14325 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14327 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14328 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14329 Make sure it is power of 2. */
14330 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14332 label
= gen_label_rtx ();
14333 emit_cmp_and_jump_insns (count_exp
,
14334 GEN_INT (epilogue_size_needed
),
14335 LTU
, 0, counter_mode (count_exp
), 1, label
);
14336 if (GET_CODE (count_exp
) == CONST_INT
)
14338 else if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
14339 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14341 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14343 /* Emit code to decide on runtime whether library call or inline should be
14345 if (dynamic_check
!= -1)
14347 rtx hot_label
= gen_label_rtx ();
14348 jump_around_label
= gen_label_rtx ();
14349 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14350 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
14351 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14352 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
14353 emit_jump (jump_around_label
);
14354 emit_label (hot_label
);
14357 /* Step 2: Alignment prologue. */
14359 if (desired_align
> align
)
14361 /* Except for the first move in epilogue, we no longer know
14362 constant offset in aliasing info. It don't seems to worth
14363 the pain to maintain it for the first move, so throw away
14365 src
= change_address (src
, BLKmode
, srcreg
);
14366 dst
= change_address (dst
, BLKmode
, destreg
);
14367 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
14370 if (label
&& size_needed
== 1)
14372 emit_label (label
);
14373 LABEL_NUSES (label
) = 1;
14377 /* Step 3: Main loop. */
14383 gcc_unreachable ();
14385 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14386 count_exp
, QImode
, 1, expected_size
);
14389 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14390 count_exp
, Pmode
, 1, expected_size
);
14392 case unrolled_loop
:
14393 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14394 registers for 4 temporaries anyway. */
14395 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
14396 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
14399 case rep_prefix_8_byte
:
14400 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14403 case rep_prefix_4_byte
:
14404 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14407 case rep_prefix_1_byte
:
14408 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
14412 /* Adjust properly the offset of src and dest memory for aliasing. */
14413 if (CONST_INT_P (count_exp
))
14415 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
14416 (count
/ size_needed
) * size_needed
);
14417 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14418 (count
/ size_needed
) * size_needed
);
14422 src
= change_address (src
, BLKmode
, srcreg
);
14423 dst
= change_address (dst
, BLKmode
, destreg
);
14426 /* Step 4: Epilogue to copy the remaining bytes. */
14430 /* When the main loop is done, COUNT_EXP might hold original count,
14431 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14432 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14433 bytes. Compensate if needed. */
14435 if (size_needed
< epilogue_size_needed
)
14438 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14439 GEN_INT (size_needed
- 1), count_exp
, 1,
14441 if (tmp
!= count_exp
)
14442 emit_move_insn (count_exp
, tmp
);
14444 emit_label (label
);
14445 LABEL_NUSES (label
) = 1;
14448 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14449 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
14450 epilogue_size_needed
);
14451 if (jump_around_label
)
14452 emit_label (jump_around_label
);
14456 /* Helper function for memcpy. For QImode value 0xXY produce
14457 0xXYXYXYXY of wide specified by MODE. This is essentially
14458 a * 0x10101010, but we can do slightly better than
14459 synth_mult by unwinding the sequence by hand on CPUs with
14462 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
14464 enum machine_mode valmode
= GET_MODE (val
);
14466 int nops
= mode
== DImode
? 3 : 2;
14468 gcc_assert (mode
== SImode
|| mode
== DImode
);
14469 if (val
== const0_rtx
)
14470 return copy_to_mode_reg (mode
, const0_rtx
);
14471 if (CONST_INT_P (val
))
14473 HOST_WIDE_INT v
= INTVAL (val
) & 255;
14477 if (mode
== DImode
)
14478 v
|= (v
<< 16) << 16;
14479 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
14482 if (valmode
== VOIDmode
)
14484 if (valmode
!= QImode
)
14485 val
= gen_lowpart (QImode
, val
);
14486 if (mode
== QImode
)
14488 if (!TARGET_PARTIAL_REG_STALL
)
14490 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
14491 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
14492 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
14493 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
14495 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14496 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
14497 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
14502 rtx reg
= convert_modes (mode
, QImode
, val
, true);
14504 if (!TARGET_PARTIAL_REG_STALL
)
14505 if (mode
== SImode
)
14506 emit_insn (gen_movsi_insv_1 (reg
, reg
));
14508 emit_insn (gen_movdi_insv_1_rex64 (reg
, reg
));
14511 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
14512 NULL
, 1, OPTAB_DIRECT
);
14514 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14516 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
14517 NULL
, 1, OPTAB_DIRECT
);
14518 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14519 if (mode
== SImode
)
14521 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
14522 NULL
, 1, OPTAB_DIRECT
);
14523 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
14528 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14529 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14530 alignment from ALIGN to DESIRED_ALIGN. */
14532 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
14537 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
14538 promoted_val
= promote_duplicated_reg (DImode
, val
);
14539 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
14540 promoted_val
= promote_duplicated_reg (SImode
, val
);
14541 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
14542 promoted_val
= promote_duplicated_reg (HImode
, val
);
14544 promoted_val
= val
;
14546 return promoted_val
;
14549 /* Expand string clear operation (bzero). Use i386 string operations when
14550 profitable. See expand_movmem comment for explanation of individual
14551 steps performed. */
14553 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
14554 rtx expected_align_exp
, rtx expected_size_exp
)
14559 rtx jump_around_label
= NULL
;
14560 HOST_WIDE_INT align
= 1;
14561 unsigned HOST_WIDE_INT count
= 0;
14562 HOST_WIDE_INT expected_size
= -1;
14563 int size_needed
= 0, epilogue_size_needed
;
14564 int desired_align
= 0;
14565 enum stringop_alg alg
;
14566 rtx promoted_val
= NULL
;
14567 bool force_loopy_epilogue
= false;
14570 if (CONST_INT_P (align_exp
))
14571 align
= INTVAL (align_exp
);
14572 /* i386 can do misaligned access on reasonably increased cost. */
14573 if (CONST_INT_P (expected_align_exp
)
14574 && INTVAL (expected_align_exp
) > align
)
14575 align
= INTVAL (expected_align_exp
);
14576 if (CONST_INT_P (count_exp
))
14577 count
= expected_size
= INTVAL (count_exp
);
14578 if (CONST_INT_P (expected_size_exp
) && count
== 0)
14579 expected_size
= INTVAL (expected_size_exp
);
14581 /* Step 0: Decide on preferred algorithm, desired alignment and
14582 size of chunks to be copied by main loop. */
14584 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
14585 desired_align
= decide_alignment (align
, alg
, expected_size
);
14587 if (!TARGET_ALIGN_STRINGOPS
)
14588 align
= desired_align
;
14590 if (alg
== libcall
)
14592 gcc_assert (alg
!= no_stringop
);
14594 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
14595 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
14600 gcc_unreachable ();
14602 size_needed
= GET_MODE_SIZE (Pmode
);
14604 case unrolled_loop
:
14605 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
14607 case rep_prefix_8_byte
:
14610 case rep_prefix_4_byte
:
14613 case rep_prefix_1_byte
:
14618 epilogue_size_needed
= size_needed
;
14620 /* Step 1: Prologue guard. */
14622 /* Alignment code needs count to be in register. */
14623 if (CONST_INT_P (count_exp
) && desired_align
> align
)
14625 enum machine_mode mode
= SImode
;
14626 if (TARGET_64BIT
&& (count
& ~0xffffffff))
14628 count_exp
= force_reg (mode
, count_exp
);
14630 /* Do the cheap promotion to allow better CSE across the
14631 main loop and epilogue (ie one load of the big constant in the
14632 front of all code. */
14633 if (CONST_INT_P (val_exp
))
14634 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14635 desired_align
, align
);
14636 /* Ensure that alignment prologue won't copy past end of block. */
14637 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
14639 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
14640 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14641 Make sure it is power of 2. */
14642 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
14644 /* To improve performance of small blocks, we jump around the VAL
14645 promoting mode. This mean that if the promoted VAL is not constant,
14646 we might not use it in the epilogue and have to use byte
14648 if (epilogue_size_needed
> 2 && !promoted_val
)
14649 force_loopy_epilogue
= true;
14650 label
= gen_label_rtx ();
14651 emit_cmp_and_jump_insns (count_exp
,
14652 GEN_INT (epilogue_size_needed
),
14653 LTU
, 0, counter_mode (count_exp
), 1, label
);
14654 if (GET_CODE (count_exp
) == CONST_INT
)
14656 else if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
14657 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
14659 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
14661 if (dynamic_check
!= -1)
14663 rtx hot_label
= gen_label_rtx ();
14664 jump_around_label
= gen_label_rtx ();
14665 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
14666 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
14667 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
14668 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
14669 emit_jump (jump_around_label
);
14670 emit_label (hot_label
);
14673 /* Step 2: Alignment prologue. */
14675 /* Do the expensive promotion once we branched off the small blocks. */
14677 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
14678 desired_align
, align
);
14679 gcc_assert (desired_align
>= 1 && align
>= 1);
14681 if (desired_align
> align
)
14683 /* Except for the first move in epilogue, we no longer know
14684 constant offset in aliasing info. It don't seems to worth
14685 the pain to maintain it for the first move, so throw away
14687 dst
= change_address (dst
, BLKmode
, destreg
);
14688 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
14691 if (label
&& size_needed
== 1)
14693 emit_label (label
);
14694 LABEL_NUSES (label
) = 1;
14698 /* Step 3: Main loop. */
14704 gcc_unreachable ();
14706 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14707 count_exp
, QImode
, 1, expected_size
);
14710 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14711 count_exp
, Pmode
, 1, expected_size
);
14713 case unrolled_loop
:
14714 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
14715 count_exp
, Pmode
, 4, expected_size
);
14717 case rep_prefix_8_byte
:
14718 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14721 case rep_prefix_4_byte
:
14722 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14725 case rep_prefix_1_byte
:
14726 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
14730 /* Adjust properly the offset of src and dest memory for aliasing. */
14731 if (CONST_INT_P (count_exp
))
14732 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
14733 (count
/ size_needed
) * size_needed
);
14735 dst
= change_address (dst
, BLKmode
, destreg
);
14737 /* Step 4: Epilogue to copy the remaining bytes. */
14741 /* When the main loop is done, COUNT_EXP might hold original count,
14742 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14743 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14744 bytes. Compensate if needed. */
14746 if (size_needed
< desired_align
- align
)
14749 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
14750 GEN_INT (size_needed
- 1), count_exp
, 1,
14752 size_needed
= desired_align
- align
+ 1;
14753 if (tmp
!= count_exp
)
14754 emit_move_insn (count_exp
, tmp
);
14756 emit_label (label
);
14757 LABEL_NUSES (label
) = 1;
14759 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
14761 if (force_loopy_epilogue
)
14762 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
14765 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
14768 if (jump_around_label
)
14769 emit_label (jump_around_label
);
14773 /* Expand strlen. */
14775 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
14777 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
14779 /* The generic case of strlen expander is long. Avoid it's
14780 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14782 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14783 && !TARGET_INLINE_ALL_STRINGOPS
14785 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
14788 addr
= force_reg (Pmode
, XEXP (src
, 0));
14789 scratch1
= gen_reg_rtx (Pmode
);
14791 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
14794 /* Well it seems that some optimizer does not combine a call like
14795 foo(strlen(bar), strlen(bar));
14796 when the move and the subtraction is done here. It does calculate
14797 the length just once when these instructions are done inside of
14798 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14799 often used and I use one fewer register for the lifetime of
14800 output_strlen_unroll() this is better. */
14802 emit_move_insn (out
, addr
);
14804 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
14806 /* strlensi_unroll_1 returns the address of the zero at the end of
14807 the string, like memchr(), so compute the length by subtracting
14808 the start address. */
14810 emit_insn (gen_subdi3 (out
, out
, addr
));
14812 emit_insn (gen_subsi3 (out
, out
, addr
));
14817 scratch2
= gen_reg_rtx (Pmode
);
14818 scratch3
= gen_reg_rtx (Pmode
);
14819 scratch4
= force_reg (Pmode
, constm1_rtx
);
14821 emit_move_insn (scratch3
, addr
);
14822 eoschar
= force_reg (QImode
, eoschar
);
14824 src
= replace_equiv_address_nv (src
, scratch3
);
14826 /* If .md starts supporting :P, this can be done in .md. */
14827 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
14828 scratch4
), UNSPEC_SCAS
);
14829 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
14832 emit_insn (gen_one_cmpldi2 (scratch2
, scratch1
));
14833 emit_insn (gen_adddi3 (out
, scratch2
, constm1_rtx
));
14837 emit_insn (gen_one_cmplsi2 (scratch2
, scratch1
));
14838 emit_insn (gen_addsi3 (out
, scratch2
, constm1_rtx
));
14844 /* Expand the appropriate insns for doing strlen if not just doing
14847 out = result, initialized with the start address
14848 align_rtx = alignment of the address.
14849 scratch = scratch register, initialized with the startaddress when
14850 not aligned, otherwise undefined
14852 This is just the body. It needs the initializations mentioned above and
14853 some address computing at the end. These things are done in i386.md. */
14856 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
14860 rtx align_2_label
= NULL_RTX
;
14861 rtx align_3_label
= NULL_RTX
;
14862 rtx align_4_label
= gen_label_rtx ();
14863 rtx end_0_label
= gen_label_rtx ();
14865 rtx tmpreg
= gen_reg_rtx (SImode
);
14866 rtx scratch
= gen_reg_rtx (SImode
);
14870 if (CONST_INT_P (align_rtx
))
14871 align
= INTVAL (align_rtx
);
14873 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14875 /* Is there a known alignment and is it less than 4? */
14878 rtx scratch1
= gen_reg_rtx (Pmode
);
14879 emit_move_insn (scratch1
, out
);
14880 /* Is there a known alignment and is it not 2? */
14883 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
14884 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
14886 /* Leave just the 3 lower bits. */
14887 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
14888 NULL_RTX
, 0, OPTAB_WIDEN
);
14890 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14891 Pmode
, 1, align_4_label
);
14892 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
14893 Pmode
, 1, align_2_label
);
14894 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
14895 Pmode
, 1, align_3_label
);
14899 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14900 check if is aligned to 4 - byte. */
14902 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
14903 NULL_RTX
, 0, OPTAB_WIDEN
);
14905 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
14906 Pmode
, 1, align_4_label
);
14909 mem
= change_address (src
, QImode
, out
);
14911 /* Now compare the bytes. */
14913 /* Compare the first n unaligned byte on a byte per byte basis. */
14914 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
14915 QImode
, 1, end_0_label
);
14917 /* Increment the address. */
14919 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14921 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14923 /* Not needed with an alignment of 2 */
14926 emit_label (align_2_label
);
14928 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14932 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14934 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14936 emit_label (align_3_label
);
14939 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
14943 emit_insn (gen_adddi3 (out
, out
, const1_rtx
));
14945 emit_insn (gen_addsi3 (out
, out
, const1_rtx
));
14948 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14949 align this loop. It gives only huge programs, but does not help to
14951 emit_label (align_4_label
);
14953 mem
= change_address (src
, SImode
, out
);
14954 emit_move_insn (scratch
, mem
);
14956 emit_insn (gen_adddi3 (out
, out
, GEN_INT (4)));
14958 emit_insn (gen_addsi3 (out
, out
, GEN_INT (4)));
14960 /* This formula yields a nonzero result iff one of the bytes is zero.
14961 This saves three branches inside loop and many cycles. */
14963 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
14964 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
14965 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
14966 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
14967 gen_int_mode (0x80808080, SImode
)));
14968 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
14973 rtx reg
= gen_reg_rtx (SImode
);
14974 rtx reg2
= gen_reg_rtx (Pmode
);
14975 emit_move_insn (reg
, tmpreg
);
14976 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
14978 /* If zero is not in the first two bytes, move two bytes forward. */
14979 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
14980 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14981 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14982 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
14983 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
14986 /* Emit lea manually to avoid clobbering of flags. */
14987 emit_insn (gen_rtx_SET (SImode
, reg2
,
14988 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
14990 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
14991 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
14992 emit_insn (gen_rtx_SET (VOIDmode
, out
,
14993 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
15000 rtx end_2_label
= gen_label_rtx ();
15001 /* Is zero in the first two bytes? */
15003 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
15004 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
15005 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
15006 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
15007 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
15009 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
15010 JUMP_LABEL (tmp
) = end_2_label
;
15012 /* Not in the first two. Move two bytes forward. */
15013 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
15015 emit_insn (gen_adddi3 (out
, out
, const2_rtx
));
15017 emit_insn (gen_addsi3 (out
, out
, const2_rtx
));
15019 emit_label (end_2_label
);
15023 /* Avoid branch in fixing the byte. */
15024 tmpreg
= gen_lowpart (QImode
, tmpreg
);
15025 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
15026 cmp
= gen_rtx_LTU (Pmode
, gen_rtx_REG (CCmode
, 17), const0_rtx
);
15028 emit_insn (gen_subdi3_carry_rex64 (out
, out
, GEN_INT (3), cmp
));
15030 emit_insn (gen_subsi3_carry (out
, out
, GEN_INT (3), cmp
));
15032 emit_label (end_0_label
);
15035 /* For given symbol (function) construct code to compute address of it's PLT
15036 entry in large x86-64 PIC model. */
15038 construct_plt_address (rtx symbol
)
15040 rtx tmp
= gen_reg_rtx (Pmode
);
15041 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
15043 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
15044 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
15046 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
15047 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
15052 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
15053 rtx callarg2 ATTRIBUTE_UNUSED
,
15054 rtx pop
, int sibcall
)
15056 rtx use
= NULL
, call
;
15058 if (pop
== const0_rtx
)
15060 gcc_assert (!TARGET_64BIT
|| !pop
);
15062 if (TARGET_MACHO
&& !TARGET_64BIT
)
15065 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
15066 fnaddr
= machopic_indirect_call_target (fnaddr
);
15071 /* Static functions and indirect calls don't need the pic register. */
15072 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
15073 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15074 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
15075 use_reg (&use
, pic_offset_table_rtx
);
15078 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
15080 rtx al
= gen_rtx_REG (QImode
, 0);
15081 emit_move_insn (al
, callarg2
);
15082 use_reg (&use
, al
);
15085 if (ix86_cmodel
== CM_LARGE_PIC
15086 && GET_CODE (fnaddr
) == MEM
15087 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
15088 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
15089 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
15090 else if (! call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
15092 fnaddr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15093 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15095 if (sibcall
&& TARGET_64BIT
15096 && !constant_call_address_operand (XEXP (fnaddr
, 0), Pmode
))
15099 addr
= copy_to_mode_reg (Pmode
, XEXP (fnaddr
, 0));
15100 fnaddr
= gen_rtx_REG (Pmode
, R11_REG
);
15101 emit_move_insn (fnaddr
, addr
);
15102 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
15105 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
15107 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
15110 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
15111 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
15112 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, call
, pop
));
15115 call
= emit_call_insn (call
);
15117 CALL_INSN_FUNCTION_USAGE (call
) = use
;
15121 /* Clear stack slot assignments remembered from previous functions.
15122 This is called from INIT_EXPANDERS once before RTL is emitted for each
15125 static struct machine_function
*
15126 ix86_init_machine_status (void)
15128 struct machine_function
*f
;
15130 f
= ggc_alloc_cleared (sizeof (struct machine_function
));
15131 f
->use_fast_prologue_epilogue_nregs
= -1;
15132 f
->tls_descriptor_call_expanded_p
= 0;
15137 /* Return a MEM corresponding to a stack slot with mode MODE.
15138 Allocate a new slot if necessary.
15140 The RTL for a function can have several slots available: N is
15141 which slot to use. */
15144 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
15146 struct stack_local_entry
*s
;
15148 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15150 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15151 if (s
->mode
== mode
&& s
->n
== n
)
15152 return copy_rtx (s
->rtl
);
15154 s
= (struct stack_local_entry
*)
15155 ggc_alloc (sizeof (struct stack_local_entry
));
15158 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15160 s
->next
= ix86_stack_locals
;
15161 ix86_stack_locals
= s
;
15165 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15167 static GTY(()) rtx ix86_tls_symbol
;
15169 ix86_tls_get_addr (void)
15172 if (!ix86_tls_symbol
)
15174 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15175 (TARGET_ANY_GNU_TLS
15177 ? "___tls_get_addr"
15178 : "__tls_get_addr");
15181 return ix86_tls_symbol
;
15184 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15186 static GTY(()) rtx ix86_tls_module_base_symbol
;
15188 ix86_tls_module_base (void)
15191 if (!ix86_tls_module_base_symbol
)
15193 ix86_tls_module_base_symbol
= gen_rtx_SYMBOL_REF (Pmode
,
15194 "_TLS_MODULE_BASE_");
15195 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
15196 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
15199 return ix86_tls_module_base_symbol
;
15202 /* Calculate the length of the memory address in the instruction
15203 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15206 memory_address_length (rtx addr
)
15208 struct ix86_address parts
;
15209 rtx base
, index
, disp
;
15213 if (GET_CODE (addr
) == PRE_DEC
15214 || GET_CODE (addr
) == POST_INC
15215 || GET_CODE (addr
) == PRE_MODIFY
15216 || GET_CODE (addr
) == POST_MODIFY
)
15219 ok
= ix86_decompose_address (addr
, &parts
);
15222 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
15223 parts
.base
= SUBREG_REG (parts
.base
);
15224 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
15225 parts
.index
= SUBREG_REG (parts
.index
);
15228 index
= parts
.index
;
15233 - esp as the base always wants an index,
15234 - ebp as the base always wants a displacement. */
15236 /* Register Indirect. */
15237 if (base
&& !index
&& !disp
)
15239 /* esp (for its index) and ebp (for its displacement) need
15240 the two-byte modrm form. */
15241 if (addr
== stack_pointer_rtx
15242 || addr
== arg_pointer_rtx
15243 || addr
== frame_pointer_rtx
15244 || addr
== hard_frame_pointer_rtx
)
15248 /* Direct Addressing. */
15249 else if (disp
&& !base
&& !index
)
15254 /* Find the length of the displacement constant. */
15257 if (base
&& satisfies_constraint_K (disp
))
15262 /* ebp always wants a displacement. */
15263 else if (base
== hard_frame_pointer_rtx
)
15266 /* An index requires the two-byte modrm form.... */
15268 /* ...like esp, which always wants an index. */
15269 || base
== stack_pointer_rtx
15270 || base
== arg_pointer_rtx
15271 || base
== frame_pointer_rtx
)
15278 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15279 is set, expect that insn have 8bit immediate alternative. */
15281 ix86_attr_length_immediate_default (rtx insn
, int shortform
)
15285 extract_insn_cached (insn
);
15286 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15287 if (CONSTANT_P (recog_data
.operand
[i
]))
15290 if (shortform
&& satisfies_constraint_K (recog_data
.operand
[i
]))
15294 switch (get_attr_mode (insn
))
15305 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15310 fatal_insn ("unknown insn mode", insn
);
15316 /* Compute default value for "length_address" attribute. */
15318 ix86_attr_length_address_default (rtx insn
)
15322 if (get_attr_type (insn
) == TYPE_LEA
)
15324 rtx set
= PATTERN (insn
);
15326 if (GET_CODE (set
) == PARALLEL
)
15327 set
= XVECEXP (set
, 0, 0);
15329 gcc_assert (GET_CODE (set
) == SET
);
15331 return memory_address_length (SET_SRC (set
));
15334 extract_insn_cached (insn
);
15335 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15336 if (MEM_P (recog_data
.operand
[i
]))
15338 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
15344 /* Return the maximum number of instructions a cpu can issue. */
15347 ix86_issue_rate (void)
15351 case PROCESSOR_PENTIUM
:
15355 case PROCESSOR_PENTIUMPRO
:
15356 case PROCESSOR_PENTIUM4
:
15357 case PROCESSOR_ATHLON
:
15359 case PROCESSOR_AMDFAM10
:
15360 case PROCESSOR_NOCONA
:
15361 case PROCESSOR_GENERIC32
:
15362 case PROCESSOR_GENERIC64
:
15365 case PROCESSOR_CORE2
:
15373 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15374 by DEP_INSN and nothing set by DEP_INSN. */
15377 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15381 /* Simplify the test for uninteresting insns. */
15382 if (insn_type
!= TYPE_SETCC
15383 && insn_type
!= TYPE_ICMOV
15384 && insn_type
!= TYPE_FCMOV
15385 && insn_type
!= TYPE_IBR
)
15388 if ((set
= single_set (dep_insn
)) != 0)
15390 set
= SET_DEST (set
);
15393 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
15394 && XVECLEN (PATTERN (dep_insn
), 0) == 2
15395 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
15396 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
15398 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15399 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
15404 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
15407 /* This test is true if the dependent insn reads the flags but
15408 not any other potentially set register. */
15409 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
15412 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
15418 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15419 address with operands set by DEP_INSN. */
15422 ix86_agi_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
15426 if (insn_type
== TYPE_LEA
15429 addr
= PATTERN (insn
);
15431 if (GET_CODE (addr
) == PARALLEL
)
15432 addr
= XVECEXP (addr
, 0, 0);
15434 gcc_assert (GET_CODE (addr
) == SET
);
15436 addr
= SET_SRC (addr
);
15441 extract_insn_cached (insn
);
15442 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15443 if (MEM_P (recog_data
.operand
[i
]))
15445 addr
= XEXP (recog_data
.operand
[i
], 0);
15452 return modified_in_p (addr
, dep_insn
);
15456 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
15458 enum attr_type insn_type
, dep_insn_type
;
15459 enum attr_memory memory
;
15461 int dep_insn_code_number
;
15463 /* Anti and output dependencies have zero cost on all CPUs. */
15464 if (REG_NOTE_KIND (link
) != 0)
15467 dep_insn_code_number
= recog_memoized (dep_insn
);
15469 /* If we can't recognize the insns, we can't really do anything. */
15470 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
15473 insn_type
= get_attr_type (insn
);
15474 dep_insn_type
= get_attr_type (dep_insn
);
15478 case PROCESSOR_PENTIUM
:
15479 /* Address Generation Interlock adds a cycle of latency. */
15480 if (ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15483 /* ??? Compares pair with jump/setcc. */
15484 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
15487 /* Floating point stores require value to be ready one cycle earlier. */
15488 if (insn_type
== TYPE_FMOV
15489 && get_attr_memory (insn
) == MEMORY_STORE
15490 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15494 case PROCESSOR_PENTIUMPRO
:
15495 memory
= get_attr_memory (insn
);
15497 /* INT->FP conversion is expensive. */
15498 if (get_attr_fp_int_src (dep_insn
))
15501 /* There is one cycle extra latency between an FP op and a store. */
15502 if (insn_type
== TYPE_FMOV
15503 && (set
= single_set (dep_insn
)) != NULL_RTX
15504 && (set2
= single_set (insn
)) != NULL_RTX
15505 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
15506 && MEM_P (SET_DEST (set2
)))
15509 /* Show ability of reorder buffer to hide latency of load by executing
15510 in parallel with previous instruction in case
15511 previous instruction is not needed to compute the address. */
15512 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15513 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15515 /* Claim moves to take one cycle, as core can issue one load
15516 at time and the next load can start cycle later. */
15517 if (dep_insn_type
== TYPE_IMOV
15518 || dep_insn_type
== TYPE_FMOV
)
15526 memory
= get_attr_memory (insn
);
15528 /* The esp dependency is resolved before the instruction is really
15530 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
15531 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
15534 /* INT->FP conversion is expensive. */
15535 if (get_attr_fp_int_src (dep_insn
))
15538 /* Show ability of reorder buffer to hide latency of load by executing
15539 in parallel with previous instruction in case
15540 previous instruction is not needed to compute the address. */
15541 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15542 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15544 /* Claim moves to take one cycle, as core can issue one load
15545 at time and the next load can start cycle later. */
15546 if (dep_insn_type
== TYPE_IMOV
15547 || dep_insn_type
== TYPE_FMOV
)
15556 case PROCESSOR_ATHLON
:
15558 case PROCESSOR_AMDFAM10
:
15559 case PROCESSOR_GENERIC32
:
15560 case PROCESSOR_GENERIC64
:
15561 memory
= get_attr_memory (insn
);
15563 /* Show ability of reorder buffer to hide latency of load by executing
15564 in parallel with previous instruction in case
15565 previous instruction is not needed to compute the address. */
15566 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
15567 && !ix86_agi_dependent (insn
, dep_insn
, insn_type
))
15569 enum attr_unit unit
= get_attr_unit (insn
);
15572 /* Because of the difference between the length of integer and
15573 floating unit pipeline preparation stages, the memory operands
15574 for floating point are cheaper.
15576 ??? For Athlon it the difference is most probably 2. */
15577 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
15580 loadcost
= TARGET_ATHLON
? 2 : 0;
15582 if (cost
>= loadcost
)
15595 /* How many alternative schedules to try. This should be as wide as the
15596 scheduling freedom in the DFA, but no wider. Making this value too
15597 large results extra work for the scheduler. */
15600 ia32_multipass_dfa_lookahead (void)
15602 if (ix86_tune
== PROCESSOR_PENTIUM
)
15605 if (ix86_tune
== PROCESSOR_PENTIUMPRO
15606 || ix86_tune
== PROCESSOR_K6
)
15614 /* Compute the alignment given to a constant that is being placed in memory.
15615 EXP is the constant and ALIGN is the alignment that the object would
15617 The value of this function is used instead of that alignment to align
15621 ix86_constant_alignment (tree exp
, int align
)
15623 if (TREE_CODE (exp
) == REAL_CST
)
15625 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
15627 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
15630 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
15631 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
15632 return BITS_PER_WORD
;
15637 /* Compute the alignment for a static variable.
15638 TYPE is the data type, and ALIGN is the alignment that
15639 the object would ordinarily have. The value of this function is used
15640 instead of that alignment to align the object. */
15643 ix86_data_alignment (tree type
, int align
)
15645 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
15647 if (AGGREGATE_TYPE_P (type
)
15648 && TYPE_SIZE (type
)
15649 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15650 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
15651 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
15652 && align
< max_align
)
15655 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15656 to 16byte boundary. */
15659 if (AGGREGATE_TYPE_P (type
)
15660 && TYPE_SIZE (type
)
15661 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15662 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
15663 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15667 if (TREE_CODE (type
) == ARRAY_TYPE
)
15669 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15671 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15674 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15677 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15679 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15682 else if ((TREE_CODE (type
) == RECORD_TYPE
15683 || TREE_CODE (type
) == UNION_TYPE
15684 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15685 && TYPE_FIELDS (type
))
15687 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15692 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15693 || TREE_CODE (type
) == INTEGER_TYPE
)
15695 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15697 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15704 /* Compute the alignment for a local variable.
15705 TYPE is the data type, and ALIGN is the alignment that
15706 the object would ordinarily have. The value of this macro is used
15707 instead of that alignment to align the object. */
15710 ix86_local_alignment (tree type
, int align
)
15712 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15713 to 16byte boundary. */
15716 if (AGGREGATE_TYPE_P (type
)
15717 && TYPE_SIZE (type
)
15718 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
15719 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
15720 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
15723 if (TREE_CODE (type
) == ARRAY_TYPE
)
15725 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
15727 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
15730 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
15732 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
15734 if (TYPE_MODE (type
) == XCmode
&& align
< 128)
15737 else if ((TREE_CODE (type
) == RECORD_TYPE
15738 || TREE_CODE (type
) == UNION_TYPE
15739 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
15740 && TYPE_FIELDS (type
))
15742 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
15744 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
15747 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
15748 || TREE_CODE (type
) == INTEGER_TYPE
)
15751 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
15753 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
15759 /* Emit RTL insns to initialize the variable parts of a trampoline.
15760 FNADDR is an RTX for the address of the function's pure code.
15761 CXT is an RTX for the static chain value for the function. */
15763 x86_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
15767 /* Compute offset from the end of the jmp to the target function. */
15768 rtx disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
15769 plus_constant (tramp
, 10),
15770 NULL_RTX
, 1, OPTAB_DIRECT
);
15771 emit_move_insn (gen_rtx_MEM (QImode
, tramp
),
15772 gen_int_mode (0xb9, QImode
));
15773 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 1)), cxt
);
15774 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, 5)),
15775 gen_int_mode (0xe9, QImode
));
15776 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, 6)), disp
);
15781 /* Try to load address using shorter movl instead of movabs.
15782 We may want to support movq for kernel mode, but kernel does not use
15783 trampolines at the moment. */
15784 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
15786 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
15787 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15788 gen_int_mode (0xbb41, HImode
));
15789 emit_move_insn (gen_rtx_MEM (SImode
, plus_constant (tramp
, offset
+ 2)),
15790 gen_lowpart (SImode
, fnaddr
));
15795 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15796 gen_int_mode (0xbb49, HImode
));
15797 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15801 /* Load static chain using movabs to r10. */
15802 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15803 gen_int_mode (0xba49, HImode
));
15804 emit_move_insn (gen_rtx_MEM (DImode
, plus_constant (tramp
, offset
+ 2)),
15807 /* Jump to the r11 */
15808 emit_move_insn (gen_rtx_MEM (HImode
, plus_constant (tramp
, offset
)),
15809 gen_int_mode (0xff49, HImode
));
15810 emit_move_insn (gen_rtx_MEM (QImode
, plus_constant (tramp
, offset
+2)),
15811 gen_int_mode (0xe3, QImode
));
15813 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
15816 #ifdef ENABLE_EXECUTE_STACK
15817 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
15818 LCT_NORMAL
, VOIDmode
, 1, tramp
, Pmode
);
15822 /* Codes for all the SSE/MMX builtins. */
15825 IX86_BUILTIN_ADDPS
,
15826 IX86_BUILTIN_ADDSS
,
15827 IX86_BUILTIN_DIVPS
,
15828 IX86_BUILTIN_DIVSS
,
15829 IX86_BUILTIN_MULPS
,
15830 IX86_BUILTIN_MULSS
,
15831 IX86_BUILTIN_SUBPS
,
15832 IX86_BUILTIN_SUBSS
,
15834 IX86_BUILTIN_CMPEQPS
,
15835 IX86_BUILTIN_CMPLTPS
,
15836 IX86_BUILTIN_CMPLEPS
,
15837 IX86_BUILTIN_CMPGTPS
,
15838 IX86_BUILTIN_CMPGEPS
,
15839 IX86_BUILTIN_CMPNEQPS
,
15840 IX86_BUILTIN_CMPNLTPS
,
15841 IX86_BUILTIN_CMPNLEPS
,
15842 IX86_BUILTIN_CMPNGTPS
,
15843 IX86_BUILTIN_CMPNGEPS
,
15844 IX86_BUILTIN_CMPORDPS
,
15845 IX86_BUILTIN_CMPUNORDPS
,
15846 IX86_BUILTIN_CMPEQSS
,
15847 IX86_BUILTIN_CMPLTSS
,
15848 IX86_BUILTIN_CMPLESS
,
15849 IX86_BUILTIN_CMPNEQSS
,
15850 IX86_BUILTIN_CMPNLTSS
,
15851 IX86_BUILTIN_CMPNLESS
,
15852 IX86_BUILTIN_CMPNGTSS
,
15853 IX86_BUILTIN_CMPNGESS
,
15854 IX86_BUILTIN_CMPORDSS
,
15855 IX86_BUILTIN_CMPUNORDSS
,
15857 IX86_BUILTIN_COMIEQSS
,
15858 IX86_BUILTIN_COMILTSS
,
15859 IX86_BUILTIN_COMILESS
,
15860 IX86_BUILTIN_COMIGTSS
,
15861 IX86_BUILTIN_COMIGESS
,
15862 IX86_BUILTIN_COMINEQSS
,
15863 IX86_BUILTIN_UCOMIEQSS
,
15864 IX86_BUILTIN_UCOMILTSS
,
15865 IX86_BUILTIN_UCOMILESS
,
15866 IX86_BUILTIN_UCOMIGTSS
,
15867 IX86_BUILTIN_UCOMIGESS
,
15868 IX86_BUILTIN_UCOMINEQSS
,
15870 IX86_BUILTIN_CVTPI2PS
,
15871 IX86_BUILTIN_CVTPS2PI
,
15872 IX86_BUILTIN_CVTSI2SS
,
15873 IX86_BUILTIN_CVTSI642SS
,
15874 IX86_BUILTIN_CVTSS2SI
,
15875 IX86_BUILTIN_CVTSS2SI64
,
15876 IX86_BUILTIN_CVTTPS2PI
,
15877 IX86_BUILTIN_CVTTSS2SI
,
15878 IX86_BUILTIN_CVTTSS2SI64
,
15880 IX86_BUILTIN_MAXPS
,
15881 IX86_BUILTIN_MAXSS
,
15882 IX86_BUILTIN_MINPS
,
15883 IX86_BUILTIN_MINSS
,
15885 IX86_BUILTIN_LOADUPS
,
15886 IX86_BUILTIN_STOREUPS
,
15887 IX86_BUILTIN_MOVSS
,
15889 IX86_BUILTIN_MOVHLPS
,
15890 IX86_BUILTIN_MOVLHPS
,
15891 IX86_BUILTIN_LOADHPS
,
15892 IX86_BUILTIN_LOADLPS
,
15893 IX86_BUILTIN_STOREHPS
,
15894 IX86_BUILTIN_STORELPS
,
15896 IX86_BUILTIN_MASKMOVQ
,
15897 IX86_BUILTIN_MOVMSKPS
,
15898 IX86_BUILTIN_PMOVMSKB
,
15900 IX86_BUILTIN_MOVNTPS
,
15901 IX86_BUILTIN_MOVNTQ
,
15903 IX86_BUILTIN_LOADDQU
,
15904 IX86_BUILTIN_STOREDQU
,
15906 IX86_BUILTIN_PACKSSWB
,
15907 IX86_BUILTIN_PACKSSDW
,
15908 IX86_BUILTIN_PACKUSWB
,
15910 IX86_BUILTIN_PADDB
,
15911 IX86_BUILTIN_PADDW
,
15912 IX86_BUILTIN_PADDD
,
15913 IX86_BUILTIN_PADDQ
,
15914 IX86_BUILTIN_PADDSB
,
15915 IX86_BUILTIN_PADDSW
,
15916 IX86_BUILTIN_PADDUSB
,
15917 IX86_BUILTIN_PADDUSW
,
15918 IX86_BUILTIN_PSUBB
,
15919 IX86_BUILTIN_PSUBW
,
15920 IX86_BUILTIN_PSUBD
,
15921 IX86_BUILTIN_PSUBQ
,
15922 IX86_BUILTIN_PSUBSB
,
15923 IX86_BUILTIN_PSUBSW
,
15924 IX86_BUILTIN_PSUBUSB
,
15925 IX86_BUILTIN_PSUBUSW
,
15928 IX86_BUILTIN_PANDN
,
15932 IX86_BUILTIN_PAVGB
,
15933 IX86_BUILTIN_PAVGW
,
15935 IX86_BUILTIN_PCMPEQB
,
15936 IX86_BUILTIN_PCMPEQW
,
15937 IX86_BUILTIN_PCMPEQD
,
15938 IX86_BUILTIN_PCMPGTB
,
15939 IX86_BUILTIN_PCMPGTW
,
15940 IX86_BUILTIN_PCMPGTD
,
15942 IX86_BUILTIN_PMADDWD
,
15944 IX86_BUILTIN_PMAXSW
,
15945 IX86_BUILTIN_PMAXUB
,
15946 IX86_BUILTIN_PMINSW
,
15947 IX86_BUILTIN_PMINUB
,
15949 IX86_BUILTIN_PMULHUW
,
15950 IX86_BUILTIN_PMULHW
,
15951 IX86_BUILTIN_PMULLW
,
15953 IX86_BUILTIN_PSADBW
,
15954 IX86_BUILTIN_PSHUFW
,
15956 IX86_BUILTIN_PSLLW
,
15957 IX86_BUILTIN_PSLLD
,
15958 IX86_BUILTIN_PSLLQ
,
15959 IX86_BUILTIN_PSRAW
,
15960 IX86_BUILTIN_PSRAD
,
15961 IX86_BUILTIN_PSRLW
,
15962 IX86_BUILTIN_PSRLD
,
15963 IX86_BUILTIN_PSRLQ
,
15964 IX86_BUILTIN_PSLLWI
,
15965 IX86_BUILTIN_PSLLDI
,
15966 IX86_BUILTIN_PSLLQI
,
15967 IX86_BUILTIN_PSRAWI
,
15968 IX86_BUILTIN_PSRADI
,
15969 IX86_BUILTIN_PSRLWI
,
15970 IX86_BUILTIN_PSRLDI
,
15971 IX86_BUILTIN_PSRLQI
,
15973 IX86_BUILTIN_PUNPCKHBW
,
15974 IX86_BUILTIN_PUNPCKHWD
,
15975 IX86_BUILTIN_PUNPCKHDQ
,
15976 IX86_BUILTIN_PUNPCKLBW
,
15977 IX86_BUILTIN_PUNPCKLWD
,
15978 IX86_BUILTIN_PUNPCKLDQ
,
15980 IX86_BUILTIN_SHUFPS
,
15982 IX86_BUILTIN_RCPPS
,
15983 IX86_BUILTIN_RCPSS
,
15984 IX86_BUILTIN_RSQRTPS
,
15985 IX86_BUILTIN_RSQRTSS
,
15986 IX86_BUILTIN_SQRTPS
,
15987 IX86_BUILTIN_SQRTSS
,
15989 IX86_BUILTIN_UNPCKHPS
,
15990 IX86_BUILTIN_UNPCKLPS
,
15992 IX86_BUILTIN_ANDPS
,
15993 IX86_BUILTIN_ANDNPS
,
15995 IX86_BUILTIN_XORPS
,
15998 IX86_BUILTIN_LDMXCSR
,
15999 IX86_BUILTIN_STMXCSR
,
16000 IX86_BUILTIN_SFENCE
,
16002 /* 3DNow! Original */
16003 IX86_BUILTIN_FEMMS
,
16004 IX86_BUILTIN_PAVGUSB
,
16005 IX86_BUILTIN_PF2ID
,
16006 IX86_BUILTIN_PFACC
,
16007 IX86_BUILTIN_PFADD
,
16008 IX86_BUILTIN_PFCMPEQ
,
16009 IX86_BUILTIN_PFCMPGE
,
16010 IX86_BUILTIN_PFCMPGT
,
16011 IX86_BUILTIN_PFMAX
,
16012 IX86_BUILTIN_PFMIN
,
16013 IX86_BUILTIN_PFMUL
,
16014 IX86_BUILTIN_PFRCP
,
16015 IX86_BUILTIN_PFRCPIT1
,
16016 IX86_BUILTIN_PFRCPIT2
,
16017 IX86_BUILTIN_PFRSQIT1
,
16018 IX86_BUILTIN_PFRSQRT
,
16019 IX86_BUILTIN_PFSUB
,
16020 IX86_BUILTIN_PFSUBR
,
16021 IX86_BUILTIN_PI2FD
,
16022 IX86_BUILTIN_PMULHRW
,
16024 /* 3DNow! Athlon Extensions */
16025 IX86_BUILTIN_PF2IW
,
16026 IX86_BUILTIN_PFNACC
,
16027 IX86_BUILTIN_PFPNACC
,
16028 IX86_BUILTIN_PI2FW
,
16029 IX86_BUILTIN_PSWAPDSI
,
16030 IX86_BUILTIN_PSWAPDSF
,
16033 IX86_BUILTIN_ADDPD
,
16034 IX86_BUILTIN_ADDSD
,
16035 IX86_BUILTIN_DIVPD
,
16036 IX86_BUILTIN_DIVSD
,
16037 IX86_BUILTIN_MULPD
,
16038 IX86_BUILTIN_MULSD
,
16039 IX86_BUILTIN_SUBPD
,
16040 IX86_BUILTIN_SUBSD
,
16042 IX86_BUILTIN_CMPEQPD
,
16043 IX86_BUILTIN_CMPLTPD
,
16044 IX86_BUILTIN_CMPLEPD
,
16045 IX86_BUILTIN_CMPGTPD
,
16046 IX86_BUILTIN_CMPGEPD
,
16047 IX86_BUILTIN_CMPNEQPD
,
16048 IX86_BUILTIN_CMPNLTPD
,
16049 IX86_BUILTIN_CMPNLEPD
,
16050 IX86_BUILTIN_CMPNGTPD
,
16051 IX86_BUILTIN_CMPNGEPD
,
16052 IX86_BUILTIN_CMPORDPD
,
16053 IX86_BUILTIN_CMPUNORDPD
,
16054 IX86_BUILTIN_CMPNEPD
,
16055 IX86_BUILTIN_CMPEQSD
,
16056 IX86_BUILTIN_CMPLTSD
,
16057 IX86_BUILTIN_CMPLESD
,
16058 IX86_BUILTIN_CMPNEQSD
,
16059 IX86_BUILTIN_CMPNLTSD
,
16060 IX86_BUILTIN_CMPNLESD
,
16061 IX86_BUILTIN_CMPORDSD
,
16062 IX86_BUILTIN_CMPUNORDSD
,
16063 IX86_BUILTIN_CMPNESD
,
16065 IX86_BUILTIN_COMIEQSD
,
16066 IX86_BUILTIN_COMILTSD
,
16067 IX86_BUILTIN_COMILESD
,
16068 IX86_BUILTIN_COMIGTSD
,
16069 IX86_BUILTIN_COMIGESD
,
16070 IX86_BUILTIN_COMINEQSD
,
16071 IX86_BUILTIN_UCOMIEQSD
,
16072 IX86_BUILTIN_UCOMILTSD
,
16073 IX86_BUILTIN_UCOMILESD
,
16074 IX86_BUILTIN_UCOMIGTSD
,
16075 IX86_BUILTIN_UCOMIGESD
,
16076 IX86_BUILTIN_UCOMINEQSD
,
16078 IX86_BUILTIN_MAXPD
,
16079 IX86_BUILTIN_MAXSD
,
16080 IX86_BUILTIN_MINPD
,
16081 IX86_BUILTIN_MINSD
,
16083 IX86_BUILTIN_ANDPD
,
16084 IX86_BUILTIN_ANDNPD
,
16086 IX86_BUILTIN_XORPD
,
16088 IX86_BUILTIN_SQRTPD
,
16089 IX86_BUILTIN_SQRTSD
,
16091 IX86_BUILTIN_UNPCKHPD
,
16092 IX86_BUILTIN_UNPCKLPD
,
16094 IX86_BUILTIN_SHUFPD
,
16096 IX86_BUILTIN_LOADUPD
,
16097 IX86_BUILTIN_STOREUPD
,
16098 IX86_BUILTIN_MOVSD
,
16100 IX86_BUILTIN_LOADHPD
,
16101 IX86_BUILTIN_LOADLPD
,
16103 IX86_BUILTIN_CVTDQ2PD
,
16104 IX86_BUILTIN_CVTDQ2PS
,
16106 IX86_BUILTIN_CVTPD2DQ
,
16107 IX86_BUILTIN_CVTPD2PI
,
16108 IX86_BUILTIN_CVTPD2PS
,
16109 IX86_BUILTIN_CVTTPD2DQ
,
16110 IX86_BUILTIN_CVTTPD2PI
,
16112 IX86_BUILTIN_CVTPI2PD
,
16113 IX86_BUILTIN_CVTSI2SD
,
16114 IX86_BUILTIN_CVTSI642SD
,
16116 IX86_BUILTIN_CVTSD2SI
,
16117 IX86_BUILTIN_CVTSD2SI64
,
16118 IX86_BUILTIN_CVTSD2SS
,
16119 IX86_BUILTIN_CVTSS2SD
,
16120 IX86_BUILTIN_CVTTSD2SI
,
16121 IX86_BUILTIN_CVTTSD2SI64
,
16123 IX86_BUILTIN_CVTPS2DQ
,
16124 IX86_BUILTIN_CVTPS2PD
,
16125 IX86_BUILTIN_CVTTPS2DQ
,
16127 IX86_BUILTIN_MOVNTI
,
16128 IX86_BUILTIN_MOVNTPD
,
16129 IX86_BUILTIN_MOVNTDQ
,
16132 IX86_BUILTIN_MASKMOVDQU
,
16133 IX86_BUILTIN_MOVMSKPD
,
16134 IX86_BUILTIN_PMOVMSKB128
,
16136 IX86_BUILTIN_PACKSSWB128
,
16137 IX86_BUILTIN_PACKSSDW128
,
16138 IX86_BUILTIN_PACKUSWB128
,
16140 IX86_BUILTIN_PADDB128
,
16141 IX86_BUILTIN_PADDW128
,
16142 IX86_BUILTIN_PADDD128
,
16143 IX86_BUILTIN_PADDQ128
,
16144 IX86_BUILTIN_PADDSB128
,
16145 IX86_BUILTIN_PADDSW128
,
16146 IX86_BUILTIN_PADDUSB128
,
16147 IX86_BUILTIN_PADDUSW128
,
16148 IX86_BUILTIN_PSUBB128
,
16149 IX86_BUILTIN_PSUBW128
,
16150 IX86_BUILTIN_PSUBD128
,
16151 IX86_BUILTIN_PSUBQ128
,
16152 IX86_BUILTIN_PSUBSB128
,
16153 IX86_BUILTIN_PSUBSW128
,
16154 IX86_BUILTIN_PSUBUSB128
,
16155 IX86_BUILTIN_PSUBUSW128
,
16157 IX86_BUILTIN_PAND128
,
16158 IX86_BUILTIN_PANDN128
,
16159 IX86_BUILTIN_POR128
,
16160 IX86_BUILTIN_PXOR128
,
16162 IX86_BUILTIN_PAVGB128
,
16163 IX86_BUILTIN_PAVGW128
,
16165 IX86_BUILTIN_PCMPEQB128
,
16166 IX86_BUILTIN_PCMPEQW128
,
16167 IX86_BUILTIN_PCMPEQD128
,
16168 IX86_BUILTIN_PCMPGTB128
,
16169 IX86_BUILTIN_PCMPGTW128
,
16170 IX86_BUILTIN_PCMPGTD128
,
16172 IX86_BUILTIN_PMADDWD128
,
16174 IX86_BUILTIN_PMAXSW128
,
16175 IX86_BUILTIN_PMAXUB128
,
16176 IX86_BUILTIN_PMINSW128
,
16177 IX86_BUILTIN_PMINUB128
,
16179 IX86_BUILTIN_PMULUDQ
,
16180 IX86_BUILTIN_PMULUDQ128
,
16181 IX86_BUILTIN_PMULHUW128
,
16182 IX86_BUILTIN_PMULHW128
,
16183 IX86_BUILTIN_PMULLW128
,
16185 IX86_BUILTIN_PSADBW128
,
16186 IX86_BUILTIN_PSHUFHW
,
16187 IX86_BUILTIN_PSHUFLW
,
16188 IX86_BUILTIN_PSHUFD
,
16190 IX86_BUILTIN_PSLLW128
,
16191 IX86_BUILTIN_PSLLD128
,
16192 IX86_BUILTIN_PSLLQ128
,
16193 IX86_BUILTIN_PSRAW128
,
16194 IX86_BUILTIN_PSRAD128
,
16195 IX86_BUILTIN_PSRLW128
,
16196 IX86_BUILTIN_PSRLD128
,
16197 IX86_BUILTIN_PSRLQ128
,
16198 IX86_BUILTIN_PSLLDQI128
,
16199 IX86_BUILTIN_PSLLWI128
,
16200 IX86_BUILTIN_PSLLDI128
,
16201 IX86_BUILTIN_PSLLQI128
,
16202 IX86_BUILTIN_PSRAWI128
,
16203 IX86_BUILTIN_PSRADI128
,
16204 IX86_BUILTIN_PSRLDQI128
,
16205 IX86_BUILTIN_PSRLWI128
,
16206 IX86_BUILTIN_PSRLDI128
,
16207 IX86_BUILTIN_PSRLQI128
,
16209 IX86_BUILTIN_PUNPCKHBW128
,
16210 IX86_BUILTIN_PUNPCKHWD128
,
16211 IX86_BUILTIN_PUNPCKHDQ128
,
16212 IX86_BUILTIN_PUNPCKHQDQ128
,
16213 IX86_BUILTIN_PUNPCKLBW128
,
16214 IX86_BUILTIN_PUNPCKLWD128
,
16215 IX86_BUILTIN_PUNPCKLDQ128
,
16216 IX86_BUILTIN_PUNPCKLQDQ128
,
16218 IX86_BUILTIN_CLFLUSH
,
16219 IX86_BUILTIN_MFENCE
,
16220 IX86_BUILTIN_LFENCE
,
16222 /* Prescott New Instructions. */
16223 IX86_BUILTIN_ADDSUBPS
,
16224 IX86_BUILTIN_HADDPS
,
16225 IX86_BUILTIN_HSUBPS
,
16226 IX86_BUILTIN_MOVSHDUP
,
16227 IX86_BUILTIN_MOVSLDUP
,
16228 IX86_BUILTIN_ADDSUBPD
,
16229 IX86_BUILTIN_HADDPD
,
16230 IX86_BUILTIN_HSUBPD
,
16231 IX86_BUILTIN_LDDQU
,
16233 IX86_BUILTIN_MONITOR
,
16234 IX86_BUILTIN_MWAIT
,
16237 IX86_BUILTIN_PHADDW
,
16238 IX86_BUILTIN_PHADDD
,
16239 IX86_BUILTIN_PHADDSW
,
16240 IX86_BUILTIN_PHSUBW
,
16241 IX86_BUILTIN_PHSUBD
,
16242 IX86_BUILTIN_PHSUBSW
,
16243 IX86_BUILTIN_PMADDUBSW
,
16244 IX86_BUILTIN_PMULHRSW
,
16245 IX86_BUILTIN_PSHUFB
,
16246 IX86_BUILTIN_PSIGNB
,
16247 IX86_BUILTIN_PSIGNW
,
16248 IX86_BUILTIN_PSIGND
,
16249 IX86_BUILTIN_PALIGNR
,
16250 IX86_BUILTIN_PABSB
,
16251 IX86_BUILTIN_PABSW
,
16252 IX86_BUILTIN_PABSD
,
16254 IX86_BUILTIN_PHADDW128
,
16255 IX86_BUILTIN_PHADDD128
,
16256 IX86_BUILTIN_PHADDSW128
,
16257 IX86_BUILTIN_PHSUBW128
,
16258 IX86_BUILTIN_PHSUBD128
,
16259 IX86_BUILTIN_PHSUBSW128
,
16260 IX86_BUILTIN_PMADDUBSW128
,
16261 IX86_BUILTIN_PMULHRSW128
,
16262 IX86_BUILTIN_PSHUFB128
,
16263 IX86_BUILTIN_PSIGNB128
,
16264 IX86_BUILTIN_PSIGNW128
,
16265 IX86_BUILTIN_PSIGND128
,
16266 IX86_BUILTIN_PALIGNR128
,
16267 IX86_BUILTIN_PABSB128
,
16268 IX86_BUILTIN_PABSW128
,
16269 IX86_BUILTIN_PABSD128
,
16271 /* AMDFAM10 - SSE4A New Instructions. */
16272 IX86_BUILTIN_MOVNTSD
,
16273 IX86_BUILTIN_MOVNTSS
,
16274 IX86_BUILTIN_EXTRQI
,
16275 IX86_BUILTIN_EXTRQ
,
16276 IX86_BUILTIN_INSERTQI
,
16277 IX86_BUILTIN_INSERTQ
,
16279 IX86_BUILTIN_VEC_INIT_V2SI
,
16280 IX86_BUILTIN_VEC_INIT_V4HI
,
16281 IX86_BUILTIN_VEC_INIT_V8QI
,
16282 IX86_BUILTIN_VEC_EXT_V2DF
,
16283 IX86_BUILTIN_VEC_EXT_V2DI
,
16284 IX86_BUILTIN_VEC_EXT_V4SF
,
16285 IX86_BUILTIN_VEC_EXT_V4SI
,
16286 IX86_BUILTIN_VEC_EXT_V8HI
,
16287 IX86_BUILTIN_VEC_EXT_V2SI
,
16288 IX86_BUILTIN_VEC_EXT_V4HI
,
16289 IX86_BUILTIN_VEC_SET_V8HI
,
16290 IX86_BUILTIN_VEC_SET_V4HI
,
16295 /* Table for the ix86 builtin decls. */
16296 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
16298 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16299 * if the target_flags include one of MASK. Stores the function decl
16300 * in the ix86_builtins array.
16301 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16304 def_builtin (int mask
, const char *name
, tree type
, enum ix86_builtins code
)
16306 tree decl
= NULL_TREE
;
16308 if (mask
& target_flags
16309 && (!(mask
& MASK_64BIT
) || TARGET_64BIT
))
16311 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
16313 ix86_builtins
[(int) code
] = decl
;
16319 /* Like def_builtin, but also marks the function decl "const". */
16322 def_builtin_const (int mask
, const char *name
, tree type
,
16323 enum ix86_builtins code
)
16325 tree decl
= def_builtin (mask
, name
, type
, code
);
16327 TREE_READONLY (decl
) = 1;
16331 /* Bits for builtin_description.flag. */
16333 /* Set when we don't support the comparison natively, and should
16334 swap_comparison in order to support it. */
16335 #define BUILTIN_DESC_SWAP_OPERANDS 1
16337 struct builtin_description
16339 const unsigned int mask
;
16340 const enum insn_code icode
;
16341 const char *const name
;
16342 const enum ix86_builtins code
;
16343 const enum rtx_code comparison
;
16344 const unsigned int flag
;
16347 static const struct builtin_description bdesc_comi
[] =
16349 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
16350 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
16351 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
16352 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
16353 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
16354 { MASK_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
16355 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
16356 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
16357 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
16358 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
16359 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
16360 { MASK_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
16361 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
16362 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
16363 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
16364 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
16365 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
16366 { MASK_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
16367 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
16368 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
16369 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
16370 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
16371 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
16372 { MASK_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
16375 static const struct builtin_description bdesc_2arg
[] =
16378 { MASK_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, 0, 0 },
16379 { MASK_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, 0, 0 },
16380 { MASK_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, 0, 0 },
16381 { MASK_SSE
, CODE_FOR_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, 0, 0 },
16382 { MASK_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, 0, 0 },
16383 { MASK_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, 0, 0 },
16384 { MASK_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, 0, 0 },
16385 { MASK_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, 0, 0 },
16387 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, 0 },
16388 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, 0 },
16389 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, 0 },
16390 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
,
16391 BUILTIN_DESC_SWAP_OPERANDS
},
16392 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
,
16393 BUILTIN_DESC_SWAP_OPERANDS
},
16394 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, 0 },
16395 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, 0 },
16396 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, 0 },
16397 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, 0 },
16398 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
,
16399 BUILTIN_DESC_SWAP_OPERANDS
},
16400 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
,
16401 BUILTIN_DESC_SWAP_OPERANDS
},
16402 { MASK_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, 0 },
16403 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, 0 },
16404 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, 0 },
16405 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, 0 },
16406 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, 0 },
16407 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, 0 },
16408 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, 0 },
16409 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, 0 },
16410 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
,
16411 BUILTIN_DESC_SWAP_OPERANDS
},
16412 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
,
16413 BUILTIN_DESC_SWAP_OPERANDS
},
16414 { MASK_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, UNORDERED
, 0 },
16416 { MASK_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, 0, 0 },
16417 { MASK_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, 0, 0 },
16418 { MASK_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, 0, 0 },
16419 { MASK_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, 0, 0 },
16421 { MASK_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, 0, 0 },
16422 { MASK_SSE
, CODE_FOR_sse_nandv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, 0, 0 },
16423 { MASK_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, 0, 0 },
16424 { MASK_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, 0, 0 },
16426 { MASK_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, 0, 0 },
16427 { MASK_SSE
, CODE_FOR_sse_movhlps
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, 0, 0 },
16428 { MASK_SSE
, CODE_FOR_sse_movlhps
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, 0, 0 },
16429 { MASK_SSE
, CODE_FOR_sse_unpckhps
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, 0, 0 },
16430 { MASK_SSE
, CODE_FOR_sse_unpcklps
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, 0, 0 },
16433 { MASK_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, 0, 0 },
16434 { MASK_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, 0, 0 },
16435 { MASK_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, 0, 0 },
16436 { MASK_SSE2
, CODE_FOR_mmx_adddi3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, 0, 0 },
16437 { MASK_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, 0, 0 },
16438 { MASK_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, 0, 0 },
16439 { MASK_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, 0, 0 },
16440 { MASK_SSE2
, CODE_FOR_mmx_subdi3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, 0, 0 },
16442 { MASK_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, 0, 0 },
16443 { MASK_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, 0, 0 },
16444 { MASK_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, 0, 0 },
16445 { MASK_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, 0, 0 },
16446 { MASK_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, 0, 0 },
16447 { MASK_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, 0, 0 },
16448 { MASK_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, 0, 0 },
16449 { MASK_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, 0, 0 },
16451 { MASK_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, 0, 0 },
16452 { MASK_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, 0, 0 },
16453 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, 0, 0 },
16455 { MASK_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, 0, 0 },
16456 { MASK_MMX
, CODE_FOR_mmx_nandv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, 0, 0 },
16457 { MASK_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, 0, 0 },
16458 { MASK_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, 0, 0 },
16460 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, 0, 0 },
16461 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, 0, 0 },
16463 { MASK_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, 0, 0 },
16464 { MASK_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, 0, 0 },
16465 { MASK_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, 0, 0 },
16466 { MASK_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, 0, 0 },
16467 { MASK_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, 0, 0 },
16468 { MASK_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, 0, 0 },
16470 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, 0, 0 },
16471 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, 0, 0 },
16472 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, 0, 0 },
16473 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, 0, 0 },
16475 { MASK_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, 0, 0 },
16476 { MASK_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, 0, 0 },
16477 { MASK_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, 0, 0 },
16478 { MASK_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, 0, 0 },
16479 { MASK_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, 0, 0 },
16480 { MASK_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, 0, 0 },
16483 { MASK_MMX
, CODE_FOR_mmx_packsswb
, 0, IX86_BUILTIN_PACKSSWB
, 0, 0 },
16484 { MASK_MMX
, CODE_FOR_mmx_packssdw
, 0, IX86_BUILTIN_PACKSSDW
, 0, 0 },
16485 { MASK_MMX
, CODE_FOR_mmx_packuswb
, 0, IX86_BUILTIN_PACKUSWB
, 0, 0 },
16487 { MASK_SSE
, CODE_FOR_sse_cvtpi2ps
, 0, IX86_BUILTIN_CVTPI2PS
, 0, 0 },
16488 { MASK_SSE
, CODE_FOR_sse_cvtsi2ss
, 0, IX86_BUILTIN_CVTSI2SS
, 0, 0 },
16489 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtsi2ssq
, 0, IX86_BUILTIN_CVTSI642SS
, 0, 0 },
16491 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLW
, 0, 0 },
16492 { MASK_MMX
, CODE_FOR_mmx_ashlv4hi3
, 0, IX86_BUILTIN_PSLLWI
, 0, 0 },
16493 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLD
, 0, 0 },
16494 { MASK_MMX
, CODE_FOR_mmx_ashlv2si3
, 0, IX86_BUILTIN_PSLLDI
, 0, 0 },
16495 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQ
, 0, 0 },
16496 { MASK_MMX
, CODE_FOR_mmx_ashldi3
, 0, IX86_BUILTIN_PSLLQI
, 0, 0 },
16498 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLW
, 0, 0 },
16499 { MASK_MMX
, CODE_FOR_mmx_lshrv4hi3
, 0, IX86_BUILTIN_PSRLWI
, 0, 0 },
16500 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLD
, 0, 0 },
16501 { MASK_MMX
, CODE_FOR_mmx_lshrv2si3
, 0, IX86_BUILTIN_PSRLDI
, 0, 0 },
16502 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQ
, 0, 0 },
16503 { MASK_MMX
, CODE_FOR_mmx_lshrdi3
, 0, IX86_BUILTIN_PSRLQI
, 0, 0 },
16505 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAW
, 0, 0 },
16506 { MASK_MMX
, CODE_FOR_mmx_ashrv4hi3
, 0, IX86_BUILTIN_PSRAWI
, 0, 0 },
16507 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRAD
, 0, 0 },
16508 { MASK_MMX
, CODE_FOR_mmx_ashrv2si3
, 0, IX86_BUILTIN_PSRADI
, 0, 0 },
16510 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_psadbw
, 0, IX86_BUILTIN_PSADBW
, 0, 0 },
16511 { MASK_MMX
, CODE_FOR_mmx_pmaddwd
, 0, IX86_BUILTIN_PMADDWD
, 0, 0 },
16514 { MASK_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, 0, 0 },
16515 { MASK_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, 0, 0 },
16516 { MASK_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, 0, 0 },
16517 { MASK_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, 0, 0 },
16518 { MASK_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, 0, 0 },
16519 { MASK_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, 0, 0 },
16520 { MASK_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, 0, 0 },
16521 { MASK_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, 0, 0 },
16523 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, 0 },
16524 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, 0 },
16525 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, 0 },
16526 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
,
16527 BUILTIN_DESC_SWAP_OPERANDS
},
16528 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
,
16529 BUILTIN_DESC_SWAP_OPERANDS
},
16530 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, 0 },
16531 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, 0 },
16532 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, 0 },
16533 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, 0 },
16534 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
,
16535 BUILTIN_DESC_SWAP_OPERANDS
},
16536 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
,
16537 BUILTIN_DESC_SWAP_OPERANDS
},
16538 { MASK_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, 0 },
16539 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, 0 },
16540 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, 0 },
16541 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, 0 },
16542 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, 0 },
16543 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, 0 },
16544 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, 0 },
16545 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, 0 },
16546 { MASK_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, 0 },
16548 { MASK_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, 0, 0 },
16549 { MASK_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, 0, 0 },
16550 { MASK_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, 0, 0 },
16551 { MASK_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, 0, 0 },
16553 { MASK_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, 0, 0 },
16554 { MASK_SSE2
, CODE_FOR_sse2_nandv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, 0, 0 },
16555 { MASK_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, 0, 0 },
16556 { MASK_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, 0, 0 },
16558 { MASK_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, 0, 0 },
16559 { MASK_SSE2
, CODE_FOR_sse2_unpckhpd
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, 0, 0 },
16560 { MASK_SSE2
, CODE_FOR_sse2_unpcklpd
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, 0, 0 },
16563 { MASK_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, 0, 0 },
16564 { MASK_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, 0, 0 },
16565 { MASK_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, 0, 0 },
16566 { MASK_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, 0, 0 },
16567 { MASK_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, 0, 0 },
16568 { MASK_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, 0, 0 },
16569 { MASK_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, 0, 0 },
16570 { MASK_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, 0, 0 },
16572 { MASK_MMX
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, 0, 0 },
16573 { MASK_MMX
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, 0, 0 },
16574 { MASK_MMX
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, 0, 0 },
16575 { MASK_MMX
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, 0, 0 },
16576 { MASK_MMX
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, 0, 0 },
16577 { MASK_MMX
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, 0, 0 },
16578 { MASK_MMX
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, 0, 0 },
16579 { MASK_MMX
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, 0, 0 },
16581 { MASK_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, 0, 0 },
16582 { MASK_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, 0, 0 },
16584 { MASK_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, 0, 0 },
16585 { MASK_SSE2
, CODE_FOR_sse2_nandv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, 0, 0 },
16586 { MASK_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, 0, 0 },
16587 { MASK_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, 0, 0 },
16589 { MASK_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, 0, 0 },
16590 { MASK_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, 0, 0 },
16592 { MASK_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, 0, 0 },
16593 { MASK_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, 0, 0 },
16594 { MASK_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, 0, 0 },
16595 { MASK_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, 0, 0 },
16596 { MASK_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, 0, 0 },
16597 { MASK_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, 0, 0 },
16599 { MASK_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, 0, 0 },
16600 { MASK_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, 0, 0 },
16601 { MASK_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, 0, 0 },
16602 { MASK_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, 0, 0 },
16604 { MASK_SSE2
, CODE_FOR_sse2_punpckhbw
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, 0, 0 },
16605 { MASK_SSE2
, CODE_FOR_sse2_punpckhwd
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, 0, 0 },
16606 { MASK_SSE2
, CODE_FOR_sse2_punpckhdq
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, 0, 0 },
16607 { MASK_SSE2
, CODE_FOR_sse2_punpckhqdq
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, 0, 0 },
16608 { MASK_SSE2
, CODE_FOR_sse2_punpcklbw
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, 0, 0 },
16609 { MASK_SSE2
, CODE_FOR_sse2_punpcklwd
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, 0, 0 },
16610 { MASK_SSE2
, CODE_FOR_sse2_punpckldq
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, 0, 0 },
16611 { MASK_SSE2
, CODE_FOR_sse2_punpcklqdq
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, 0, 0 },
16613 { MASK_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, 0, 0 },
16614 { MASK_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, 0, 0 },
16615 { MASK_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, 0, 0 },
16617 { MASK_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, 0, 0 },
16618 { MASK_SSE2
, CODE_FOR_sse2_psadbw
, 0, IX86_BUILTIN_PSADBW128
, 0, 0 },
16620 { MASK_SSE2
, CODE_FOR_sse2_umulsidi3
, 0, IX86_BUILTIN_PMULUDQ
, 0, 0 },
16621 { MASK_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, 0, IX86_BUILTIN_PMULUDQ128
, 0, 0 },
16623 { MASK_SSE2
, CODE_FOR_ashlv8hi3
, 0, IX86_BUILTIN_PSLLWI128
, 0, 0 },
16624 { MASK_SSE2
, CODE_FOR_ashlv4si3
, 0, IX86_BUILTIN_PSLLDI128
, 0, 0 },
16625 { MASK_SSE2
, CODE_FOR_ashlv2di3
, 0, IX86_BUILTIN_PSLLQI128
, 0, 0 },
16627 { MASK_SSE2
, CODE_FOR_lshrv8hi3
, 0, IX86_BUILTIN_PSRLWI128
, 0, 0 },
16628 { MASK_SSE2
, CODE_FOR_lshrv4si3
, 0, IX86_BUILTIN_PSRLDI128
, 0, 0 },
16629 { MASK_SSE2
, CODE_FOR_lshrv2di3
, 0, IX86_BUILTIN_PSRLQI128
, 0, 0 },
16631 { MASK_SSE2
, CODE_FOR_ashrv8hi3
, 0, IX86_BUILTIN_PSRAWI128
, 0, 0 },
16632 { MASK_SSE2
, CODE_FOR_ashrv4si3
, 0, IX86_BUILTIN_PSRADI128
, 0, 0 },
16634 { MASK_SSE2
, CODE_FOR_sse2_pmaddwd
, 0, IX86_BUILTIN_PMADDWD128
, 0, 0 },
16636 { MASK_SSE2
, CODE_FOR_sse2_cvtsi2sd
, 0, IX86_BUILTIN_CVTSI2SD
, 0, 0 },
16637 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, 0, IX86_BUILTIN_CVTSI642SD
, 0, 0 },
16638 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2ss
, 0, IX86_BUILTIN_CVTSD2SS
, 0, 0 },
16639 { MASK_SSE2
, CODE_FOR_sse2_cvtss2sd
, 0, IX86_BUILTIN_CVTSS2SD
, 0, 0 },
16642 { MASK_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, 0, 0 },
16643 { MASK_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, 0, 0 },
16644 { MASK_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, 0, 0 },
16645 { MASK_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, 0, 0 },
16646 { MASK_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, 0, 0 },
16647 { MASK_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, 0, 0 },
16650 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, 0, 0 },
16651 { MASK_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, 0, 0 },
16652 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, 0, 0 },
16653 { MASK_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, 0, 0 },
16654 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, 0, 0 },
16655 { MASK_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, 0, 0 },
16656 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, 0, 0 },
16657 { MASK_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, 0, 0 },
16658 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, 0, 0 },
16659 { MASK_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, 0, 0 },
16660 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, 0, 0 },
16661 { MASK_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, 0, 0 },
16662 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv8hi3
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, 0, 0 },
16663 { MASK_SSSE3
, CODE_FOR_ssse3_pmaddubswv4hi3
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, 0, 0 },
16664 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, 0, 0 },
16665 { MASK_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, 0, 0 },
16666 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, 0, 0 },
16667 { MASK_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, 0, 0 },
16668 { MASK_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, 0, 0 },
16669 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, 0, 0 },
16670 { MASK_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, 0, 0 },
16671 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, 0, 0 },
16672 { MASK_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, 0, 0 },
16673 { MASK_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, 0, 0 }
16676 static const struct builtin_description bdesc_1arg
[] =
16678 { MASK_SSE
| MASK_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB
, 0, 0 },
16679 { MASK_SSE
, CODE_FOR_sse_movmskps
, 0, IX86_BUILTIN_MOVMSKPS
, 0, 0 },
16681 { MASK_SSE
, CODE_FOR_sqrtv4sf2
, 0, IX86_BUILTIN_SQRTPS
, 0, 0 },
16682 { MASK_SSE
, CODE_FOR_sse_rsqrtv4sf2
, 0, IX86_BUILTIN_RSQRTPS
, 0, 0 },
16683 { MASK_SSE
, CODE_FOR_sse_rcpv4sf2
, 0, IX86_BUILTIN_RCPPS
, 0, 0 },
16685 { MASK_SSE
, CODE_FOR_sse_cvtps2pi
, 0, IX86_BUILTIN_CVTPS2PI
, 0, 0 },
16686 { MASK_SSE
, CODE_FOR_sse_cvtss2si
, 0, IX86_BUILTIN_CVTSS2SI
, 0, 0 },
16687 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvtss2siq
, 0, IX86_BUILTIN_CVTSS2SI64
, 0, 0 },
16688 { MASK_SSE
, CODE_FOR_sse_cvttps2pi
, 0, IX86_BUILTIN_CVTTPS2PI
, 0, 0 },
16689 { MASK_SSE
, CODE_FOR_sse_cvttss2si
, 0, IX86_BUILTIN_CVTTSS2SI
, 0, 0 },
16690 { MASK_SSE
| MASK_64BIT
, CODE_FOR_sse_cvttss2siq
, 0, IX86_BUILTIN_CVTTSS2SI64
, 0, 0 },
16692 { MASK_SSE2
, CODE_FOR_sse2_pmovmskb
, 0, IX86_BUILTIN_PMOVMSKB128
, 0, 0 },
16693 { MASK_SSE2
, CODE_FOR_sse2_movmskpd
, 0, IX86_BUILTIN_MOVMSKPD
, 0, 0 },
16695 { MASK_SSE2
, CODE_FOR_sqrtv2df2
, 0, IX86_BUILTIN_SQRTPD
, 0, 0 },
16697 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2pd
, 0, IX86_BUILTIN_CVTDQ2PD
, 0, 0 },
16698 { MASK_SSE2
, CODE_FOR_sse2_cvtdq2ps
, 0, IX86_BUILTIN_CVTDQ2PS
, 0, 0 },
16700 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2dq
, 0, IX86_BUILTIN_CVTPD2DQ
, 0, 0 },
16701 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2pi
, 0, IX86_BUILTIN_CVTPD2PI
, 0, 0 },
16702 { MASK_SSE2
, CODE_FOR_sse2_cvtpd2ps
, 0, IX86_BUILTIN_CVTPD2PS
, 0, 0 },
16703 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2dq
, 0, IX86_BUILTIN_CVTTPD2DQ
, 0, 0 },
16704 { MASK_SSE2
, CODE_FOR_sse2_cvttpd2pi
, 0, IX86_BUILTIN_CVTTPD2PI
, 0, 0 },
16706 { MASK_SSE2
, CODE_FOR_sse2_cvtpi2pd
, 0, IX86_BUILTIN_CVTPI2PD
, 0, 0 },
16708 { MASK_SSE2
, CODE_FOR_sse2_cvtsd2si
, 0, IX86_BUILTIN_CVTSD2SI
, 0, 0 },
16709 { MASK_SSE2
, CODE_FOR_sse2_cvttsd2si
, 0, IX86_BUILTIN_CVTTSD2SI
, 0, 0 },
16710 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvtsd2siq
, 0, IX86_BUILTIN_CVTSD2SI64
, 0, 0 },
16711 { MASK_SSE2
| MASK_64BIT
, CODE_FOR_sse2_cvttsd2siq
, 0, IX86_BUILTIN_CVTTSD2SI64
, 0, 0 },
16713 { MASK_SSE2
, CODE_FOR_sse2_cvtps2dq
, 0, IX86_BUILTIN_CVTPS2DQ
, 0, 0 },
16714 { MASK_SSE2
, CODE_FOR_sse2_cvtps2pd
, 0, IX86_BUILTIN_CVTPS2PD
, 0, 0 },
16715 { MASK_SSE2
, CODE_FOR_sse2_cvttps2dq
, 0, IX86_BUILTIN_CVTTPS2DQ
, 0, 0 },
16718 { MASK_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, 0, 0 },
16719 { MASK_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, 0, 0 },
16722 { MASK_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, 0, 0 },
16723 { MASK_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, 0, 0 },
16724 { MASK_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, 0, 0 },
16725 { MASK_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, 0, 0 },
16726 { MASK_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, 0, 0 },
16727 { MASK_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, 0, 0 },
16731 ix86_init_builtins (void)
16734 ix86_init_mmx_sse_builtins ();
16737 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16738 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16741 ix86_init_mmx_sse_builtins (void)
16743 const struct builtin_description
* d
;
16746 tree V16QI_type_node
= build_vector_type_for_mode (char_type_node
, V16QImode
);
16747 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16748 tree V2SF_type_node
= build_vector_type_for_mode (float_type_node
, V2SFmode
);
16749 tree V2DI_type_node
16750 = build_vector_type_for_mode (long_long_integer_type_node
, V2DImode
);
16751 tree V2DF_type_node
= build_vector_type_for_mode (double_type_node
, V2DFmode
);
16752 tree V4SF_type_node
= build_vector_type_for_mode (float_type_node
, V4SFmode
);
16753 tree V4SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V4SImode
);
16754 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16755 tree V8QI_type_node
= build_vector_type_for_mode (char_type_node
, V8QImode
);
16756 tree V8HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V8HImode
);
16758 tree pchar_type_node
= build_pointer_type (char_type_node
);
16759 tree pcchar_type_node
= build_pointer_type (
16760 build_type_variant (char_type_node
, 1, 0));
16761 tree pfloat_type_node
= build_pointer_type (float_type_node
);
16762 tree pcfloat_type_node
= build_pointer_type (
16763 build_type_variant (float_type_node
, 1, 0));
16764 tree pv2si_type_node
= build_pointer_type (V2SI_type_node
);
16765 tree pv2di_type_node
= build_pointer_type (V2DI_type_node
);
16766 tree pdi_type_node
= build_pointer_type (long_long_unsigned_type_node
);
16769 tree int_ftype_v4sf_v4sf
16770 = build_function_type_list (integer_type_node
,
16771 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16772 tree v4si_ftype_v4sf_v4sf
16773 = build_function_type_list (V4SI_type_node
,
16774 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16775 /* MMX/SSE/integer conversions. */
16776 tree int_ftype_v4sf
16777 = build_function_type_list (integer_type_node
,
16778 V4SF_type_node
, NULL_TREE
);
16779 tree int64_ftype_v4sf
16780 = build_function_type_list (long_long_integer_type_node
,
16781 V4SF_type_node
, NULL_TREE
);
16782 tree int_ftype_v8qi
16783 = build_function_type_list (integer_type_node
, V8QI_type_node
, NULL_TREE
);
16784 tree v4sf_ftype_v4sf_int
16785 = build_function_type_list (V4SF_type_node
,
16786 V4SF_type_node
, integer_type_node
, NULL_TREE
);
16787 tree v4sf_ftype_v4sf_int64
16788 = build_function_type_list (V4SF_type_node
,
16789 V4SF_type_node
, long_long_integer_type_node
,
16791 tree v4sf_ftype_v4sf_v2si
16792 = build_function_type_list (V4SF_type_node
,
16793 V4SF_type_node
, V2SI_type_node
, NULL_TREE
);
16795 /* Miscellaneous. */
16796 tree v8qi_ftype_v4hi_v4hi
16797 = build_function_type_list (V8QI_type_node
,
16798 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16799 tree v4hi_ftype_v2si_v2si
16800 = build_function_type_list (V4HI_type_node
,
16801 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16802 tree v4sf_ftype_v4sf_v4sf_int
16803 = build_function_type_list (V4SF_type_node
,
16804 V4SF_type_node
, V4SF_type_node
,
16805 integer_type_node
, NULL_TREE
);
16806 tree v2si_ftype_v4hi_v4hi
16807 = build_function_type_list (V2SI_type_node
,
16808 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16809 tree v4hi_ftype_v4hi_int
16810 = build_function_type_list (V4HI_type_node
,
16811 V4HI_type_node
, integer_type_node
, NULL_TREE
);
16812 tree v4hi_ftype_v4hi_di
16813 = build_function_type_list (V4HI_type_node
,
16814 V4HI_type_node
, long_long_unsigned_type_node
,
16816 tree v2si_ftype_v2si_di
16817 = build_function_type_list (V2SI_type_node
,
16818 V2SI_type_node
, long_long_unsigned_type_node
,
16820 tree void_ftype_void
16821 = build_function_type (void_type_node
, void_list_node
);
16822 tree void_ftype_unsigned
16823 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL_TREE
);
16824 tree void_ftype_unsigned_unsigned
16825 = build_function_type_list (void_type_node
, unsigned_type_node
,
16826 unsigned_type_node
, NULL_TREE
);
16827 tree void_ftype_pcvoid_unsigned_unsigned
16828 = build_function_type_list (void_type_node
, const_ptr_type_node
,
16829 unsigned_type_node
, unsigned_type_node
,
16831 tree unsigned_ftype_void
16832 = build_function_type (unsigned_type_node
, void_list_node
);
16833 tree v2si_ftype_v4sf
16834 = build_function_type_list (V2SI_type_node
, V4SF_type_node
, NULL_TREE
);
16835 /* Loads/stores. */
16836 tree void_ftype_v8qi_v8qi_pchar
16837 = build_function_type_list (void_type_node
,
16838 V8QI_type_node
, V8QI_type_node
,
16839 pchar_type_node
, NULL_TREE
);
16840 tree v4sf_ftype_pcfloat
16841 = build_function_type_list (V4SF_type_node
, pcfloat_type_node
, NULL_TREE
);
16842 /* @@@ the type is bogus */
16843 tree v4sf_ftype_v4sf_pv2si
16844 = build_function_type_list (V4SF_type_node
,
16845 V4SF_type_node
, pv2si_type_node
, NULL_TREE
);
16846 tree void_ftype_pv2si_v4sf
16847 = build_function_type_list (void_type_node
,
16848 pv2si_type_node
, V4SF_type_node
, NULL_TREE
);
16849 tree void_ftype_pfloat_v4sf
16850 = build_function_type_list (void_type_node
,
16851 pfloat_type_node
, V4SF_type_node
, NULL_TREE
);
16852 tree void_ftype_pdi_di
16853 = build_function_type_list (void_type_node
,
16854 pdi_type_node
, long_long_unsigned_type_node
,
16856 tree void_ftype_pv2di_v2di
16857 = build_function_type_list (void_type_node
,
16858 pv2di_type_node
, V2DI_type_node
, NULL_TREE
);
16859 /* Normal vector unops. */
16860 tree v4sf_ftype_v4sf
16861 = build_function_type_list (V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16862 tree v16qi_ftype_v16qi
16863 = build_function_type_list (V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16864 tree v8hi_ftype_v8hi
16865 = build_function_type_list (V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16866 tree v4si_ftype_v4si
16867 = build_function_type_list (V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16868 tree v8qi_ftype_v8qi
16869 = build_function_type_list (V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16870 tree v4hi_ftype_v4hi
16871 = build_function_type_list (V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16873 /* Normal vector binops. */
16874 tree v4sf_ftype_v4sf_v4sf
16875 = build_function_type_list (V4SF_type_node
,
16876 V4SF_type_node
, V4SF_type_node
, NULL_TREE
);
16877 tree v8qi_ftype_v8qi_v8qi
16878 = build_function_type_list (V8QI_type_node
,
16879 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
16880 tree v4hi_ftype_v4hi_v4hi
16881 = build_function_type_list (V4HI_type_node
,
16882 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
16883 tree v2si_ftype_v2si_v2si
16884 = build_function_type_list (V2SI_type_node
,
16885 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16886 tree di_ftype_di_di
16887 = build_function_type_list (long_long_unsigned_type_node
,
16888 long_long_unsigned_type_node
,
16889 long_long_unsigned_type_node
, NULL_TREE
);
16891 tree di_ftype_di_di_int
16892 = build_function_type_list (long_long_unsigned_type_node
,
16893 long_long_unsigned_type_node
,
16894 long_long_unsigned_type_node
,
16895 integer_type_node
, NULL_TREE
);
16897 tree v2si_ftype_v2sf
16898 = build_function_type_list (V2SI_type_node
, V2SF_type_node
, NULL_TREE
);
16899 tree v2sf_ftype_v2si
16900 = build_function_type_list (V2SF_type_node
, V2SI_type_node
, NULL_TREE
);
16901 tree v2si_ftype_v2si
16902 = build_function_type_list (V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
16903 tree v2sf_ftype_v2sf
16904 = build_function_type_list (V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16905 tree v2sf_ftype_v2sf_v2sf
16906 = build_function_type_list (V2SF_type_node
,
16907 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16908 tree v2si_ftype_v2sf_v2sf
16909 = build_function_type_list (V2SI_type_node
,
16910 V2SF_type_node
, V2SF_type_node
, NULL_TREE
);
16911 tree pint_type_node
= build_pointer_type (integer_type_node
);
16912 tree pdouble_type_node
= build_pointer_type (double_type_node
);
16913 tree pcdouble_type_node
= build_pointer_type (
16914 build_type_variant (double_type_node
, 1, 0));
16915 tree int_ftype_v2df_v2df
16916 = build_function_type_list (integer_type_node
,
16917 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16919 tree void_ftype_pcvoid
16920 = build_function_type_list (void_type_node
, const_ptr_type_node
, NULL_TREE
);
16921 tree v4sf_ftype_v4si
16922 = build_function_type_list (V4SF_type_node
, V4SI_type_node
, NULL_TREE
);
16923 tree v4si_ftype_v4sf
16924 = build_function_type_list (V4SI_type_node
, V4SF_type_node
, NULL_TREE
);
16925 tree v2df_ftype_v4si
16926 = build_function_type_list (V2DF_type_node
, V4SI_type_node
, NULL_TREE
);
16927 tree v4si_ftype_v2df
16928 = build_function_type_list (V4SI_type_node
, V2DF_type_node
, NULL_TREE
);
16929 tree v2si_ftype_v2df
16930 = build_function_type_list (V2SI_type_node
, V2DF_type_node
, NULL_TREE
);
16931 tree v4sf_ftype_v2df
16932 = build_function_type_list (V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16933 tree v2df_ftype_v2si
16934 = build_function_type_list (V2DF_type_node
, V2SI_type_node
, NULL_TREE
);
16935 tree v2df_ftype_v4sf
16936 = build_function_type_list (V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16937 tree int_ftype_v2df
16938 = build_function_type_list (integer_type_node
, V2DF_type_node
, NULL_TREE
);
16939 tree int64_ftype_v2df
16940 = build_function_type_list (long_long_integer_type_node
,
16941 V2DF_type_node
, NULL_TREE
);
16942 tree v2df_ftype_v2df_int
16943 = build_function_type_list (V2DF_type_node
,
16944 V2DF_type_node
, integer_type_node
, NULL_TREE
);
16945 tree v2df_ftype_v2df_int64
16946 = build_function_type_list (V2DF_type_node
,
16947 V2DF_type_node
, long_long_integer_type_node
,
16949 tree v4sf_ftype_v4sf_v2df
16950 = build_function_type_list (V4SF_type_node
,
16951 V4SF_type_node
, V2DF_type_node
, NULL_TREE
);
16952 tree v2df_ftype_v2df_v4sf
16953 = build_function_type_list (V2DF_type_node
,
16954 V2DF_type_node
, V4SF_type_node
, NULL_TREE
);
16955 tree v2df_ftype_v2df_v2df_int
16956 = build_function_type_list (V2DF_type_node
,
16957 V2DF_type_node
, V2DF_type_node
,
16960 tree v2df_ftype_v2df_pcdouble
16961 = build_function_type_list (V2DF_type_node
,
16962 V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16963 tree void_ftype_pdouble_v2df
16964 = build_function_type_list (void_type_node
,
16965 pdouble_type_node
, V2DF_type_node
, NULL_TREE
);
16966 tree void_ftype_pint_int
16967 = build_function_type_list (void_type_node
,
16968 pint_type_node
, integer_type_node
, NULL_TREE
);
16969 tree void_ftype_v16qi_v16qi_pchar
16970 = build_function_type_list (void_type_node
,
16971 V16QI_type_node
, V16QI_type_node
,
16972 pchar_type_node
, NULL_TREE
);
16973 tree v2df_ftype_pcdouble
16974 = build_function_type_list (V2DF_type_node
, pcdouble_type_node
, NULL_TREE
);
16975 tree v2df_ftype_v2df_v2df
16976 = build_function_type_list (V2DF_type_node
,
16977 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16978 tree v16qi_ftype_v16qi_v16qi
16979 = build_function_type_list (V16QI_type_node
,
16980 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
16981 tree v8hi_ftype_v8hi_v8hi
16982 = build_function_type_list (V8HI_type_node
,
16983 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
16984 tree v4si_ftype_v4si_v4si
16985 = build_function_type_list (V4SI_type_node
,
16986 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
16987 tree v2di_ftype_v2di_v2di
16988 = build_function_type_list (V2DI_type_node
,
16989 V2DI_type_node
, V2DI_type_node
, NULL_TREE
);
16990 tree v2di_ftype_v2df_v2df
16991 = build_function_type_list (V2DI_type_node
,
16992 V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16993 tree v2df_ftype_v2df
16994 = build_function_type_list (V2DF_type_node
, V2DF_type_node
, NULL_TREE
);
16995 tree v2di_ftype_v2di_int
16996 = build_function_type_list (V2DI_type_node
,
16997 V2DI_type_node
, integer_type_node
, NULL_TREE
);
16998 tree v2di_ftype_v2di_v2di_int
16999 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17000 V2DI_type_node
, integer_type_node
, NULL_TREE
);
17001 tree v4si_ftype_v4si_int
17002 = build_function_type_list (V4SI_type_node
,
17003 V4SI_type_node
, integer_type_node
, NULL_TREE
);
17004 tree v8hi_ftype_v8hi_int
17005 = build_function_type_list (V8HI_type_node
,
17006 V8HI_type_node
, integer_type_node
, NULL_TREE
);
17007 tree v8hi_ftype_v8hi_v2di
17008 = build_function_type_list (V8HI_type_node
,
17009 V8HI_type_node
, V2DI_type_node
, NULL_TREE
);
17010 tree v4si_ftype_v4si_v2di
17011 = build_function_type_list (V4SI_type_node
,
17012 V4SI_type_node
, V2DI_type_node
, NULL_TREE
);
17013 tree v4si_ftype_v8hi_v8hi
17014 = build_function_type_list (V4SI_type_node
,
17015 V8HI_type_node
, V8HI_type_node
, NULL_TREE
);
17016 tree di_ftype_v8qi_v8qi
17017 = build_function_type_list (long_long_unsigned_type_node
,
17018 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
17019 tree di_ftype_v2si_v2si
17020 = build_function_type_list (long_long_unsigned_type_node
,
17021 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
17022 tree v2di_ftype_v16qi_v16qi
17023 = build_function_type_list (V2DI_type_node
,
17024 V16QI_type_node
, V16QI_type_node
, NULL_TREE
);
17025 tree v2di_ftype_v4si_v4si
17026 = build_function_type_list (V2DI_type_node
,
17027 V4SI_type_node
, V4SI_type_node
, NULL_TREE
);
17028 tree int_ftype_v16qi
17029 = build_function_type_list (integer_type_node
, V16QI_type_node
, NULL_TREE
);
17030 tree v16qi_ftype_pcchar
17031 = build_function_type_list (V16QI_type_node
, pcchar_type_node
, NULL_TREE
);
17032 tree void_ftype_pchar_v16qi
17033 = build_function_type_list (void_type_node
,
17034 pchar_type_node
, V16QI_type_node
, NULL_TREE
);
17036 tree v2di_ftype_v2di_unsigned_unsigned
17037 = build_function_type_list (V2DI_type_node
, V2DI_type_node
,
17038 unsigned_type_node
, unsigned_type_node
,
17040 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17041 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V2DI_type_node
,
17042 unsigned_type_node
, unsigned_type_node
,
17044 tree v2di_ftype_v2di_v16qi
17045 = build_function_type_list (V2DI_type_node
, V2DI_type_node
, V16QI_type_node
,
17049 tree float128_type
;
17052 /* The __float80 type. */
17053 if (TYPE_MODE (long_double_type_node
) == XFmode
)
17054 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
17058 /* The __float80 type. */
17059 float80_type
= make_node (REAL_TYPE
);
17060 TYPE_PRECISION (float80_type
) = 80;
17061 layout_type (float80_type
);
17062 (*lang_hooks
.types
.register_builtin_type
) (float80_type
, "__float80");
17067 float128_type
= make_node (REAL_TYPE
);
17068 TYPE_PRECISION (float128_type
) = 128;
17069 layout_type (float128_type
);
17070 (*lang_hooks
.types
.register_builtin_type
) (float128_type
, "__float128");
17073 /* Add all builtins that are more or less simple operations on two
17075 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17077 /* Use one of the operands; the target can have a different mode for
17078 mask-generating compares. */
17079 enum machine_mode mode
;
17084 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17089 type
= v16qi_ftype_v16qi_v16qi
;
17092 type
= v8hi_ftype_v8hi_v8hi
;
17095 type
= v4si_ftype_v4si_v4si
;
17098 type
= v2di_ftype_v2di_v2di
;
17101 type
= v2df_ftype_v2df_v2df
;
17104 type
= v4sf_ftype_v4sf_v4sf
;
17107 type
= v8qi_ftype_v8qi_v8qi
;
17110 type
= v4hi_ftype_v4hi_v4hi
;
17113 type
= v2si_ftype_v2si_v2si
;
17116 type
= di_ftype_di_di
;
17120 gcc_unreachable ();
17123 /* Override for comparisons. */
17124 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
17125 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
)
17126 type
= v4si_ftype_v4sf_v4sf
;
17128 if (d
->icode
== CODE_FOR_sse2_maskcmpv2df3
17129 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
17130 type
= v2di_ftype_v2df_v2df
;
17132 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17135 /* Add all builtins that are more or less simple operations on 1 operand. */
17136 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
17138 enum machine_mode mode
;
17143 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17148 type
= v16qi_ftype_v16qi
;
17151 type
= v8hi_ftype_v8hi
;
17154 type
= v4si_ftype_v4si
;
17157 type
= v2df_ftype_v2df
;
17160 type
= v4sf_ftype_v4sf
;
17163 type
= v8qi_ftype_v8qi
;
17166 type
= v4hi_ftype_v4hi
;
17169 type
= v2si_ftype_v2si
;
17176 def_builtin (d
->mask
, d
->name
, type
, d
->code
);
17179 /* Add the remaining MMX insns with somewhat more complicated types. */
17180 def_builtin (MASK_MMX
, "__builtin_ia32_emms", void_ftype_void
, IX86_BUILTIN_EMMS
);
17181 def_builtin (MASK_MMX
, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSLLW
);
17182 def_builtin (MASK_MMX
, "__builtin_ia32_pslld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSLLD
);
17183 def_builtin (MASK_MMX
, "__builtin_ia32_psllq", di_ftype_di_di
, IX86_BUILTIN_PSLLQ
);
17185 def_builtin (MASK_MMX
, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRLW
);
17186 def_builtin (MASK_MMX
, "__builtin_ia32_psrld", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRLD
);
17187 def_builtin (MASK_MMX
, "__builtin_ia32_psrlq", di_ftype_di_di
, IX86_BUILTIN_PSRLQ
);
17189 def_builtin (MASK_MMX
, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di
, IX86_BUILTIN_PSRAW
);
17190 def_builtin (MASK_MMX
, "__builtin_ia32_psrad", v2si_ftype_v2si_di
, IX86_BUILTIN_PSRAD
);
17192 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int
, IX86_BUILTIN_PSHUFW
);
17193 def_builtin (MASK_MMX
, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi
, IX86_BUILTIN_PMADDWD
);
17195 /* comi/ucomi insns. */
17196 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
17197 if (d
->mask
== MASK_SSE2
)
17198 def_builtin (d
->mask
, d
->name
, int_ftype_v2df_v2df
, d
->code
);
17200 def_builtin (d
->mask
, d
->name
, int_ftype_v4sf_v4sf
, d
->code
);
17202 def_builtin (MASK_MMX
, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKSSWB
);
17203 def_builtin (MASK_MMX
, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si
, IX86_BUILTIN_PACKSSDW
);
17204 def_builtin (MASK_MMX
, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi
, IX86_BUILTIN_PACKUSWB
);
17206 def_builtin (MASK_SSE
, "__builtin_ia32_ldmxcsr", void_ftype_unsigned
, IX86_BUILTIN_LDMXCSR
);
17207 def_builtin (MASK_SSE
, "__builtin_ia32_stmxcsr", unsigned_ftype_void
, IX86_BUILTIN_STMXCSR
);
17208 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si
, IX86_BUILTIN_CVTPI2PS
);
17209 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTPS2PI
);
17210 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int
, IX86_BUILTIN_CVTSI2SS
);
17211 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64
, IX86_BUILTIN_CVTSI642SS
);
17212 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvtss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI
);
17213 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTSS2SI64
);
17214 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2PI
);
17215 def_builtin_const (MASK_SSE
, "__builtin_ia32_cvttss2si", int_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI
);
17216 def_builtin_const (MASK_SSE
| MASK_64BIT
, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf
, IX86_BUILTIN_CVTTSS2SI64
);
17218 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar
, IX86_BUILTIN_MASKMOVQ
);
17220 def_builtin (MASK_SSE
, "__builtin_ia32_loadups", v4sf_ftype_pcfloat
, IX86_BUILTIN_LOADUPS
);
17221 def_builtin (MASK_SSE
, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf
, IX86_BUILTIN_STOREUPS
);
17223 def_builtin (MASK_SSE
, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADHPS
);
17224 def_builtin (MASK_SSE
, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si
, IX86_BUILTIN_LOADLPS
);
17225 def_builtin (MASK_SSE
, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STOREHPS
);
17226 def_builtin (MASK_SSE
, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf
, IX86_BUILTIN_STORELPS
);
17228 def_builtin (MASK_SSE
, "__builtin_ia32_movmskps", int_ftype_v4sf
, IX86_BUILTIN_MOVMSKPS
);
17229 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_pmovmskb", int_ftype_v8qi
, IX86_BUILTIN_PMOVMSKB
);
17230 def_builtin (MASK_SSE
, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTPS
);
17231 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_movntq", void_ftype_pdi_di
, IX86_BUILTIN_MOVNTQ
);
17233 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_sfence", void_ftype_void
, IX86_BUILTIN_SFENCE
);
17235 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi
, IX86_BUILTIN_PSADBW
);
17237 def_builtin (MASK_SSE
, "__builtin_ia32_rcpps", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPPS
);
17238 def_builtin (MASK_SSE
, "__builtin_ia32_rcpss", v4sf_ftype_v4sf
, IX86_BUILTIN_RCPSS
);
17239 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTPS
);
17240 def_builtin (MASK_SSE
, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_RSQRTSS
);
17241 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTPS
);
17242 def_builtin_const (MASK_SSE
, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf
, IX86_BUILTIN_SQRTSS
);
17244 def_builtin (MASK_SSE
, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int
, IX86_BUILTIN_SHUFPS
);
17246 /* Original 3DNow! */
17247 def_builtin (MASK_3DNOW
, "__builtin_ia32_femms", void_ftype_void
, IX86_BUILTIN_FEMMS
);
17248 def_builtin (MASK_3DNOW
, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi
, IX86_BUILTIN_PAVGUSB
);
17249 def_builtin (MASK_3DNOW
, "__builtin_ia32_pf2id", v2si_ftype_v2sf
, IX86_BUILTIN_PF2ID
);
17250 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFACC
);
17251 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFADD
);
17252 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPEQ
);
17253 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGE
);
17254 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf
, IX86_BUILTIN_PFCMPGT
);
17255 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMAX
);
17256 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMIN
);
17257 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFMUL
);
17258 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRCP
);
17259 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT1
);
17260 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRCPIT2
);
17261 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf
, IX86_BUILTIN_PFRSQRT
);
17262 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFRSQIT1
);
17263 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUB
);
17264 def_builtin (MASK_3DNOW
, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFSUBR
);
17265 def_builtin (MASK_3DNOW
, "__builtin_ia32_pi2fd", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FD
);
17266 def_builtin (MASK_3DNOW
, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi
, IX86_BUILTIN_PMULHRW
);
17268 /* 3DNow! extension as used in the Athlon CPU. */
17269 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pf2iw", v2si_ftype_v2sf
, IX86_BUILTIN_PF2IW
);
17270 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFNACC
);
17271 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf
, IX86_BUILTIN_PFPNACC
);
17272 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pi2fw", v2sf_ftype_v2si
, IX86_BUILTIN_PI2FW
);
17273 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf
, IX86_BUILTIN_PSWAPDSF
);
17274 def_builtin (MASK_3DNOW_A
, "__builtin_ia32_pswapdsi", v2si_ftype_v2si
, IX86_BUILTIN_PSWAPDSI
);
17277 def_builtin (MASK_SSE2
, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar
, IX86_BUILTIN_MASKMOVDQU
);
17279 def_builtin (MASK_SSE2
, "__builtin_ia32_loadupd", v2df_ftype_pcdouble
, IX86_BUILTIN_LOADUPD
);
17280 def_builtin (MASK_SSE2
, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df
, IX86_BUILTIN_STOREUPD
);
17282 def_builtin (MASK_SSE2
, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADHPD
);
17283 def_builtin (MASK_SSE2
, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble
, IX86_BUILTIN_LOADLPD
);
17285 def_builtin (MASK_SSE2
, "__builtin_ia32_movmskpd", int_ftype_v2df
, IX86_BUILTIN_MOVMSKPD
);
17286 def_builtin (MASK_SSE2
, "__builtin_ia32_pmovmskb128", int_ftype_v16qi
, IX86_BUILTIN_PMOVMSKB128
);
17287 def_builtin (MASK_SSE2
, "__builtin_ia32_movnti", void_ftype_pint_int
, IX86_BUILTIN_MOVNTI
);
17288 def_builtin (MASK_SSE2
, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTPD
);
17289 def_builtin (MASK_SSE2
, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di
, IX86_BUILTIN_MOVNTDQ
);
17291 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufd", v4si_ftype_v4si_int
, IX86_BUILTIN_PSHUFD
);
17292 def_builtin (MASK_SSE2
, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFLW
);
17293 def_builtin (MASK_SSE2
, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSHUFHW
);
17294 def_builtin (MASK_SSE2
, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi
, IX86_BUILTIN_PSADBW128
);
17296 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtpd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTPD
);
17297 def_builtin_const (MASK_SSE2
, "__builtin_ia32_sqrtsd", v2df_ftype_v2df
, IX86_BUILTIN_SQRTSD
);
17299 def_builtin (MASK_SSE2
, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int
, IX86_BUILTIN_SHUFPD
);
17301 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si
, IX86_BUILTIN_CVTDQ2PD
);
17302 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si
, IX86_BUILTIN_CVTDQ2PS
);
17304 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTPD2DQ
);
17305 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTPD2PI
);
17306 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df
, IX86_BUILTIN_CVTPD2PS
);
17307 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df
, IX86_BUILTIN_CVTTPD2DQ
);
17308 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df
, IX86_BUILTIN_CVTTPD2PI
);
17310 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si
, IX86_BUILTIN_CVTPI2PD
);
17312 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTSD2SI
);
17313 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttsd2si", int_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI
);
17314 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTSD2SI64
);
17315 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df
, IX86_BUILTIN_CVTTSD2SI64
);
17317 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTPS2DQ
);
17318 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf
, IX86_BUILTIN_CVTPS2PD
);
17319 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf
, IX86_BUILTIN_CVTTPS2DQ
);
17321 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int
, IX86_BUILTIN_CVTSI2SD
);
17322 def_builtin_const (MASK_SSE2
| MASK_64BIT
, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64
, IX86_BUILTIN_CVTSI642SD
);
17323 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df
, IX86_BUILTIN_CVTSD2SS
);
17324 def_builtin_const (MASK_SSE2
, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf
, IX86_BUILTIN_CVTSS2SD
);
17326 def_builtin (MASK_SSE2
, "__builtin_ia32_clflush", void_ftype_pcvoid
, IX86_BUILTIN_CLFLUSH
);
17327 def_builtin (MASK_SSE2
, "__builtin_ia32_lfence", void_ftype_void
, IX86_BUILTIN_LFENCE
);
17328 def_builtin (MASK_SSE2
, "__builtin_ia32_mfence", void_ftype_void
, IX86_BUILTIN_MFENCE
);
17330 def_builtin (MASK_SSE2
, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar
, IX86_BUILTIN_LOADDQU
);
17331 def_builtin (MASK_SSE2
, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi
, IX86_BUILTIN_STOREDQU
);
17333 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si
, IX86_BUILTIN_PMULUDQ
);
17334 def_builtin (MASK_SSE2
, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si
, IX86_BUILTIN_PMULUDQ128
);
17336 def_builtin (MASK_SSE2
, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSLLW128
);
17337 def_builtin (MASK_SSE2
, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSLLD128
);
17338 def_builtin (MASK_SSE2
, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSLLQ128
);
17340 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRLW128
);
17341 def_builtin (MASK_SSE2
, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRLD128
);
17342 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di
, IX86_BUILTIN_PSRLQ128
);
17344 def_builtin (MASK_SSE2
, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di
, IX86_BUILTIN_PSRAW128
);
17345 def_builtin (MASK_SSE2
, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di
, IX86_BUILTIN_PSRAD128
);
17347 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLDQI128
);
17348 def_builtin (MASK_SSE2
, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSLLWI128
);
17349 def_builtin (MASK_SSE2
, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSLLDI128
);
17350 def_builtin (MASK_SSE2
, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSLLQI128
);
17352 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLDQI128
);
17353 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRLWI128
);
17354 def_builtin (MASK_SSE2
, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRLDI128
);
17355 def_builtin (MASK_SSE2
, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int
, IX86_BUILTIN_PSRLQI128
);
17357 def_builtin (MASK_SSE2
, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int
, IX86_BUILTIN_PSRAWI128
);
17358 def_builtin (MASK_SSE2
, "__builtin_ia32_psradi128", v4si_ftype_v4si_int
, IX86_BUILTIN_PSRADI128
);
17360 def_builtin (MASK_SSE2
, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi
, IX86_BUILTIN_PMADDWD128
);
17362 /* Prescott New Instructions. */
17363 def_builtin (MASK_SSE3
, "__builtin_ia32_monitor",
17364 void_ftype_pcvoid_unsigned_unsigned
,
17365 IX86_BUILTIN_MONITOR
);
17366 def_builtin (MASK_SSE3
, "__builtin_ia32_mwait",
17367 void_ftype_unsigned_unsigned
,
17368 IX86_BUILTIN_MWAIT
);
17369 def_builtin (MASK_SSE3
, "__builtin_ia32_lddqu",
17370 v16qi_ftype_pcchar
, IX86_BUILTIN_LDDQU
);
17373 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr128",
17374 v2di_ftype_v2di_v2di_int
, IX86_BUILTIN_PALIGNR128
);
17375 def_builtin (MASK_SSSE3
, "__builtin_ia32_palignr", di_ftype_di_di_int
,
17376 IX86_BUILTIN_PALIGNR
);
17378 /* AMDFAM10 SSE4A New built-ins */
17379 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntsd",
17380 void_ftype_pdouble_v2df
, IX86_BUILTIN_MOVNTSD
);
17381 def_builtin (MASK_SSE4A
, "__builtin_ia32_movntss",
17382 void_ftype_pfloat_v4sf
, IX86_BUILTIN_MOVNTSS
);
17383 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrqi",
17384 v2di_ftype_v2di_unsigned_unsigned
, IX86_BUILTIN_EXTRQI
);
17385 def_builtin (MASK_SSE4A
, "__builtin_ia32_extrq",
17386 v2di_ftype_v2di_v16qi
, IX86_BUILTIN_EXTRQ
);
17387 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertqi",
17388 v2di_ftype_v2di_v2di_unsigned_unsigned
, IX86_BUILTIN_INSERTQI
);
17389 def_builtin (MASK_SSE4A
, "__builtin_ia32_insertq",
17390 v2di_ftype_v2di_v2di
, IX86_BUILTIN_INSERTQ
);
17392 /* Access to the vec_init patterns. */
17393 ftype
= build_function_type_list (V2SI_type_node
, integer_type_node
,
17394 integer_type_node
, NULL_TREE
);
17395 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v2si",
17396 ftype
, IX86_BUILTIN_VEC_INIT_V2SI
);
17398 ftype
= build_function_type_list (V4HI_type_node
, short_integer_type_node
,
17399 short_integer_type_node
,
17400 short_integer_type_node
,
17401 short_integer_type_node
, NULL_TREE
);
17402 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v4hi",
17403 ftype
, IX86_BUILTIN_VEC_INIT_V4HI
);
17405 ftype
= build_function_type_list (V8QI_type_node
, char_type_node
,
17406 char_type_node
, char_type_node
,
17407 char_type_node
, char_type_node
,
17408 char_type_node
, char_type_node
,
17409 char_type_node
, NULL_TREE
);
17410 def_builtin (MASK_MMX
, "__builtin_ia32_vec_init_v8qi",
17411 ftype
, IX86_BUILTIN_VEC_INIT_V8QI
);
17413 /* Access to the vec_extract patterns. */
17414 ftype
= build_function_type_list (double_type_node
, V2DF_type_node
,
17415 integer_type_node
, NULL_TREE
);
17416 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2df",
17417 ftype
, IX86_BUILTIN_VEC_EXT_V2DF
);
17419 ftype
= build_function_type_list (long_long_integer_type_node
,
17420 V2DI_type_node
, integer_type_node
,
17422 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v2di",
17423 ftype
, IX86_BUILTIN_VEC_EXT_V2DI
);
17425 ftype
= build_function_type_list (float_type_node
, V4SF_type_node
,
17426 integer_type_node
, NULL_TREE
);
17427 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4sf",
17428 ftype
, IX86_BUILTIN_VEC_EXT_V4SF
);
17430 ftype
= build_function_type_list (intSI_type_node
, V4SI_type_node
,
17431 integer_type_node
, NULL_TREE
);
17432 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v4si",
17433 ftype
, IX86_BUILTIN_VEC_EXT_V4SI
);
17435 ftype
= build_function_type_list (intHI_type_node
, V8HI_type_node
,
17436 integer_type_node
, NULL_TREE
);
17437 def_builtin (MASK_SSE
, "__builtin_ia32_vec_ext_v8hi",
17438 ftype
, IX86_BUILTIN_VEC_EXT_V8HI
);
17440 ftype
= build_function_type_list (intHI_type_node
, V4HI_type_node
,
17441 integer_type_node
, NULL_TREE
);
17442 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_ext_v4hi",
17443 ftype
, IX86_BUILTIN_VEC_EXT_V4HI
);
17445 ftype
= build_function_type_list (intSI_type_node
, V2SI_type_node
,
17446 integer_type_node
, NULL_TREE
);
17447 def_builtin (MASK_MMX
, "__builtin_ia32_vec_ext_v2si",
17448 ftype
, IX86_BUILTIN_VEC_EXT_V2SI
);
17450 /* Access to the vec_set patterns. */
17451 ftype
= build_function_type_list (V8HI_type_node
, V8HI_type_node
,
17453 integer_type_node
, NULL_TREE
);
17454 def_builtin (MASK_SSE
, "__builtin_ia32_vec_set_v8hi",
17455 ftype
, IX86_BUILTIN_VEC_SET_V8HI
);
17457 ftype
= build_function_type_list (V4HI_type_node
, V4HI_type_node
,
17459 integer_type_node
, NULL_TREE
);
17460 def_builtin (MASK_SSE
| MASK_3DNOW_A
, "__builtin_ia32_vec_set_v4hi",
17461 ftype
, IX86_BUILTIN_VEC_SET_V4HI
);
17464 /* Errors in the source file can cause expand_expr to return const0_rtx
17465 where we expect a vector. To avoid crashing, use one of the vector
17466 clear instructions. */
17468 safe_vector_operand (rtx x
, enum machine_mode mode
)
17470 if (x
== const0_rtx
)
17471 x
= CONST0_RTX (mode
);
17475 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17478 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
17481 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17482 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17483 rtx op0
= expand_normal (arg0
);
17484 rtx op1
= expand_normal (arg1
);
17485 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17486 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17487 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
17489 if (VECTOR_MODE_P (mode0
))
17490 op0
= safe_vector_operand (op0
, mode0
);
17491 if (VECTOR_MODE_P (mode1
))
17492 op1
= safe_vector_operand (op1
, mode1
);
17494 if (optimize
|| !target
17495 || GET_MODE (target
) != tmode
17496 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17497 target
= gen_reg_rtx (tmode
);
17499 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
17501 rtx x
= gen_reg_rtx (V4SImode
);
17502 emit_insn (gen_sse2_loadd (x
, op1
));
17503 op1
= gen_lowpart (TImode
, x
);
17506 /* The insn must want input operands in the same modes as the
17508 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
17509 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
17511 if (!(*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17512 op0
= copy_to_mode_reg (mode0
, op0
);
17513 if (!(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
17514 op1
= copy_to_mode_reg (mode1
, op1
);
17516 /* ??? Using ix86_fixup_binary_operands is problematic when
17517 we've got mismatched modes. Fake it. */
17523 if (tmode
== mode0
&& tmode
== mode1
)
17525 target
= ix86_fixup_binary_operands (UNKNOWN
, tmode
, xops
);
17529 else if (optimize
|| !ix86_binary_operator_ok (UNKNOWN
, tmode
, xops
))
17531 op0
= force_reg (mode0
, op0
);
17532 op1
= force_reg (mode1
, op1
);
17533 target
= gen_reg_rtx (tmode
);
17536 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17543 /* Subroutine of ix86_expand_builtin to take care of stores. */
17546 ix86_expand_store_builtin (enum insn_code icode
, tree exp
)
17549 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17550 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17551 rtx op0
= expand_normal (arg0
);
17552 rtx op1
= expand_normal (arg1
);
17553 enum machine_mode mode0
= insn_data
[icode
].operand
[0].mode
;
17554 enum machine_mode mode1
= insn_data
[icode
].operand
[1].mode
;
17556 if (VECTOR_MODE_P (mode1
))
17557 op1
= safe_vector_operand (op1
, mode1
);
17559 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17560 op1
= copy_to_mode_reg (mode1
, op1
);
17562 pat
= GEN_FCN (icode
) (op0
, op1
);
17568 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17571 ix86_expand_unop_builtin (enum insn_code icode
, tree exp
,
17572 rtx target
, int do_load
)
17575 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17576 rtx op0
= expand_normal (arg0
);
17577 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17578 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17580 if (optimize
|| !target
17581 || GET_MODE (target
) != tmode
17582 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17583 target
= gen_reg_rtx (tmode
);
17585 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17588 if (VECTOR_MODE_P (mode0
))
17589 op0
= safe_vector_operand (op0
, mode0
);
17591 if ((optimize
&& !register_operand (op0
, mode0
))
17592 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17593 op0
= copy_to_mode_reg (mode0
, op0
);
17596 pat
= GEN_FCN (icode
) (target
, op0
);
17603 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17604 sqrtss, rsqrtss, rcpss. */
17607 ix86_expand_unop1_builtin (enum insn_code icode
, tree exp
, rtx target
)
17610 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17611 rtx op1
, op0
= expand_normal (arg0
);
17612 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
17613 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
17615 if (optimize
|| !target
17616 || GET_MODE (target
) != tmode
17617 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
17618 target
= gen_reg_rtx (tmode
);
17620 if (VECTOR_MODE_P (mode0
))
17621 op0
= safe_vector_operand (op0
, mode0
);
17623 if ((optimize
&& !register_operand (op0
, mode0
))
17624 || ! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
17625 op0
= copy_to_mode_reg (mode0
, op0
);
17628 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode0
))
17629 op1
= copy_to_mode_reg (mode0
, op1
);
17631 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17638 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17641 ix86_expand_sse_compare (const struct builtin_description
*d
, tree exp
,
17645 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17646 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17647 rtx op0
= expand_normal (arg0
);
17648 rtx op1
= expand_normal (arg1
);
17650 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
17651 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
17652 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
17653 enum rtx_code comparison
= d
->comparison
;
17655 if (VECTOR_MODE_P (mode0
))
17656 op0
= safe_vector_operand (op0
, mode0
);
17657 if (VECTOR_MODE_P (mode1
))
17658 op1
= safe_vector_operand (op1
, mode1
);
17660 /* Swap operands if we have a comparison that isn't available in
17662 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17664 rtx tmp
= gen_reg_rtx (mode1
);
17665 emit_move_insn (tmp
, op1
);
17670 if (optimize
|| !target
17671 || GET_MODE (target
) != tmode
17672 || ! (*insn_data
[d
->icode
].operand
[0].predicate
) (target
, tmode
))
17673 target
= gen_reg_rtx (tmode
);
17675 if ((optimize
&& !register_operand (op0
, mode0
))
17676 || ! (*insn_data
[d
->icode
].operand
[1].predicate
) (op0
, mode0
))
17677 op0
= copy_to_mode_reg (mode0
, op0
);
17678 if ((optimize
&& !register_operand (op1
, mode1
))
17679 || ! (*insn_data
[d
->icode
].operand
[2].predicate
) (op1
, mode1
))
17680 op1
= copy_to_mode_reg (mode1
, op1
);
17682 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17683 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
17690 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17693 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
17697 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17698 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17699 rtx op0
= expand_normal (arg0
);
17700 rtx op1
= expand_normal (arg1
);
17702 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
17703 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
17704 enum rtx_code comparison
= d
->comparison
;
17706 if (VECTOR_MODE_P (mode0
))
17707 op0
= safe_vector_operand (op0
, mode0
);
17708 if (VECTOR_MODE_P (mode1
))
17709 op1
= safe_vector_operand (op1
, mode1
);
17711 /* Swap operands if we have a comparison that isn't available in
17713 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
17720 target
= gen_reg_rtx (SImode
);
17721 emit_move_insn (target
, const0_rtx
);
17722 target
= gen_rtx_SUBREG (QImode
, target
, 0);
17724 if ((optimize
&& !register_operand (op0
, mode0
))
17725 || !(*insn_data
[d
->icode
].operand
[0].predicate
) (op0
, mode0
))
17726 op0
= copy_to_mode_reg (mode0
, op0
);
17727 if ((optimize
&& !register_operand (op1
, mode1
))
17728 || !(*insn_data
[d
->icode
].operand
[1].predicate
) (op1
, mode1
))
17729 op1
= copy_to_mode_reg (mode1
, op1
);
17731 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
17732 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
17736 emit_insn (gen_rtx_SET (VOIDmode
,
17737 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
17738 gen_rtx_fmt_ee (comparison
, QImode
,
17742 return SUBREG_REG (target
);
17745 /* Return the integer constant in ARG. Constrain it to be in the range
17746 of the subparts of VEC_TYPE; issue an error if not. */
17749 get_element_number (tree vec_type
, tree arg
)
17751 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
17753 if (!host_integerp (arg
, 1)
17754 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
17756 error ("selector must be an integer constant in the range 0..%wi", max
);
17763 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17764 ix86_expand_vector_init. We DO have language-level syntax for this, in
17765 the form of (type){ init-list }. Except that since we can't place emms
17766 instructions from inside the compiler, we can't allow the use of MMX
17767 registers unless the user explicitly asks for it. So we do *not* define
17768 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17769 we have builtins invoked by mmintrin.h that gives us license to emit
17770 these sorts of instructions. */
17773 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
17775 enum machine_mode tmode
= TYPE_MODE (type
);
17776 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
17777 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
17778 rtvec v
= rtvec_alloc (n_elt
);
17780 gcc_assert (VECTOR_MODE_P (tmode
));
17781 gcc_assert (call_expr_nargs (exp
) == n_elt
);
17783 for (i
= 0; i
< n_elt
; ++i
)
17785 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
17786 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
17789 if (!target
|| !register_operand (target
, tmode
))
17790 target
= gen_reg_rtx (tmode
);
17792 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
17796 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17797 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17798 had a language-level syntax for referencing vector elements. */
17801 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
17803 enum machine_mode tmode
, mode0
;
17808 arg0
= CALL_EXPR_ARG (exp
, 0);
17809 arg1
= CALL_EXPR_ARG (exp
, 1);
17811 op0
= expand_normal (arg0
);
17812 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
17814 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17815 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
17816 gcc_assert (VECTOR_MODE_P (mode0
));
17818 op0
= force_reg (mode0
, op0
);
17820 if (optimize
|| !target
|| !register_operand (target
, tmode
))
17821 target
= gen_reg_rtx (tmode
);
17823 ix86_expand_vector_extract (true, target
, op0
, elt
);
17828 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17829 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17830 a language-level syntax for referencing vector elements. */
17833 ix86_expand_vec_set_builtin (tree exp
)
17835 enum machine_mode tmode
, mode1
;
17836 tree arg0
, arg1
, arg2
;
17840 arg0
= CALL_EXPR_ARG (exp
, 0);
17841 arg1
= CALL_EXPR_ARG (exp
, 1);
17842 arg2
= CALL_EXPR_ARG (exp
, 2);
17844 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
17845 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
17846 gcc_assert (VECTOR_MODE_P (tmode
));
17848 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, 0);
17849 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, 0);
17850 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
17852 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
17853 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
17855 op0
= force_reg (tmode
, op0
);
17856 op1
= force_reg (mode1
, op1
);
17858 ix86_expand_vector_set (true, op0
, op1
, elt
);
17863 /* Expand an expression EXP that calls a built-in function,
17864 with result going to TARGET if that's convenient
17865 (and in mode MODE if that's convenient).
17866 SUBTARGET may be used as the target for computing one of EXP's operands.
17867 IGNORE is nonzero if the value is to be ignored. */
17870 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
17871 enum machine_mode mode ATTRIBUTE_UNUSED
,
17872 int ignore ATTRIBUTE_UNUSED
)
17874 const struct builtin_description
*d
;
17876 enum insn_code icode
;
17877 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
17878 tree arg0
, arg1
, arg2
, arg3
;
17879 rtx op0
, op1
, op2
, op3
, pat
;
17880 enum machine_mode tmode
, mode0
, mode1
, mode2
, mode3
, mode4
;
17881 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
17885 case IX86_BUILTIN_EMMS
:
17886 emit_insn (gen_mmx_emms ());
17889 case IX86_BUILTIN_SFENCE
:
17890 emit_insn (gen_sse_sfence ());
17893 case IX86_BUILTIN_MASKMOVQ
:
17894 case IX86_BUILTIN_MASKMOVDQU
:
17895 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
17896 ? CODE_FOR_mmx_maskmovq
17897 : CODE_FOR_sse2_maskmovdqu
);
17898 /* Note the arg order is different from the operand order. */
17899 arg1
= CALL_EXPR_ARG (exp
, 0);
17900 arg2
= CALL_EXPR_ARG (exp
, 1);
17901 arg0
= CALL_EXPR_ARG (exp
, 2);
17902 op0
= expand_normal (arg0
);
17903 op1
= expand_normal (arg1
);
17904 op2
= expand_normal (arg2
);
17905 mode0
= insn_data
[icode
].operand
[0].mode
;
17906 mode1
= insn_data
[icode
].operand
[1].mode
;
17907 mode2
= insn_data
[icode
].operand
[2].mode
;
17909 op0
= force_reg (Pmode
, op0
);
17910 op0
= gen_rtx_MEM (mode1
, op0
);
17912 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, mode0
))
17913 op0
= copy_to_mode_reg (mode0
, op0
);
17914 if (! (*insn_data
[icode
].operand
[1].predicate
) (op1
, mode1
))
17915 op1
= copy_to_mode_reg (mode1
, op1
);
17916 if (! (*insn_data
[icode
].operand
[2].predicate
) (op2
, mode2
))
17917 op2
= copy_to_mode_reg (mode2
, op2
);
17918 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
17924 case IX86_BUILTIN_SQRTSS
:
17925 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2
, exp
, target
);
17926 case IX86_BUILTIN_RSQRTSS
:
17927 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2
, exp
, target
);
17928 case IX86_BUILTIN_RCPSS
:
17929 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2
, exp
, target
);
17931 case IX86_BUILTIN_LOADUPS
:
17932 return ix86_expand_unop_builtin (CODE_FOR_sse_movups
, exp
, target
, 1);
17934 case IX86_BUILTIN_STOREUPS
:
17935 return ix86_expand_store_builtin (CODE_FOR_sse_movups
, exp
);
17937 case IX86_BUILTIN_LOADHPS
:
17938 case IX86_BUILTIN_LOADLPS
:
17939 case IX86_BUILTIN_LOADHPD
:
17940 case IX86_BUILTIN_LOADLPD
:
17941 icode
= (fcode
== IX86_BUILTIN_LOADHPS
? CODE_FOR_sse_loadhps
17942 : fcode
== IX86_BUILTIN_LOADLPS
? CODE_FOR_sse_loadlps
17943 : fcode
== IX86_BUILTIN_LOADHPD
? CODE_FOR_sse2_loadhpd
17944 : CODE_FOR_sse2_loadlpd
);
17945 arg0
= CALL_EXPR_ARG (exp
, 0);
17946 arg1
= CALL_EXPR_ARG (exp
, 1);
17947 op0
= expand_normal (arg0
);
17948 op1
= expand_normal (arg1
);
17949 tmode
= insn_data
[icode
].operand
[0].mode
;
17950 mode0
= insn_data
[icode
].operand
[1].mode
;
17951 mode1
= insn_data
[icode
].operand
[2].mode
;
17953 op0
= force_reg (mode0
, op0
);
17954 op1
= gen_rtx_MEM (mode1
, copy_to_mode_reg (Pmode
, op1
));
17955 if (optimize
|| target
== 0
17956 || GET_MODE (target
) != tmode
17957 || !register_operand (target
, tmode
))
17958 target
= gen_reg_rtx (tmode
);
17959 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
17965 case IX86_BUILTIN_STOREHPS
:
17966 case IX86_BUILTIN_STORELPS
:
17967 icode
= (fcode
== IX86_BUILTIN_STOREHPS
? CODE_FOR_sse_storehps
17968 : CODE_FOR_sse_storelps
);
17969 arg0
= CALL_EXPR_ARG (exp
, 0);
17970 arg1
= CALL_EXPR_ARG (exp
, 1);
17971 op0
= expand_normal (arg0
);
17972 op1
= expand_normal (arg1
);
17973 mode0
= insn_data
[icode
].operand
[0].mode
;
17974 mode1
= insn_data
[icode
].operand
[1].mode
;
17976 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
17977 op1
= force_reg (mode1
, op1
);
17979 pat
= GEN_FCN (icode
) (op0
, op1
);
17985 case IX86_BUILTIN_MOVNTPS
:
17986 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf
, exp
);
17987 case IX86_BUILTIN_MOVNTQ
:
17988 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi
, exp
);
17990 case IX86_BUILTIN_LDMXCSR
:
17991 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
17992 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17993 emit_move_insn (target
, op0
);
17994 emit_insn (gen_sse_ldmxcsr (target
));
17997 case IX86_BUILTIN_STMXCSR
:
17998 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
17999 emit_insn (gen_sse_stmxcsr (target
));
18000 return copy_to_mode_reg (SImode
, target
);
18002 case IX86_BUILTIN_SHUFPS
:
18003 case IX86_BUILTIN_SHUFPD
:
18004 icode
= (fcode
== IX86_BUILTIN_SHUFPS
18005 ? CODE_FOR_sse_shufps
18006 : CODE_FOR_sse2_shufpd
);
18007 arg0
= CALL_EXPR_ARG (exp
, 0);
18008 arg1
= CALL_EXPR_ARG (exp
, 1);
18009 arg2
= CALL_EXPR_ARG (exp
, 2);
18010 op0
= expand_normal (arg0
);
18011 op1
= expand_normal (arg1
);
18012 op2
= expand_normal (arg2
);
18013 tmode
= insn_data
[icode
].operand
[0].mode
;
18014 mode0
= insn_data
[icode
].operand
[1].mode
;
18015 mode1
= insn_data
[icode
].operand
[2].mode
;
18016 mode2
= insn_data
[icode
].operand
[3].mode
;
18018 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18019 op0
= copy_to_mode_reg (mode0
, op0
);
18020 if ((optimize
&& !register_operand (op1
, mode1
))
18021 || !(*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18022 op1
= copy_to_mode_reg (mode1
, op1
);
18023 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18025 /* @@@ better error message */
18026 error ("mask must be an immediate");
18027 return gen_reg_rtx (tmode
);
18029 if (optimize
|| target
== 0
18030 || GET_MODE (target
) != tmode
18031 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18032 target
= gen_reg_rtx (tmode
);
18033 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18039 case IX86_BUILTIN_PSHUFW
:
18040 case IX86_BUILTIN_PSHUFD
:
18041 case IX86_BUILTIN_PSHUFHW
:
18042 case IX86_BUILTIN_PSHUFLW
:
18043 icode
= ( fcode
== IX86_BUILTIN_PSHUFHW
? CODE_FOR_sse2_pshufhw
18044 : fcode
== IX86_BUILTIN_PSHUFLW
? CODE_FOR_sse2_pshuflw
18045 : fcode
== IX86_BUILTIN_PSHUFD
? CODE_FOR_sse2_pshufd
18046 : CODE_FOR_mmx_pshufw
);
18047 arg0
= CALL_EXPR_ARG (exp
, 0);
18048 arg1
= CALL_EXPR_ARG (exp
, 1);
18049 op0
= expand_normal (arg0
);
18050 op1
= expand_normal (arg1
);
18051 tmode
= insn_data
[icode
].operand
[0].mode
;
18052 mode1
= insn_data
[icode
].operand
[1].mode
;
18053 mode2
= insn_data
[icode
].operand
[2].mode
;
18055 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18056 op0
= copy_to_mode_reg (mode1
, op0
);
18057 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18059 /* @@@ better error message */
18060 error ("mask must be an immediate");
18064 || GET_MODE (target
) != tmode
18065 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18066 target
= gen_reg_rtx (tmode
);
18067 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18073 case IX86_BUILTIN_PSLLDQI128
:
18074 case IX86_BUILTIN_PSRLDQI128
:
18075 icode
= ( fcode
== IX86_BUILTIN_PSLLDQI128
? CODE_FOR_sse2_ashlti3
18076 : CODE_FOR_sse2_lshrti3
);
18077 arg0
= CALL_EXPR_ARG (exp
, 0);
18078 arg1
= CALL_EXPR_ARG (exp
, 1);
18079 op0
= expand_normal (arg0
);
18080 op1
= expand_normal (arg1
);
18081 tmode
= insn_data
[icode
].operand
[0].mode
;
18082 mode1
= insn_data
[icode
].operand
[1].mode
;
18083 mode2
= insn_data
[icode
].operand
[2].mode
;
18085 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18087 op0
= copy_to_reg (op0
);
18088 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18090 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18092 error ("shift must be an immediate");
18095 target
= gen_reg_rtx (V2DImode
);
18096 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, V2DImode
, 0), op0
, op1
);
18102 case IX86_BUILTIN_FEMMS
:
18103 emit_insn (gen_mmx_femms ());
18106 case IX86_BUILTIN_PAVGUSB
:
18107 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3
, exp
, target
);
18109 case IX86_BUILTIN_PF2ID
:
18110 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id
, exp
, target
, 0);
18112 case IX86_BUILTIN_PFACC
:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3
, exp
, target
);
18115 case IX86_BUILTIN_PFADD
:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3
, exp
, target
);
18118 case IX86_BUILTIN_PFCMPEQ
:
18119 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3
, exp
, target
);
18121 case IX86_BUILTIN_PFCMPGE
:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3
, exp
, target
);
18124 case IX86_BUILTIN_PFCMPGT
:
18125 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3
, exp
, target
);
18127 case IX86_BUILTIN_PFMAX
:
18128 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3
, exp
, target
);
18130 case IX86_BUILTIN_PFMIN
:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3
, exp
, target
);
18133 case IX86_BUILTIN_PFMUL
:
18134 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3
, exp
, target
);
18136 case IX86_BUILTIN_PFRCP
:
18137 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2
, exp
, target
, 0);
18139 case IX86_BUILTIN_PFRCPIT1
:
18140 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3
, exp
, target
);
18142 case IX86_BUILTIN_PFRCPIT2
:
18143 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3
, exp
, target
);
18145 case IX86_BUILTIN_PFRSQIT1
:
18146 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3
, exp
, target
);
18148 case IX86_BUILTIN_PFRSQRT
:
18149 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2
, exp
, target
, 0);
18151 case IX86_BUILTIN_PFSUB
:
18152 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3
, exp
, target
);
18154 case IX86_BUILTIN_PFSUBR
:
18155 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3
, exp
, target
);
18157 case IX86_BUILTIN_PI2FD
:
18158 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2
, exp
, target
, 0);
18160 case IX86_BUILTIN_PMULHRW
:
18161 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3
, exp
, target
);
18163 case IX86_BUILTIN_PF2IW
:
18164 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw
, exp
, target
, 0);
18166 case IX86_BUILTIN_PFNACC
:
18167 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3
, exp
, target
);
18169 case IX86_BUILTIN_PFPNACC
:
18170 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3
, exp
, target
);
18172 case IX86_BUILTIN_PI2FW
:
18173 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw
, exp
, target
, 0);
18175 case IX86_BUILTIN_PSWAPDSI
:
18176 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2
, exp
, target
, 0);
18178 case IX86_BUILTIN_PSWAPDSF
:
18179 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2
, exp
, target
, 0);
18181 case IX86_BUILTIN_SQRTSD
:
18182 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2
, exp
, target
);
18183 case IX86_BUILTIN_LOADUPD
:
18184 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd
, exp
, target
, 1);
18185 case IX86_BUILTIN_STOREUPD
:
18186 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd
, exp
);
18188 case IX86_BUILTIN_MFENCE
:
18189 emit_insn (gen_sse2_mfence ());
18191 case IX86_BUILTIN_LFENCE
:
18192 emit_insn (gen_sse2_lfence ());
18195 case IX86_BUILTIN_CLFLUSH
:
18196 arg0
= CALL_EXPR_ARG (exp
, 0);
18197 op0
= expand_normal (arg0
);
18198 icode
= CODE_FOR_sse2_clflush
;
18199 if (! (*insn_data
[icode
].operand
[0].predicate
) (op0
, Pmode
))
18200 op0
= copy_to_mode_reg (Pmode
, op0
);
18202 emit_insn (gen_sse2_clflush (op0
));
18205 case IX86_BUILTIN_MOVNTPD
:
18206 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df
, exp
);
18207 case IX86_BUILTIN_MOVNTDQ
:
18208 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di
, exp
);
18209 case IX86_BUILTIN_MOVNTI
:
18210 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi
, exp
);
18212 case IX86_BUILTIN_LOADDQU
:
18213 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu
, exp
, target
, 1);
18214 case IX86_BUILTIN_STOREDQU
:
18215 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu
, exp
);
18217 case IX86_BUILTIN_MONITOR
:
18218 arg0
= CALL_EXPR_ARG (exp
, 0);
18219 arg1
= CALL_EXPR_ARG (exp
, 1);
18220 arg2
= CALL_EXPR_ARG (exp
, 2);
18221 op0
= expand_normal (arg0
);
18222 op1
= expand_normal (arg1
);
18223 op2
= expand_normal (arg2
);
18225 op0
= copy_to_mode_reg (Pmode
, op0
);
18227 op1
= copy_to_mode_reg (SImode
, op1
);
18229 op2
= copy_to_mode_reg (SImode
, op2
);
18231 emit_insn (gen_sse3_monitor (op0
, op1
, op2
));
18233 emit_insn (gen_sse3_monitor64 (op0
, op1
, op2
));
18236 case IX86_BUILTIN_MWAIT
:
18237 arg0
= CALL_EXPR_ARG (exp
, 0);
18238 arg1
= CALL_EXPR_ARG (exp
, 1);
18239 op0
= expand_normal (arg0
);
18240 op1
= expand_normal (arg1
);
18242 op0
= copy_to_mode_reg (SImode
, op0
);
18244 op1
= copy_to_mode_reg (SImode
, op1
);
18245 emit_insn (gen_sse3_mwait (op0
, op1
));
18248 case IX86_BUILTIN_LDDQU
:
18249 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu
, exp
,
18252 case IX86_BUILTIN_PALIGNR
:
18253 case IX86_BUILTIN_PALIGNR128
:
18254 if (fcode
== IX86_BUILTIN_PALIGNR
)
18256 icode
= CODE_FOR_ssse3_palignrdi
;
18261 icode
= CODE_FOR_ssse3_palignrti
;
18264 arg0
= CALL_EXPR_ARG (exp
, 0);
18265 arg1
= CALL_EXPR_ARG (exp
, 1);
18266 arg2
= CALL_EXPR_ARG (exp
, 2);
18267 op0
= expand_expr (arg0
, NULL_RTX
, VOIDmode
, 0);
18268 op1
= expand_expr (arg1
, NULL_RTX
, VOIDmode
, 0);
18269 op2
= expand_expr (arg2
, NULL_RTX
, VOIDmode
, 0);
18270 tmode
= insn_data
[icode
].operand
[0].mode
;
18271 mode1
= insn_data
[icode
].operand
[1].mode
;
18272 mode2
= insn_data
[icode
].operand
[2].mode
;
18273 mode3
= insn_data
[icode
].operand
[3].mode
;
18275 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18277 op0
= copy_to_reg (op0
);
18278 op0
= simplify_gen_subreg (mode1
, op0
, GET_MODE (op0
), 0);
18280 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18282 op1
= copy_to_reg (op1
);
18283 op1
= simplify_gen_subreg (mode2
, op1
, GET_MODE (op1
), 0);
18285 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18287 error ("shift must be an immediate");
18290 target
= gen_reg_rtx (mode
);
18291 pat
= GEN_FCN (icode
) (simplify_gen_subreg (tmode
, target
, mode
, 0),
18298 case IX86_BUILTIN_MOVNTSD
:
18299 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df
, exp
);
18301 case IX86_BUILTIN_MOVNTSS
:
18302 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf
, exp
);
18304 case IX86_BUILTIN_INSERTQ
:
18305 case IX86_BUILTIN_EXTRQ
:
18306 icode
= (fcode
== IX86_BUILTIN_EXTRQ
18307 ? CODE_FOR_sse4a_extrq
18308 : CODE_FOR_sse4a_insertq
);
18309 arg0
= CALL_EXPR_ARG (exp
, 0);
18310 arg1
= CALL_EXPR_ARG (exp
, 1);
18311 op0
= expand_normal (arg0
);
18312 op1
= expand_normal (arg1
);
18313 tmode
= insn_data
[icode
].operand
[0].mode
;
18314 mode1
= insn_data
[icode
].operand
[1].mode
;
18315 mode2
= insn_data
[icode
].operand
[2].mode
;
18316 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18317 op0
= copy_to_mode_reg (mode1
, op0
);
18318 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18319 op1
= copy_to_mode_reg (mode2
, op1
);
18320 if (optimize
|| target
== 0
18321 || GET_MODE (target
) != tmode
18322 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18323 target
= gen_reg_rtx (tmode
);
18324 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18330 case IX86_BUILTIN_EXTRQI
:
18331 icode
= CODE_FOR_sse4a_extrqi
;
18332 arg0
= CALL_EXPR_ARG (exp
, 0);
18333 arg1
= CALL_EXPR_ARG (exp
, 1);
18334 arg2
= CALL_EXPR_ARG (exp
, 2);
18335 op0
= expand_normal (arg0
);
18336 op1
= expand_normal (arg1
);
18337 op2
= expand_normal (arg2
);
18338 tmode
= insn_data
[icode
].operand
[0].mode
;
18339 mode1
= insn_data
[icode
].operand
[1].mode
;
18340 mode2
= insn_data
[icode
].operand
[2].mode
;
18341 mode3
= insn_data
[icode
].operand
[3].mode
;
18342 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18343 op0
= copy_to_mode_reg (mode1
, op0
);
18344 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18346 error ("index mask must be an immediate");
18347 return gen_reg_rtx (tmode
);
18349 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18351 error ("length mask must be an immediate");
18352 return gen_reg_rtx (tmode
);
18354 if (optimize
|| target
== 0
18355 || GET_MODE (target
) != tmode
18356 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18357 target
= gen_reg_rtx (tmode
);
18358 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18364 case IX86_BUILTIN_INSERTQI
:
18365 icode
= CODE_FOR_sse4a_insertqi
;
18366 arg0
= CALL_EXPR_ARG (exp
, 0);
18367 arg1
= CALL_EXPR_ARG (exp
, 1);
18368 arg2
= CALL_EXPR_ARG (exp
, 2);
18369 arg3
= CALL_EXPR_ARG (exp
, 3);
18370 op0
= expand_normal (arg0
);
18371 op1
= expand_normal (arg1
);
18372 op2
= expand_normal (arg2
);
18373 op3
= expand_normal (arg3
);
18374 tmode
= insn_data
[icode
].operand
[0].mode
;
18375 mode1
= insn_data
[icode
].operand
[1].mode
;
18376 mode2
= insn_data
[icode
].operand
[2].mode
;
18377 mode3
= insn_data
[icode
].operand
[3].mode
;
18378 mode4
= insn_data
[icode
].operand
[4].mode
;
18380 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18381 op0
= copy_to_mode_reg (mode1
, op0
);
18383 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18384 op1
= copy_to_mode_reg (mode2
, op1
);
18386 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode3
))
18388 error ("index mask must be an immediate");
18389 return gen_reg_rtx (tmode
);
18391 if (! (*insn_data
[icode
].operand
[4].predicate
) (op3
, mode4
))
18393 error ("length mask must be an immediate");
18394 return gen_reg_rtx (tmode
);
18396 if (optimize
|| target
== 0
18397 || GET_MODE (target
) != tmode
18398 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18399 target
= gen_reg_rtx (tmode
);
18400 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
, op3
);
18406 case IX86_BUILTIN_VEC_INIT_V2SI
:
18407 case IX86_BUILTIN_VEC_INIT_V4HI
:
18408 case IX86_BUILTIN_VEC_INIT_V8QI
:
18409 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
18411 case IX86_BUILTIN_VEC_EXT_V2DF
:
18412 case IX86_BUILTIN_VEC_EXT_V2DI
:
18413 case IX86_BUILTIN_VEC_EXT_V4SF
:
18414 case IX86_BUILTIN_VEC_EXT_V4SI
:
18415 case IX86_BUILTIN_VEC_EXT_V8HI
:
18416 case IX86_BUILTIN_VEC_EXT_V2SI
:
18417 case IX86_BUILTIN_VEC_EXT_V4HI
:
18418 return ix86_expand_vec_ext_builtin (exp
, target
);
18420 case IX86_BUILTIN_VEC_SET_V8HI
:
18421 case IX86_BUILTIN_VEC_SET_V4HI
:
18422 return ix86_expand_vec_set_builtin (exp
);
18428 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18429 if (d
->code
== fcode
)
18431 /* Compares are treated specially. */
18432 if (d
->icode
== CODE_FOR_sse_maskcmpv4sf3
18433 || d
->icode
== CODE_FOR_sse_vmmaskcmpv4sf3
18434 || d
->icode
== CODE_FOR_sse2_maskcmpv2df3
18435 || d
->icode
== CODE_FOR_sse2_vmmaskcmpv2df3
)
18436 return ix86_expand_sse_compare (d
, exp
, target
);
18438 return ix86_expand_binop_builtin (d
->icode
, exp
, target
);
18441 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18442 if (d
->code
== fcode
)
18443 return ix86_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18445 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
18446 if (d
->code
== fcode
)
18447 return ix86_expand_sse_comi (d
, exp
, target
);
18449 gcc_unreachable ();
18452 /* Returns a function decl for a vectorized version of the builtin function
18453 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18454 if it is not available. */
18457 ix86_builtin_vectorized_function (enum built_in_function fn
, tree type_out
,
18460 enum machine_mode in_mode
, out_mode
;
18463 if (TREE_CODE (type_out
) != VECTOR_TYPE
18464 || TREE_CODE (type_in
) != VECTOR_TYPE
)
18467 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18468 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
18469 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18470 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18474 case BUILT_IN_SQRT
:
18475 if (out_mode
== DFmode
&& out_n
== 2
18476 && in_mode
== DFmode
&& in_n
== 2)
18477 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
18480 case BUILT_IN_SQRTF
:
18481 if (out_mode
== SFmode
&& out_n
== 4
18482 && in_mode
== SFmode
&& in_n
== 4)
18483 return ix86_builtins
[IX86_BUILTIN_SQRTPS
];
18486 case BUILT_IN_LRINTF
:
18487 if (out_mode
== SImode
&& out_n
== 4
18488 && in_mode
== SFmode
&& in_n
== 4)
18489 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
18499 /* Returns a decl of a function that implements conversion of the
18500 input vector of type TYPE, or NULL_TREE if it is not available. */
18503 ix86_builtin_conversion (enum tree_code code
, tree type
)
18505 if (TREE_CODE (type
) != VECTOR_TYPE
)
18511 switch (TYPE_MODE (type
))
18514 return ix86_builtins
[IX86_BUILTIN_CVTDQ2PS
];
18519 case FIX_TRUNC_EXPR
:
18520 switch (TYPE_MODE (type
))
18523 return ix86_builtins
[IX86_BUILTIN_CVTTPS2DQ
];
18533 /* Store OPERAND to the memory after reload is completed. This means
18534 that we can't easily use assign_stack_local. */
18536 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
18540 gcc_assert (reload_completed
);
18541 if (TARGET_RED_ZONE
)
18543 result
= gen_rtx_MEM (mode
,
18544 gen_rtx_PLUS (Pmode
,
18546 GEN_INT (-RED_ZONE_SIZE
)));
18547 emit_move_insn (result
, operand
);
18549 else if (!TARGET_RED_ZONE
&& TARGET_64BIT
)
18555 operand
= gen_lowpart (DImode
, operand
);
18559 gen_rtx_SET (VOIDmode
,
18560 gen_rtx_MEM (DImode
,
18561 gen_rtx_PRE_DEC (DImode
,
18562 stack_pointer_rtx
)),
18566 gcc_unreachable ();
18568 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18577 split_di (&operand
, 1, operands
, operands
+ 1);
18579 gen_rtx_SET (VOIDmode
,
18580 gen_rtx_MEM (SImode
,
18581 gen_rtx_PRE_DEC (Pmode
,
18582 stack_pointer_rtx
)),
18585 gen_rtx_SET (VOIDmode
,
18586 gen_rtx_MEM (SImode
,
18587 gen_rtx_PRE_DEC (Pmode
,
18588 stack_pointer_rtx
)),
18593 /* Store HImodes as SImodes. */
18594 operand
= gen_lowpart (SImode
, operand
);
18598 gen_rtx_SET (VOIDmode
,
18599 gen_rtx_MEM (GET_MODE (operand
),
18600 gen_rtx_PRE_DEC (SImode
,
18601 stack_pointer_rtx
)),
18605 gcc_unreachable ();
18607 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
18612 /* Free operand from the memory. */
18614 ix86_free_from_memory (enum machine_mode mode
)
18616 if (!TARGET_RED_ZONE
)
18620 if (mode
== DImode
|| TARGET_64BIT
)
18624 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18625 to pop or add instruction if registers are available. */
18626 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
18627 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
18632 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18633 QImode must go into class Q_REGS.
18634 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18635 movdf to do mem-to-mem moves through integer regs. */
18637 ix86_preferred_reload_class (rtx x
, enum reg_class
class)
18639 enum machine_mode mode
= GET_MODE (x
);
18641 /* We're only allowed to return a subclass of CLASS. Many of the
18642 following checks fail for NO_REGS, so eliminate that early. */
18643 if (class == NO_REGS
)
18646 /* All classes can load zeros. */
18647 if (x
== CONST0_RTX (mode
))
18650 /* Force constants into memory if we are loading a (nonzero) constant into
18651 an MMX or SSE register. This is because there are no MMX/SSE instructions
18652 to load from a constant. */
18654 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18657 /* Prefer SSE regs only, if we can use them for math. */
18658 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
18659 return SSE_CLASS_P (class) ? class : NO_REGS
;
18661 /* Floating-point constants need more complex checks. */
18662 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
18664 /* General regs can load everything. */
18665 if (reg_class_subset_p (class, GENERAL_REGS
))
18668 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18669 zero above. We only want to wind up preferring 80387 registers if
18670 we plan on doing computation with them. */
18672 && standard_80387_constant_p (x
))
18674 /* Limit class to non-sse. */
18675 if (class == FLOAT_SSE_REGS
)
18677 if (class == FP_TOP_SSE_REGS
)
18679 if (class == FP_SECOND_SSE_REGS
)
18680 return FP_SECOND_REG
;
18681 if (class == FLOAT_INT_REGS
|| class == FLOAT_REGS
)
18688 /* Generally when we see PLUS here, it's the function invariant
18689 (plus soft-fp const_int). Which can only be computed into general
18691 if (GET_CODE (x
) == PLUS
)
18692 return reg_class_subset_p (class, GENERAL_REGS
) ? class : NO_REGS
;
18694 /* QImode constants are easy to load, but non-constant QImode data
18695 must go into Q_REGS. */
18696 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18698 if (reg_class_subset_p (class, Q_REGS
))
18700 if (reg_class_subset_p (Q_REGS
, class))
18708 /* Discourage putting floating-point values in SSE registers unless
18709 SSE math is being used, and likewise for the 387 registers. */
18711 ix86_preferred_output_reload_class (rtx x
, enum reg_class
class)
18713 enum machine_mode mode
= GET_MODE (x
);
18715 /* Restrict the output reload class to the register bank that we are doing
18716 math on. If we would like not to return a subset of CLASS, reject this
18717 alternative: if reload cannot do this, it will still use its choice. */
18718 mode
= GET_MODE (x
);
18719 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
18720 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS
: NO_REGS
;
18722 if (TARGET_80387
&& SCALAR_FLOAT_MODE_P (mode
))
18724 if (class == FP_TOP_SSE_REGS
)
18726 else if (class == FP_SECOND_SSE_REGS
)
18727 return FP_SECOND_REG
;
18729 return FLOAT_CLASS_P (class) ? class : NO_REGS
;
18735 /* If we are copying between general and FP registers, we need a memory
18736 location. The same is true for SSE and MMX registers.
18738 The macro can't work reliably when one of the CLASSES is class containing
18739 registers from multiple units (SSE, MMX, integer). We avoid this by never
18740 combining those units in single alternative in the machine description.
18741 Ensure that this constraint holds to avoid unexpected surprises.
18743 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18744 enforce these sanity checks. */
18747 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
18748 enum machine_mode mode
, int strict
)
18750 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18751 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18752 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18753 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18754 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18755 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
18757 gcc_assert (!strict
);
18761 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18764 /* ??? This is a lie. We do have moves between mmx/general, and for
18765 mmx/sse2. But by saying we need secondary memory we discourage the
18766 register allocator from using the mmx registers unless needed. */
18767 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18770 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18772 /* SSE1 doesn't have any direct moves from other classes. */
18776 /* If the target says that inter-unit moves are more expensive
18777 than moving through memory, then don't generate them. */
18778 if (!TARGET_INTER_UNIT_MOVES
)
18781 /* Between SSE and general, we have moves no larger than word size. */
18782 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18789 /* Return true if the registers in CLASS cannot represent the change from
18790 modes FROM to TO. */
18793 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
18794 enum reg_class
class)
18799 /* x87 registers can't do subreg at all, as all values are reformatted
18800 to extended precision. */
18801 if (MAYBE_FLOAT_CLASS_P (class))
18804 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18806 /* Vector registers do not support QI or HImode loads. If we don't
18807 disallow a change to these modes, reload will assume it's ok to
18808 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18809 the vec_dupv4hi pattern. */
18810 if (GET_MODE_SIZE (from
) < 4)
18813 /* Vector registers do not support subreg with nonzero offsets, which
18814 are otherwise valid for integer registers. Since we can't see
18815 whether we have a nonzero offset from here, prohibit all
18816 nonparadoxical subregs changing size. */
18817 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
18824 /* Return the cost of moving data from a register in class CLASS1 to
18825 one in class CLASS2.
18827 It is not required that the cost always equal 2 when FROM is the same as TO;
18828 on some machines it is expensive to move between registers if they are not
18829 general registers. */
18832 ix86_register_move_cost (enum machine_mode mode
, enum reg_class class1
,
18833 enum reg_class class2
)
18835 /* In case we require secondary memory, compute cost of the store followed
18836 by load. In order to avoid bad register allocation choices, we need
18837 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18839 if (ix86_secondary_memory_needed (class1
, class2
, mode
, 0))
18843 cost
+= MAX (MEMORY_MOVE_COST (mode
, class1
, 0),
18844 MEMORY_MOVE_COST (mode
, class1
, 1));
18845 cost
+= MAX (MEMORY_MOVE_COST (mode
, class2
, 0),
18846 MEMORY_MOVE_COST (mode
, class2
, 1));
18848 /* In case of copying from general_purpose_register we may emit multiple
18849 stores followed by single load causing memory size mismatch stall.
18850 Count this as arbitrarily high cost of 20. */
18851 if (CLASS_MAX_NREGS (class1
, mode
) > CLASS_MAX_NREGS (class2
, mode
))
18854 /* In the case of FP/MMX moves, the registers actually overlap, and we
18855 have to switch modes in order to treat them differently. */
18856 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18857 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18863 /* Moves between SSE/MMX and integer unit are expensive. */
18864 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
18865 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18866 return ix86_cost
->mmxsse_to_integer
;
18867 if (MAYBE_FLOAT_CLASS_P (class1
))
18868 return ix86_cost
->fp_move
;
18869 if (MAYBE_SSE_CLASS_P (class1
))
18870 return ix86_cost
->sse_move
;
18871 if (MAYBE_MMX_CLASS_P (class1
))
18872 return ix86_cost
->mmx_move
;
18876 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18879 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
18881 /* Flags and only flags can only hold CCmode values. */
18882 if (CC_REGNO_P (regno
))
18883 return GET_MODE_CLASS (mode
) == MODE_CC
;
18884 if (GET_MODE_CLASS (mode
) == MODE_CC
18885 || GET_MODE_CLASS (mode
) == MODE_RANDOM
18886 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18888 if (FP_REGNO_P (regno
))
18889 return VALID_FP_MODE_P (mode
);
18890 if (SSE_REGNO_P (regno
))
18892 /* We implement the move patterns for all vector modes into and
18893 out of SSE registers, even when no operation instructions
18895 return (VALID_SSE_REG_MODE (mode
)
18896 || VALID_SSE2_REG_MODE (mode
)
18897 || VALID_MMX_REG_MODE (mode
)
18898 || VALID_MMX_REG_MODE_3DNOW (mode
));
18900 if (MMX_REGNO_P (regno
))
18902 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18903 so if the register is available at all, then we can move data of
18904 the given mode into or out of it. */
18905 return (VALID_MMX_REG_MODE (mode
)
18906 || VALID_MMX_REG_MODE_3DNOW (mode
));
18909 if (mode
== QImode
)
18911 /* Take care for QImode values - they can be in non-QI regs,
18912 but then they do cause partial register stalls. */
18913 if (regno
< 4 || TARGET_64BIT
)
18915 if (!TARGET_PARTIAL_REG_STALL
)
18917 return reload_in_progress
|| reload_completed
;
18919 /* We handle both integer and floats in the general purpose registers. */
18920 else if (VALID_INT_MODE_P (mode
))
18922 else if (VALID_FP_MODE_P (mode
))
18924 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18925 on to use that value in smaller contexts, this can easily force a
18926 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18927 supporting DImode, allow it. */
18928 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18934 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18935 tieable integer mode. */
18938 ix86_tieable_integer_mode_p (enum machine_mode mode
)
18947 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18950 return TARGET_64BIT
;
18957 /* Return true if MODE1 is accessible in a register that can hold MODE2
18958 without copying. That is, all register classes that can hold MODE2
18959 can also hold MODE1. */
18962 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
18964 if (mode1
== mode2
)
18967 if (ix86_tieable_integer_mode_p (mode1
)
18968 && ix86_tieable_integer_mode_p (mode2
))
18971 /* MODE2 being XFmode implies fp stack or general regs, which means we
18972 can tie any smaller floating point modes to it. Note that we do not
18973 tie this with TFmode. */
18974 if (mode2
== XFmode
)
18975 return mode1
== SFmode
|| mode1
== DFmode
;
18977 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18978 that we can tie it with SFmode. */
18979 if (mode2
== DFmode
)
18980 return mode1
== SFmode
;
18982 /* If MODE2 is only appropriate for an SSE register, then tie with
18983 any other mode acceptable to SSE registers. */
18984 if (GET_MODE_SIZE (mode2
) == 16
18985 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18986 return (GET_MODE_SIZE (mode1
) == 16
18987 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18989 /* If MODE2 is appropriate for an MMX register, then tie
18990 with any other mode acceptable to MMX registers. */
18991 if (GET_MODE_SIZE (mode2
) == 8
18992 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18993 return (GET_MODE_SIZE (mode1
) == 8
18994 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18999 /* Return the cost of moving data of mode M between a
19000 register and memory. A value of 2 is the default; this cost is
19001 relative to those in `REGISTER_MOVE_COST'.
19003 If moving between registers and memory is more expensive than
19004 between two registers, you should define this macro to express the
19007 Model also increased moving costs of QImode registers in non
19011 ix86_memory_move_cost (enum machine_mode mode
, enum reg_class
class, int in
)
19013 if (FLOAT_CLASS_P (class))
19030 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
19032 if (SSE_CLASS_P (class))
19035 switch (GET_MODE_SIZE (mode
))
19049 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
19051 if (MMX_CLASS_P (class))
19054 switch (GET_MODE_SIZE (mode
))
19065 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
19067 switch (GET_MODE_SIZE (mode
))
19071 return (Q_CLASS_P (class) ? ix86_cost
->int_load
[0]
19072 : ix86_cost
->movzbl_load
);
19074 return (Q_CLASS_P (class) ? ix86_cost
->int_store
[0]
19075 : ix86_cost
->int_store
[0] + 4);
19078 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
19080 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19081 if (mode
== TFmode
)
19083 return ((in
? ix86_cost
->int_load
[2] : ix86_cost
->int_store
[2])
19084 * (((int) GET_MODE_SIZE (mode
)
19085 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
19089 /* Compute a (partial) cost for rtx X. Return true if the complete
19090 cost has been computed, and false if subexpressions should be
19091 scanned. In either case, *TOTAL contains the cost result. */
19094 ix86_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
19096 enum machine_mode mode
= GET_MODE (x
);
19104 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
19106 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
19108 else if (flag_pic
&& SYMBOLIC_CONST (x
)
19110 || (!GET_CODE (x
) != LABEL_REF
19111 && (GET_CODE (x
) != SYMBOL_REF
19112 || !SYMBOL_REF_LOCAL_P (x
)))))
19119 if (mode
== VOIDmode
)
19122 switch (standard_80387_constant_p (x
))
19127 default: /* Other constants */
19132 /* Start with (MEM (SYMBOL_REF)), since that's where
19133 it'll probably end up. Add a penalty for size. */
19134 *total
= (COSTS_N_INSNS (1)
19135 + (flag_pic
!= 0 && !TARGET_64BIT
)
19136 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
19142 /* The zero extensions is often completely free on x86_64, so make
19143 it as cheap as possible. */
19144 if (TARGET_64BIT
&& mode
== DImode
19145 && GET_MODE (XEXP (x
, 0)) == SImode
)
19147 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19148 *total
= ix86_cost
->add
;
19150 *total
= ix86_cost
->movzx
;
19154 *total
= ix86_cost
->movsx
;
19158 if (CONST_INT_P (XEXP (x
, 1))
19159 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
19161 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19164 *total
= ix86_cost
->add
;
19167 if ((value
== 2 || value
== 3)
19168 && ix86_cost
->lea
<= ix86_cost
->shift_const
)
19170 *total
= ix86_cost
->lea
;
19180 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
19182 if (CONST_INT_P (XEXP (x
, 1)))
19184 if (INTVAL (XEXP (x
, 1)) > 32)
19185 *total
= ix86_cost
->shift_const
+ COSTS_N_INSNS (2);
19187 *total
= ix86_cost
->shift_const
* 2;
19191 if (GET_CODE (XEXP (x
, 1)) == AND
)
19192 *total
= ix86_cost
->shift_var
* 2;
19194 *total
= ix86_cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19199 if (CONST_INT_P (XEXP (x
, 1)))
19200 *total
= ix86_cost
->shift_const
;
19202 *total
= ix86_cost
->shift_var
;
19207 if (FLOAT_MODE_P (mode
))
19209 *total
= ix86_cost
->fmul
;
19214 rtx op0
= XEXP (x
, 0);
19215 rtx op1
= XEXP (x
, 1);
19217 if (CONST_INT_P (XEXP (x
, 1)))
19219 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19220 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19224 /* This is arbitrary. */
19227 /* Compute costs correctly for widening multiplication. */
19228 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op1
) == ZERO_EXTEND
)
19229 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19230 == GET_MODE_SIZE (mode
))
19232 int is_mulwiden
= 0;
19233 enum machine_mode inner_mode
= GET_MODE (op0
);
19235 if (GET_CODE (op0
) == GET_CODE (op1
))
19236 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19237 else if (CONST_INT_P (op1
))
19239 if (GET_CODE (op0
) == SIGN_EXTEND
)
19240 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19243 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19247 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19250 *total
= (ix86_cost
->mult_init
[MODE_INDEX (mode
)]
19251 + nbits
* ix86_cost
->mult_bit
19252 + rtx_cost (op0
, outer_code
) + rtx_cost (op1
, outer_code
));
19261 if (FLOAT_MODE_P (mode
))
19262 *total
= ix86_cost
->fdiv
;
19264 *total
= ix86_cost
->divide
[MODE_INDEX (mode
)];
19268 if (FLOAT_MODE_P (mode
))
19269 *total
= ix86_cost
->fadd
;
19270 else if (GET_MODE_CLASS (mode
) == MODE_INT
19271 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
19273 if (GET_CODE (XEXP (x
, 0)) == PLUS
19274 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19275 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19276 && CONSTANT_P (XEXP (x
, 1)))
19278 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19279 if (val
== 2 || val
== 4 || val
== 8)
19281 *total
= ix86_cost
->lea
;
19282 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19283 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
19285 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19289 else if (GET_CODE (XEXP (x
, 0)) == MULT
19290 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19292 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19293 if (val
== 2 || val
== 4 || val
== 8)
19295 *total
= ix86_cost
->lea
;
19296 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19297 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19301 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19303 *total
= ix86_cost
->lea
;
19304 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
);
19305 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), outer_code
);
19306 *total
+= rtx_cost (XEXP (x
, 1), outer_code
);
19313 if (FLOAT_MODE_P (mode
))
19315 *total
= ix86_cost
->fadd
;
19323 if (!TARGET_64BIT
&& mode
== DImode
)
19325 *total
= (ix86_cost
->add
* 2
19326 + (rtx_cost (XEXP (x
, 0), outer_code
)
19327 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19328 + (rtx_cost (XEXP (x
, 1), outer_code
)
19329 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19335 if (FLOAT_MODE_P (mode
))
19337 *total
= ix86_cost
->fchs
;
19343 if (!TARGET_64BIT
&& mode
== DImode
)
19344 *total
= ix86_cost
->add
* 2;
19346 *total
= ix86_cost
->add
;
19350 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19351 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19352 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19353 && XEXP (x
, 1) == const0_rtx
)
19355 /* This kind of construct is implemented using test[bwl].
19356 Treat it as if we had an AND. */
19357 *total
= (ix86_cost
->add
19358 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
)
19359 + rtx_cost (const1_rtx
, outer_code
));
19365 if (!TARGET_SSE_MATH
19367 || (mode
== DFmode
&& !TARGET_SSE2
))
19372 if (FLOAT_MODE_P (mode
))
19373 *total
= ix86_cost
->fabs
;
19377 if (FLOAT_MODE_P (mode
))
19378 *total
= ix86_cost
->fsqrt
;
19382 if (XINT (x
, 1) == UNSPEC_TP
)
19393 static int current_machopic_label_num
;
19395 /* Given a symbol name and its associated stub, write out the
19396 definition of the stub. */
19399 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19401 unsigned int length
;
19402 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19403 int label
= ++current_machopic_label_num
;
19405 /* For 64-bit we shouldn't get here. */
19406 gcc_assert (!TARGET_64BIT
);
19408 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19409 symb
= (*targetm
.strip_name_encoding
) (symb
);
19411 length
= strlen (stub
);
19412 binder_name
= alloca (length
+ 32);
19413 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19415 length
= strlen (symb
);
19416 symbol_name
= alloca (length
+ 32);
19417 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19419 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19422 switch_to_section (darwin_sections
[machopic_picsymbol_stub_section
]);
19424 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19426 fprintf (file
, "%s:\n", stub
);
19427 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19431 fprintf (file
, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label
, label
);
19432 fprintf (file
, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name
, label
);
19433 fprintf (file
, "\tjmp\t*%%edx\n");
19436 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19438 fprintf (file
, "%s:\n", binder_name
);
19442 fprintf (file
, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name
, label
);
19443 fprintf (file
, "\tpushl\t%%eax\n");
19446 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19448 fprintf (file
, "\tjmp\tdyld_stub_binding_helper\n");
19450 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
19451 fprintf (file
, "%s:\n", lazy_ptr_name
);
19452 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19453 fprintf (file
, "\t.long %s\n", binder_name
);
19457 darwin_x86_file_end (void)
19459 darwin_file_end ();
19462 #endif /* TARGET_MACHO */
19464 /* Order the registers for register allocator. */
19467 x86_order_regs_for_local_alloc (void)
19472 /* First allocate the local general purpose registers. */
19473 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19474 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19475 reg_alloc_order
[pos
++] = i
;
19477 /* Global general purpose registers. */
19478 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19479 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19480 reg_alloc_order
[pos
++] = i
;
19482 /* x87 registers come first in case we are doing FP math
19484 if (!TARGET_SSE_MATH
)
19485 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19486 reg_alloc_order
[pos
++] = i
;
19488 /* SSE registers. */
19489 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19490 reg_alloc_order
[pos
++] = i
;
19491 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19492 reg_alloc_order
[pos
++] = i
;
19494 /* x87 registers. */
19495 if (TARGET_SSE_MATH
)
19496 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19497 reg_alloc_order
[pos
++] = i
;
19499 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19500 reg_alloc_order
[pos
++] = i
;
19502 /* Initialize the rest of array as we do not allocate some registers
19504 while (pos
< FIRST_PSEUDO_REGISTER
)
19505 reg_alloc_order
[pos
++] = 0;
19508 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19509 struct attribute_spec.handler. */
19511 ix86_handle_struct_attribute (tree
*node
, tree name
,
19512 tree args ATTRIBUTE_UNUSED
,
19513 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
19516 if (DECL_P (*node
))
19518 if (TREE_CODE (*node
) == TYPE_DECL
)
19519 type
= &TREE_TYPE (*node
);
19524 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
19525 || TREE_CODE (*type
) == UNION_TYPE
)))
19527 warning (OPT_Wattributes
, "%qs attribute ignored",
19528 IDENTIFIER_POINTER (name
));
19529 *no_add_attrs
= true;
19532 else if ((is_attribute_p ("ms_struct", name
)
19533 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
19534 || ((is_attribute_p ("gcc_struct", name
)
19535 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
19537 warning (OPT_Wattributes
, "%qs incompatible attribute ignored",
19538 IDENTIFIER_POINTER (name
));
19539 *no_add_attrs
= true;
19546 ix86_ms_bitfield_layout_p (tree record_type
)
19548 return (TARGET_MS_BITFIELD_LAYOUT
&&
19549 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19550 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
19553 /* Returns an expression indicating where the this parameter is
19554 located on entry to the FUNCTION. */
19557 x86_this_parameter (tree function
)
19559 tree type
= TREE_TYPE (function
);
19563 int n
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19564 return gen_rtx_REG (DImode
, x86_64_int_parameter_registers
[n
]);
19567 if (ix86_function_regparm (type
, function
) > 0)
19571 parm
= TYPE_ARG_TYPES (type
);
19572 /* Figure out whether or not the function has a variable number of
19574 for (; parm
; parm
= TREE_CHAIN (parm
))
19575 if (TREE_VALUE (parm
) == void_type_node
)
19577 /* If not, the this parameter is in the first argument. */
19581 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type
)))
19583 return gen_rtx_REG (SImode
, regno
);
19587 if (aggregate_value_p (TREE_TYPE (type
), type
))
19588 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 8));
19590 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, 4));
19593 /* Determine whether x86_output_mi_thunk can succeed. */
19596 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED
,
19597 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
19598 HOST_WIDE_INT vcall_offset
, tree function
)
19600 /* 64-bit can handle anything. */
19604 /* For 32-bit, everything's fine if we have one free register. */
19605 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19608 /* Need a free register for vcall_offset. */
19612 /* Need a free register for GOT references. */
19613 if (flag_pic
&& !(*targetm
.binds_local_p
) (function
))
19616 /* Otherwise ok. */
19620 /* Output the assembler code for a thunk function. THUNK_DECL is the
19621 declaration for the thunk function itself, FUNCTION is the decl for
19622 the target function. DELTA is an immediate constant offset to be
19623 added to THIS. If VCALL_OFFSET is nonzero, the word at
19624 *(*this + vcall_offset) should be added to THIS. */
19627 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED
,
19628 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
19629 HOST_WIDE_INT vcall_offset
, tree function
)
19632 rtx
this = x86_this_parameter (function
);
19635 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19636 pull it in now and let DELTA benefit. */
19639 else if (vcall_offset
)
19641 /* Put the this parameter into %eax. */
19643 xops
[1] = this_reg
= gen_rtx_REG (Pmode
, 0);
19644 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19647 this_reg
= NULL_RTX
;
19649 /* Adjust the this parameter by a fixed constant. */
19652 xops
[0] = GEN_INT (delta
);
19653 xops
[1] = this_reg
? this_reg
: this;
19656 if (!x86_64_general_operand (xops
[0], DImode
))
19658 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19660 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops
);
19664 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19667 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19670 /* Adjust the this parameter by a value stored in the vtable. */
19674 tmp
= gen_rtx_REG (DImode
, R10_REG
);
19677 int tmp_regno
= 2 /* ECX */;
19678 if (lookup_attribute ("fastcall",
19679 TYPE_ATTRIBUTES (TREE_TYPE (function
))))
19680 tmp_regno
= 0 /* EAX */;
19681 tmp
= gen_rtx_REG (SImode
, tmp_regno
);
19684 xops
[0] = gen_rtx_MEM (Pmode
, this_reg
);
19687 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19689 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19691 /* Adjust the this parameter. */
19692 xops
[0] = gen_rtx_MEM (Pmode
, plus_constant (tmp
, vcall_offset
));
19693 if (TARGET_64BIT
&& !memory_operand (xops
[0], Pmode
))
19695 rtx tmp2
= gen_rtx_REG (DImode
, R11_REG
);
19696 xops
[0] = GEN_INT (vcall_offset
);
19698 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops
);
19699 xops
[0] = gen_rtx_MEM (Pmode
, gen_rtx_PLUS (Pmode
, tmp
, tmp2
));
19701 xops
[1] = this_reg
;
19703 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops
);
19705 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops
);
19708 /* If necessary, drop THIS back to its stack slot. */
19709 if (this_reg
&& this_reg
!= this)
19711 xops
[0] = this_reg
;
19713 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops
);
19716 xops
[0] = XEXP (DECL_RTL (function
), 0);
19719 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19720 output_asm_insn ("jmp\t%P0", xops
);
19723 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, xops
[0]), UNSPEC_GOTPCREL
);
19724 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19725 tmp
= gen_rtx_MEM (QImode
, tmp
);
19727 output_asm_insn ("jmp\t%A0", xops
);
19732 if (!flag_pic
|| (*targetm
.binds_local_p
) (function
))
19733 output_asm_insn ("jmp\t%P0", xops
);
19738 rtx sym_ref
= XEXP (DECL_RTL (function
), 0);
19739 tmp
= (gen_rtx_SYMBOL_REF
19741 machopic_indirection_name (sym_ref
, /*stub_p=*/true)));
19742 tmp
= gen_rtx_MEM (QImode
, tmp
);
19744 output_asm_insn ("jmp\t%0", xops
);
19747 #endif /* TARGET_MACHO */
19749 tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19750 output_set_got (tmp
, NULL_RTX
);
19753 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops
);
19754 output_asm_insn ("jmp\t{*}%1", xops
);
19760 x86_file_start (void)
19762 default_file_start ();
19764 darwin_file_start ();
19766 if (X86_FILE_START_VERSION_DIRECTIVE
)
19767 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19768 if (X86_FILE_START_FLTUSED
)
19769 fputs ("\t.global\t__fltused\n", asm_out_file
);
19770 if (ix86_asm_dialect
== ASM_INTEL
)
19771 fputs ("\t.intel_syntax\n", asm_out_file
);
19775 x86_field_alignment (tree field
, int computed
)
19777 enum machine_mode mode
;
19778 tree type
= TREE_TYPE (field
);
19780 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19782 mode
= TYPE_MODE (TREE_CODE (type
) == ARRAY_TYPE
19783 ? get_inner_array_type (type
) : type
);
19784 if (mode
== DFmode
|| mode
== DCmode
19785 || GET_MODE_CLASS (mode
) == MODE_INT
19786 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19787 return MIN (32, computed
);
19791 /* Output assembler code to FILE to increment profiler label # LABELNO
19792 for profiling a function entry. */
19794 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
19799 #ifndef NO_PROFILE_COUNTERS
19800 fprintf (file
, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX
, labelno
);
19802 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME
);
19806 #ifndef NO_PROFILE_COUNTERS
19807 fprintf (file
, "\tmovq\t$%sP%d,%%r11\n", LPREFIX
, labelno
);
19809 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19813 #ifndef NO_PROFILE_COUNTERS
19814 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19815 LPREFIX
, labelno
, PROFILE_COUNT_REGISTER
);
19817 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME
);
19821 #ifndef NO_PROFILE_COUNTERS
19822 fprintf (file
, "\tmovl\t$%sP%d,%%%s\n", LPREFIX
, labelno
,
19823 PROFILE_COUNT_REGISTER
);
19825 fprintf (file
, "\tcall\t%s\n", MCOUNT_NAME
);
19829 /* We don't have exact information about the insn sizes, but we may assume
19830 quite safely that we are informed about all 1 byte insns and memory
19831 address sizes. This is enough to eliminate unnecessary padding in
19835 min_insn_size (rtx insn
)
19839 if (!INSN_P (insn
) || !active_insn_p (insn
))
19842 /* Discard alignments we've emit and jump instructions. */
19843 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
19844 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
19847 && (GET_CODE (PATTERN (insn
)) == ADDR_VEC
19848 || GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
))
19851 /* Important case - calls are always 5 bytes.
19852 It is common to have many calls in the row. */
19854 && symbolic_reference_mentioned_p (PATTERN (insn
))
19855 && !SIBLING_CALL_P (insn
))
19857 if (get_attr_length (insn
) <= 1)
19860 /* For normal instructions we may rely on the sizes of addresses
19861 and the presence of symbol to require 4 bytes of encoding.
19862 This is not the case for jumps where references are PC relative. */
19863 if (!JUMP_P (insn
))
19865 l
= get_attr_length_address (insn
);
19866 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
19875 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19879 ix86_avoid_jump_misspredicts (void)
19881 rtx insn
, start
= get_insns ();
19882 int nbytes
= 0, njumps
= 0;
19885 /* Look for all minimal intervals of instructions containing 4 jumps.
19886 The intervals are bounded by START and INSN. NBYTES is the total
19887 size of instructions in the interval including INSN and not including
19888 START. When the NBYTES is smaller than 16 bytes, it is possible
19889 that the end of START and INSN ends up in the same 16byte page.
19891 The smallest offset in the page INSN can start is the case where START
19892 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19893 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19895 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19898 nbytes
+= min_insn_size (insn
);
19900 fprintf(dump_file
, "Insn %i estimated to %i bytes\n",
19901 INSN_UID (insn
), min_insn_size (insn
));
19903 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19904 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
19912 start
= NEXT_INSN (start
);
19913 if ((JUMP_P (start
)
19914 && GET_CODE (PATTERN (start
)) != ADDR_VEC
19915 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
19917 njumps
--, isjump
= 1;
19920 nbytes
-= min_insn_size (start
);
19922 gcc_assert (njumps
>= 0);
19924 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
19925 INSN_UID (start
), INSN_UID (insn
), nbytes
);
19927 if (njumps
== 3 && isjump
&& nbytes
< 16)
19929 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
19932 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
19933 INSN_UID (insn
), padsize
);
19934 emit_insn_before (gen_align (GEN_INT (padsize
)), insn
);
19939 /* AMD Athlon works faster
19940 when RET is not destination of conditional jump or directly preceded
19941 by other jump instruction. We avoid the penalty by inserting NOP just
19942 before the RET instructions in such cases. */
19944 ix86_pad_returns (void)
19949 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
19951 basic_block bb
= e
->src
;
19952 rtx ret
= BB_END (bb
);
19954 bool replace
= false;
19956 if (!JUMP_P (ret
) || GET_CODE (PATTERN (ret
)) != RETURN
19957 || !maybe_hot_bb_p (bb
))
19959 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
19960 if (active_insn_p (prev
) || LABEL_P (prev
))
19962 if (prev
&& LABEL_P (prev
))
19967 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
19968 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
19969 && !(e
->flags
& EDGE_FALLTHRU
))
19974 prev
= prev_active_insn (ret
);
19976 && ((JUMP_P (prev
) && any_condjump_p (prev
))
19979 /* Empty functions get branch mispredict even when the jump destination
19980 is not visible to us. */
19981 if (!prev
&& cfun
->function_frequency
> FUNCTION_FREQUENCY_UNLIKELY_EXECUTED
)
19986 emit_insn_before (gen_return_internal_long (), ret
);
19992 /* Implement machine specific optimizations. We implement padding of returns
19993 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19997 if (TARGET_PAD_RETURNS
&& optimize
&& !optimize_size
)
19998 ix86_pad_returns ();
19999 if (TARGET_FOUR_JUMP_LIMIT
&& optimize
&& !optimize_size
)
20000 ix86_avoid_jump_misspredicts ();
20003 /* Return nonzero when QImode register that must be represented via REX prefix
20006 x86_extended_QIreg_mentioned_p (rtx insn
)
20009 extract_insn_cached (insn
);
20010 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20011 if (REG_P (recog_data
.operand
[i
])
20012 && REGNO (recog_data
.operand
[i
]) >= 4)
20017 /* Return nonzero when P points to register encoded via REX prefix.
20018 Called via for_each_rtx. */
20020 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
20022 unsigned int regno
;
20025 regno
= REGNO (*p
);
20026 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
20029 /* Return true when INSN mentions register that must be encoded using REX
20032 x86_extended_reg_mentioned_p (rtx insn
)
20034 return for_each_rtx (&PATTERN (insn
), extended_reg_mentioned_1
, NULL
);
20037 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20038 optabs would emit if we didn't have TFmode patterns. */
20041 x86_emit_floatuns (rtx operands
[2])
20043 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
20044 enum machine_mode mode
, inmode
;
20046 inmode
= GET_MODE (operands
[1]);
20047 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20050 in
= force_reg (inmode
, operands
[1]);
20051 mode
= GET_MODE (out
);
20052 neglab
= gen_label_rtx ();
20053 donelab
= gen_label_rtx ();
20054 f0
= gen_reg_rtx (mode
);
20056 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20058 expand_float (out
, in
, 0);
20060 emit_jump_insn (gen_jump (donelab
));
20063 emit_label (neglab
);
20065 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20067 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20069 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20071 expand_float (f0
, i0
, 0);
20073 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20075 emit_label (donelab
);
20078 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20079 with all elements equal to VAR. Return true if successful. */
20082 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
20083 rtx target
, rtx val
)
20085 enum machine_mode smode
, wsmode
, wvmode
;
20100 val
= force_reg (GET_MODE_INNER (mode
), val
);
20101 x
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20102 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20108 if (TARGET_SSE
|| TARGET_3DNOW_A
)
20110 val
= gen_lowpart (SImode
, val
);
20111 x
= gen_rtx_TRUNCATE (HImode
, val
);
20112 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
20113 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20135 /* Extend HImode to SImode using a paradoxical SUBREG. */
20136 tmp1
= gen_reg_rtx (SImode
);
20137 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20138 /* Insert the SImode value as low element of V4SImode vector. */
20139 tmp2
= gen_reg_rtx (V4SImode
);
20140 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20141 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20142 CONST0_RTX (V4SImode
),
20144 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20145 /* Cast the V4SImode vector back to a V8HImode vector. */
20146 tmp1
= gen_reg_rtx (V8HImode
);
20147 emit_move_insn (tmp1
, gen_lowpart (V8HImode
, tmp2
));
20148 /* Duplicate the low short through the whole low SImode word. */
20149 emit_insn (gen_sse2_punpcklwd (tmp1
, tmp1
, tmp1
));
20150 /* Cast the V8HImode vector back to a V4SImode vector. */
20151 tmp2
= gen_reg_rtx (V4SImode
);
20152 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20153 /* Replicate the low element of the V4SImode vector. */
20154 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20155 /* Cast the V2SImode back to V8HImode, and store in target. */
20156 emit_move_insn (target
, gen_lowpart (V8HImode
, tmp2
));
20167 /* Extend QImode to SImode using a paradoxical SUBREG. */
20168 tmp1
= gen_reg_rtx (SImode
);
20169 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
20170 /* Insert the SImode value as low element of V4SImode vector. */
20171 tmp2
= gen_reg_rtx (V4SImode
);
20172 tmp1
= gen_rtx_VEC_MERGE (V4SImode
,
20173 gen_rtx_VEC_DUPLICATE (V4SImode
, tmp1
),
20174 CONST0_RTX (V4SImode
),
20176 emit_insn (gen_rtx_SET (VOIDmode
, tmp2
, tmp1
));
20177 /* Cast the V4SImode vector back to a V16QImode vector. */
20178 tmp1
= gen_reg_rtx (V16QImode
);
20179 emit_move_insn (tmp1
, gen_lowpart (V16QImode
, tmp2
));
20180 /* Duplicate the low byte through the whole low SImode word. */
20181 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20182 emit_insn (gen_sse2_punpcklbw (tmp1
, tmp1
, tmp1
));
20183 /* Cast the V16QImode vector back to a V4SImode vector. */
20184 tmp2
= gen_reg_rtx (V4SImode
);
20185 emit_move_insn (tmp2
, gen_lowpart (V4SImode
, tmp1
));
20186 /* Replicate the low element of the V4SImode vector. */
20187 emit_insn (gen_sse2_pshufd (tmp2
, tmp2
, const0_rtx
));
20188 /* Cast the V2SImode back to V16QImode, and store in target. */
20189 emit_move_insn (target
, gen_lowpart (V16QImode
, tmp2
));
20197 /* Replicate the value once into the next wider mode and recurse. */
20198 val
= convert_modes (wsmode
, smode
, val
, true);
20199 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
20200 GEN_INT (GET_MODE_BITSIZE (smode
)),
20201 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20202 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
20204 x
= gen_reg_rtx (wvmode
);
20205 if (!ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
))
20206 gcc_unreachable ();
20207 emit_move_insn (target
, gen_lowpart (mode
, x
));
20215 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20216 whose ONE_VAR element is VAR, and other elements are zero. Return true
20220 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
20221 rtx target
, rtx var
, int one_var
)
20223 enum machine_mode vsimode
;
20239 var
= force_reg (GET_MODE_INNER (mode
), var
);
20240 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
20241 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
20246 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
20247 new_target
= gen_reg_rtx (mode
);
20249 new_target
= target
;
20250 var
= force_reg (GET_MODE_INNER (mode
), var
);
20251 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
20252 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
20253 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
20256 /* We need to shuffle the value to the correct position, so
20257 create a new pseudo to store the intermediate result. */
20259 /* With SSE2, we can use the integer shuffle insns. */
20260 if (mode
!= V4SFmode
&& TARGET_SSE2
)
20262 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
20264 GEN_INT (one_var
== 1 ? 0 : 1),
20265 GEN_INT (one_var
== 2 ? 0 : 1),
20266 GEN_INT (one_var
== 3 ? 0 : 1)));
20267 if (target
!= new_target
)
20268 emit_move_insn (target
, new_target
);
20272 /* Otherwise convert the intermediate result to V4SFmode and
20273 use the SSE1 shuffle instructions. */
20274 if (mode
!= V4SFmode
)
20276 tmp
= gen_reg_rtx (V4SFmode
);
20277 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
20282 emit_insn (gen_sse_shufps_1 (tmp
, tmp
, tmp
,
20284 GEN_INT (one_var
== 1 ? 0 : 1),
20285 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
20286 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
20288 if (mode
!= V4SFmode
)
20289 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
20290 else if (tmp
!= target
)
20291 emit_move_insn (target
, tmp
);
20293 else if (target
!= new_target
)
20294 emit_move_insn (target
, new_target
);
20299 vsimode
= V4SImode
;
20305 vsimode
= V2SImode
;
20311 /* Zero extend the variable element to SImode and recurse. */
20312 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
20314 x
= gen_reg_rtx (vsimode
);
20315 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
20317 gcc_unreachable ();
20319 emit_move_insn (target
, gen_lowpart (mode
, x
));
20327 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20328 consisting of the values in VALS. It is known that all elements
20329 except ONE_VAR are constants. Return true if successful. */
20332 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
20333 rtx target
, rtx vals
, int one_var
)
20335 rtx var
= XVECEXP (vals
, 0, one_var
);
20336 enum machine_mode wmode
;
20339 const_vec
= copy_rtx (vals
);
20340 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
20341 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
20349 /* For the two element vectors, it's just as easy to use
20350 the general case. */
20366 /* There's no way to set one QImode entry easily. Combine
20367 the variable value with its adjacent constant value, and
20368 promote to an HImode set. */
20369 x
= XVECEXP (vals
, 0, one_var
^ 1);
20372 var
= convert_modes (HImode
, QImode
, var
, true);
20373 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
20374 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
20375 x
= GEN_INT (INTVAL (x
) & 0xff);
20379 var
= convert_modes (HImode
, QImode
, var
, true);
20380 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
20382 if (x
!= const0_rtx
)
20383 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
20384 1, OPTAB_LIB_WIDEN
);
20386 x
= gen_reg_rtx (wmode
);
20387 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
20388 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
20390 emit_move_insn (target
, gen_lowpart (mode
, x
));
20397 emit_move_insn (target
, const_vec
);
20398 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
20402 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20403 all values variable, and none identical. */
20406 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
20407 rtx target
, rtx vals
)
20409 enum machine_mode half_mode
= GET_MODE_INNER (mode
);
20410 rtx op0
= NULL
, op1
= NULL
;
20411 bool use_vec_concat
= false;
20417 if (!mmx_ok
&& !TARGET_SSE
)
20423 /* For the two element vectors, we always implement VEC_CONCAT. */
20424 op0
= XVECEXP (vals
, 0, 0);
20425 op1
= XVECEXP (vals
, 0, 1);
20426 use_vec_concat
= true;
20430 half_mode
= V2SFmode
;
20433 half_mode
= V2SImode
;
20439 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20440 Recurse to load the two halves. */
20442 op0
= gen_reg_rtx (half_mode
);
20443 v
= gen_rtvec (2, XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1));
20444 ix86_expand_vector_init (false, op0
, gen_rtx_PARALLEL (half_mode
, v
));
20446 op1
= gen_reg_rtx (half_mode
);
20447 v
= gen_rtvec (2, XVECEXP (vals
, 0, 2), XVECEXP (vals
, 0, 3));
20448 ix86_expand_vector_init (false, op1
, gen_rtx_PARALLEL (half_mode
, v
));
20450 use_vec_concat
= true;
20461 gcc_unreachable ();
20464 if (use_vec_concat
)
20466 if (!register_operand (op0
, half_mode
))
20467 op0
= force_reg (half_mode
, op0
);
20468 if (!register_operand (op1
, half_mode
))
20469 op1
= force_reg (half_mode
, op1
);
20471 emit_insn (gen_rtx_SET (VOIDmode
, target
,
20472 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
20476 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
20477 enum machine_mode inner_mode
;
20478 rtx words
[4], shift
;
20480 inner_mode
= GET_MODE_INNER (mode
);
20481 n_elts
= GET_MODE_NUNITS (mode
);
20482 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
20483 n_elt_per_word
= n_elts
/ n_words
;
20484 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
20486 for (i
= 0; i
< n_words
; ++i
)
20488 rtx word
= NULL_RTX
;
20490 for (j
= 0; j
< n_elt_per_word
; ++j
)
20492 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
20493 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
20499 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
20500 word
, 1, OPTAB_LIB_WIDEN
);
20501 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
20502 word
, 1, OPTAB_LIB_WIDEN
);
20510 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
20511 else if (n_words
== 2)
20513 rtx tmp
= gen_reg_rtx (mode
);
20514 emit_insn (gen_rtx_CLOBBER (VOIDmode
, tmp
));
20515 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
20516 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
20517 emit_move_insn (target
, tmp
);
20519 else if (n_words
== 4)
20521 rtx tmp
= gen_reg_rtx (V4SImode
);
20522 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
20523 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
20524 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
20527 gcc_unreachable ();
20531 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20532 instructions unless MMX_OK is true. */
20535 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
20537 enum machine_mode mode
= GET_MODE (target
);
20538 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20539 int n_elts
= GET_MODE_NUNITS (mode
);
20540 int n_var
= 0, one_var
= -1;
20541 bool all_same
= true, all_const_zero
= true;
20545 for (i
= 0; i
< n_elts
; ++i
)
20547 x
= XVECEXP (vals
, 0, i
);
20548 if (!CONSTANT_P (x
))
20549 n_var
++, one_var
= i
;
20550 else if (x
!= CONST0_RTX (inner_mode
))
20551 all_const_zero
= false;
20552 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
20556 /* Constants are best loaded from the constant pool. */
20559 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
20563 /* If all values are identical, broadcast the value. */
20565 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
20566 XVECEXP (vals
, 0, 0)))
20569 /* Values where only one field is non-constant are best loaded from
20570 the pool and overwritten via move later. */
20574 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
20575 XVECEXP (vals
, 0, one_var
),
20579 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
20583 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
20587 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
20589 enum machine_mode mode
= GET_MODE (target
);
20590 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20591 bool use_vec_merge
= false;
20600 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
20601 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
20603 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
20605 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
20606 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20616 /* For the two element vectors, we implement a VEC_CONCAT with
20617 the extraction of the other element. */
20619 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
20620 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
20623 op0
= val
, op1
= tmp
;
20625 op0
= tmp
, op1
= val
;
20627 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
20628 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20636 use_vec_merge
= true;
20640 /* tmp = target = A B C D */
20641 tmp
= copy_to_reg (target
);
20642 /* target = A A B B */
20643 emit_insn (gen_sse_unpcklps (target
, target
, target
));
20644 /* target = X A B B */
20645 ix86_expand_vector_set (false, target
, val
, 0);
20646 /* target = A X C D */
20647 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20648 GEN_INT (1), GEN_INT (0),
20649 GEN_INT (2+4), GEN_INT (3+4)));
20653 /* tmp = target = A B C D */
20654 tmp
= copy_to_reg (target
);
20655 /* tmp = X B C D */
20656 ix86_expand_vector_set (false, tmp
, val
, 0);
20657 /* target = A B X D */
20658 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20659 GEN_INT (0), GEN_INT (1),
20660 GEN_INT (0+4), GEN_INT (3+4)));
20664 /* tmp = target = A B C D */
20665 tmp
= copy_to_reg (target
);
20666 /* tmp = X B C D */
20667 ix86_expand_vector_set (false, tmp
, val
, 0);
20668 /* target = A B X D */
20669 emit_insn (gen_sse_shufps_1 (target
, target
, tmp
,
20670 GEN_INT (0), GEN_INT (1),
20671 GEN_INT (2+4), GEN_INT (0+4)));
20675 gcc_unreachable ();
20680 /* Element 0 handled by vec_merge below. */
20683 use_vec_merge
= true;
20689 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20690 store into element 0, then shuffle them back. */
20694 order
[0] = GEN_INT (elt
);
20695 order
[1] = const1_rtx
;
20696 order
[2] = const2_rtx
;
20697 order
[3] = GEN_INT (3);
20698 order
[elt
] = const0_rtx
;
20700 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20701 order
[1], order
[2], order
[3]));
20703 ix86_expand_vector_set (false, target
, val
, 0);
20705 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
20706 order
[1], order
[2], order
[3]));
20710 /* For SSE1, we have to reuse the V4SF code. */
20711 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
20712 gen_lowpart (SFmode
, val
), elt
);
20717 use_vec_merge
= TARGET_SSE2
;
20720 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20731 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
20732 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
20733 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20737 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20739 emit_move_insn (mem
, target
);
20741 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20742 emit_move_insn (tmp
, val
);
20744 emit_move_insn (target
, mem
);
20749 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
20751 enum machine_mode mode
= GET_MODE (vec
);
20752 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
20753 bool use_vec_extr
= false;
20766 use_vec_extr
= true;
20778 tmp
= gen_reg_rtx (mode
);
20779 emit_insn (gen_sse_shufps_1 (tmp
, vec
, vec
,
20780 GEN_INT (elt
), GEN_INT (elt
),
20781 GEN_INT (elt
+4), GEN_INT (elt
+4)));
20785 tmp
= gen_reg_rtx (mode
);
20786 emit_insn (gen_sse_unpckhps (tmp
, vec
, vec
));
20790 gcc_unreachable ();
20793 use_vec_extr
= true;
20808 tmp
= gen_reg_rtx (mode
);
20809 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
20810 GEN_INT (elt
), GEN_INT (elt
),
20811 GEN_INT (elt
), GEN_INT (elt
)));
20815 tmp
= gen_reg_rtx (mode
);
20816 emit_insn (gen_sse2_punpckhdq (tmp
, vec
, vec
));
20820 gcc_unreachable ();
20823 use_vec_extr
= true;
20828 /* For SSE1, we have to reuse the V4SF code. */
20829 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
20830 gen_lowpart (V4SFmode
, vec
), elt
);
20836 use_vec_extr
= TARGET_SSE2
;
20839 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
20844 /* ??? Could extract the appropriate HImode element and shift. */
20851 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
20852 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
20854 /* Let the rtl optimizers know about the zero extension performed. */
20855 if (inner_mode
== HImode
)
20857 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
20858 target
= gen_lowpart (SImode
, target
);
20861 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
20865 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
20867 emit_move_insn (mem
, vec
);
20869 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
20870 emit_move_insn (target
, tmp
);
20874 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20875 pattern to reduce; DEST is the destination; IN is the input vector. */
20878 ix86_expand_reduc_v4sf (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
20880 rtx tmp1
, tmp2
, tmp3
;
20882 tmp1
= gen_reg_rtx (V4SFmode
);
20883 tmp2
= gen_reg_rtx (V4SFmode
);
20884 tmp3
= gen_reg_rtx (V4SFmode
);
20886 emit_insn (gen_sse_movhlps (tmp1
, in
, in
));
20887 emit_insn (fn (tmp2
, tmp1
, in
));
20889 emit_insn (gen_sse_shufps_1 (tmp3
, tmp2
, tmp2
,
20890 GEN_INT (1), GEN_INT (1),
20891 GEN_INT (1+4), GEN_INT (1+4)));
20892 emit_insn (fn (dest
, tmp2
, tmp3
));
20895 /* Target hook for scalar_mode_supported_p. */
20897 ix86_scalar_mode_supported_p (enum machine_mode mode
)
20899 if (DECIMAL_FLOAT_MODE_P (mode
))
20902 return default_scalar_mode_supported_p (mode
);
20905 /* Implements target hook vector_mode_supported_p. */
20907 ix86_vector_mode_supported_p (enum machine_mode mode
)
20909 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20911 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20913 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
20915 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20920 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20922 We do this in the new i386 backend to maintain source compatibility
20923 with the old cc0-based compiler. */
20926 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
20927 tree inputs ATTRIBUTE_UNUSED
,
20930 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
20932 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
20937 /* Return true if this goes in small data/bss. */
20940 ix86_in_large_data_p (tree exp
)
20942 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
20945 /* Functions are never large data. */
20946 if (TREE_CODE (exp
) == FUNCTION_DECL
)
20949 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
20951 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
20952 if (strcmp (section
, ".ldata") == 0
20953 || strcmp (section
, ".lbss") == 0)
20959 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
20961 /* If this is an incomplete type with size 0, then we can't put it
20962 in data because it might be too big when completed. */
20963 if (!size
|| size
> ix86_section_threshold
)
20970 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20972 default_encode_section_info (decl
, rtl
, first
);
20974 if (TREE_CODE (decl
) == VAR_DECL
20975 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
20976 && ix86_in_large_data_p (decl
))
20977 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20980 /* Worker function for REVERSE_CONDITION. */
20983 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
20985 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
20986 ? reverse_condition (code
)
20987 : reverse_condition_maybe_unordered (code
));
20990 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20994 output_387_reg_move (rtx insn
, rtx
*operands
)
20996 if (REG_P (operands
[1])
20997 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20999 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21000 return output_387_ffreep (operands
, 0);
21001 return "fstp\t%y0";
21003 if (STACK_TOP_P (operands
[0]))
21004 return "fld%z1\t%y1";
21008 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21009 FP status register is set. */
21012 ix86_emit_fp_unordered_jump (rtx label
)
21014 rtx reg
= gen_reg_rtx (HImode
);
21017 emit_insn (gen_x86_fnstsw_1 (reg
));
21019 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_size
))
21021 emit_insn (gen_x86_sahf_1 (reg
));
21023 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
21024 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
21028 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
21030 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21031 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
21034 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
21035 gen_rtx_LABEL_REF (VOIDmode
, label
),
21037 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
21039 emit_jump_insn (temp
);
21040 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21043 /* Output code to perform a log1p XFmode calculation. */
21045 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
21047 rtx label1
= gen_label_rtx ();
21048 rtx label2
= gen_label_rtx ();
21050 rtx tmp
= gen_reg_rtx (XFmode
);
21051 rtx tmp2
= gen_reg_rtx (XFmode
);
21053 emit_insn (gen_absxf2 (tmp
, op1
));
21054 emit_insn (gen_cmpxf (tmp
,
21055 CONST_DOUBLE_FROM_REAL_VALUE (
21056 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
21058 emit_jump_insn (gen_bge (label1
));
21060 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21061 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
21062 emit_jump (label2
);
21064 emit_label (label1
);
21065 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
21066 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
21067 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
21068 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
21070 emit_label (label2
);
21073 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21076 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21079 /* With Binutils 2.15, the "@unwind" marker must be specified on
21080 every occurrence of the ".eh_frame" section, not just the first
21083 && strcmp (name
, ".eh_frame") == 0)
21085 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21086 flags
& SECTION_WRITE
? "aw" : "a");
21089 default_elf_asm_named_section (name
, flags
, decl
);
21092 /* Return the mangling of TYPE if it is an extended fundamental type. */
21094 static const char *
21095 ix86_mangle_fundamental_type (tree type
)
21097 switch (TYPE_MODE (type
))
21100 /* __float128 is "g". */
21103 /* "long double" or __float80 is "e". */
21110 /* For 32-bit code we can save PIC register setup by using
21111 __stack_chk_fail_local hidden function instead of calling
21112 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21113 register, so it is better to call __stack_chk_fail directly. */
21116 ix86_stack_protect_fail (void)
21118 return TARGET_64BIT
21119 ? default_external_stack_protect_fail ()
21120 : default_hidden_stack_protect_fail ();
21123 /* Select a format to encode pointers in exception handling data. CODE
21124 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21125 true if the symbol may be affected by dynamic relocations.
21127 ??? All x86 object file formats are capable of representing this.
21128 After all, the relocation needed is the same as for the call insn.
21129 Whether or not a particular assembler allows us to enter such, I
21130 guess we'll have to see. */
21132 asm_preferred_eh_data_format (int code
, int global
)
21136 int type
= DW_EH_PE_sdata8
;
21138 || ix86_cmodel
== CM_SMALL_PIC
21139 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21140 type
= DW_EH_PE_sdata4
;
21141 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21143 if (ix86_cmodel
== CM_SMALL
21144 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21145 return DW_EH_PE_udata4
;
21146 return DW_EH_PE_absptr
;
21149 /* Expand copysign from SIGN to the positive value ABS_VALUE
21150 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21153 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
21155 enum machine_mode mode
= GET_MODE (sign
);
21156 rtx sgn
= gen_reg_rtx (mode
);
21157 if (mask
== NULL_RTX
)
21159 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), false);
21160 if (!VECTOR_MODE_P (mode
))
21162 /* We need to generate a scalar mode mask in this case. */
21163 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21164 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21165 mask
= gen_reg_rtx (mode
);
21166 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21170 mask
= gen_rtx_NOT (mode
, mask
);
21171 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
21172 gen_rtx_AND (mode
, mask
, sign
)));
21173 emit_insn (gen_rtx_SET (VOIDmode
, result
,
21174 gen_rtx_IOR (mode
, abs_value
, sgn
)));
21177 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21178 mask for masking out the sign-bit is stored in *SMASK, if that is
21181 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
21183 enum machine_mode mode
= GET_MODE (op0
);
21186 xa
= gen_reg_rtx (mode
);
21187 mask
= ix86_build_signbit_mask (mode
, VECTOR_MODE_P (mode
), true);
21188 if (!VECTOR_MODE_P (mode
))
21190 /* We need to generate a scalar mode mask in this case. */
21191 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
21192 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
21193 mask
= gen_reg_rtx (mode
);
21194 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
21196 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
21197 gen_rtx_AND (mode
, op0
, mask
)));
21205 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21206 swapping the operands if SWAP_OPERANDS is true. The expanded
21207 code is a forward jump to a newly created label in case the
21208 comparison is true. The generated label rtx is returned. */
21210 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
21211 bool swap_operands
)
21222 label
= gen_label_rtx ();
21223 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
21224 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21225 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
21226 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
21227 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
21228 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
21229 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
21230 JUMP_LABEL (tmp
) = label
;
21235 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21236 using comparison code CODE. Operands are swapped for the comparison if
21237 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21239 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
21240 bool swap_operands
)
21242 enum machine_mode mode
= GET_MODE (op0
);
21243 rtx mask
= gen_reg_rtx (mode
);
21252 if (mode
== DFmode
)
21253 emit_insn (gen_sse2_maskcmpdf3 (mask
, op0
, op1
,
21254 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21256 emit_insn (gen_sse_maskcmpsf3 (mask
, op0
, op1
,
21257 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
21262 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21263 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21265 ix86_gen_TWO52 (enum machine_mode mode
)
21267 REAL_VALUE_TYPE TWO52r
;
21270 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
21271 TWO52
= const_double_from_real_value (TWO52r
, mode
);
21272 TWO52
= force_reg (mode
, TWO52
);
21277 /* Expand SSE sequence for computing lround from OP1 storing
21280 ix86_expand_lround (rtx op0
, rtx op1
)
21282 /* C code for the stuff we're doing below:
21283 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21286 enum machine_mode mode
= GET_MODE (op1
);
21287 const struct real_format
*fmt
;
21288 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21291 /* load nextafter (0.5, 0.0) */
21292 fmt
= REAL_MODE_FORMAT (mode
);
21293 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21294 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21296 /* adj = copysign (0.5, op1) */
21297 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21298 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
21300 /* adj = op1 + adj */
21301 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
21303 /* op0 = (imode)adj */
21304 expand_fix (op0
, adj
, 0);
21307 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21310 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
21312 /* C code for the stuff we're doing below (for do_floor):
21314 xi -= (double)xi > op1 ? 1 : 0;
21317 enum machine_mode fmode
= GET_MODE (op1
);
21318 enum machine_mode imode
= GET_MODE (op0
);
21319 rtx ireg
, freg
, label
, tmp
;
21321 /* reg = (long)op1 */
21322 ireg
= gen_reg_rtx (imode
);
21323 expand_fix (ireg
, op1
, 0);
21325 /* freg = (double)reg */
21326 freg
= gen_reg_rtx (fmode
);
21327 expand_float (freg
, ireg
, 0);
21329 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21330 label
= ix86_expand_sse_compare_and_jump (UNLE
,
21331 freg
, op1
, !do_floor
);
21332 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
21333 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
21334 emit_move_insn (ireg
, tmp
);
21336 emit_label (label
);
21337 LABEL_NUSES (label
) = 1;
21339 emit_move_insn (op0
, ireg
);
21342 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21343 result in OPERAND0. */
21345 ix86_expand_rint (rtx operand0
, rtx operand1
)
21347 /* C code for the stuff we're doing below:
21348 xa = fabs (operand1);
21349 if (!isless (xa, 2**52))
21351 xa = xa + 2**52 - 2**52;
21352 return copysign (xa, operand1);
21354 enum machine_mode mode
= GET_MODE (operand0
);
21355 rtx res
, xa
, label
, TWO52
, mask
;
21357 res
= gen_reg_rtx (mode
);
21358 emit_move_insn (res
, operand1
);
21360 /* xa = abs (operand1) */
21361 xa
= ix86_expand_sse_fabs (res
, &mask
);
21363 /* if (!isless (xa, TWO52)) goto label; */
21364 TWO52
= ix86_gen_TWO52 (mode
);
21365 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21367 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21368 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21370 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
21372 emit_label (label
);
21373 LABEL_NUSES (label
) = 1;
21375 emit_move_insn (operand0
, res
);
21378 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21381 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
21383 /* C code for the stuff we expand below.
21384 double xa = fabs (x), x2;
21385 if (!isless (xa, TWO52))
21387 xa = xa + TWO52 - TWO52;
21388 x2 = copysign (xa, x);
21397 enum machine_mode mode
= GET_MODE (operand0
);
21398 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
21400 TWO52
= ix86_gen_TWO52 (mode
);
21402 /* Temporary for holding the result, initialized to the input
21403 operand to ease control flow. */
21404 res
= gen_reg_rtx (mode
);
21405 emit_move_insn (res
, operand1
);
21407 /* xa = abs (operand1) */
21408 xa
= ix86_expand_sse_fabs (res
, &mask
);
21410 /* if (!isless (xa, TWO52)) goto label; */
21411 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21413 /* xa = xa + TWO52 - TWO52; */
21414 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21415 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
21417 /* xa = copysign (xa, operand1) */
21418 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
21420 /* generate 1.0 or -1.0 */
21421 one
= force_reg (mode
,
21422 const_double_from_real_value (do_floor
21423 ? dconst1
: dconstm1
, mode
));
21425 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21426 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21427 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21428 gen_rtx_AND (mode
, one
, tmp
)));
21429 /* We always need to subtract here to preserve signed zero. */
21430 tmp
= expand_simple_binop (mode
, MINUS
,
21431 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21432 emit_move_insn (res
, tmp
);
21434 emit_label (label
);
21435 LABEL_NUSES (label
) = 1;
21437 emit_move_insn (operand0
, res
);
21440 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21443 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
21445 /* C code for the stuff we expand below.
21446 double xa = fabs (x), x2;
21447 if (!isless (xa, TWO52))
21449 x2 = (double)(long)x;
21456 if (HONOR_SIGNED_ZEROS (mode))
21457 return copysign (x2, x);
21460 enum machine_mode mode
= GET_MODE (operand0
);
21461 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
21463 TWO52
= ix86_gen_TWO52 (mode
);
21465 /* Temporary for holding the result, initialized to the input
21466 operand to ease control flow. */
21467 res
= gen_reg_rtx (mode
);
21468 emit_move_insn (res
, operand1
);
21470 /* xa = abs (operand1) */
21471 xa
= ix86_expand_sse_fabs (res
, &mask
);
21473 /* if (!isless (xa, TWO52)) goto label; */
21474 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21476 /* xa = (double)(long)x */
21477 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21478 expand_fix (xi
, res
, 0);
21479 expand_float (xa
, xi
, 0);
21482 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21484 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21485 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
21486 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21487 gen_rtx_AND (mode
, one
, tmp
)));
21488 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
21489 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21490 emit_move_insn (res
, tmp
);
21492 if (HONOR_SIGNED_ZEROS (mode
))
21493 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21495 emit_label (label
);
21496 LABEL_NUSES (label
) = 1;
21498 emit_move_insn (operand0
, res
);
21501 /* Expand SSE sequence for computing round from OPERAND1 storing
21502 into OPERAND0. Sequence that works without relying on DImode truncation
21503 via cvttsd2siq that is only available on 64bit targets. */
21505 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
21507 /* C code for the stuff we expand below.
21508 double xa = fabs (x), xa2, x2;
21509 if (!isless (xa, TWO52))
21511 Using the absolute value and copying back sign makes
21512 -0.0 -> -0.0 correct.
21513 xa2 = xa + TWO52 - TWO52;
21518 else if (dxa > 0.5)
21520 x2 = copysign (xa2, x);
21523 enum machine_mode mode
= GET_MODE (operand0
);
21524 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
21526 TWO52
= ix86_gen_TWO52 (mode
);
21528 /* Temporary for holding the result, initialized to the input
21529 operand to ease control flow. */
21530 res
= gen_reg_rtx (mode
);
21531 emit_move_insn (res
, operand1
);
21533 /* xa = abs (operand1) */
21534 xa
= ix86_expand_sse_fabs (res
, &mask
);
21536 /* if (!isless (xa, TWO52)) goto label; */
21537 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21539 /* xa2 = xa + TWO52 - TWO52; */
21540 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21541 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
21543 /* dxa = xa2 - xa; */
21544 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
21546 /* generate 0.5, 1.0 and -0.5 */
21547 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
21548 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21549 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
21553 tmp
= gen_reg_rtx (mode
);
21554 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21555 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
21556 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21557 gen_rtx_AND (mode
, one
, tmp
)));
21558 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21559 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21560 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
21561 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
21562 gen_rtx_AND (mode
, one
, tmp
)));
21563 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
21565 /* res = copysign (xa2, operand1) */
21566 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
21568 emit_label (label
);
21569 LABEL_NUSES (label
) = 1;
21571 emit_move_insn (operand0
, res
);
21574 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21577 ix86_expand_trunc (rtx operand0
, rtx operand1
)
21579 /* C code for SSE variant we expand below.
21580 double xa = fabs (x), x2;
21581 if (!isless (xa, TWO52))
21583 x2 = (double)(long)x;
21584 if (HONOR_SIGNED_ZEROS (mode))
21585 return copysign (x2, x);
21588 enum machine_mode mode
= GET_MODE (operand0
);
21589 rtx xa
, xi
, TWO52
, label
, res
, mask
;
21591 TWO52
= ix86_gen_TWO52 (mode
);
21593 /* Temporary for holding the result, initialized to the input
21594 operand to ease control flow. */
21595 res
= gen_reg_rtx (mode
);
21596 emit_move_insn (res
, operand1
);
21598 /* xa = abs (operand1) */
21599 xa
= ix86_expand_sse_fabs (res
, &mask
);
21601 /* if (!isless (xa, TWO52)) goto label; */
21602 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21604 /* x = (double)(long)x */
21605 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21606 expand_fix (xi
, res
, 0);
21607 expand_float (res
, xi
, 0);
21609 if (HONOR_SIGNED_ZEROS (mode
))
21610 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
21612 emit_label (label
);
21613 LABEL_NUSES (label
) = 1;
21615 emit_move_insn (operand0
, res
);
21618 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21621 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
21623 enum machine_mode mode
= GET_MODE (operand0
);
21624 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
21626 /* C code for SSE variant we expand below.
21627 double xa = fabs (x), x2;
21628 if (!isless (xa, TWO52))
21630 xa2 = xa + TWO52 - TWO52;
21634 x2 = copysign (xa2, x);
21638 TWO52
= ix86_gen_TWO52 (mode
);
21640 /* Temporary for holding the result, initialized to the input
21641 operand to ease control flow. */
21642 res
= gen_reg_rtx (mode
);
21643 emit_move_insn (res
, operand1
);
21645 /* xa = abs (operand1) */
21646 xa
= ix86_expand_sse_fabs (res
, &smask
);
21648 /* if (!isless (xa, TWO52)) goto label; */
21649 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21651 /* res = xa + TWO52 - TWO52; */
21652 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
21653 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
21654 emit_move_insn (res
, tmp
);
21657 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
21659 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21660 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
21661 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
21662 gen_rtx_AND (mode
, mask
, one
)));
21663 tmp
= expand_simple_binop (mode
, MINUS
,
21664 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
21665 emit_move_insn (res
, tmp
);
21667 /* res = copysign (res, operand1) */
21668 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
21670 emit_label (label
);
21671 LABEL_NUSES (label
) = 1;
21673 emit_move_insn (operand0
, res
);
21676 /* Expand SSE sequence for computing round from OPERAND1 storing
21679 ix86_expand_round (rtx operand0
, rtx operand1
)
21681 /* C code for the stuff we're doing below:
21682 double xa = fabs (x);
21683 if (!isless (xa, TWO52))
21685 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21686 return copysign (xa, x);
21688 enum machine_mode mode
= GET_MODE (operand0
);
21689 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
21690 const struct real_format
*fmt
;
21691 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
21693 /* Temporary for holding the result, initialized to the input
21694 operand to ease control flow. */
21695 res
= gen_reg_rtx (mode
);
21696 emit_move_insn (res
, operand1
);
21698 TWO52
= ix86_gen_TWO52 (mode
);
21699 xa
= ix86_expand_sse_fabs (res
, &mask
);
21700 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
21702 /* load nextafter (0.5, 0.0) */
21703 fmt
= REAL_MODE_FORMAT (mode
);
21704 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1);
21705 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
21707 /* xa = xa + 0.5 */
21708 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
21709 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
21711 /* xa = (double)(int64_t)xa */
21712 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
21713 expand_fix (xi
, xa
, 0);
21714 expand_float (xa
, xi
, 0);
21716 /* res = copysign (xa, operand1) */
21717 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
21719 emit_label (label
);
21720 LABEL_NUSES (label
) = 1;
21722 emit_move_insn (operand0
, res
);
21725 #include "gt-i386.h"